├── .dockerignore
├── .gitignore
├── assets
    └── architecture_diagram.png
├── scripts
    ├── client
    └── transact
├── Dockerfile.init
├── src
    ├── test
    │   ├── common_test.rs
    │   ├── slave_test.rs
    │   ├── node_test.rs
    │   ├── master_test.rs
    │   ├── storage_test.rs
    │   ├── coord_test.rs
    │   ├── tablet_test.rs
    │   ├── expression_test.rs
    │   └── query_converter_test.rs
    ├── bin
    │   ├── paxos2pc_sim
    │   │   ├── main.rs
    │   │   ├── tests.rs
    │   │   ├── message.rs
    │   │   ├── simple_rm_es.rs
    │   │   ├── simple_tm_es.rs
    │   │   ├── stm_simple_rm_es.rs
    │   │   ├── tests_paxos2pc.rs
    │   │   ├── tests_stmpaxos2pc.rs
    │   │   └── stm_simple_tm_es.rs
    │   ├── experimental
    │   │   └── main.rs
    │   ├── transact
    │   │   └── main.rs
    │   ├── paxos
    │   │   └── main.rs
    │   └── simtest
    │   │   └── main.rs
    ├── lib.rs
    ├── simulation_utils.rs
    ├── experimental.rs
    ├── lang.rs
    ├── test_utils.rs
    ├── finish_query_tm_es.rs
    ├── finish_query_rm_es.rs
    ├── shard_split_slave_rm_es.rs
    ├── shard_split_tablet_rm_es.rs
    ├── drop_table_rm_es.rs
    ├── ms_table_read_es.rs
    ├── alter_table_rm_es.rs
    ├── network_driver.rs
    ├── query_planning.rs
    ├── multiversion_map.rs
    ├── tm_status.rs
    ├── create_table_rm_es.rs
    ├── slave_group_create_es.rs
    ├── ms_table_delete_es.rs
    └── shard_snapshot_es.rs
├── Dockerfile
├── Cargo.toml
├── rustfmt.toml
├── LICENSE
├── notes.md
└── run


/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | .idea
3 | target
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDE
 2 | .vscode
 3 | .idea
 4 | 
 5 | # OS
 6 | .DS_Store
 7 | 
 8 | # Build
 9 | target
10 | 


--------------------------------------------------------------------------------
/assets/architecture_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pasindumuth/rUniversalDB/HEAD/assets/architecture_diagram.png


--------------------------------------------------------------------------------
/scripts/client:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Create a 1ms egress delay in the network (to be closer to simulating a real network)
4 | tc qdisc add dev eth0 root netem delay 1ms
5 | 
6 | # Run the binary
7 | target/debug/client "${@}"
8 | 


--------------------------------------------------------------------------------
/scripts/transact:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Create a 1ms egress delay in the network (to be closer to simulating a real network)
4 | tc qdisc add dev eth0 root netem delay 1ms
5 | 
6 | # Run the binary
7 | target/debug/transact "${@}"
8 | 


--------------------------------------------------------------------------------
/Dockerfile.init:
--------------------------------------------------------------------------------
 1 | FROM rustlang/rust:nightly
 2 | WORKDIR /home
 3 | COPY ./ ./
 4 | 
 5 | # Install the `tc` command
 6 | RUN apt-get update && apt-get install -y iproute2
 7 | 
 8 | # Build
 9 | RUN cargo build --bin transact; cargo build --bin client;
10 | 


--------------------------------------------------------------------------------
/src/test/common_test.rs:
--------------------------------------------------------------------------------
1 | use crate::common::Timestamp;
2 | 
3 | #[test]
4 | fn timestamp_test() {
5 |   assert_eq!(Timestamp::new(1, 2).add(Timestamp::new(1, 1)), Timestamp::new(2, 3));
6 |   assert_eq!(Timestamp::new(1, 2).add(Timestamp::new(1, u64::MAX)), Timestamp::new(3, 1));
7 | }
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM runiversal:latest
2 | WORKDIR /home
3 | # If we delete files, make sure to run rm -rf to get these files
4 | # out from the previous version of runiversal we bring in. The COPY
5 | # command doesn't remove them for us.
6 | # RUN rm -rf ./*
7 | COPY ./ ./
8 | RUN cargo build --bin transact; cargo build --bin client;
9 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/main.rs:
--------------------------------------------------------------------------------
 1 | #![feature(map_first_last)]
 2 | 
 3 | use crate::tests::test;
 4 | 
 5 | mod message;
 6 | mod simple_rm_es;
 7 | mod simple_tm_es;
 8 | mod simulation;
 9 | mod slave;
10 | mod stm_simple_rm_es;
11 | mod stm_simple_tm_es;
12 | mod tests;
13 | mod tests_paxos2pc;
14 | mod tests_stmpaxos2pc;
15 | 
16 | #[macro_use]
17 | extern crate runiversal;
18 | 
19 | fn main() {
20 |   test()
21 | }
22 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/tests.rs:
--------------------------------------------------------------------------------
 1 | use crate::tests_paxos2pc;
 2 | use crate::tests_stmpaxos2pc;
 3 | use rand::{RngCore, SeedableRng};
 4 | use rand_xorshift::XorShiftRng;
 5 | use runiversal::test_utils::mk_seed;
 6 | 
 7 | /// Run `test_single()` multiple times, each with a different seed.
 8 | pub fn test() {
 9 |   let mut orig_rand = XorShiftRng::from_seed([0; 16]);
10 |   for i in 0..2000 {
11 |     let mut seed = mk_seed(&mut orig_rand);
12 |     if i % 2 == 0 {
13 |       tests_stmpaxos2pc::test_single(i, seed);
14 |     } else {
15 |       tests_paxos2pc::test_single(i, seed);
16 |     }
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/slave_test.rs:
--------------------------------------------------------------------------------
 1 | use crate::slave::SlaveState;
 2 | use crate::test_utils::CheckCtx;
 3 | 
 4 | // -----------------------------------------------------------------------------------------------
 5 | //  Consistency Testing
 6 | // -----------------------------------------------------------------------------------------------
 7 | 
 8 | pub fn check_slave_clean(slave: &SlaveState, check_ctx: &mut CheckCtx) {
 9 |   let statuses = &slave.statuses;
10 | 
11 |   // Check `Statuses` clean
12 |   check_ctx.check(statuses.create_table_ess.is_empty());
13 |   check_ctx.check(statuses.shard_split_ess.is_empty());
14 | }
15 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "runiversal"
 3 | version = "0.1.0"
 4 | authors = ["Pasindu Muthukuda <pasindumuth@gmail.com>"]
 5 | edition = "2018"
 6 | default-run = "transact"
 7 | 
 8 | [profile.release]
 9 | debug-assertions = true
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | serde = { version = "1.0.117", features = ["derive"] }
15 | sqlparser = "0.9.0"
16 | sqlformat = "0.1.8"
17 | rmp-serde = "1.0.0"
18 | rand = "0.7.3"
19 | rand_xorshift = "0.2.0"
20 | byteorder = "1.3.4"
21 | clap = "3.0.13"
22 | rustyline = "9.1.0"
23 | crossterm = "0.23.2"
24 | tui = "0.18.0"
25 | tabled = "0.6.1"
26 | log = "0.4.17"
27 | env_logger = "0.9.0"
28 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
 1 | tab_spaces = 2
 2 | 
 3 | # These are the default values for these settings when
 4 | # `use_small_heuristics = "Default'`. I generally like them, but I'd
 5 | # like to tinker with them a little bit. So I'm turning off
 6 | # `use_small_heuristics` and setting them manually.
 7 | #
 8 | # fn_call_width = 60
 9 | # attr_fn_like_width = 70
10 | # struct_lit_width = 18
11 | # struct_variant_width = 35
12 | # array_width = 60
13 | # chain_width = 60
14 | # single_line_if_else_max_width = 50
15 | # use_small_heuristics = "Off"
16 | 
17 | # Actually.. this strategy doesn't work. The formatting we get for
18 | # using above values directly isn't the same as using
19 | # `use_small_heuristics = "Default"`. I believe there
20 | # is a bug in rustfmt. We can just use "Max" instead.
21 | use_small_heuristics = "Max"
22 | 


--------------------------------------------------------------------------------
/src/test/node_test.rs:
--------------------------------------------------------------------------------
 1 | use crate::master::master_test::check_master_clean;
 2 | use crate::node::{NodeState, State};
 3 | use crate::slave::slave_test::check_slave_clean;
 4 | use crate::test_utils::CheckCtx;
 5 | 
 6 | // -----------------------------------------------------------------------------------------------
 7 | //  Consistency Testing
 8 | // -----------------------------------------------------------------------------------------------
 9 | 
10 | pub fn check_node_clean(node: &NodeState, check_ctx: &mut CheckCtx) {
11 |   match &node.state {
12 |     State::DNEState(_) => {}
13 |     State::FreeNodeState(_, _) => {}
14 |     State::NominalSlaveState(slave_state, _) => {
15 |       check_slave_clean(&slave_state, check_ctx);
16 |     }
17 |     State::NominalMasterState(master_state, _) => {
18 |       check_master_clean(&master_state, check_ctx);
19 |     }
20 |     State::PostExistence => {}
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/bin/experimental/main.rs:
--------------------------------------------------------------------------------
 1 | use sqlparser::dialect::GenericDialect;
 2 | use sqlparser::parser::Parser;
 3 | use std::collections::BTreeMap;
 4 | 
 5 | fn main() {
 6 |   let d = true;
 7 |   let mut f = 3;
 8 |   println!("{}", f);
 9 | 
10 |   // let mut s = BTreeMap::new();
11 |   // s.insert("hello", 4);
12 |   // println!("{:#?}", s);
13 | }
14 | 
15 | fn sql() {
16 |   //
17 |   // let sql = "SELECT a, b, 123, myfunc(b) \
18 |   //          FROM table_1 \
19 |   //          WHERE a > b AND b < 100 \
20 |   //          ORDER BY a DESC, b";
21 | 
22 |   let sql = "
23 |     INSERT INTO inventory (product_id, email, count)
24 |     VALUES (2, 'my_email_2', 25);
25 |   
26 |     SELECT SUM(DISTINCT count)
27 |     FROM inventory;
28 |   ";
29 | 
30 |   let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ...
31 | 
32 |   let ast = Parser::parse_sql(&dialect, sql);
33 | 
34 |   println!("AST: {:#?}", ast);
35 | }
36 | 


--------------------------------------------------------------------------------
/src/test/master_test.rs:
--------------------------------------------------------------------------------
 1 | use crate::master::MasterState;
 2 | use crate::test_utils::CheckCtx;
 3 | 
 4 | // -----------------------------------------------------------------------------------------------
 5 | //  Consistency Testing
 6 | // -----------------------------------------------------------------------------------------------
 7 | 
 8 | pub fn check_master_clean(master: &MasterState, check_ctx: &mut CheckCtx) {
 9 |   let statuses = &master.statuses;
10 |   let ctx = &master.ctx;
11 | 
12 |   // Check `Status` clean
13 |   check_ctx.check(statuses.create_table_tm_ess.is_empty());
14 |   check_ctx.check(statuses.alter_table_tm_ess.is_empty());
15 |   check_ctx.check(statuses.drop_table_tm_ess.is_empty());
16 |   check_ctx.check(statuses.shard_split_tm_ess.is_empty());
17 |   check_ctx.check(statuses.planning_ess.is_empty());
18 | 
19 |   // Check `Master` clean
20 |   check_ctx.check(ctx.external_request_id_map.is_empty());
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/storage_test.rs:
--------------------------------------------------------------------------------
 1 | use super::add_version;
 2 | use crate::common::{mk_t, Timestamp};
 3 | use crate::common::{ColVal, ColValN};
 4 | 
 5 | #[test]
 6 | fn add_version_test() {
 7 |   let mut versions = Vec::<(Timestamp, ColValN)>::new();
 8 | 
 9 |   add_version(&mut versions, mk_t(10), None);
10 |   assert_eq!(versions, vec![(mk_t(10), None)]);
11 |   add_version(&mut versions, mk_t(20), None);
12 |   assert_eq!(versions, vec![(mk_t(10), None), (mk_t(20), None)]);
13 |   add_version(&mut versions, mk_t(5), None);
14 |   assert_eq!(versions, vec![(mk_t(5), None), (mk_t(10), None), (mk_t(20), None)]);
15 |   add_version(&mut versions, mk_t(15), None);
16 |   assert_eq!(versions, vec![(mk_t(5), None), (mk_t(10), None), (mk_t(15), None), (mk_t(20), None)]);
17 |   add_version(&mut versions, mk_t(10), Some(ColVal::Int(10)));
18 |   assert_eq!(
19 |     versions,
20 |     vec![(mk_t(5), None), (mk_t(10), Some(ColVal::Int(10))), (mk_t(15), None), (mk_t(20), None)]
21 |   );
22 | }
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 pasindumuth
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![feature(map_first_last)]
 2 | 
 3 | extern crate core;
 4 | 
 5 | #[macro_use]
 6 | pub mod lang;
 7 | pub mod alter_table_rm_es;
 8 | pub mod alter_table_tm_es;
 9 | pub mod col_usage;
10 | pub mod common;
11 | pub mod coord;
12 | pub mod create_table_rm_es;
13 | pub mod create_table_tm_es;
14 | pub mod drop_table_rm_es;
15 | pub mod drop_table_tm_es;
16 | pub mod experimental;
17 | pub mod expression;
18 | pub mod finish_query_rm_es;
19 | pub mod finish_query_tm_es;
20 | pub mod free_node_manager;
21 | pub mod gr_query_es;
22 | pub mod join_read_es;
23 | pub mod join_util;
24 | pub mod master;
25 | pub mod master_query_planning_es;
26 | pub mod message;
27 | pub mod ms_query_coord_es;
28 | pub mod ms_table_delete_es;
29 | pub mod ms_table_es;
30 | pub mod ms_table_insert_es;
31 | pub mod ms_table_read_es;
32 | pub mod ms_table_write_es;
33 | pub mod multiversion_map;
34 | pub mod net;
35 | pub mod network_driver;
36 | pub mod node;
37 | pub mod paxos;
38 | pub mod paxos2pc_rm;
39 | pub mod paxos2pc_tm;
40 | pub mod query_converter;
41 | pub mod query_planning;
42 | pub mod server;
43 | pub mod shard_pending_es;
44 | pub mod shard_snapshot_es;
45 | pub mod shard_split_slave_rm_es;
46 | pub mod shard_split_tablet_rm_es;
47 | pub mod shard_split_tm_es;
48 | pub mod simulation_utils;
49 | pub mod slave;
50 | pub mod slave_group_create_es;
51 | pub mod slave_reconfig_es;
52 | pub mod sql_ast;
53 | pub mod sql_parser;
54 | pub mod stmpaxos2pc_rm;
55 | pub mod stmpaxos2pc_tm;
56 | pub mod storage;
57 | pub mod table_read_es;
58 | pub mod tablet;
59 | pub mod test_utils;
60 | pub mod tm_status;
61 | pub mod trans_table_read_es;
62 | 


--------------------------------------------------------------------------------
/src/simulation_utils.rs:
--------------------------------------------------------------------------------
 1 | use crate::common::{EndpointId, InternalMode};
 2 | use std::collections::{BTreeMap, VecDeque};
 3 | 
 4 | // -----------------------------------------------------------------------------------------------
 5 | //  Utils
 6 | // -----------------------------------------------------------------------------------------------
 7 | // Construct the PaxosNode EndpointIds of the paxos at the given index.
 8 | pub fn mk_paxos_eid(i: u32) -> EndpointId {
 9 |   EndpointId::new(format!("pe{}", i), InternalMode::Internal)
10 | }
11 | 
12 | // Construct the Slave EndpointId of the Slave at the given index.
13 | pub fn mk_slave_eid(i: u32) -> EndpointId {
14 |   EndpointId::new(format!("se{}", i), InternalMode::Internal)
15 | }
16 | 
17 | // Construct the EndpointId of a Node.
18 | pub fn mk_node_eid(i: u32) -> EndpointId {
19 |   EndpointId::new(format!("ne{}", i), InternalMode::Internal)
20 | }
21 | 
22 | // Construct the Client id of the slave at the given index.
23 | pub fn mk_client_eid(i: u32) -> EndpointId {
24 |   EndpointId::new(format!("ce{}", i), InternalMode::External { salt: "".to_string() })
25 | }
26 | 
27 | /// Add a message between two nodes in the network.
28 | pub fn add_msg<NetworkMessageT>(
29 |   queues: &mut BTreeMap<EndpointId, BTreeMap<EndpointId, VecDeque<NetworkMessageT>>>,
30 |   nonempty_queues: &mut Vec<(EndpointId, EndpointId)>,
31 |   msg: NetworkMessageT,
32 |   from_eid: &EndpointId,
33 |   to_eid: &EndpointId,
34 | ) {
35 |   let queue = queues.get_mut(from_eid).unwrap().get_mut(to_eid).unwrap();
36 |   if queue.len() == 0 {
37 |     let queue_id = (from_eid.clone(), to_eid.clone());
38 |     nonempty_queues.push(queue_id);
39 |   }
40 |   queue.push_back(msg);
41 | }
42 | 


--------------------------------------------------------------------------------
/src/experimental.rs:
--------------------------------------------------------------------------------
 1 | use crate::sql_ast::proc;
 2 | use std::mem;
 3 | use std::ops::Deref;
 4 | 
 5 | // In this file, we store experimental implementations for things.
 6 | 
 7 | // Implementations
 8 | 
 9 | struct SubqueryIter<'a> {
10 |   expr: &'a proc::ValExpr,
11 |   parent: Option<Box<SubqueryIter<'a>>>,
12 | }
13 | 
14 | // impl proc::ValExpr {
15 | //   fn subquery_iter(&self) -> SubqueryIter<'_> {
16 | //     SubqueryIter { expr: self, parent: None }
17 | //   }
18 | // }
19 | 
20 | impl<'a> Iterator for SubqueryIter<'a> {
21 |   type Item = &'a proc::GRQuery;
22 |   // The property here is that we should return the GRQuerys in `expr` in this
23 |   // node, then the parent node, then its' parent, and so-on.
24 |   fn next(&mut self) -> Option<Self::Item> {
25 |     match self.expr {
26 |       proc::ValExpr::ColumnRef(_) => {
27 |         if let Some(parent) = self.parent.take() {
28 |           *self = *parent;
29 |           self.next()
30 |         } else {
31 |           None
32 |         }
33 |       }
34 |       proc::ValExpr::UnaryExpr { expr, .. } => {
35 |         self.expr = expr.deref();
36 |         self.next()
37 |       }
38 |       proc::ValExpr::BinaryExpr { left, right, .. } => {
39 |         self.parent =
40 |           Some(Box::new(SubqueryIter { expr: right, parent: mem::take(&mut self.parent) }));
41 |         self.expr = left;
42 |         self.next()
43 |       }
44 |       proc::ValExpr::Value { .. } => {
45 |         if let Some(parent) = self.parent.take() {
46 |           *self = *parent;
47 |           self.next()
48 |         } else {
49 |           None
50 |         }
51 |       }
52 |       proc::ValExpr::Subquery { query } => {
53 |         if let Some(parent) = self.parent.take() {
54 |           *self = *parent;
55 |           Some(query)
56 |         } else {
57 |           None
58 |         }
59 |       }
60 |     }
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/lang.rs:
--------------------------------------------------------------------------------
 1 | macro_rules! collection {
 2 |     // map-like
 3 |     ($($k:expr => $v:expr),* $(,)?) => {
 4 |         std::iter::Iterator::collect(std::array::IntoIter::new([$(($k, $v),)*]))
 5 |     };
 6 |     // set-like
 7 |     ($($v:expr),* $(,)?) => {
 8 |         std::iter::Iterator::collect(std::array::IntoIter::new([$($v,)*]))
 9 |     };
10 | }
11 | 
12 | /// The first argument is a single-element Tuple Struct Variant
13 | /// whose inside's we want to extract to. The second argument is the
14 | /// value we to extract from. If the value is a reference, the return
15 | /// value here is a reference. Otherwise, the value is moved.
16 | #[macro_export]
17 | macro_rules! cast {
18 |   ($enum:path, $expr:expr) => {{
19 |     if let $enum(item) = $expr {
20 |       Some(item)
21 |     } else {
22 |       debug_assert!(false);
23 |       None
24 |     }
25 |   }};
26 | }
27 | 
28 | /// Same as the above, but the expected branch might be the `None`
29 | /// branch, so we do not debug assert.
30 | #[macro_export]
31 | macro_rules! cast_safe {
32 |   ($enum:path, $expr:expr) => {{
33 |     if let $enum(item) = $expr {
34 |       Some(item)
35 |     } else {
36 |       None
37 |     }
38 |   }};
39 | }
40 | 
41 | /// A macro that makes it easy to check that an expression is true,
42 | /// and then exit the current function if it is false (in production,
43 | /// but assert in development).
44 | #[macro_export]
45 | macro_rules! check {
46 |   ($expr:expr) => {{
47 |     if $expr {
48 |       Some(())
49 |     } else {
50 |       debug_assert!(false);
51 |       None
52 |     }? // We place the `?` here, since it is easy to forget
53 |        // when using this macro (since it does not return anything).
54 |   }};
55 | }
56 | 
57 | #[cfg(test)]
58 | mod tests {
59 |   enum Enum {
60 |     V1(i32),
61 |     V2(String),
62 |   }
63 | 
64 |   #[test]
65 |   fn cast_test() {
66 |     let e = Enum::V2("value".to_string());
67 |     let inner_incorrect = cast!(Enum::V1, &e);
68 |     assert!(inner_incorrect.is_err());
69 |     let inner_correct = cast!(Enum::V2, &e);
70 |     assert_eq!(inner_correct, Ok(&"value".to_string()));
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/test_utils.rs:
--------------------------------------------------------------------------------
 1 | use crate::common::{
 2 |   ColName, ColVal, EndpointId, SlaveGroupId, TablePath, TabletGroupId, TransTableName,
 3 | };
 4 | use rand::RngCore;
 5 | use rand_xorshift::XorShiftRng;
 6 | 
 7 | pub fn cn(s: &str) -> ColName {
 8 |   ColName(s.to_string())
 9 | }
10 | 
11 | pub fn cno(s: &str) -> Option<ColName> {
12 |   Some(ColName(s.to_string()))
13 | }
14 | 
15 | pub fn cvs(s: &str) -> ColVal {
16 |   ColVal::String(s.to_string())
17 | }
18 | 
19 | pub fn cvi(i: i32) -> ColVal {
20 |   ColVal::Int(i)
21 | }
22 | 
23 | pub fn cvb(b: bool) -> ColVal {
24 |   ColVal::Bool(b)
25 | }
26 | 
27 | pub fn mk_sid(id: &str) -> SlaveGroupId {
28 |   SlaveGroupId(id.to_string())
29 | }
30 | 
31 | pub fn mk_tid(id: &str) -> TabletGroupId {
32 |   TabletGroupId(id.to_string())
33 | }
34 | 
35 | pub fn mk_tab(table_path: &str) -> TablePath {
36 |   TablePath(table_path.to_string())
37 | }
38 | 
39 | pub fn mk_ttab(table_path: &str) -> TransTableName {
40 |   TransTableName(table_path.to_string())
41 | }
42 | 
43 | // -----------------------------------------------------------------------------------------------
44 | //  Random
45 | // -----------------------------------------------------------------------------------------------
46 | 
47 | pub fn mk_seed(rand: &mut XorShiftRng) -> [u8; 16] {
48 |   let mut seed = [0; 16];
49 |   rand.fill_bytes(&mut seed);
50 |   seed
51 | }
52 | 
53 | // -----------------------------------------------------------------------------------------------
54 | //  Check Context
55 | // -----------------------------------------------------------------------------------------------
56 | 
57 | /// This is a utility for effectively accumulating the AND result of many boolean expressions.
58 | /// If `check` is called even once with `false` after construction, we remember this fact
59 | /// in `cum_bool`. We do not simply use a `&mut bool` because sometimes, we want to panic
60 | /// if the AND expression would evaluate to false (and we want to do it early).
61 | pub struct CheckCtx {
62 |   pub should_assert: bool,
63 |   cum_bool: bool,
64 | }
65 | 
66 | impl CheckCtx {
67 |   pub fn new(should_assert: bool) -> CheckCtx {
68 |     CheckCtx { should_assert, cum_bool: true }
69 |   }
70 | 
71 |   pub fn check(&mut self, boolean: bool) {
72 |     if !boolean {
73 |       if self.should_assert {
74 |         panic!();
75 |       } else {
76 |         self.cum_bool = false;
77 |       }
78 |     }
79 |   }
80 | 
81 |   pub fn get_result(&self) -> bool {
82 |     self.cum_bool
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/message.rs:
--------------------------------------------------------------------------------
 1 | use crate::simple_tm_es::SimplePayloadTypes;
 2 | use crate::stm_simple_tm_es::STMSimpleTMPayloadTypes;
 3 | use runiversal::common::{QueryId, SlaveGroupId};
 4 | use runiversal::message as msg;
 5 | use runiversal::paxos2pc_tm as paxos2pc;
 6 | use runiversal::stmpaxos2pc_tm as stmpaxos2pc;
 7 | use serde::{Deserialize, Serialize};
 8 | 
 9 | // -------------------------------------------------------------------------------------------------
10 | //  NetworkMessage
11 | // -------------------------------------------------------------------------------------------------
12 | 
13 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
14 | pub enum NetworkMessage {
15 |   Slave(SlaveMessage),
16 | }
17 | 
18 | // -------------------------------------------------------------------------------------------------
19 | //  SlaveMessage
20 | // -------------------------------------------------------------------------------------------------
21 | 
22 | pub type RemoteMessage<PayloadT> = msg::RemoteMessage<PayloadT>;
23 | pub type RemoteLeaderChangedGossip = msg::RemoteLeaderChangedGossip;
24 | 
25 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
26 | pub enum SlaveMessage {
27 |   ExternalMessage(ExternalMessage),
28 |   RemoteMessage(msg::RemoteMessage<SlaveRemotePayload>),
29 |   RemoteLeaderChangedGossip(msg::RemoteLeaderChangedGossip),
30 | }
31 | 
32 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
33 | pub enum SlaveRemotePayload {
34 |   // Simple STMPaxos2PC
35 |   STMRMMessage(stmpaxos2pc::RMMessage<STMSimpleTMPayloadTypes>),
36 |   STMTMMessage(stmpaxos2pc::TMMessage<STMSimpleTMPayloadTypes>),
37 | 
38 |   // Simple Paxos2PC
39 |   RMMessage(paxos2pc::RMMessage<SimplePayloadTypes>),
40 |   TMMessage(paxos2pc::TMMessage<SimplePayloadTypes>),
41 | }
42 | 
43 | // -------------------------------------------------------------------------------------------------
44 | //  ExternalMessage
45 | // -------------------------------------------------------------------------------------------------
46 | 
47 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
48 | pub enum ExternalMessage {
49 |   STMSimpleRequest(STMSimpleRequest),
50 |   SimpleRequest(SimpleRequest),
51 | }
52 | 
53 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
54 | pub struct STMSimpleRequest {
55 |   pub query_id: QueryId,
56 |   pub rms: Vec<SlaveGroupId>,
57 | }
58 | 
59 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
60 | pub struct SimpleRequest {
61 |   pub query_id: QueryId,
62 |   pub rms: Vec<SlaveGroupId>,
63 | }
64 | 
65 | // -------------------------------------------------------------------------------------------------
66 | //  Paxos
67 | // -------------------------------------------------------------------------------------------------
68 | 
69 | pub type LeaderChanged = msg::LeaderChanged;
70 | pub type PLEntry<BundleT> = msg::PLEntry<BundleT>;
71 | 


--------------------------------------------------------------------------------
/src/test/coord_test.rs:
--------------------------------------------------------------------------------
 1 | use crate::coord::CoordState;
 2 | use crate::test_utils::CheckCtx;
 3 | 
 4 | // -----------------------------------------------------------------------------------------------
 5 | //  Consistency Testing
 6 | // -----------------------------------------------------------------------------------------------
 7 | 
 8 | /// Asserts various consistency properties in the `CoordState`.
 9 | pub fn assert_coord_consistency(coord: &CoordState) {
10 |   external_request_id_map_consistency(coord);
11 | }
12 | 
13 | // Verify that every `MSCoordES` and every `FinishQueryTMES`.
14 | fn external_request_id_map_consistency(coord: &CoordState) {
15 |   let statuses = &coord.statuses;
16 |   let ctx = &coord.ctx;
17 | 
18 |   if ctx.is_leader() {
19 |     // If this is a Leader, we make sure all RequestIds in the ESs exist in
20 |     // the `external_request_id_map`.
21 |     for (qid, es) in &statuses.ms_coord_ess {
22 |       if let Some(stored_qid) = ctx.external_request_id_map.get(&es.request_id) {
23 |         assert_eq!(stored_qid, qid);
24 |       } else {
25 |         panic!();
26 |       }
27 |     }
28 | 
29 |     for (qid, es) in &statuses.finish_query_tm_ess {
30 |       if let Some(response_data) = &es.inner.response_data {
31 |         if let Some(stored_qid) = ctx.external_request_id_map.get(&response_data.request_id) {
32 |           assert_eq!(stored_qid, qid);
33 |         } else {
34 |           panic!();
35 |         }
36 |       }
37 |     }
38 | 
39 |     // Next, we see if all entries in `external_request_id_map` are in an ES.
40 |     for (rid, qid) in &ctx.external_request_id_map {
41 |       if let Some(es) = &statuses.ms_coord_ess.get(qid) {
42 |         assert_eq!(&es.request_id, rid);
43 |         assert!(!statuses.finish_query_tm_ess.contains_key(qid));
44 |       } else if let Some(es) = &statuses.finish_query_tm_ess.get(qid) {
45 |         if let Some(response_data) = &es.inner.response_data {
46 |           assert_eq!(&response_data.request_id, rid);
47 |         } else {
48 |           panic!();
49 |         }
50 |       } else {
51 |         panic!();
52 |       }
53 |     }
54 |   } else {
55 |     // If this is a Follower, we make sure it has `external_request_id_map` be empty.
56 |     assert!(ctx.external_request_id_map.is_empty());
57 |     assert!(statuses.ms_coord_ess.is_empty());
58 |   }
59 | }
60 | 
61 | pub fn check_coord_clean(coord: &CoordState, check_ctx: &mut CheckCtx) {
62 |   let statuses = &coord.statuses;
63 |   let ctx = &coord.ctx;
64 | 
65 |   // Check `Status` clean
66 |   check_ctx.check(statuses.finish_query_tm_ess.is_empty());
67 |   check_ctx.check(statuses.ms_coord_ess.is_empty());
68 |   check_ctx.check(statuses.gr_query_ess.is_empty());
69 |   check_ctx.check(statuses.join_query_ess.is_empty());
70 |   check_ctx.check(statuses.trans_table_read_ess.is_empty());
71 |   check_ctx.check(statuses.tm_statuss.is_empty());
72 | 
73 |   // Check `Coord` clean
74 |   check_ctx.check(ctx.external_request_id_map.is_empty());
75 | }
76 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/simple_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::message as msg;
  2 | use crate::simple_tm_es::{
  3 |   SimplePayloadTypes, SimplePrepare, SimpleRMAborted, SimpleRMCommitted, SimpleRMPrepared,
  4 | };
  5 | use crate::slave::SlaveContext;
  6 | use rand::RngCore;
  7 | use runiversal::common::BasicIOCtx;
  8 | use runiversal::common::QueryId;
  9 | use runiversal::paxos2pc_rm::{Paxos2PCRMInner, Paxos2PCRMOuter};
 10 | use runiversal::paxos2pc_tm::{PayloadTypes, RMCommittedPLm};
 11 | 
 12 | // -----------------------------------------------------------------------------------------------
 13 | //  SimpleES Implementation
 14 | // -----------------------------------------------------------------------------------------------
 15 | 
 16 | #[derive(Debug)]
 17 | pub struct SimpleRMInner {}
 18 | 
 19 | pub type SimpleRMES = Paxos2PCRMOuter<SimplePayloadTypes, SimpleRMInner>;
 20 | 
 21 | impl Paxos2PCRMInner<SimplePayloadTypes> for SimpleRMInner {
 22 |   fn new<IO: BasicIOCtx<msg::NetworkMessage>>(
 23 |     _: &mut SlaveContext,
 24 |     io_ctx: &mut IO,
 25 |     _: SimplePrepare,
 26 |     _: &mut (),
 27 |   ) -> Option<SimpleRMInner> {
 28 |     // Here, we randomly decide whether to accept continue or Abort. // We abort with 5% chance.
 29 |     if io_ctx.rand().next_u32() % 100 < 5 {
 30 |       None
 31 |     } else {
 32 |       Some(SimpleRMInner {})
 33 |     }
 34 |   }
 35 | 
 36 |   fn new_follower<IO: BasicIOCtx<msg::NetworkMessage>>(
 37 |     _: &mut SlaveContext,
 38 |     _: &mut IO,
 39 |     _: SimpleRMPrepared,
 40 |   ) -> SimpleRMInner {
 41 |     SimpleRMInner {}
 42 |   }
 43 | 
 44 |   fn early_aborted<IO: BasicIOCtx<msg::NetworkMessage>>(
 45 |     &mut self,
 46 |     _: &mut <SimplePayloadTypes as PayloadTypes>::RMContext,
 47 |     _: &mut IO,
 48 |   ) {
 49 |   }
 50 | 
 51 |   fn mk_prepared_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
 52 |     &mut self,
 53 |     _: &mut SlaveContext,
 54 |     _: &mut IO,
 55 |   ) -> SimpleRMPrepared {
 56 |     SimpleRMPrepared {}
 57 |   }
 58 | 
 59 |   fn prepared_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
 60 |     &mut self,
 61 |     _: &mut SlaveContext,
 62 |     _: &mut IO,
 63 |   ) {
 64 |   }
 65 | 
 66 |   fn mk_committed_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
 67 |     &mut self,
 68 |     _: &mut SlaveContext,
 69 |     _: &mut IO,
 70 |   ) -> SimpleRMCommitted {
 71 |     SimpleRMCommitted {}
 72 |   }
 73 | 
 74 |   fn committed_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
 75 |     &mut self,
 76 |     _: &mut SlaveContext,
 77 |     _: &mut IO,
 78 |     _: &QueryId,
 79 |   ) {
 80 |   }
 81 | 
 82 |   fn mk_aborted_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
 83 |     &mut self,
 84 |     _: &mut SlaveContext,
 85 |     _: &mut IO,
 86 |   ) -> SimpleRMAborted {
 87 |     SimpleRMAborted {}
 88 |   }
 89 | 
 90 |   fn aborted_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
 91 |     &mut self,
 92 |     _: &mut SlaveContext,
 93 |     _: &mut IO,
 94 |   ) {
 95 |   }
 96 | 
 97 |   fn reconfig_snapshot(&self) -> Self {
 98 |     unimplemented!()
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/simple_tm_es.rs:
--------------------------------------------------------------------------------
 1 | use crate::message as msg;
 2 | use crate::slave::{SlaveContext, SlavePLm};
 3 | use runiversal::common::BasicIOCtx;
 4 | use runiversal::common::{EndpointId, RequestId, SlaveGroupId};
 5 | use runiversal::paxos2pc_tm::{
 6 |   Paxos2PCTMInner, Paxos2PCTMOuter, PayloadTypes, RMMessage, RMPLm, TMMessage,
 7 | };
 8 | use serde::{Deserialize, Serialize};
 9 | use std::collections::BTreeMap;
10 | 
11 | // -----------------------------------------------------------------------------------------------
12 | //  Payloads
13 | // -----------------------------------------------------------------------------------------------
14 | 
15 | // RM PLm
16 | 
17 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
18 | pub struct SimpleRMPrepared {}
19 | 
20 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
21 | pub struct SimpleRMCommitted {}
22 | 
23 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
24 | pub struct SimpleRMAborted {}
25 | 
26 | // TM-to-RM
27 | 
28 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
29 | pub struct SimplePrepare {}
30 | 
31 | // SimplePayloadTypes
32 | 
33 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
34 | pub struct SimplePayloadTypes {}
35 | 
36 | impl PayloadTypes for SimplePayloadTypes {
37 |   // Master
38 |   type RMPLm = SlavePLm;
39 |   type RMPath = SlaveGroupId;
40 |   type TMPath = SlaveGroupId;
41 |   type RMMessage = msg::SlaveRemotePayload;
42 |   type TMMessage = msg::SlaveRemotePayload;
43 |   type NetworkMessageT = msg::NetworkMessage;
44 |   type RMContext = SlaveContext;
45 |   type RMExtraData = ();
46 |   type TMContext = SlaveContext;
47 | 
48 |   // RM PLm
49 |   type RMPreparedPLm = SimpleRMPrepared;
50 |   type RMCommittedPLm = SimpleRMCommitted;
51 |   type RMAbortedPLm = SimpleRMAborted;
52 | 
53 |   fn rm_plm(plm: RMPLm<Self>) -> Self::RMPLm {
54 |     SlavePLm::SimpleRM(plm)
55 |   }
56 | 
57 |   type Prepare = SimplePrepare;
58 | 
59 |   fn rm_msg(msg: RMMessage<Self>) -> Self::RMMessage {
60 |     msg::SlaveRemotePayload::RMMessage(msg)
61 |   }
62 | 
63 |   fn tm_msg(msg: TMMessage<Self>) -> Self::TMMessage {
64 |     msg::SlaveRemotePayload::TMMessage(msg)
65 |   }
66 | }
67 | 
68 | // -----------------------------------------------------------------------------------------------
69 | //  Simple Implementation
70 | // -----------------------------------------------------------------------------------------------
71 | 
72 | pub type SimpleTMES = Paxos2PCTMOuter<SimplePayloadTypes, SimpleTMInner>;
73 | 
74 | #[derive(Debug)]
75 | pub struct SimpleTMInner {}
76 | 
77 | impl Paxos2PCTMInner<SimplePayloadTypes> for SimpleTMInner {
78 |   fn new_rec<IO: BasicIOCtx<msg::NetworkMessage>>(
79 |     _: &mut SlaveContext,
80 |     _: &mut IO,
81 |   ) -> SimpleTMInner {
82 |     SimpleTMInner {}
83 |   }
84 | 
85 |   fn committed<IO: BasicIOCtx<msg::NetworkMessage>>(&mut self, _: &mut SlaveContext, _: &mut IO) {}
86 | 
87 |   fn aborted<IO: BasicIOCtx<msg::NetworkMessage>>(&mut self, _: &mut SlaveContext, _: &mut IO) {}
88 | }
89 | 


--------------------------------------------------------------------------------
/src/finish_query_tm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{BasicIOCtx, ColName, QueryResult, ShardingGen, Timestamp};
  2 | use crate::common::{CNodePath, EndpointId, QueryId, RequestId, TNodePath, TQueryPath, TableView};
  3 | use crate::coord::CoordContext;
  4 | use crate::message as msg;
  5 | use crate::paxos2pc_tm::{
  6 |   Paxos2PCTMInner, Paxos2PCTMOuter, PayloadTypes, RMMessage, RMPLm, TMMessage,
  7 | };
  8 | use crate::sql_ast::{iast, proc};
  9 | use crate::storage::GenericTable;
 10 | use crate::tablet::{MSQueryES, ReadWriteRegion, TabletContext, TabletPLm};
 11 | use serde::{Deserialize, Serialize};
 12 | use std::collections::BTreeMap;
 13 | 
 14 | // -----------------------------------------------------------------------------------------------
 15 | //  Payloads
 16 | // -----------------------------------------------------------------------------------------------
 17 | 
 18 | // RM PLm
 19 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 20 | pub struct FinishQueryRMPrepared {
 21 |   pub sharding_gen: ShardingGen,
 22 |   pub region_lock: ReadWriteRegion,
 23 |   pub timestamp: Timestamp,
 24 |   pub update_view: GenericTable,
 25 | }
 26 | 
 27 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 28 | pub struct FinishQueryRMCommitted {}
 29 | 
 30 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 31 | pub struct FinishQueryRMAborted {}
 32 | 
 33 | // TM-to-RM Messages
 34 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 35 | pub struct FinishQueryPrepare {
 36 |   /// Contains the QueryId of the MSQueryES that this `Prepare` has to take over
 37 |   pub query_id: QueryId,
 38 | }
 39 | 
 40 | // FinishQueryPayloadTypes
 41 | 
 42 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 43 | pub struct FinishQueryPayloadTypes {}
 44 | 
 45 | impl PayloadTypes for FinishQueryPayloadTypes {
 46 |   // Master
 47 |   type RMPLm = TabletPLm;
 48 |   type RMPath = TNodePath;
 49 |   type TMPath = CNodePath;
 50 |   type RMMessage = msg::TabletMessage;
 51 |   type TMMessage = msg::CoordMessage;
 52 |   type NetworkMessageT = msg::NetworkMessage;
 53 |   type RMContext = TabletContext;
 54 |   type RMExtraData = BTreeMap<QueryId, MSQueryES>;
 55 |   type TMContext = CoordContext;
 56 | 
 57 |   // RM PLm
 58 |   type RMPreparedPLm = FinishQueryRMPrepared;
 59 |   type RMCommittedPLm = FinishQueryRMCommitted;
 60 |   type RMAbortedPLm = FinishQueryRMAborted;
 61 | 
 62 |   fn rm_plm(plm: RMPLm<Self>) -> Self::RMPLm {
 63 |     TabletPLm::FinishQuery(plm)
 64 |   }
 65 | 
 66 |   type Prepare = FinishQueryPrepare;
 67 | 
 68 |   fn rm_msg(msg: RMMessage<Self>) -> Self::RMMessage {
 69 |     msg::TabletMessage::FinishQuery(msg)
 70 |   }
 71 | 
 72 |   fn tm_msg(msg: TMMessage<Self>) -> Self::TMMessage {
 73 |     msg::CoordMessage::FinishQuery(msg)
 74 |   }
 75 | }
 76 | 
 77 | // -----------------------------------------------------------------------------------------------
 78 | //  FinishQueryTMES
 79 | // -----------------------------------------------------------------------------------------------
 80 | 
 81 | pub type FinishQueryTMES = Paxos2PCTMOuter<FinishQueryPayloadTypes, FinishQueryTMInner>;
 82 | 
 83 | #[derive(Debug)]
 84 | pub struct ResponseData {
 85 |   // Request values (values send in the original request)
 86 |   pub request_id: RequestId,
 87 |   pub sender_eid: EndpointId,
 88 |   /// We hold onto the original `Query` in case of an Abort so that we can restart.
 89 |   pub sql_query: iast::Query,
 90 | 
 91 |   // Result values (values computed by the MSCoordES)
 92 |   pub result: QueryResult,
 93 |   pub timestamp: Timestamp,
 94 | }
 95 | 
 96 | #[derive(Debug)]
 97 | pub struct FinishQueryTMInner {
 98 |   pub response_data: Option<ResponseData>,
 99 |   pub committed: bool,
100 | }
101 | 
102 | // -----------------------------------------------------------------------------------------------
103 | //  Implementation
104 | // -----------------------------------------------------------------------------------------------
105 | 
106 | impl Paxos2PCTMInner<FinishQueryPayloadTypes> for FinishQueryTMInner {
107 |   fn new_rec<IO: BasicIOCtx>(_: &mut CoordContext, _: &mut IO) -> FinishQueryTMInner {
108 |     FinishQueryTMInner { response_data: None, committed: false }
109 |   }
110 | 
111 |   fn committed<IO: BasicIOCtx>(&mut self, _: &mut CoordContext, _: &mut IO) {
112 |     self.committed = true;
113 |   }
114 | 
115 |   fn aborted<IO: BasicIOCtx>(&mut self, _: &mut CoordContext, _: &mut IO) {
116 |     self.committed = false;
117 |   }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/stm_simple_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::message as msg;
  2 | use crate::slave::{SlaveContext, SlavePLm};
  3 | use crate::stm_simple_tm_es::{
  4 |   STMSimpleClosed, STMSimpleCommit, STMSimplePrepare, STMSimplePrepared, STMSimpleTMPayloadTypes,
  5 | };
  6 | use runiversal::common::BasicIOCtx;
  7 | use runiversal::common::SlaveGroupId;
  8 | use runiversal::stmpaxos2pc_rm::{
  9 |   RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner,
 10 |   STMPaxos2PCRMOuter,
 11 | };
 12 | use runiversal::stmpaxos2pc_tm::TMMessage;
 13 | use serde::{Deserialize, Serialize};
 14 | 
 15 | // -----------------------------------------------------------------------------------------------
 16 | //  Payloads
 17 | // -----------------------------------------------------------------------------------------------
 18 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 19 | pub struct STMSimpleRMPayloadTypes {}
 20 | 
 21 | impl RMPayloadTypes for STMSimpleRMPayloadTypes {
 22 |   type TM = STMSimpleTMPayloadTypes;
 23 |   type RMContext = SlaveContext;
 24 | 
 25 |   // Actions
 26 |   type RMCommitActionData = ();
 27 | 
 28 |   // RM PLm
 29 |   type RMPreparedPLm = STMSimpleRMPrepared;
 30 |   type RMCommittedPLm = STMSimpleRMCommitted;
 31 |   type RMAbortedPLm = STMSimpleRMAborted;
 32 | }
 33 | 
 34 | // RM PLm
 35 | 
 36 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 37 | pub struct STMSimpleRMPrepared {}
 38 | 
 39 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 40 | pub struct STMSimpleRMCommitted {}
 41 | 
 42 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 43 | pub struct STMSimpleRMAborted {}
 44 | 
 45 | // -----------------------------------------------------------------------------------------------
 46 | //  RMServerContext
 47 | // -----------------------------------------------------------------------------------------------
 48 | 
 49 | impl RMServerContext<STMSimpleRMPayloadTypes> for SlaveContext {
 50 |   fn push_plm(&mut self, plm: RMPLm<STMSimpleRMPayloadTypes>) {
 51 |     self.slave_bundle.plms.push(SlavePLm::SimpleSTMRM(plm));
 52 |   }
 53 | 
 54 |   fn send_to_tm<IO: BasicIOCtx<msg::NetworkMessage>>(
 55 |     &mut self,
 56 |     io_ctx: &mut IO,
 57 |     tm: &SlaveGroupId,
 58 |     msg: TMMessage<STMSimpleTMPayloadTypes>,
 59 |   ) {
 60 |     self.send(io_ctx, tm, msg::SlaveRemotePayload::STMTMMessage(msg));
 61 |   }
 62 | 
 63 |   fn mk_node_path(&self) -> SlaveGroupId {
 64 |     self.this_sid.clone()
 65 |   }
 66 | 
 67 |   fn is_leader(&self) -> bool {
 68 |     SlaveContext::is_leader(self)
 69 |   }
 70 | }
 71 | 
 72 | // -----------------------------------------------------------------------------------------------
 73 | //  SimpleES Implementation
 74 | // -----------------------------------------------------------------------------------------------
 75 | 
 76 | #[derive(Debug)]
 77 | pub struct STMSimpleRMInner {}
 78 | 
 79 | pub type STMSimpleRMES = STMPaxos2PCRMOuter<STMSimpleRMPayloadTypes, STMSimpleRMInner>;
 80 | pub type STMSimpleRMAction = STMPaxos2PCRMAction<STMSimpleRMPayloadTypes>;
 81 | 
 82 | impl STMPaxos2PCRMInner<STMSimpleRMPayloadTypes> for STMSimpleRMInner {
 83 |   fn new<IO: BasicIOCtx<msg::NetworkMessage>>(
 84 |     _: &mut SlaveContext,
 85 |     _: &mut IO,
 86 |     _: STMSimplePrepare,
 87 |   ) -> STMSimpleRMInner {
 88 |     STMSimpleRMInner {}
 89 |   }
 90 | 
 91 |   fn new_follower<IO: BasicIOCtx<msg::NetworkMessage>>(
 92 |     _: &mut SlaveContext,
 93 |     _: &mut IO,
 94 |     _: STMSimpleRMPrepared,
 95 |   ) -> STMSimpleRMInner {
 96 |     STMSimpleRMInner {}
 97 |   }
 98 | 
 99 |   fn mk_closed() -> STMSimpleClosed {
100 |     STMSimpleClosed {}
101 |   }
102 | 
103 |   fn mk_prepared_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
104 |     &mut self,
105 |     _: &mut SlaveContext,
106 |     _: &mut IO,
107 |   ) -> Option<STMSimpleRMPrepared> {
108 |     Some(STMSimpleRMPrepared {})
109 |   }
110 | 
111 |   fn prepared_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
112 |     &mut self,
113 |     _: &mut SlaveContext,
114 |     _: &mut IO,
115 |   ) -> STMSimplePrepared {
116 |     STMSimplePrepared {}
117 |   }
118 | 
119 |   fn mk_committed_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
120 |     &mut self,
121 |     _: &mut SlaveContext,
122 |     _: &mut IO,
123 |     _: &STMSimpleCommit,
124 |   ) -> STMSimpleRMCommitted {
125 |     STMSimpleRMCommitted {}
126 |   }
127 | 
128 |   fn committed_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
129 |     &mut self,
130 |     _: &mut SlaveContext,
131 |     _: &mut IO,
132 |     _: &RMCommittedPLm<STMSimpleRMPayloadTypes>,
133 |   ) {
134 |   }
135 | 
136 |   fn mk_aborted_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
137 |     &mut self,
138 |     _: &mut SlaveContext,
139 |     _: &mut IO,
140 |   ) -> STMSimpleRMAborted {
141 |     STMSimpleRMAborted {}
142 |   }
143 | 
144 |   fn aborted_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
145 |     &mut self,
146 |     _: &mut SlaveContext,
147 |     _: &mut IO,
148 |   ) {
149 |   }
150 | 
151 |   fn reconfig_snapshot(&self) -> Self {
152 |     unimplemented!()
153 |   }
154 | }
155 | 


--------------------------------------------------------------------------------
/src/test/tablet_test.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{PrimaryKey, QueryId, TabletKeyRange};
  2 | use crate::finish_query_rm_es::FinishQueryRMES;
  3 | use crate::tablet::{ShardingState, TabletState, DDLES};
  4 | use crate::test_utils::{cvb, cvi, cvs, CheckCtx};
  5 | use std::collections::BTreeMap;
  6 | 
  7 | #[test]
  8 | fn test_key_comparison() {
  9 |   assert_eq!(
 10 |     PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] },
 11 |     PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] }
 12 |   );
 13 | 
 14 |   assert!(
 15 |     PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] }
 16 |       < PrimaryKey { cols: vec![cvi(3), cvs("a"), cvb(false)] }
 17 |   );
 18 | 
 19 |   assert!(
 20 |     PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] }
 21 |       < PrimaryKey { cols: vec![cvi(2), cvs("b"), cvb(false)] }
 22 |   );
 23 | 
 24 |   assert!(
 25 |     PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] }
 26 |       < PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(true)] }
 27 |   );
 28 | }
 29 | 
 30 | // -----------------------------------------------------------------------------------------------
 31 | //  Consistency Testing
 32 | // -----------------------------------------------------------------------------------------------
 33 | 
 34 | /// Asserts various consistency properties in the `TabletState`.
 35 | pub fn assert_tablet_consistency(tablet: &TabletState) {
 36 |   let statuses = &tablet.statuses;
 37 | 
 38 |   // Verify for every MSQueryES, every ES in `pending_queries` exist.
 39 |   for (query_id, ms_query_es) in &statuses.ms_query_ess {
 40 |     for child_qid in &ms_query_es.pending_queries {
 41 |       if let Some(es) = statuses.top.ms_table_read_ess.get(child_qid) {
 42 |         assert_eq!(&es.general.ms_query_id, query_id);
 43 |       } else if let Some(es) = statuses.top.ms_table_write_ess.get(child_qid) {
 44 |         assert_eq!(&es.general.ms_query_id, query_id);
 45 |       } else if let Some(es) = statuses.top.ms_table_insert_ess.get(child_qid) {
 46 |         assert_eq!(&es.general.ms_query_id, query_id);
 47 |       } else if let Some(es) = statuses.top.ms_table_delete_ess.get(child_qid) {
 48 |         assert_eq!(&es.general.ms_query_id, query_id);
 49 |       } else {
 50 |         panic!();
 51 |       }
 52 |     }
 53 |   }
 54 | 
 55 |   // Verify that for every MSTable*ES, a valid MSQueryES exists.
 56 |   for (query_id, es) in &statuses.top.ms_table_read_ess {
 57 |     if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) {
 58 |       assert!(ms_query_es.pending_queries.contains(query_id));
 59 |     } else {
 60 |       panic!()
 61 |     }
 62 |   }
 63 |   for (query_id, es) in &statuses.top.ms_table_write_ess {
 64 |     if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) {
 65 |       assert!(ms_query_es.pending_queries.contains(query_id));
 66 |     } else {
 67 |       panic!()
 68 |     }
 69 |   }
 70 |   for (query_id, es) in &statuses.top.ms_table_insert_ess {
 71 |     if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) {
 72 |       assert!(ms_query_es.pending_queries.contains(query_id));
 73 |     } else {
 74 |       panic!()
 75 |     }
 76 |   }
 77 |   for (query_id, es) in &statuses.top.ms_table_delete_ess {
 78 |     if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) {
 79 |       assert!(ms_query_es.pending_queries.contains(query_id));
 80 |     } else {
 81 |       panic!()
 82 |     }
 83 |   }
 84 | }
 85 | 
 86 | pub fn check_tablet_clean(tablet: &TabletState, check_ctx: &mut CheckCtx) {
 87 |   let statuses = &tablet.statuses;
 88 |   let ctx = &tablet.ctx;
 89 | 
 90 |   // Check `Statuses` clean
 91 | 
 92 |   check_ctx.check(statuses.perform_query_buffer.is_empty());
 93 | 
 94 |   check_ctx.check(statuses.gr_query_ess.is_empty());
 95 |   check_ctx.check(statuses.join_query_ess.is_empty());
 96 |   check_ctx.check(statuses.tm_statuss.is_empty());
 97 |   check_ctx.check(statuses.ms_query_ess.is_empty());
 98 |   check_ctx.check(statuses.top.table_read_ess.is_empty());
 99 |   check_ctx.check(statuses.top.trans_table_read_ess.is_empty());
100 |   check_ctx.check(statuses.top.ms_table_read_ess.is_empty());
101 |   check_ctx.check(statuses.top.ms_table_write_ess.is_empty());
102 |   check_ctx.check(statuses.top.ms_table_insert_ess.is_empty());
103 |   check_ctx.check(statuses.top.ms_table_delete_ess.is_empty());
104 |   for (_, es) in &statuses.finish_query_ess {
105 |     if let FinishQueryRMES::Paxos2PCRMExecOuter(_) = es {
106 |       check_ctx.check(false);
107 |     }
108 |   }
109 | 
110 |   check_ctx.check(match &statuses.ddl_es {
111 |     DDLES::None => true,
112 |     DDLES::Alter(_) => false,
113 |     DDLES::Drop(_) => false,
114 |     DDLES::Dropped(_) => true,
115 |     DDLES::ShardSplit(_) => false,
116 |   });
117 | 
118 |   check_ctx.check(match &statuses.sharding_state {
119 |     ShardingState::None => true,
120 |     ShardingState::ShardingSnapshotES(_) => false,
121 |   });
122 | 
123 |   // Check `Tablet` clean
124 | 
125 |   check_ctx.check(ctx.verifying_writes.is_empty());
126 |   check_ctx.check(ctx.inserting_prepared_writes.is_empty());
127 |   check_ctx.check(ctx.prepared_writes.is_empty());
128 | 
129 |   check_ctx.check(ctx.waiting_read_protected.is_empty());
130 |   check_ctx.check(ctx.inserting_read_protected.is_empty());
131 | 
132 |   check_ctx.check(ctx.waiting_locked_cols.is_empty());
133 |   check_ctx.check(ctx.inserting_locked_cols.is_empty());
134 | 
135 |   check_ctx.check(ctx.ms_root_query_map.is_empty());
136 | }
137 | 


--------------------------------------------------------------------------------
/src/finish_query_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{BasicIOCtx, GeneralTraceMessage, Timestamp};
  2 | use crate::common::{QueryId, ShardingGen};
  3 | use crate::finish_query_tm_es::{
  4 |   FinishQueryPayloadTypes, FinishQueryPrepare, FinishQueryRMAborted, FinishQueryRMCommitted,
  5 |   FinishQueryRMPrepared,
  6 | };
  7 | use crate::paxos2pc_rm::{Paxos2PCRMInner, Paxos2PCRMOuter};
  8 | use crate::paxos2pc_tm::PayloadTypes;
  9 | use crate::sql_ast::proc;
 10 | use crate::storage::{commit_to_storage, compress_updates_views, GenericTable};
 11 | use crate::tablet::{MSQueryES, ReadWriteRegion, TabletContext};
 12 | use serde::{Deserialize, Serialize};
 13 | use std::collections::BTreeMap;
 14 | 
 15 | // -----------------------------------------------------------------------------------------------
 16 | //  FinishQueryRMES
 17 | // -----------------------------------------------------------------------------------------------
 18 | 
 19 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 20 | pub struct FinishQueryRMInner {
 21 |   pub sharding_gen: ShardingGen,
 22 |   pub region_lock: ReadWriteRegion,
 23 |   pub timestamp: Timestamp,
 24 |   pub update_view: GenericTable,
 25 | }
 26 | 
 27 | pub type FinishQueryRMES = Paxos2PCRMOuter<FinishQueryPayloadTypes, FinishQueryRMInner>;
 28 | 
 29 | // -----------------------------------------------------------------------------------------------
 30 | //  Implementation
 31 | // -----------------------------------------------------------------------------------------------
 32 | 
 33 | impl Paxos2PCRMInner<FinishQueryPayloadTypes> for FinishQueryRMInner {
 34 |   fn new<IO: BasicIOCtx>(
 35 |     ctx: &mut TabletContext,
 36 |     _: &mut IO,
 37 |     payload: FinishQueryPrepare,
 38 |     extra_data: &mut BTreeMap<QueryId, MSQueryES>,
 39 |   ) -> Option<FinishQueryRMInner> {
 40 |     if let Some(ms_query_es) = extra_data.remove(&payload.query_id) {
 41 |       ctx.ms_root_query_map.remove(&ms_query_es.root_query_path.query_id);
 42 |       debug_assert!(ms_query_es.pending_queries.is_empty());
 43 | 
 44 |       let timestamp = ms_query_es.timestamp;
 45 | 
 46 |       // Move the VerifyingReadWrite to inserting.
 47 |       let verifying = ctx.verifying_writes.remove(&timestamp).unwrap();
 48 |       debug_assert!(verifying.m_waiting_read_protected.is_empty());
 49 |       let region_lock = ReadWriteRegion {
 50 |         orig_p: verifying.orig_p,
 51 |         m_read_protected: verifying.m_read_protected,
 52 |         m_write_protected: verifying.m_write_protected,
 53 |       };
 54 |       ctx.inserting_prepared_writes.insert(timestamp.clone(), region_lock.clone());
 55 | 
 56 |       Some(FinishQueryRMInner {
 57 |         sharding_gen: ms_query_es.sharding_gen,
 58 |         region_lock,
 59 |         timestamp,
 60 |         update_view: compress_updates_views(ms_query_es.update_views),
 61 |       })
 62 |     } else {
 63 |       // The MSQueryES might not be present because of a DeadlockSafetyWriteAbort.
 64 |       None
 65 |     }
 66 |   }
 67 | 
 68 |   fn new_follower<IO: BasicIOCtx>(
 69 |     _: &mut TabletContext,
 70 |     _: &mut IO,
 71 |     payload: FinishQueryRMPrepared,
 72 |   ) -> FinishQueryRMInner {
 73 |     FinishQueryRMInner {
 74 |       sharding_gen: payload.sharding_gen,
 75 |       region_lock: payload.region_lock,
 76 |       timestamp: payload.timestamp,
 77 |       update_view: payload.update_view,
 78 |     }
 79 |   }
 80 | 
 81 |   fn early_aborted<IO: BasicIOCtx>(&mut self, ctx: &mut TabletContext, _: &mut IO) {
 82 |     ctx.inserting_prepared_writes.remove(&self.timestamp);
 83 |   }
 84 | 
 85 |   fn mk_prepared_plm<IO: BasicIOCtx>(
 86 |     &mut self,
 87 |     _: &mut TabletContext,
 88 |     _: &mut IO,
 89 |   ) -> FinishQueryRMPrepared {
 90 |     FinishQueryRMPrepared {
 91 |       sharding_gen: self.sharding_gen.clone(),
 92 |       region_lock: self.region_lock.clone(),
 93 |       timestamp: self.timestamp.clone(),
 94 |       update_view: self.update_view.clone(),
 95 |     }
 96 |   }
 97 | 
 98 |   fn prepared_plm_inserted<IO: BasicIOCtx>(&mut self, ctx: &mut TabletContext, _: &mut IO) {
 99 |     ctx.inserting_prepared_writes.remove(&self.timestamp);
100 |     ctx.prepared_writes.insert(self.timestamp.clone(), self.region_lock.clone());
101 |   }
102 | 
103 |   fn mk_committed_plm<IO: BasicIOCtx>(
104 |     &mut self,
105 |     _: &mut TabletContext,
106 |     _: &mut IO,
107 |   ) -> FinishQueryRMCommitted {
108 |     FinishQueryRMCommitted {}
109 |   }
110 | 
111 |   fn committed_plm_inserted<IO: BasicIOCtx>(
112 |     &mut self,
113 |     ctx: &mut TabletContext,
114 |     io_ctx: &mut IO,
115 |     query_id: &QueryId,
116 |   ) {
117 |     commit_to_storage(&mut ctx.storage, &self.timestamp, self.update_view.clone());
118 |     let region_lock = ctx.prepared_writes.remove(&self.timestamp).unwrap();
119 |     ctx.committed_writes.insert(self.timestamp.clone(), region_lock);
120 | 
121 |     // Trace the commit
122 |     io_ctx.general_trace(GeneralTraceMessage::CommittedQueryId(
123 |       query_id.clone(),
124 |       self.timestamp.clone(),
125 |     ));
126 |   }
127 | 
128 |   fn mk_aborted_plm<IO: BasicIOCtx>(
129 |     &mut self,
130 |     _: &mut TabletContext,
131 |     _: &mut IO,
132 |   ) -> FinishQueryRMAborted {
133 |     FinishQueryRMAborted {}
134 |   }
135 | 
136 |   fn aborted_plm_inserted<IO: BasicIOCtx>(&mut self, ctx: &mut TabletContext, _: &mut IO) {
137 |     ctx.prepared_writes.remove(&self.timestamp).unwrap();
138 |   }
139 | 
140 |   fn reconfig_snapshot(&self) -> FinishQueryRMInner {
141 |     self.clone()
142 |   }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/shard_split_slave_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{cur_timestamp, QueryId, Timestamp};
  2 | use crate::common::{mk_t, BasicIOCtx};
  3 | use crate::common::{TNodePath, TabletGroupId};
  4 | use crate::message as msg;
  5 | use crate::server::ServerContextBase;
  6 | use crate::shard_pending_es::ShardingSplitPLm;
  7 | use crate::shard_split_tm_es::{
  8 |   ShardNodePath, ShardSplitClosed, ShardSplitCommit, ShardSplitPrepare, ShardSplitPrepared,
  9 |   ShardSplitTMPayloadTypes,
 10 | };
 11 | use crate::slave::{SlaveContext, SlavePLm};
 12 | use crate::stmpaxos2pc_rm::{
 13 |   RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner,
 14 |   STMPaxos2PCRMOuter,
 15 | };
 16 | use crate::stmpaxos2pc_tm::TMMessage;
 17 | use crate::tablet::ShardingSnapshot;
 18 | use serde::{Deserialize, Serialize};
 19 | use std::cmp::max;
 20 | 
 21 | // -----------------------------------------------------------------------------------------------
 22 | //  Payloads
 23 | // -----------------------------------------------------------------------------------------------
 24 | 
 25 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 26 | pub struct ShardSplitSlaveRMPayloadTypes {}
 27 | 
 28 | impl RMPayloadTypes for ShardSplitSlaveRMPayloadTypes {
 29 |   type TM = ShardSplitTMPayloadTypes;
 30 |   type RMContext = SlaveContext;
 31 | 
 32 |   // Actions
 33 |   type RMCommitActionData = (TabletGroupId, QueryId);
 34 | 
 35 |   // RM PLm
 36 |   type RMPreparedPLm = ShardSplitSlaveRMPrepared;
 37 |   type RMCommittedPLm = ShardSplitSlaveRMCommitted;
 38 |   type RMAbortedPLm = ShardSplitSlaveRMAborted;
 39 | }
 40 | 
 41 | // RM PLm
 42 | 
 43 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 44 | pub struct ShardSplitSlaveRMPrepared {}
 45 | 
 46 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 47 | pub struct ShardSplitSlaveRMCommitted {
 48 |   /// The `TabletGroupId` for the new Tablet that will be created.
 49 |   pub tid: TabletGroupId,
 50 | }
 51 | 
 52 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 53 | pub struct ShardSplitSlaveRMAborted {}
 54 | 
 55 | // -----------------------------------------------------------------------------------------------
 56 | //  RMServerContext ShardSplitSlave
 57 | // -----------------------------------------------------------------------------------------------
 58 | 
 59 | impl RMServerContext<ShardSplitSlaveRMPayloadTypes> for SlaveContext {
 60 |   fn push_plm(&mut self, plm: RMPLm<ShardSplitSlaveRMPayloadTypes>) {
 61 |     self.slave_bundle.plms.push(SlavePLm::ShardingSplitPLm(ShardingSplitPLm::ShardSplit(plm)));
 62 |   }
 63 | 
 64 |   fn send_to_tm<IO: BasicIOCtx>(
 65 |     &mut self,
 66 |     io_ctx: &mut IO,
 67 |     _: &(),
 68 |     msg: TMMessage<ShardSplitTMPayloadTypes>,
 69 |   ) {
 70 |     self.send_to_master(io_ctx, msg::MasterRemotePayload::ShardSplit(msg));
 71 |   }
 72 | 
 73 |   fn mk_node_path(&self) -> ShardNodePath {
 74 |     ShardNodePath::Slave(self.this_sid.clone())
 75 |   }
 76 | 
 77 |   fn is_leader(&self) -> bool {
 78 |     SlaveContext::is_leader(self)
 79 |   }
 80 | }
 81 | 
 82 | // -----------------------------------------------------------------------------------------------
 83 | //  ShardSplitSlaveES Implementation
 84 | // -----------------------------------------------------------------------------------------------
 85 | 
 86 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 87 | pub struct ShardSplitSlaveRMInner {}
 88 | 
 89 | pub type ShardSplitSlaveRMES =
 90 |   STMPaxos2PCRMOuter<ShardSplitSlaveRMPayloadTypes, ShardSplitSlaveRMInner>;
 91 | pub type ShardSplitSlaveRMAction = STMPaxos2PCRMAction<ShardSplitSlaveRMPayloadTypes>;
 92 | 
 93 | impl STMPaxos2PCRMInner<ShardSplitSlaveRMPayloadTypes> for ShardSplitSlaveRMInner {
 94 |   fn new<IO: BasicIOCtx>(
 95 |     _: &mut SlaveContext,
 96 |     _: &mut IO,
 97 |     _: ShardSplitPrepare,
 98 |   ) -> ShardSplitSlaveRMInner {
 99 |     ShardSplitSlaveRMInner {}
100 |   }
101 | 
102 |   fn new_follower<IO: BasicIOCtx>(
103 |     _: &mut SlaveContext,
104 |     _: &mut IO,
105 |     _: ShardSplitSlaveRMPrepared,
106 |   ) -> ShardSplitSlaveRMInner {
107 |     ShardSplitSlaveRMInner {}
108 |   }
109 | 
110 |   fn mk_closed() -> ShardSplitClosed {
111 |     ShardSplitClosed {}
112 |   }
113 | 
114 |   fn mk_prepared_plm<IO: BasicIOCtx>(
115 |     &mut self,
116 |     _: &mut SlaveContext,
117 |     _: &mut IO,
118 |   ) -> Option<ShardSplitSlaveRMPrepared> {
119 |     Some(ShardSplitSlaveRMPrepared {})
120 |   }
121 | 
122 |   fn prepared_plm_inserted<IO: BasicIOCtx>(
123 |     &mut self,
124 |     _: &mut SlaveContext,
125 |     _: &mut IO,
126 |   ) -> ShardSplitPrepared {
127 |     ShardSplitPrepared {}
128 |   }
129 | 
130 |   fn mk_committed_plm<IO: BasicIOCtx>(
131 |     &mut self,
132 |     _: &mut SlaveContext,
133 |     _: &mut IO,
134 |     commit: &ShardSplitCommit,
135 |   ) -> ShardSplitSlaveRMCommitted {
136 |     ShardSplitSlaveRMCommitted { tid: commit.target_new.tid.clone() }
137 |   }
138 | 
139 |   fn committed_plm_inserted<IO: BasicIOCtx>(
140 |     &mut self,
141 |     _: &mut SlaveContext,
142 |     _: &mut IO,
143 |     commit: &RMCommittedPLm<ShardSplitSlaveRMPayloadTypes>,
144 |   ) -> (TabletGroupId, QueryId) {
145 |     (commit.payload.tid.clone(), commit.query_id.clone())
146 |   }
147 | 
148 |   fn mk_aborted_plm<IO: BasicIOCtx>(
149 |     &mut self,
150 |     _: &mut SlaveContext,
151 |     _: &mut IO,
152 |   ) -> ShardSplitSlaveRMAborted {
153 |     ShardSplitSlaveRMAborted {}
154 |   }
155 | 
156 |   fn aborted_plm_inserted<IO: BasicIOCtx>(&mut self, _: &mut SlaveContext, _: &mut IO) {}
157 | 
158 |   fn reconfig_snapshot(&self) -> ShardSplitSlaveRMInner {
159 |     self.clone()
160 |   }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/test/expression_test.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::ColVal;
  2 | use crate::common::{ColBound, SingleBound};
  3 | use crate::expression::{
  4 |   col_bound_intersect_interval, construct_cexpr, construct_colvaln, does_col_regions_intersect,
  5 |   evaluate_binary_op, evaluate_c_expr, CExpr, EvalError,
  6 | };
  7 | use crate::query_converter::flatten_val_expr_r;
  8 | use crate::sql_ast::{iast, proc};
  9 | use crate::sql_parser::convert_expr;
 10 | use crate::test_utils::cn;
 11 | use sqlparser::ast;
 12 | use sqlparser::dialect::GenericDialect;
 13 | use sqlparser::parser::Parser;
 14 | use sqlparser::tokenizer::Tokenizer;
 15 | use std::collections::BTreeMap;
 16 | 
 17 | // -----------------------------------------------------------------------------------------------
 18 | //  Expression Evaluation
 19 | // -----------------------------------------------------------------------------------------------
 20 | 
 21 | /// Utility for converting a raw SQL expression, not containing `Subquery`s or `ColumnRef`s.
 22 | fn parse_expr(expr_str: &str) -> CExpr {
 23 |   let dialect = GenericDialect {};
 24 |   let mut tokenizer = Tokenizer::new(&dialect, expr_str);
 25 |   let tokens = tokenizer.tokenize().unwrap();
 26 |   let mut parser = Parser::new(tokens, &dialect);
 27 |   let sql_expr = parser.parse_expr().unwrap();
 28 |   let internal_expr = convert_expr(sql_expr).unwrap();
 29 |   let val_expr = flatten_val_expr_r(&internal_expr, &mut 0).unwrap();
 30 |   construct_cexpr(&val_expr, &mut BTreeMap::new(), &mut Vec::new(), &mut 0).unwrap()
 31 | }
 32 | 
 33 | #[test]
 34 | fn construct_colvaln_test() {
 35 |   // Number
 36 |   assert_eq!(construct_colvaln(iast::Value::Number("42".to_string())), Ok(Some(ColVal::Int(42))));
 37 |   assert_eq!(construct_colvaln(iast::Value::Number("".to_string())), Err(EvalError::GenericError));
 38 |   assert_eq!(
 39 |     construct_colvaln(iast::Value::Number("999999999999".to_string())),
 40 |     Err(EvalError::GenericError)
 41 |   );
 42 |   assert_eq!(
 43 |     construct_colvaln(iast::Value::Number("1234hello".to_string())),
 44 |     Err(EvalError::GenericError)
 45 |   );
 46 | 
 47 |   // String, Boolean, Null
 48 |   assert_eq!(
 49 |     construct_colvaln(iast::Value::QuotedString("hello".to_string())),
 50 |     Ok(Some(ColVal::String("hello".to_string())))
 51 |   );
 52 |   assert_eq!(construct_colvaln(iast::Value::Boolean(true)), Ok(Some(ColVal::Bool(true))));
 53 |   assert_eq!(construct_colvaln(iast::Value::Null), Ok(None));
 54 | }
 55 | 
 56 | #[test]
 57 | fn evaluate_unary_op_test() {
 58 |   // Plus
 59 |   assert_eq!(evaluate_c_expr(&parse_expr("+10")), Ok(Some(ColVal::Int(10))));
 60 |   assert_eq!(evaluate_c_expr(&parse_expr("-10")), Ok(Some(ColVal::Int(-10))));
 61 |   // Not
 62 |   assert_eq!(evaluate_c_expr(&parse_expr("NOT true")), Ok(Some(ColVal::Bool(false))));
 63 |   assert_eq!(evaluate_c_expr(&parse_expr("NOT (NULL)")), Ok(None));
 64 | }
 65 | 
 66 | #[test]
 67 | fn evaluate_binary_op_test() {
 68 |   // Divide
 69 |   assert_eq!(evaluate_c_expr(&parse_expr("20/10")), Ok(Some(ColVal::Int(2))));
 70 |   assert_eq!(evaluate_c_expr(&parse_expr("20/15")), Ok(Some(ColVal::Int(1))));
 71 |   assert_eq!(evaluate_c_expr(&parse_expr("20/25")), Ok(Some(ColVal::Int(0))));
 72 |   assert_eq!(evaluate_c_expr(&parse_expr("-30/20")), Ok(Some(ColVal::Int(-1))));
 73 |   assert_eq!(evaluate_c_expr(&parse_expr("10/0")), Err(EvalError::InvalidBinaryOp));
 74 |   // OR
 75 |   assert_eq!(evaluate_c_expr(&parse_expr("true OR NULL")), Ok(Some(ColVal::Bool(true))));
 76 |   assert_eq!(evaluate_c_expr(&parse_expr("NULL OR true")), Ok(Some(ColVal::Bool(true))));
 77 |   assert_eq!(evaluate_c_expr(&parse_expr("false OR NULL")), Ok(None));
 78 |   assert_eq!(evaluate_c_expr(&parse_expr("NULL OR false")), Ok(None));
 79 |   assert_eq!(evaluate_c_expr(&parse_expr("NULL OR NULL")), Ok(None));
 80 |   assert_eq!(evaluate_c_expr(&parse_expr("false OR false")), Ok(Some(ColVal::Bool(false))));
 81 |   assert_eq!(evaluate_c_expr(&parse_expr("false OR 3")), Err(EvalError::InvalidBinaryOp));
 82 | }
 83 | 
 84 | // -----------------------------------------------------------------------------------------------
 85 | //  Region Isolation Property Utilities
 86 | // -----------------------------------------------------------------------------------------------
 87 | 
 88 | fn unb<T>() -> SingleBound<T> {
 89 |   SingleBound::Unbounded
 90 | }
 91 | 
 92 | fn inc<T>(val: T) -> SingleBound<T> {
 93 |   SingleBound::Included(val)
 94 | }
 95 | 
 96 | fn exl<T>(val: T) -> SingleBound<T> {
 97 |   SingleBound::Excluded(val)
 98 | }
 99 | 
100 | /// `ColBound` of `Int`
101 | fn cb<T>(start: SingleBound<T>, end: SingleBound<T>) -> ColBound<T> {
102 |   ColBound { start, end }
103 | }
104 | 
105 | #[test]
106 | fn col_bound_intersect_interval_test() {
107 |   assert_eq!(
108 |     col_bound_intersect_interval(&cb(inc(3), inc(5)), &cb(inc(4), inc(6))),
109 |     (&inc(4), &inc(5))
110 |   );
111 |   assert_eq!(
112 |     col_bound_intersect_interval(&cb(unb(), exl(5)), &cb(unb(), inc(4))),
113 |     (&unb(), &inc(4))
114 |   );
115 |   assert_eq!(
116 |     col_bound_intersect_interval(&cb(exl(3), exl(5)), &cb(unb(), inc(3))),
117 |     (&exl(3), &inc(3))
118 |   );
119 | }
120 | 
121 | #[test]
122 | fn does_col_regions_intersect_test() {
123 |   let cols1 = vec![cn("c1"), cn("c2")];
124 |   let cols2 = vec![cn("c2"), cn("c3")];
125 |   let cols3 = vec![cn("c4")];
126 |   let cols4 = vec![];
127 |   assert!(does_col_regions_intersect(&cols1, &cols2));
128 |   assert!(does_col_regions_intersect(&cols2, &cols1));
129 |   assert!(!does_col_regions_intersect(&cols1, &cols3));
130 |   assert!(!does_col_regions_intersect(&cols3, &cols1));
131 |   assert!(!does_col_regions_intersect(&cols3, &cols4));
132 |   assert!(!does_col_regions_intersect(&cols4, &cols3));
133 | }
134 | 


--------------------------------------------------------------------------------
/notes.md:
--------------------------------------------------------------------------------
  1 | # Build & Run
  2 | 
  3 | cargo build
  4 | 
  5 | # Docker
  6 | 
  7 | ## Setup
  8 | docker network create --subnet=172.20.0.0/16 runiversal-net
  9 | 
 10 | ## Build
 11 | docker build -t runiversal -f Dockerfile.init .
 12 | docker build -t runiversal .
 13 | 
 14 | ## Local Build and Test
 15 | cargo build
 16 | 
 17 | cargo run --release --bin paxos &&
 18 | cargo run --release --bin paxos2pc_sim &&
 19 | cargo run --release --bin simtest
 20 | cargo run --release --bin simtest -- -i 9
 21 | 
 22 | cargo run --bin client 2>/dev/null
 23 | docker run -it runiversal
 24 | 
 25 | ## Run & Stop
 26 | docker run --cap-add=NET_ADMIN -it --name=rclient4 --ip 172.20.0.4 --network=runiversal-net runiversal scripts/client -i 172.20.0.4 -e 172.20.0.10
 27 | docker run --cap-add=NET_ADMIN -it --name=runiversal10 --ip 172.20.0.10 --network=runiversal-net runiversal scripts/transact -i 172.20.0.10 -t masterbootup
 28 | docker run --cap-add=NET_ADMIN -d --name=runiversal15 --ip 172.20.0.15 --network=runiversal-net runiversal scripts/transact -i 172.20.0.15 -t freenode -f newslave -e 172.20.0.10
 29 | 
 30 | docker kill rclient; docker container rm rclient;
 31 | docker kill runiversal10; docker container rm runiversal10;
 32 | docker kill runiversal15; docker container rm runiversal15;
 33 | 
 34 | ## Setup
 35 | To build:
 36 | 
 37 | ./run build
 38 | 
 39 | To start up the system and create an initial client, do:
 40 | 
 41 | ./run start
 42 | 
 43 | To create extra clients and nodes, do:
 44 | 
 45 | ./run new_client 3 10
 46 | ./run new_node 25 reconfig 10
 47 | ./run new_node 26 newslave 10
 48 | ./run new_node 27 newslave 10
 49 | ./run new_node 28 newslave 10
 50 | ./run new_node 29 newslave 10
 51 | ./run new_node 30 newslave 10
 52 | ./run new_node 31 reconfig 10
 53 | ./run new_node 32 reconfig 10
 54 | ./run new_node 33 reconfig 10
 55 | ./run new_node 34 reconfig 10
 56 | ./run new_node 35 reconfig 10
 57 | 
 58 | master_target 172.20.0.1
 59 | slave_target 172.20.0.16
 60 | 
 61 | To clean up everything, do:
 62 | 
 63 | ./run clean
 64 | ./run cclean 2
 65 | ./run cclean 3
 66 | ./run nclean 25
 67 | ./run nclean 26
 68 | ./run dclean
 69 | 
 70 | ## Demo
 71 | 1. Run `./run start` in terminal pane. This will start the MasterGroup, 2 SlaveGroups, and a client. Run `live` in that view
 72 | 3. Run `./run new_client 3 10` to start a new client.
 73 | 2. Explicitly connect to `172.20.0.15` with `slave_target 172.20.0.15` (the Leadership for the first SlaveGroup). (This is useful for showcasing node deletion later.)
 74 | 4. Run the Basic/Advanced Queries.
 75 | 5. Kill `172.20.0.15` with `./run nclean 15` (or similar). (This shows reconfiguration.)
 76 | 6. Create a Slave free node so that it can replace the one that was just killed: `./run new_node 25 reconfig 10`
 77 | 7. Explicitly connect to `172.20.0.17` with `slave_target 172.20.0.17` and then fire some queries (just to show that new leaders are actually possible to use).
 78 | 8. Create 5 Slaves as newslave, e.g. `./run new_node 26 newslave 10` to show how new SlaveGroups are formed automatically.
 79 | 9. Explicitly connect to `172.20.0.26` with `slave_target 172.20.0.26` and then fire some queries (just to show that new Groups are actually used).
 80 | 10. Run the `./run new_node 31 reconfig 10` commands to create lots of free nodes.
 81 | 11. Kill `172.20.0.26` with `./run nclean 26` (or similar). (This shows reconfiguration, immediately follows by the consumption of a free node.)
 82 | 12. Explicitly connect to `172.20.0.29` with `slave_target 172.20.0.29` and then fire some queries (just to show that new Groups are actually used). 
 83 | 13. Quit the live system with `q`, and call `./run dclean` to clean up.
 84 | 14. Run the simulation tests and describe that.
 85 | 
 86 | ## Basic Queries
 87 | ```sql
 88 | CREATE TABLE user(id INT PRIMARY KEY);
 89 | INSERT INTO user(id) VALUES (1), (2), (3);
 90 | SELECT * FROM user;
 91 | 
 92 | ALTER TABLE user ADD name STRING;
 93 | UPDATE user SET name = 'henry' WHERE id = 2;
 94 | SELECT * FROM user;
 95 | 
 96 | CREATE TABLE inventory(id INT PRIMARY KEY, name VARCHAR);
 97 | INSERT INTO inventory(id, name) VALUES (1, 'pasindu'), (2, 'hello');
 98 | SELECT id, name FROM inventory;
 99 | 
100 | DROP TABLE user;
101 | DROP TABLE inventory;
102 | ```
103 | 
104 | ## Advanced Queries
105 | 
106 | ### DDL
107 | ```sql
108 | CREATE TABLE inventory (
109 |   product_id INT PRIMARY KEY, email VARCHAR, 
110 |   count INT
111 | );
112 | -- Separate
113 | INSERT INTO inventory (product_id, email, count) 
114 | VALUES 
115 |   (0, 'my_email_0', 15), 
116 |   (1, 'my_email_1', 25);
117 | -- Separate
118 | CREATE TABLE user (
119 |   email VARCHAR PRIMARY KEY, balance INT, 
120 | );
121 | -- Separate
122 | INSERT INTO user (email, balance) 
123 | VALUES 
124 |   ('my_email_0', 50), 
125 |   ('my_email_1', 60), 
126 |   ('my_email_2', 70);
127 | -- Separate
128 | CREATE TABLE product_stock (id INT PRIMARY KEY, product_id INT,);
129 | -- Separate
130 | INSERT INTO product_stock (id, product_id) 
131 | VALUES 
132 |   (0, 0), 
133 |   (1, 1), 
134 |   (2, 1);
135 | ```
136 | ### DQL
137 | 
138 | ```sql
139 | -- Join
140 | SELECT U2.email, U1.balance, product_id
141 | FROM user AS U2 JOIN (user AS U1 LEFT JOIN inventory AS I)
142 |     ON ((SELECT count(id) 
143 |         FROM product_stock
144 |         WHERE product_id = I.product_id) = 2)
145 |     AND U1.balance <= 60
146 | WHERE U2.email = 'my_email_0';
147 | 
148 | -- CTEs
149 | WITH
150 |     v1 AS (SELECT email AS e, balance * 2
151 |             FROM  user
152 |             WHERE email = 'my_email_0')
153 | SELECT *
154 | FROM v1;
155 | 
156 | -- Multi Stage
157 | UPDATE user
158 | SET balance = balance + 20
159 | WHERE email = (
160 |     SELECT email
161 |     FROM inventory
162 |     WHERE product_id = 1);
163 | 
164 | UPDATE inventory
165 | SET count = count + 5
166 | WHERE email = (
167 |     SELECT email
168 |     FROM user
169 |     WHERE balance >= 80);
170 | ```


--------------------------------------------------------------------------------
/src/bin/transact/main.rs:
--------------------------------------------------------------------------------
  1 | #![feature(map_first_last)]
  2 | 
  3 | mod server;
  4 | 
  5 | #[macro_use]
  6 | extern crate runiversal;
  7 | 
  8 | use crate::server::{ProdCoreIOCtx, ProdIOCtx, TIMER_INCREMENT};
  9 | use clap::{arg, App};
 10 | use env_logger::Builder;
 11 | use log::LevelFilter;
 12 | use rand::{RngCore, SeedableRng};
 13 | use rand_xorshift::XorShiftRng;
 14 | use runiversal::common::{
 15 |   mk_t, BasicIOCtx, FreeNodeIOCtx, GossipData, InternalMode, MasterIOCtx, NodeIOCtx, SlaveIOCtx,
 16 | };
 17 | use runiversal::common::{CoordGroupId, EndpointId, Gen, LeadershipId, PaxosGroupId, SlaveGroupId};
 18 | use runiversal::coord::{CoordConfig, CoordContext, CoordForwardMsg, CoordState};
 19 | use runiversal::free_node_manager::FreeNodeType;
 20 | use runiversal::master::{
 21 |   FullMasterInput, MasterConfig, MasterContext, MasterState, MasterTimerInput,
 22 | };
 23 | use runiversal::message as msg;
 24 | use runiversal::message::FreeNodeMessage;
 25 | use runiversal::net::{handle_self_conn, send_msg, start_acceptor_thread, SendAction};
 26 | use runiversal::node::{get_prod_configs, GenericInput, NodeConfig, NodeState};
 27 | use runiversal::paxos::PaxosConfig;
 28 | use runiversal::slave::{
 29 |   FullSlaveInput, SlaveBackMessage, SlaveConfig, SlaveContext, SlaveState, SlaveTimerInput,
 30 | };
 31 | use runiversal::tablet::TabletConfig;
 32 | use runiversal::test_utils as tu;
 33 | use runiversal::test_utils::mk_seed;
 34 | use std::collections::{BTreeMap, LinkedList};
 35 | use std::env;
 36 | use std::net::{TcpListener, TcpStream};
 37 | use std::sync::mpsc::Sender;
 38 | use std::sync::{mpsc, Arc, Mutex};
 39 | use std::thread;
 40 | use std::time::{SystemTime, UNIX_EPOCH};
 41 | 
 42 | // -----------------------------------------------------------------------------------------------
 43 | //  Main
 44 | // -----------------------------------------------------------------------------------------------
 45 | 
 46 | fn main() {
 47 |   // Setup CLI parsing
 48 |   let matches = App::new("rUniversalDB")
 49 |     .version("1.0")
 50 |     .author("Pasindu M. <pasindumuth@gmail.com>")
 51 |     .arg(
 52 |       arg!(-t --startup_type <VALUE>)
 53 |         .required(true)
 54 |         .help("Indicates if this is an initial Master node ('masterbootup') or not ('freenode').'")
 55 |         .possible_values(["masterbootup", "freenode"]),
 56 |     )
 57 |     .arg(arg!(-i --ip <VALUE>).required(true).help("The IP address of the current host."))
 58 |     .arg(
 59 |       arg!(-f --freenode_type <VALUE>)
 60 |         .required(false)
 61 |         .help("The type of freenode this is.")
 62 |         .possible_values(["newslave", "reconfig"]),
 63 |     )
 64 |     .arg(arg!(-e --entry_ip <VALUE>).required(false).help(
 65 |       "The IP address of the current Master \
 66 |        Leader. (This is unused if the startup_type is 'masterbootup').",
 67 |     ))
 68 |     .get_matches();
 69 | 
 70 |   // Setup logging
 71 |   Builder::new().filter_level(LevelFilter::max()).init();
 72 | 
 73 |   // Get required arguments
 74 |   let startup_type = matches.value_of("startup_type").unwrap().to_string();
 75 |   let this_ip = matches.value_of("ip").unwrap().to_string();
 76 | 
 77 |   // The mpsc channel for passing data to the Server Thread from all FromNetwork Threads.
 78 |   let (to_server_sender, to_server_receiver) = mpsc::channel::<GenericInput>();
 79 |   // Maps the IP addresses to a FromServer Queue, used to send data to Outgoing Connections.
 80 |   let out_conn_map = Arc::new(Mutex::new(BTreeMap::<EndpointId, Sender<SendAction>>::new()));
 81 | 
 82 |   // Start the Accepting Thread
 83 |   start_acceptor_thread(&to_server_sender, this_ip.clone());
 84 | 
 85 |   // Create the self-connection
 86 |   let this_internal_mode = InternalMode::Internal;
 87 |   let this_eid = EndpointId::new(this_ip, this_internal_mode.clone());
 88 |   handle_self_conn(&this_eid, &out_conn_map, &to_server_sender);
 89 | 
 90 |   // Run startup_type specific code.
 91 |   match &startup_type[..] {
 92 |     "masterbootup" => {}
 93 |     "freenode" => {
 94 |       // Parse entry_ip
 95 |       let master_ip = matches
 96 |         .value_of("entry_ip")
 97 |         .expect("entry_ip is requred if startup_type is 'freenode'")
 98 |         .to_string();
 99 |       let master_eid = EndpointId::new(master_ip, InternalMode::Internal);
100 | 
101 |       // Parse freenode_type
102 |       let freenode_type = matches
103 |         .value_of("freenode_type")
104 |         .expect("entry_ip is requred if startup_type is 'freenode'");
105 | 
106 |       let node_type = match freenode_type {
107 |         "newslave" => FreeNodeType::NewSlaveFreeNode,
108 |         "reconfig" => FreeNodeType::ReconfigFreeNode,
109 |         _ => unreachable!(),
110 |       };
111 | 
112 |       // Send RegisterFreeNode
113 |       send_msg(
114 |         &out_conn_map,
115 |         &master_eid,
116 |         SendAction::new(
117 |           msg::NetworkMessage::Master(msg::MasterMessage::FreeNodeAssoc(
118 |             msg::FreeNodeAssoc::RegisterFreeNode(msg::RegisterFreeNode {
119 |               sender_eid: this_eid.clone(),
120 |               node_type,
121 |             }),
122 |           )),
123 |           None,
124 |         ),
125 |         &this_internal_mode,
126 |       );
127 |     }
128 |     _ => unreachable!(),
129 |   }
130 | 
131 |   let mut io_ctx = ProdIOCtx {
132 |     rand: XorShiftRng::from_entropy(),
133 |     out_conn_map,
134 |     exited: false,
135 |     to_top: to_server_sender,
136 |     tablet_map: Default::default(),
137 |     coord_map: Default::default(),
138 |     tasks: Arc::new(Mutex::new(Default::default())),
139 |   };
140 |   io_ctx.start();
141 | 
142 |   let mut node = NodeState::new(this_eid, get_prod_configs());
143 |   node.bootstrap(&mut io_ctx);
144 | 
145 |   // Enter the main loop forever.
146 |   loop {
147 |     let generic_input = to_server_receiver.recv().unwrap();
148 |     node.process_input(&mut io_ctx, generic_input);
149 |   }
150 | }
151 | 


--------------------------------------------------------------------------------
/src/shard_split_tablet_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{cur_timestamp, QueryId, Timestamp};
  2 | use crate::common::{mk_t, BasicIOCtx};
  3 | use crate::common::{
  4 |   ShardingGen, SlaveGroupId, TNodePath, TablePath, TabletGroupId, TabletKeyRange,
  5 | };
  6 | use crate::message as msg;
  7 | use crate::server::ServerContextBase;
  8 | use crate::shard_split_tm_es::{
  9 |   STRange, ShardNodePath, ShardSplitClosed, ShardSplitCommit, ShardSplitPrepare,
 10 |   ShardSplitPrepared, ShardSplitTMPayloadTypes,
 11 | };
 12 | use crate::stmpaxos2pc_rm::{
 13 |   RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner,
 14 |   STMPaxos2PCRMOuter,
 15 | };
 16 | use crate::stmpaxos2pc_tm::TMMessage;
 17 | use crate::tablet::{TabletContext, TabletPLm};
 18 | use serde::{Deserialize, Serialize};
 19 | use std::cmp::max;
 20 | 
 21 | // -----------------------------------------------------------------------------------------------
 22 | //  Payloads
 23 | // -----------------------------------------------------------------------------------------------
 24 | 
 25 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 26 | pub struct ShardSplitTabletRMPayloadTypes {}
 27 | 
 28 | impl RMPayloadTypes for ShardSplitTabletRMPayloadTypes {
 29 |   type TM = ShardSplitTMPayloadTypes;
 30 |   type RMContext = TabletContext;
 31 | 
 32 |   // Actions. This contains the target Tablet to create.
 33 |   type RMCommitActionData = STRange;
 34 | 
 35 |   // RM PLm
 36 |   type RMPreparedPLm = ShardSplitTabletRMPrepared;
 37 |   type RMCommittedPLm = ShardSplitTabletRMCommitted;
 38 |   type RMAbortedPLm = ShardSplitTabletRMAborted;
 39 | }
 40 | 
 41 | // RM PLm
 42 | 
 43 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 44 | pub struct ShardSplitTabletRMPrepared {}
 45 | 
 46 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 47 | pub struct ShardSplitTabletRMCommitted {
 48 |   pub sharding_gen: ShardingGen,
 49 |   pub target_old: STRange,
 50 |   pub target_new: STRange,
 51 | }
 52 | 
 53 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 54 | pub struct ShardSplitTabletRMAborted {}
 55 | 
 56 | // -----------------------------------------------------------------------------------------------
 57 | //  RMServerContext ShardSplitTablet
 58 | // -----------------------------------------------------------------------------------------------
 59 | 
 60 | impl RMServerContext<ShardSplitTabletRMPayloadTypes> for TabletContext {
 61 |   fn push_plm(&mut self, plm: RMPLm<ShardSplitTabletRMPayloadTypes>) {
 62 |     self.tablet_bundle.push(TabletPLm::ShardSplit(plm));
 63 |   }
 64 | 
 65 |   fn send_to_tm<IO: BasicIOCtx>(
 66 |     &mut self,
 67 |     io_ctx: &mut IO,
 68 |     _: &(),
 69 |     msg: TMMessage<ShardSplitTMPayloadTypes>,
 70 |   ) {
 71 |     self.send_to_master(io_ctx, msg::MasterRemotePayload::ShardSplit(msg));
 72 |   }
 73 | 
 74 |   fn mk_node_path(&self) -> ShardNodePath {
 75 |     ShardNodePath::Tablet(TabletContext::mk_node_path(self))
 76 |   }
 77 | 
 78 |   fn is_leader(&self) -> bool {
 79 |     TabletContext::is_leader(self)
 80 |   }
 81 | }
 82 | 
 83 | // -----------------------------------------------------------------------------------------------
 84 | //  ShardSplitTabletRMES Implementation
 85 | // -----------------------------------------------------------------------------------------------
 86 | 
 87 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 88 | pub struct ShardSplitTabletRMInner {}
 89 | 
 90 | pub type ShardSplitTabletRMES =
 91 |   STMPaxos2PCRMOuter<ShardSplitTabletRMPayloadTypes, ShardSplitTabletRMInner>;
 92 | pub type ShardSplitTabletRMAction = STMPaxos2PCRMAction<ShardSplitTabletRMPayloadTypes>;
 93 | 
 94 | impl STMPaxos2PCRMInner<ShardSplitTabletRMPayloadTypes> for ShardSplitTabletRMInner {
 95 |   fn new<IO: BasicIOCtx>(
 96 |     _: &mut TabletContext,
 97 |     _: &mut IO,
 98 |     _: ShardSplitPrepare,
 99 |   ) -> ShardSplitTabletRMInner {
100 |     ShardSplitTabletRMInner {}
101 |   }
102 | 
103 |   fn new_follower<IO: BasicIOCtx>(
104 |     _: &mut TabletContext,
105 |     _: &mut IO,
106 |     _: ShardSplitTabletRMPrepared,
107 |   ) -> ShardSplitTabletRMInner {
108 |     ShardSplitTabletRMInner {}
109 |   }
110 | 
111 |   fn mk_closed() -> ShardSplitClosed {
112 |     ShardSplitClosed {}
113 |   }
114 | 
115 |   fn mk_prepared_plm<IO: BasicIOCtx>(
116 |     &mut self,
117 |     ctx: &mut TabletContext,
118 |     _: &mut IO,
119 |   ) -> Option<ShardSplitTabletRMPrepared> {
120 |     if ctx.pause_ddl() {
121 |       None
122 |     } else {
123 |       Some(ShardSplitTabletRMPrepared {})
124 |     }
125 |   }
126 | 
127 |   fn prepared_plm_inserted<IO: BasicIOCtx>(
128 |     &mut self,
129 |     _: &mut TabletContext,
130 |     _: &mut IO,
131 |   ) -> ShardSplitPrepared {
132 |     ShardSplitPrepared {}
133 |   }
134 | 
135 |   fn mk_committed_plm<IO: BasicIOCtx>(
136 |     &mut self,
137 |     _: &mut TabletContext,
138 |     _: &mut IO,
139 |     commit: &ShardSplitCommit,
140 |   ) -> ShardSplitTabletRMCommitted {
141 |     ShardSplitTabletRMCommitted {
142 |       sharding_gen: commit.sharding_gen.clone(),
143 |       target_old: commit.target_old.clone(),
144 |       target_new: commit.target_new.clone(),
145 |     }
146 |   }
147 | 
148 |   fn committed_plm_inserted<IO: BasicIOCtx>(
149 |     &mut self,
150 |     ctx: &mut TabletContext,
151 |     _: &mut IO,
152 |     plm: &RMCommittedPLm<ShardSplitTabletRMPayloadTypes>,
153 |   ) -> STRange {
154 |     // Update Sharding data.
155 |     ctx.this_sharding_gen = plm.payload.sharding_gen.clone();
156 |     ctx.this_tablet_key_range = plm.payload.target_old.range.clone();
157 |     plm.payload.target_new.clone()
158 |   }
159 | 
160 |   fn mk_aborted_plm<IO: BasicIOCtx>(
161 |     &mut self,
162 |     _: &mut TabletContext,
163 |     _: &mut IO,
164 |   ) -> ShardSplitTabletRMAborted {
165 |     ShardSplitTabletRMAborted {}
166 |   }
167 | 
168 |   fn aborted_plm_inserted<IO: BasicIOCtx>(&mut self, _: &mut TabletContext, _: &mut IO) {}
169 | 
170 |   fn reconfig_snapshot(&self) -> ShardSplitTabletRMInner {
171 |     self.clone()
172 |   }
173 | }
174 | 


--------------------------------------------------------------------------------
/src/drop_table_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::TNodePath;
  2 | use crate::common::{cur_timestamp, Timestamp};
  3 | use crate::common::{mk_t, BasicIOCtx};
  4 | use crate::drop_table_tm_es::{
  5 |   DropTableClosed, DropTableCommit, DropTablePrepare, DropTablePrepared, DropTableTMPayloadTypes,
  6 | };
  7 | use crate::message as msg;
  8 | use crate::server::ServerContextBase;
  9 | use crate::stmpaxos2pc_rm::{
 10 |   RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner,
 11 |   STMPaxos2PCRMOuter,
 12 | };
 13 | use crate::stmpaxos2pc_tm::TMMessage;
 14 | use crate::tablet::{TabletContext, TabletPLm};
 15 | use serde::{Deserialize, Serialize};
 16 | use std::cmp::max;
 17 | 
 18 | // -----------------------------------------------------------------------------------------------
 19 | //  Payloads
 20 | // -----------------------------------------------------------------------------------------------
 21 | 
 22 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 23 | pub struct DropTableRMPayloadTypes {}
 24 | 
 25 | impl RMPayloadTypes for DropTableRMPayloadTypes {
 26 |   type TM = DropTableTMPayloadTypes;
 27 |   type RMContext = TabletContext;
 28 | 
 29 |   // Actions
 30 |   type RMCommitActionData = Timestamp;
 31 | 
 32 |   // RM PLm
 33 |   type RMPreparedPLm = DropTableRMPrepared;
 34 |   type RMCommittedPLm = DropTableRMCommitted;
 35 |   type RMAbortedPLm = DropTableRMAborted;
 36 | }
 37 | 
 38 | // RM PLm
 39 | 
 40 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 41 | pub struct DropTableRMPrepared {
 42 |   pub timestamp: Timestamp,
 43 | }
 44 | 
 45 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 46 | pub struct DropTableRMCommitted {
 47 |   pub timestamp: Timestamp,
 48 | }
 49 | 
 50 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 51 | pub struct DropTableRMAborted {}
 52 | 
 53 | // -----------------------------------------------------------------------------------------------
 54 | //  RMServerContext DropTable
 55 | // -----------------------------------------------------------------------------------------------
 56 | 
 57 | impl RMServerContext<DropTableRMPayloadTypes> for TabletContext {
 58 |   fn push_plm(&mut self, plm: RMPLm<DropTableRMPayloadTypes>) {
 59 |     self.tablet_bundle.push(TabletPLm::DropTable(plm));
 60 |   }
 61 | 
 62 |   fn send_to_tm<IO: BasicIOCtx>(
 63 |     &mut self,
 64 |     io_ctx: &mut IO,
 65 |     _: &(),
 66 |     msg: TMMessage<DropTableTMPayloadTypes>,
 67 |   ) {
 68 |     self.send_to_master(io_ctx, msg::MasterRemotePayload::DropTable(msg));
 69 |   }
 70 | 
 71 |   fn mk_node_path(&self) -> TNodePath {
 72 |     TabletContext::mk_node_path(self)
 73 |   }
 74 | 
 75 |   fn is_leader(&self) -> bool {
 76 |     TabletContext::is_leader(self)
 77 |   }
 78 | }
 79 | 
 80 | // -----------------------------------------------------------------------------------------------
 81 | //  DropTableES Implementation
 82 | // -----------------------------------------------------------------------------------------------
 83 | 
 84 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 85 | pub struct DropTableRMInner {
 86 |   pub prepared_timestamp: Timestamp,
 87 | }
 88 | 
 89 | pub type DropTableRMES = STMPaxos2PCRMOuter<DropTableRMPayloadTypes, DropTableRMInner>;
 90 | pub type DropTableRMAction = STMPaxos2PCRMAction<DropTableRMPayloadTypes>;
 91 | 
 92 | impl STMPaxos2PCRMInner<DropTableRMPayloadTypes> for DropTableRMInner {
 93 |   fn new<IO: BasicIOCtx>(
 94 |     ctx: &mut TabletContext,
 95 |     io_ctx: &mut IO,
 96 |     _: DropTablePrepare,
 97 |   ) -> DropTableRMInner {
 98 |     // Construct the `preparing_timestamp`
 99 |     let mut timestamp = cur_timestamp(io_ctx, ctx.tablet_config.timestamp_suffix_divisor);
100 |     timestamp = max(timestamp, ctx.table_schema.val_cols.get_latest_lat());
101 |     timestamp = max(timestamp, ctx.presence_timestamp.clone());
102 |     for (_, req) in ctx.waiting_locked_cols.iter().chain(ctx.inserting_locked_cols.iter()) {
103 |       timestamp = max(timestamp, req.timestamp.clone());
104 |     }
105 |     timestamp = timestamp.add(mk_t(1));
106 | 
107 |     DropTableRMInner { prepared_timestamp: timestamp }
108 |   }
109 | 
110 |   fn new_follower<IO: BasicIOCtx>(
111 |     _: &mut TabletContext,
112 |     _: &mut IO,
113 |     payload: DropTableRMPrepared,
114 |   ) -> DropTableRMInner {
115 |     DropTableRMInner { prepared_timestamp: payload.timestamp }
116 |   }
117 | 
118 |   fn mk_closed() -> DropTableClosed {
119 |     DropTableClosed {}
120 |   }
121 | 
122 |   fn mk_prepared_plm<IO: BasicIOCtx>(
123 |     &mut self,
124 |     ctx: &mut TabletContext,
125 |     _: &mut IO,
126 |   ) -> Option<DropTableRMPrepared> {
127 |     if ctx.pause_ddl() {
128 |       None
129 |     } else {
130 |       Some(DropTableRMPrepared { timestamp: self.prepared_timestamp.clone() })
131 |     }
132 |   }
133 | 
134 |   fn prepared_plm_inserted<IO: BasicIOCtx>(
135 |     &mut self,
136 |     _: &mut TabletContext,
137 |     _: &mut IO,
138 |   ) -> DropTablePrepared {
139 |     DropTablePrepared { timestamp: self.prepared_timestamp.clone() }
140 |   }
141 | 
142 |   fn mk_committed_plm<IO: BasicIOCtx>(
143 |     &mut self,
144 |     _: &mut TabletContext,
145 |     _: &mut IO,
146 |     commit: &DropTableCommit,
147 |   ) -> DropTableRMCommitted {
148 |     DropTableRMCommitted { timestamp: commit.timestamp.clone() }
149 |   }
150 | 
151 |   /// Apply the `alter_op` to this Tablet's `table_schema`.
152 |   fn committed_plm_inserted<IO: BasicIOCtx>(
153 |     &mut self,
154 |     _: &mut TabletContext,
155 |     _: &mut IO,
156 |     committed_plm: &RMCommittedPLm<DropTableRMPayloadTypes>,
157 |   ) -> Timestamp {
158 |     committed_plm.payload.timestamp.clone()
159 |   }
160 | 
161 |   fn mk_aborted_plm<IO: BasicIOCtx>(
162 |     &mut self,
163 |     _: &mut TabletContext,
164 |     _: &mut IO,
165 |   ) -> DropTableRMAborted {
166 |     DropTableRMAborted {}
167 |   }
168 | 
169 |   fn aborted_plm_inserted<IO: BasicIOCtx>(&mut self, _: &mut TabletContext, _: &mut IO) {}
170 | 
171 |   fn reconfig_snapshot(&self) -> DropTableRMInner {
172 |     self.clone()
173 |   }
174 | }
175 | 


--------------------------------------------------------------------------------
/src/ms_table_read_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::col_usage::{col_collecting_cb, QueryIterator};
  2 | use crate::common::{mk_qid, ColName, CoreIOCtx, OrigP, QueryESResult, WriteRegion};
  3 | use crate::common::{
  4 |   ColType, ColVal, ColValN, ContextRow, PrimaryKey, QueryId, TablePath, TableView, TransTableName,
  5 | };
  6 | use crate::expression::{is_true, EvalError};
  7 | use crate::gr_query_es::{GRQueryConstructorView, GRQueryES};
  8 | use crate::message as msg;
  9 | use crate::ms_table_es::{GeneralQueryES, MSTableES, SqlQueryInner};
 10 | use crate::server::{mk_eval_error, ContextConstructor};
 11 | use crate::sql_ast::proc;
 12 | 
 13 | use crate::storage::{GenericTable, MSStorageView};
 14 | use crate::table_read_es::{compute_read_region, fully_evaluate_select};
 15 | use crate::tablet::{
 16 |   compute_subqueries, MSQueryES, RequestedReadProtected, StorageLocalTable, TPESAction,
 17 |   TabletContext,
 18 | };
 19 | use std::collections::BTreeSet;
 20 | use std::iter::FromIterator;
 21 | use std::ops::Deref;
 22 | 
 23 | // -----------------------------------------------------------------------------------------------
 24 | //  MSTableReadES
 25 | // -----------------------------------------------------------------------------------------------
 26 | 
 27 | pub type MSTableReadES = MSTableES<SelectInner>;
 28 | 
 29 | #[derive(Debug)]
 30 | pub struct SelectInner {
 31 |   sql_query: proc::TableSelect,
 32 | }
 33 | 
 34 | impl SelectInner {
 35 |   pub fn new(sql_query: proc::TableSelect) -> Self {
 36 |     SelectInner { sql_query }
 37 |   }
 38 | }
 39 | 
 40 | impl SqlQueryInner for SelectInner {
 41 |   /// This function shouly only be called if we know `from` is not a `JoinNode`.
 42 |   fn table_path(&self) -> &TablePath {
 43 |     &self.sql_query.from.table_path
 44 |   }
 45 | 
 46 |   fn request_region_locks<IO: CoreIOCtx>(
 47 |     &mut self,
 48 |     ctx: &mut TabletContext,
 49 |     io_ctx: &mut IO,
 50 |     es: &GeneralQueryES,
 51 |   ) -> Result<QueryId, msg::QueryError> {
 52 |     // Get extra columns that must be in the region due to SELECT * .
 53 |     let mut extra_cols = Vec::<ColName>::new();
 54 |     for item in &self.sql_query.projection {
 55 |       match item {
 56 |         proc::SelectItem::ExprWithAlias { .. } => {}
 57 |         proc::SelectItem::Wildcard { .. } => {
 58 |           // Choose all columns in the Table, and break out early
 59 |           // since there is no reason to continue.
 60 |           extra_cols = ctx.table_schema.get_schema_val_cols_static(&es.timestamp);
 61 |           break;
 62 |         }
 63 |       }
 64 |     }
 65 | 
 66 |     // Collect all `ColNames` of this table that all `ColumnRefs` refer to.
 67 |     let mut safe_present_cols = Vec::<ColName>::new();
 68 |     QueryIterator::new().iterate_table_select(
 69 |       &mut col_collecting_cb(&self.sql_query.from.alias, &mut safe_present_cols),
 70 |       &self.sql_query,
 71 |     );
 72 | 
 73 |     // Compute the ReadRegion
 74 |     let read_region = compute_read_region(
 75 |       &ctx.table_schema.key_cols,
 76 |       &ctx.this_tablet_key_range,
 77 |       &es.context,
 78 |       &self.sql_query.selection,
 79 |       &self.sql_query.from.alias,
 80 |       safe_present_cols,
 81 |       extra_cols,
 82 |     );
 83 | 
 84 |     // Move the MSTableReadES to the Pending state with the given ReadRegion.
 85 |     let protect_qid = mk_qid(io_ctx.rand());
 86 | 
 87 |     // Add a ReadRegion to the m_waiting_read_protected.
 88 |     let verifying = ctx.verifying_writes.get_mut(&es.timestamp).unwrap();
 89 |     verifying.m_waiting_read_protected.insert(RequestedReadProtected {
 90 |       orig_p: OrigP::new(es.query_id.clone()),
 91 |       query_id: protect_qid.clone(),
 92 |       read_region,
 93 |     });
 94 | 
 95 |     Ok(protect_qid)
 96 |   }
 97 | 
 98 |   fn compute_subqueries<IO: CoreIOCtx>(
 99 |     &mut self,
100 |     ctx: &mut TabletContext,
101 |     io_ctx: &mut IO,
102 |     es: &GeneralQueryES,
103 |     ms_query_es: &mut MSQueryES,
104 |   ) -> Vec<GRQueryES> {
105 |     compute_subqueries(
106 |       GRQueryConstructorView {
107 |         root_query_path: &es.root_query_path,
108 |         timestamp: &es.timestamp,
109 |         sql_query: &self.sql_query,
110 |         query_plan: &es.query_plan,
111 |         query_id: &es.query_id,
112 |         context: &es.context,
113 |       },
114 |       io_ctx.rand(),
115 |       StorageLocalTable::new(
116 |         &ctx.table_schema,
117 |         &es.timestamp,
118 |         &self.sql_query.from,
119 |         &ctx.this_tablet_key_range,
120 |         &self.sql_query.selection,
121 |         MSStorageView::new(
122 |           &ctx.storage,
123 |           &ctx.table_schema,
124 |           &ms_query_es.update_views,
125 |           es.tier.clone(),
126 |         ),
127 |       ),
128 |     )
129 |   }
130 | 
131 |   fn finish<IO: CoreIOCtx>(
132 |     &mut self,
133 |     ctx: &mut TabletContext,
134 |     _: &mut IO,
135 |     es: &GeneralQueryES,
136 |     (children, subquery_results): (
137 |       Vec<(Vec<proc::ColumnRef>, Vec<TransTableName>)>,
138 |       Vec<Vec<TableView>>,
139 |     ),
140 |     ms_query_es: &mut MSQueryES,
141 |   ) -> Option<TPESAction> {
142 |     // Create the ContextConstructor.
143 |     let context_constructor = ContextConstructor::new(
144 |       es.context.context_schema.clone(),
145 |       StorageLocalTable::new(
146 |         &ctx.table_schema,
147 |         &es.timestamp,
148 |         &self.sql_query.from,
149 |         &ctx.this_tablet_key_range,
150 |         &self.sql_query.selection,
151 |         MSStorageView::new(
152 |           &ctx.storage,
153 |           &ctx.table_schema,
154 |           &ms_query_es.update_views,
155 |           es.tier.clone(),
156 |         ),
157 |       ),
158 |       children,
159 |     );
160 | 
161 |     // Evaluate
162 |     let eval_res = fully_evaluate_select(
163 |       context_constructor,
164 |       &es.context.deref(),
165 |       subquery_results,
166 |       &self.sql_query,
167 |     );
168 | 
169 |     match eval_res {
170 |       Ok(res_table_views) => {
171 |         // Signal Success and return the data.
172 |         Some(TPESAction::Success(QueryESResult {
173 |           result: res_table_views,
174 |           new_rms: es.new_rms.iter().cloned().collect(),
175 |         }))
176 |       }
177 |       Err(eval_error) => Some(TPESAction::QueryError(mk_eval_error(eval_error))),
178 |     }
179 |   }
180 | }
181 | 


--------------------------------------------------------------------------------
/src/alter_table_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::alter_table_tm_es::{
  2 |   AlterTableClosed, AlterTableCommit, AlterTablePrepare, AlterTablePrepared,
  3 |   AlterTableTMPayloadTypes,
  4 | };
  5 | use crate::common::TNodePath;
  6 | use crate::common::{cur_timestamp, mk_t, BasicIOCtx, Timestamp};
  7 | use crate::message as msg;
  8 | use crate::server::ServerContextBase;
  9 | use crate::sql_ast::proc;
 10 | use crate::stmpaxos2pc_rm::{
 11 |   RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner,
 12 |   STMPaxos2PCRMOuter,
 13 | };
 14 | use crate::stmpaxos2pc_tm::TMMessage;
 15 | use crate::tablet::{TabletContext, TabletPLm};
 16 | use serde::{Deserialize, Serialize};
 17 | use std::cmp::max;
 18 | 
 19 | // -----------------------------------------------------------------------------------------------
 20 | //  Payloads
 21 | // -----------------------------------------------------------------------------------------------
 22 | 
 23 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 24 | pub struct AlterTableRMPayloadTypes {}
 25 | 
 26 | impl RMPayloadTypes for AlterTableRMPayloadTypes {
 27 |   type TM = AlterTableTMPayloadTypes;
 28 |   type RMContext = TabletContext;
 29 | 
 30 |   // Actions
 31 |   type RMCommitActionData = ();
 32 | 
 33 |   // RM PLm
 34 |   type RMPreparedPLm = AlterTableRMPrepared;
 35 |   type RMCommittedPLm = AlterTableRMCommitted;
 36 |   type RMAbortedPLm = AlterTableRMAborted;
 37 | }
 38 | 
 39 | // RM PLm
 40 | 
 41 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 42 | pub struct AlterTableRMPrepared {
 43 |   pub alter_op: proc::AlterOp,
 44 |   pub timestamp: Timestamp,
 45 | }
 46 | 
 47 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 48 | pub struct AlterTableRMCommitted {
 49 |   pub timestamp: Timestamp,
 50 | }
 51 | 
 52 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 53 | pub struct AlterTableRMAborted {}
 54 | 
 55 | // -----------------------------------------------------------------------------------------------
 56 | //  RMServerContext AlterTable
 57 | // -----------------------------------------------------------------------------------------------
 58 | 
 59 | impl RMServerContext<AlterTableRMPayloadTypes> for TabletContext {
 60 |   fn push_plm(&mut self, plm: RMPLm<AlterTableRMPayloadTypes>) {
 61 |     self.tablet_bundle.push(TabletPLm::AlterTable(plm));
 62 |   }
 63 | 
 64 |   fn send_to_tm<IO: BasicIOCtx>(
 65 |     &mut self,
 66 |     io_ctx: &mut IO,
 67 |     _: &(),
 68 |     msg: TMMessage<AlterTableTMPayloadTypes>,
 69 |   ) {
 70 |     self.send_to_master(io_ctx, msg::MasterRemotePayload::AlterTable(msg));
 71 |   }
 72 | 
 73 |   fn mk_node_path(&self) -> TNodePath {
 74 |     TabletContext::mk_node_path(self)
 75 |   }
 76 | 
 77 |   fn is_leader(&self) -> bool {
 78 |     TabletContext::is_leader(self)
 79 |   }
 80 | }
 81 | 
 82 | // -----------------------------------------------------------------------------------------------
 83 | //  AlterTableES Implementation
 84 | // -----------------------------------------------------------------------------------------------
 85 | 
 86 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 87 | pub struct AlterTableRMInner {
 88 |   pub alter_op: proc::AlterOp,
 89 |   pub prepared_timestamp: Timestamp,
 90 | }
 91 | 
 92 | pub type AlterTableRMES = STMPaxos2PCRMOuter<AlterTableRMPayloadTypes, AlterTableRMInner>;
 93 | pub type AlterTableRMAction = STMPaxos2PCRMAction<AlterTableRMPayloadTypes>;
 94 | 
 95 | impl STMPaxos2PCRMInner<AlterTableRMPayloadTypes> for AlterTableRMInner {
 96 |   fn new<IO: BasicIOCtx>(
 97 |     ctx: &mut TabletContext,
 98 |     io_ctx: &mut IO,
 99 |     payload: AlterTablePrepare,
100 |   ) -> AlterTableRMInner {
101 |     // Construct the `preparing_timestamp`
102 |     let mut timestamp = cur_timestamp(io_ctx, ctx.tablet_config.timestamp_suffix_divisor);
103 |     let col_name = &payload.alter_op.col_name;
104 |     timestamp = max(timestamp, ctx.table_schema.val_cols.get_lat(col_name));
105 |     for (_, req) in ctx.waiting_locked_cols.iter().chain(ctx.inserting_locked_cols.iter()) {
106 |       if req.cols.contains(col_name) {
107 |         timestamp = max(timestamp, req.timestamp.clone());
108 |       }
109 |     }
110 |     timestamp = timestamp.add(mk_t(1));
111 | 
112 |     AlterTableRMInner { alter_op: payload.alter_op, prepared_timestamp: timestamp }
113 |   }
114 | 
115 |   fn new_follower<IO: BasicIOCtx>(
116 |     _: &mut TabletContext,
117 |     _: &mut IO,
118 |     payload: AlterTableRMPrepared,
119 |   ) -> AlterTableRMInner {
120 |     AlterTableRMInner { alter_op: payload.alter_op, prepared_timestamp: payload.timestamp }
121 |   }
122 | 
123 |   fn mk_closed() -> AlterTableClosed {
124 |     AlterTableClosed {}
125 |   }
126 | 
127 |   fn mk_prepared_plm<IO: BasicIOCtx>(
128 |     &mut self,
129 |     ctx: &mut TabletContext,
130 |     _: &mut IO,
131 |   ) -> Option<AlterTableRMPrepared> {
132 |     if ctx.pause_ddl() {
133 |       None
134 |     } else {
135 |       Some(AlterTableRMPrepared {
136 |         alter_op: self.alter_op.clone(),
137 |         timestamp: self.prepared_timestamp.clone(),
138 |       })
139 |     }
140 |   }
141 | 
142 |   fn prepared_plm_inserted<IO: BasicIOCtx>(
143 |     &mut self,
144 |     _: &mut TabletContext,
145 |     _: &mut IO,
146 |   ) -> AlterTablePrepared {
147 |     AlterTablePrepared { timestamp: self.prepared_timestamp.clone() }
148 |   }
149 | 
150 |   fn mk_committed_plm<IO: BasicIOCtx>(
151 |     &mut self,
152 |     _: &mut TabletContext,
153 |     _: &mut IO,
154 |     commit: &AlterTableCommit,
155 |   ) -> AlterTableRMCommitted {
156 |     AlterTableRMCommitted { timestamp: commit.timestamp.clone() }
157 |   }
158 | 
159 |   /// Apply the `alter_op` to this Tablet's `table_schema`.
160 |   fn committed_plm_inserted<IO: BasicIOCtx>(
161 |     &mut self,
162 |     ctx: &mut TabletContext,
163 |     _: &mut IO,
164 |     committed_plm: &RMCommittedPLm<AlterTableRMPayloadTypes>,
165 |   ) {
166 |     ctx.table_schema.val_cols.write(
167 |       &self.alter_op.col_name,
168 |       self.alter_op.maybe_col_type.clone(),
169 |       committed_plm.payload.timestamp.clone(),
170 |     );
171 |   }
172 | 
173 |   fn mk_aborted_plm<IO: BasicIOCtx>(
174 |     &mut self,
175 |     _: &mut TabletContext,
176 |     _: &mut IO,
177 |   ) -> AlterTableRMAborted {
178 |     AlterTableRMAborted {}
179 |   }
180 | 
181 |   fn aborted_plm_inserted<IO: BasicIOCtx>(&mut self, _: &mut TabletContext, _: &mut IO) {}
182 | 
183 |   fn reconfig_snapshot(&self) -> AlterTableRMInner {
184 |     self.clone()
185 |   }
186 | }
187 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/tests_paxos2pc.rs:
--------------------------------------------------------------------------------
  1 | use crate::message as msg;
  2 | use crate::simple_tm_es::SimplePayloadTypes;
  3 | use crate::simulation::Simulation;
  4 | use crate::slave::SlavePLm;
  5 | use rand::{RngCore, SeedableRng};
  6 | use rand_xorshift::XorShiftRng;
  7 | use runiversal::common::mk_qid;
  8 | use runiversal::common::{EndpointId, SlaveGroupId};
  9 | use runiversal::paxos2pc_tm::RMPLm;
 10 | use runiversal::simulation_utils::{mk_client_eid, mk_slave_eid};
 11 | use runiversal::slave::SlaveConfig;
 12 | use runiversal::test_utils::mk_sid;
 13 | use std::collections::BTreeMap;
 14 | 
 15 | enum CompletionResult {
 16 |   Invalid,
 17 |   SuccessfullyCommitted,
 18 |   SuccessfullyAborted,
 19 |   SuccessfullyTrivial,
 20 | }
 21 | 
 22 | /// This checks for 2PC Completion. Recall that 2PC Completion is where every
 23 | /// RM either Commits or Aborts.
 24 | fn check_completion(sim: &Simulation, rms: &Vec<SlaveGroupId>) -> CompletionResult {
 25 |   let mut rms_plms = BTreeMap::<SlaveGroupId, Vec<RMPLm<SimplePayloadTypes>>>::new();
 26 | 
 27 |   // Add RMPLms
 28 |   for rm in rms {
 29 |     rms_plms.insert(rm.clone(), vec![]);
 30 |     for pl_entry in sim.global_pls.get(rm).unwrap() {
 31 |       if let msg::PLEntry::Bundle(bundle) = pl_entry {
 32 |         for plm in &bundle.plms {
 33 |           if let SlavePLm::SimpleRM(rm_plm) = plm {
 34 |             rms_plms.get_mut(rm).unwrap().push(rm_plm.clone());
 35 |           }
 36 |         }
 37 |       }
 38 |     }
 39 |   }
 40 | 
 41 |   // Check if the execution was trivial
 42 |   let mut is_trivial = true;
 43 |   for (_, rm_plms) in &rms_plms {
 44 |     match rm_plms[..] {
 45 |       [] => continue,
 46 |       _ => {
 47 |         is_trivial = false;
 48 |         break;
 49 |       }
 50 |     }
 51 |   }
 52 |   if is_trivial {
 53 |     return CompletionResult::SuccessfullyTrivial;
 54 |   }
 55 | 
 56 |   // Check if the execution committed
 57 |   let mut did_commit = true;
 58 |   for (_, rm_plms) in &rms_plms {
 59 |     match rm_plms[..] {
 60 |       [RMPLm::Prepared(_), RMPLm::Committed(_)] => continue,
 61 |       _ => {
 62 |         did_commit = false;
 63 |         break;
 64 |       }
 65 |     }
 66 |   }
 67 |   if did_commit {
 68 |     return CompletionResult::SuccessfullyCommitted;
 69 |   }
 70 | 
 71 |   // Check if the execution aborted
 72 |   let mut did_abort = true;
 73 |   for (_, rm_plms) in &rms_plms {
 74 |     match rm_plms[..] {
 75 |       [] => continue,
 76 |       [RMPLm::Prepared(_), RMPLm::Aborted(_)] => continue,
 77 |       _ => {
 78 |         did_abort = false;
 79 |         break;
 80 |       }
 81 |     }
 82 |   }
 83 |   if did_abort {
 84 |     return CompletionResult::SuccessfullyAborted;
 85 |   }
 86 | 
 87 |   // Otherwise, this was an invalid execution.
 88 |   return CompletionResult::Invalid;
 89 | }
 90 | 
 91 | pub fn test_single(test_num: u32, seed: [u8; 16]) {
 92 |   // Setup Simulation
 93 | 
 94 |   // Create 5 SlaveGroups, each with 3 nodes.
 95 |   const NUM_PAXOS_GROUPS: u32 = 5;
 96 |   const NUM_PAXOS_NODES: u32 = 3;
 97 |   let mut slave_address_config = BTreeMap::<SlaveGroupId, Vec<EndpointId>>::new();
 98 |   for i in 0..NUM_PAXOS_GROUPS {
 99 |     let mut eids = Vec::<EndpointId>::new();
100 |     for j in 0..NUM_PAXOS_NODES {
101 |       eids.push(mk_slave_eid(i * NUM_PAXOS_NODES + j));
102 |     }
103 |     slave_address_config.insert(SlaveGroupId(format!("s{}", i)), eids);
104 |   }
105 | 
106 |   let client_eid = mk_client_eid(0);
107 | 
108 |   let slave_config = SlaveConfig {
109 |     timestamp_suffix_divisor: 1,
110 |     remote_leader_changed_period_ms: 5,
111 |     // The below are not needed
112 |     failure_detector_period_ms: 0,
113 |     check_unconfirmed_eids_period_ms: 0,
114 |   };
115 |   let mut sim = Simulation::new(seed, 1, slave_config, slave_address_config.clone());
116 | 
117 |   // Run the simulation to warm it up. Activity here consists of Leadership changes,
118 |   // Gossip, Paxos Insertions, etc.
119 |   sim.simulate_n_ms(100);
120 | 
121 |   // Randomly construct a SimpleRequest and send it to a random Slave
122 |   // to perform Simple Paxos2PC.
123 | 
124 |   // Take s0 to be the TM.
125 |   let tm = mk_sid("s0");
126 |   let tm_eid = sim.leader_map.get(&tm).unwrap().eid.clone();
127 | 
128 |   // Randomly chose RMs, where none of them are the TM.
129 |   // Recall that Paxos2PC requires at least one.
130 |   let num_rms = (sim.rand.next_u32() % (NUM_PAXOS_GROUPS - 1)) + 1;
131 |   let mut all_slaves: Vec<SlaveGroupId> = slave_address_config.keys().cloned().collect();
132 |   all_slaves.remove(all_slaves.iter().position(|i| i == &tm).unwrap());
133 |   let mut rms = Vec::<SlaveGroupId>::new();
134 |   for _ in 0..num_rms {
135 |     let r = sim.rand.next_u32() % all_slaves.len() as u32;
136 |     rms.push(all_slaves.remove(r as usize));
137 |   }
138 | 
139 |   let request = msg::SimpleRequest { query_id: mk_qid(&mut sim.rand), rms: rms.clone() };
140 |   sim.add_msg(
141 |     msg::NetworkMessage::Slave(msg::SlaveMessage::ExternalMessage(
142 |       msg::ExternalMessage::SimpleRequest(request),
143 |     )),
144 |     &client_eid,
145 |     &tm_eid,
146 |   );
147 | 
148 |   /// The number of iterations we simulate for, where we check 2PC
149 |   /// consistency after each iteration.
150 |   const NUM_CONSISTENCY_ITERATIONS: u32 = 5;
151 |   /// Number of iterations per iteration.
152 |   const MS_PER_ITERATION: u32 = 5;
153 | 
154 |   // Continue simulating, checking 2PC Consistency after each round
155 |   sim.simulate_n_ms(NUM_CONSISTENCY_ITERATIONS * MS_PER_ITERATION);
156 | 
157 |   // Finally, run the Simulation in Cooldown Mode and test for Paxos2PC
158 |   // completion at end. "Cooldown Mode" is defined to be where no Leadership changes occur.
159 |   sim.sim_params.pl_entry_delivery_prob = 70;
160 |   sim.sim_params.global_pl_insertion_prob = 30;
161 | 
162 |   /// Here, "cooldown ms" are the number of milliseconds that we expect the Paxos2PC to finish,
163 |   /// given that no leadership changes happen during this time. Although this can be calculated,
164 |   /// we simply guess a sensible number for expedience.
165 |   const EXPECTED_COOLDOWN_MS: u32 = 500;
166 | 
167 |   sim.simulate_n_ms(EXPECTED_COOLDOWN_MS);
168 | 
169 |   match check_completion(&mut sim, &rms) {
170 |     CompletionResult::Invalid => {
171 |       println!(
172 |         "{:?}. Paxos2PC Test Failed: Invalid PLs after cooldown. Seed: {:?}",
173 |         test_num, seed
174 |       );
175 |       panic!()
176 |     }
177 |     CompletionResult::SuccessfullyCommitted => {
178 |       println!("{:?}.    Paxos2PC SuccessfullyCommitted!", test_num);
179 |     }
180 |     CompletionResult::SuccessfullyAborted => {
181 |       println!("{:?}.    Paxos2PC SuccessfullyAborted!", test_num);
182 |     }
183 |     CompletionResult::SuccessfullyTrivial => {
184 |       println!("{:?}.    Paxos2PC SuccessfullyTrivial!", test_num);
185 |     }
186 |   }
187 | }
188 | 


--------------------------------------------------------------------------------
/src/network_driver.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{EndpointId, Gen, LeadershipId, PaxosGroupId};
  2 | use crate::common::{LeaderMap, RemoteLeaderChangedPLm, VersionedValue};
  3 | use crate::message as msg;
  4 | use std::collections::BTreeMap;
  5 | 
  6 | pub struct NetworkDriverContext<'a> {
  7 |   pub this_gid: &'a PaxosGroupId,
  8 |   pub this_eid: &'a EndpointId,
  9 |   pub leader_map: &'a VersionedValue<LeaderMap>,
 10 |   pub remote_leader_changes: &'a mut Vec<RemoteLeaderChangedPLm>,
 11 | }
 12 | 
 13 | // TODO: amend the proof for when PaxosGroupIds get removed. It can go something like:
 14 | //  "Safety: a `remote_message` with PaxosGroupId outside of `network_buffer` will never
 15 | //  come in unless `leader_map` comes in containing the new PaxosGroupIds. Liveness: a
 16 | //  buffered messages, either `deliver_blocked_messages` is called with high enough `lid`,
 17 | //  or the `gid` is removed.
 18 | 
 19 | #[derive(Debug)]
 20 | pub struct NetworkDriver<PayloadT> {
 21 |   /// This buffers the NetworkMessages until the corresponding `LeadershipId` in the
 22 |   /// `LeaderMap` is sufficiently high enough. Some properties:
 23 |   ///   1. All `RemoteMessage`s for a given `PaxosGroupId` have the same `from_lid`.
 24 |   network_buffer: BTreeMap<PaxosGroupId, Vec<msg::RemoteMessage<PayloadT>>>,
 25 |   /// The `Gen` of the `ctx.leader_map` that `network_buffer` corresponds to.
 26 |   gen: Gen,
 27 | }
 28 | 
 29 | impl<PayloadT: Clone> NetworkDriver<PayloadT> {
 30 |   pub fn new(leader_map: &VersionedValue<LeaderMap>) -> NetworkDriver<PayloadT> {
 31 |     let mut network_buffer = BTreeMap::<PaxosGroupId, Vec<msg::RemoteMessage<PayloadT>>>::new();
 32 |     for (gid, _) in leader_map.value() {
 33 |       network_buffer.insert(gid.clone(), Vec::new());
 34 |     }
 35 | 
 36 |     NetworkDriver { network_buffer, gen: leader_map.gen().clone() }
 37 |   }
 38 | 
 39 |   /// The precondition is that the `remote_message` is always from a `PaxosGroupId` that
 40 |   /// is in the `leader_map` in `ctx`.
 41 |   pub fn receive(
 42 |     &mut self,
 43 |     ctx: NetworkDriverContext,
 44 |     remote_message: msg::RemoteMessage<PayloadT>,
 45 |   ) -> Option<PayloadT> {
 46 |     // Update `network_buffer` if the LeaderMap has since been updated.
 47 |     if &self.gen < ctx.leader_map.gen() {
 48 |       self.gen = ctx.leader_map.gen().clone();
 49 | 
 50 |       // Add new PaxosGroupIds
 51 |       for (gid, _) in ctx.leader_map.value() {
 52 |         if !self.network_buffer.contains_key(gid) {
 53 |           self.network_buffer.insert(gid.clone(), vec![]);
 54 |         }
 55 |       }
 56 | 
 57 |       // Remove old PaxosGroupIds
 58 |       let mut removed_gids = Vec::<PaxosGroupId>::new();
 59 |       for (gid, _) in &self.network_buffer {
 60 |         if !ctx.leader_map.value().contains_key(gid) {
 61 |           removed_gids.push(gid.clone());
 62 |         }
 63 |       }
 64 |       for gid in removed_gids {
 65 |         self.network_buffer.remove(&gid);
 66 |       }
 67 |     }
 68 | 
 69 |     let this_gid = &ctx.this_gid;
 70 |     let this_lid = ctx.leader_map.value().get(&this_gid).unwrap();
 71 | 
 72 |     // A node only gets to this code if it is the Leader.
 73 |     debug_assert!(&this_lid.eid == ctx.this_eid);
 74 |     // Messages should not misrouted.
 75 |     debug_assert!(remote_message.to_lid.eid == this_lid.eid);
 76 |     // Messages should not be routed here ahead of this node knowing it is the Leader.
 77 |     debug_assert!(remote_message.to_lid.gen <= this_lid.gen);
 78 | 
 79 |     // Drop the RemoteMessage if it was destined to an older generation.
 80 |     if remote_message.to_lid.gen < this_lid.gen {
 81 |       return None;
 82 |     }
 83 | 
 84 |     // This assertion follows immediately from the above.
 85 |     debug_assert!(remote_message.to_lid.gen == this_lid.gen);
 86 | 
 87 |     let from_gid = remote_message.from_gid.clone();
 88 |     let from_lid = remote_message.from_lid.clone();
 89 |     let buffer = self.network_buffer.get_mut(&from_gid).unwrap();
 90 |     if !buffer.is_empty() {
 91 |       // This means there are already messages from a new remote Leader.
 92 |       let new_from_lid = &buffer.get(0).unwrap().from_lid;
 93 |       if from_lid.gen < new_from_lid.gen {
 94 |         // The Leadership of the new message is too old, so we drop it.
 95 |         None
 96 |       } else if from_lid.gen == new_from_lid.gen {
 97 |         // The Leadership of the new message is the same as the other new messages, so we push.
 98 |         buffer.push(remote_message);
 99 |         None
100 |       } else {
101 |         // The Leadership of the new message is even newer, so we replace.
102 |         buffer.clear();
103 |         buffer.push(remote_message);
104 |         // We also add a new RemoteLeaderChanged PLm to be inserted.
105 |         ctx.remote_leader_changes.push(RemoteLeaderChangedPLm { gid: from_gid, lid: from_lid });
106 |         None
107 |       }
108 |     } else {
109 |       let cur_from_lid = ctx.leader_map.value().get(&from_gid).unwrap();
110 |       if from_lid.gen < cur_from_lid.gen {
111 |         // The Leadership of the new message is old, so we drop it.
112 |         None
113 |       } else if from_lid.gen == cur_from_lid.gen {
114 |         // The Leadership of the new message is current, so we Deliver the message
115 |         Some(remote_message.payload)
116 |       } else {
117 |         // The Leadership of the new message is new, so we buffer it.
118 |         buffer.push(remote_message);
119 |         // We also add a new RemoteLeaderChanged PLm to be inserted.
120 |         ctx.remote_leader_changes.push(RemoteLeaderChangedPLm { gid: from_gid, lid: from_lid });
121 |         None
122 |       }
123 |     }
124 |   }
125 | 
126 |   /// This is called a `RemoteLeaderChangedPLm` is inserted.
127 |   pub fn deliver_blocked_messages(
128 |     &mut self,
129 |     from_gid: PaxosGroupId,
130 |     from_lid: LeadershipId,
131 |   ) -> Vec<PayloadT> {
132 |     if let Some(buffer) = self.network_buffer.get_mut(&from_gid) {
133 |       if !buffer.is_empty() {
134 |         // Recall that the `from_lid.gen` of all bufferred messages should be the same.
135 |         let new_from_lid = &buffer.get(0).unwrap().from_lid;
136 |         if from_lid.gen > new_from_lid.gen {
137 |           // Here, the new RemoteLeaderChangedPLm is beyond all buffered messages, so we drop them.
138 |           buffer.clear();
139 |           Vec::new()
140 |         } else if from_lid.gen == new_from_lid.gen {
141 |           // Deliver all messages from the buffer.
142 |           let remote_messages = std::mem::replace(buffer, Vec::new());
143 |           remote_messages.into_iter().map(|m| m.payload).collect()
144 |         } else {
145 |           // Here, the newly inserted RemoteLeaderChangedPLm will have no affect. Note that from
146 |           // `recieve`, an appropriate one is still scheduled for insertion.
147 |           Vec::new()
148 |         }
149 |       } else {
150 |         Vec::new()
151 |       }
152 |     } else {
153 |       Vec::new()
154 |     }
155 |   }
156 | 
157 |   // Here, we just clear the NetworkBuffer.
158 |   pub fn leader_changed(&mut self) {
159 |     for (_, buffer) in &mut self.network_buffer {
160 |       buffer.clear();
161 |     }
162 |   }
163 | }
164 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/tests_stmpaxos2pc.rs:
--------------------------------------------------------------------------------
  1 | use crate::message as msg;
  2 | use crate::simulation::Simulation;
  3 | use crate::slave::SlavePLm;
  4 | use crate::stm_simple_rm_es::STMSimpleRMPayloadTypes;
  5 | use crate::stm_simple_tm_es::STMSimpleTMPayloadTypes;
  6 | use rand::{RngCore, SeedableRng};
  7 | use rand_xorshift::XorShiftRng;
  8 | use runiversal::common::mk_qid;
  9 | use runiversal::common::{EndpointId, SlaveGroupId};
 10 | use runiversal::simulation_utils::{mk_client_eid, mk_slave_eid};
 11 | use runiversal::slave::SlaveConfig;
 12 | use runiversal::stmpaxos2pc_rm::RMPLm;
 13 | use runiversal::stmpaxos2pc_tm::TMPLm;
 14 | use runiversal::test_utils::mk_sid;
 15 | use std::collections::BTreeMap;
 16 | 
 17 | enum CompletionResult {
 18 |   Invalid,
 19 |   SuccessfullyCommitted,
 20 |   SuccessfullyAborted,
 21 |   SuccessfullyTrivial,
 22 | }
 23 | 
 24 | /// This checks for 2PC Completion. Recall that 2PC Completion is where every
 25 | /// RM either Commits or Aborts.
 26 | fn check_completion(
 27 |   sim: &Simulation,
 28 |   rms: &Vec<SlaveGroupId>,
 29 |   tm: &SlaveGroupId,
 30 | ) -> CompletionResult {
 31 |   let mut tm_plms = Vec::<TMPLm<STMSimpleTMPayloadTypes>>::new();
 32 |   let mut rms_plms = BTreeMap::<SlaveGroupId, Vec<RMPLm<STMSimpleRMPayloadTypes>>>::new();
 33 | 
 34 |   // Add TMPLms
 35 |   for pl_entry in sim.global_pls.get(tm).unwrap() {
 36 |     if let msg::PLEntry::Bundle(bundle) = pl_entry {
 37 |       for plm in &bundle.plms {
 38 |         if let SlavePLm::SimpleSTMTM(tm_plm) = plm {
 39 |           tm_plms.push(tm_plm.clone());
 40 |         }
 41 |       }
 42 |     }
 43 |   }
 44 | 
 45 |   // Add RMPLms
 46 |   for rm in rms {
 47 |     rms_plms.insert(rm.clone(), vec![]);
 48 |     for pl_entry in sim.global_pls.get(rm).unwrap() {
 49 |       if let msg::PLEntry::Bundle(bundle) = pl_entry {
 50 |         for plm in &bundle.plms {
 51 |           if let SlavePLm::SimpleSTMRM(rm_plm) = plm {
 52 |             rms_plms.get_mut(rm).unwrap().push(rm_plm.clone());
 53 |           }
 54 |         }
 55 |       }
 56 |     }
 57 |   }
 58 | 
 59 |   // For every valid value of `tm_plms`, we verify that all `rms_plms` are as expected.
 60 |   match tm_plms[..] {
 61 |     [TMPLm::Prepared(_), TMPLm::Committed(_), TMPLm::Closed(_)] => {
 62 |       for (_, rm_plms) in &rms_plms {
 63 |         match rm_plms[..] {
 64 |           [RMPLm::Prepared(_), RMPLm::Committed(_)] => continue,
 65 |           _ => return CompletionResult::Invalid,
 66 |         }
 67 |       }
 68 |       CompletionResult::SuccessfullyCommitted
 69 |     }
 70 |     [TMPLm::Prepared(_), TMPLm::Aborted(_), TMPLm::Closed(_)] => {
 71 |       for (_, rm_plms) in &rms_plms {
 72 |         match rm_plms[..] {
 73 |           [] | [RMPLm::Prepared(_), RMPLm::Aborted(_)] => continue,
 74 |           _ => return CompletionResult::Invalid,
 75 |         }
 76 |       }
 77 |       CompletionResult::SuccessfullyAborted
 78 |     }
 79 |     [] => {
 80 |       for (_, rm_plms) in &rms_plms {
 81 |         match rm_plms[..] {
 82 |           [] => continue,
 83 |           _ => return CompletionResult::Invalid,
 84 |         }
 85 |       }
 86 |       CompletionResult::SuccessfullyTrivial
 87 |     }
 88 |     _ => CompletionResult::Invalid,
 89 |   }
 90 | }
 91 | 
 92 | pub fn test_single(test_num: u32, seed: [u8; 16]) {
 93 |   // Setup Simulation
 94 | 
 95 |   // Create 5 SlaveGroups, each with 3 nodes.
 96 |   const NUM_PAXOS_GROUPS: u32 = 5;
 97 |   const NUM_PAXOS_NODES: u32 = 3;
 98 |   let mut slave_address_config = BTreeMap::<SlaveGroupId, Vec<EndpointId>>::new();
 99 |   for i in 0..NUM_PAXOS_GROUPS {
100 |     let mut eids = Vec::<EndpointId>::new();
101 |     for j in 0..NUM_PAXOS_NODES {
102 |       eids.push(mk_slave_eid(i * NUM_PAXOS_NODES + j));
103 |     }
104 |     slave_address_config.insert(SlaveGroupId(format!("s{}", i)), eids);
105 |   }
106 | 
107 |   let client_eid = mk_client_eid(0);
108 | 
109 |   let slave_config = SlaveConfig {
110 |     timestamp_suffix_divisor: 1,
111 |     remote_leader_changed_period_ms: 5,
112 |     // The below are not needed
113 |     failure_detector_period_ms: 0,
114 |     check_unconfirmed_eids_period_ms: 0,
115 |   };
116 |   let mut sim = Simulation::new(seed, 1, slave_config, slave_address_config.clone());
117 | 
118 |   // Run the simulation to warm it up. Activity here consists of Leadership changes,
119 |   // Gossip, Paxos Insertions, etc.
120 |   sim.simulate_n_ms(100);
121 | 
122 |   // Randomly construct a STMSimpleRequest and send it to a random Slave
123 |   // to perform Simple STMPaxos2PC.
124 | 
125 |   // Take s0 to be the TM.
126 |   let tm = mk_sid("s0");
127 |   let tm_eid = sim.leader_map.get(&tm).unwrap().eid.clone();
128 | 
129 |   // Randomly chose RMs, where none of them are the TM.
130 |   // Recall that STMPaxos2PC requires at least one.
131 |   let num_rms = (sim.rand.next_u32() % (NUM_PAXOS_GROUPS - 1)) + 1;
132 |   let mut all_slaves: Vec<SlaveGroupId> = slave_address_config.keys().cloned().collect();
133 |   all_slaves.remove(all_slaves.iter().position(|i| i == &tm).unwrap());
134 |   let mut rms = Vec::<SlaveGroupId>::new();
135 |   for _ in 0..num_rms {
136 |     let r = sim.rand.next_u32() % all_slaves.len() as u32;
137 |     rms.push(all_slaves.remove(r as usize));
138 |   }
139 | 
140 |   let request = msg::STMSimpleRequest { query_id: mk_qid(&mut sim.rand), rms: rms.clone() };
141 |   sim.add_msg(
142 |     msg::NetworkMessage::Slave(msg::SlaveMessage::ExternalMessage(
143 |       msg::ExternalMessage::STMSimpleRequest(request),
144 |     )),
145 |     &client_eid,
146 |     &tm_eid,
147 |   );
148 | 
149 |   /// The number of iterations we simulate for, where we check 2PC
150 |   /// consistency after each iteration.
151 |   const NUM_CONSISTENCY_ITERATIONS: u32 = 5;
152 |   /// Number of iterations per iteration.
153 |   const MS_PER_ITERATION: u32 = 5;
154 | 
155 |   // Continue simulating, checking 2PC Consistency after each round
156 |   sim.simulate_n_ms(NUM_CONSISTENCY_ITERATIONS * MS_PER_ITERATION);
157 | 
158 |   // Finally, run the Simulation in Cooldown Mode and test for STMPaxos2PC
159 |   // completion at end. "Cooldown Mode" is defined to be where no Leadership changes occur.
160 |   sim.sim_params.pl_entry_delivery_prob = 70;
161 |   sim.sim_params.global_pl_insertion_prob = 30;
162 | 
163 |   /// Here, "cooldown ms" are the number of milliseconds that we expect the STMPaxos2PC to finish,
164 |   /// given that no leadership changes happen during this time. Although this can be calculated,
165 |   /// we simply guess a sensible number for expedience.
166 |   const EXPECTED_COOLDOWN_MS: u32 = 500;
167 | 
168 |   sim.simulate_n_ms(EXPECTED_COOLDOWN_MS);
169 | 
170 |   match check_completion(&mut sim, &rms, &tm) {
171 |     CompletionResult::Invalid => {
172 |       println!(
173 |         "{:?}. STMPaxos2PC Test Failed: Invalid PLs after cooldown. Seed: {:?}",
174 |         test_num, seed
175 |       );
176 |       panic!()
177 |     }
178 |     CompletionResult::SuccessfullyCommitted => {
179 |       println!("{:?}. STMPaxos2PC SuccessfullyCommitted!", test_num);
180 |     }
181 |     CompletionResult::SuccessfullyAborted => {
182 |       println!("{:?}. STMPaxos2PC SuccessfullyAborted!", test_num);
183 |     }
184 |     CompletionResult::SuccessfullyTrivial => {
185 |       println!("{:?}. STMPaxos2PC SuccessfullyTrivial!", test_num);
186 |     }
187 |   }
188 | }
189 | 


--------------------------------------------------------------------------------
/src/test/query_converter_test.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{TablePath, TransTableName};
  2 | use crate::message as msg;
  3 | use crate::query_converter::{rename_under_query, ConversionContext, RenameContext};
  4 | use crate::sql_ast::{iast, proc};
  5 | 
  6 | // -----------------------------------------------------------------------------------------------
  7 | //  Common
  8 | // -----------------------------------------------------------------------------------------------
  9 | 
 10 | fn basic_join_node(name: String, alias: Option<String>) -> iast::JoinNode {
 11 |   iast::JoinNode::JoinLeaf(iast::JoinLeaf { alias, source: iast::JoinNodeSource::Table(name) })
 12 | }
 13 | 
 14 | fn basic_select(table_ref: &str) -> iast::Select {
 15 |   iast::Select {
 16 |     distinct: false,
 17 |     projection: vec![],
 18 |     from: basic_join_node(table_ref.to_string(), None),
 19 |     selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) },
 20 |   }
 21 | }
 22 | 
 23 | fn basic_select_query(ctes: Vec<(&str, iast::Query)>, table_ref: &str) -> iast::Query {
 24 |   iast::Query {
 25 |     ctes: ctes.iter().map(|(name, query)| (name.to_string(), query.clone())).collect(),
 26 |     body: iast::QueryBody::Select(basic_select(table_ref)),
 27 |   }
 28 | }
 29 | 
 30 | // -----------------------------------------------------------------------------------------------
 31 | //  Renaming
 32 | // -----------------------------------------------------------------------------------------------
 33 | 
 34 | // This test simply checks that TransTables that are shadowed in the
 35 | // original Query are still renamed properly, where references of that
 36 | // TransTable are also renamed to properly.
 37 | #[test]
 38 | fn test_basic_rename() {
 39 |   let mut in_query = basic_select_query(
 40 |     vec![
 41 |       ("tt1", basic_select_query(vec![], "t2")),
 42 |       ("tt2", basic_select_query(vec![("tt1", basic_select_query(vec![], "tt1"))], "tt1")),
 43 |     ],
 44 |     "tt2",
 45 |   );
 46 | 
 47 |   // Rename TransTables
 48 |   let mut ctx = RenameContext { trans_table_map: Default::default(), counter: 0 };
 49 |   rename_under_query(&mut ctx, &mut in_query);
 50 | 
 51 |   let expected = iast::Query {
 52 |     ctes: vec![
 53 |       (
 54 |         "tt\\0\\tt1".to_string(),
 55 |         iast::Query {
 56 |           ctes: vec![],
 57 |           body: iast::QueryBody::Select(iast::Select {
 58 |             distinct: false,
 59 |             projection: iast::SelectClause::SelectList(vec![]),
 60 |             from: basic_join_node("t2".to_string(), None),
 61 |             selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) },
 62 |           }),
 63 |         },
 64 |       ),
 65 |       (
 66 |         "tt\\2\\tt2".to_string(),
 67 |         iast::Query {
 68 |           ctes: vec![(
 69 |             "tt\\1\\tt1".to_string(),
 70 |             iast::Query {
 71 |               ctes: vec![],
 72 |               body: iast::QueryBody::Select(iast::Select {
 73 |                 distinct: false,
 74 |                 projection: iast::SelectClause::SelectList(vec![]),
 75 |                 from: basic_join_node("tt\\0\\tt1".to_string(), Some("tt1".to_string())),
 76 |                 selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) },
 77 |               }),
 78 |             },
 79 |           )],
 80 |           body: iast::QueryBody::Select(iast::Select {
 81 |             distinct: false,
 82 |             projection: iast::SelectClause::SelectList(vec![]),
 83 |             from: basic_join_node("tt\\1\\tt1".to_string(), Some("tt1".to_string())),
 84 |             selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) },
 85 |           }),
 86 |         },
 87 |       ),
 88 |     ],
 89 |     body: iast::QueryBody::Select(iast::Select {
 90 |       distinct: false,
 91 |       projection: iast::SelectClause::SelectList(vec![]),
 92 |       from: basic_join_node("tt\\2\\tt2".to_string(), Some("tt2".to_string())),
 93 |       selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) },
 94 |     }),
 95 |   };
 96 | 
 97 |   // Verify the result.
 98 |   assert_eq!(in_query, expected);
 99 | }
100 | 
101 | // -----------------------------------------------------------------------------------------------
102 | //  Flattening
103 | // -----------------------------------------------------------------------------------------------
104 | 
105 | // This tests for a basic flattening of the Query.
106 | #[test]
107 | fn test_basic_flatten() {
108 |   let query = basic_select_query(
109 |     vec![
110 |       ("tt\\0\\tt1", basic_select_query(vec![], "t2")),
111 |       (
112 |         "tt\\2\\tt2",
113 |         basic_select_query(
114 |           vec![("tt\\1\\tt1", basic_select_query(vec![], "tt\\0\\tt1"))],
115 |           "tt\\1\\tt1",
116 |         ),
117 |       ),
118 |     ],
119 |     "tt\\2\\tt2",
120 |   );
121 | 
122 |   let expected: Result<proc::MSQuery, msg::ExternalAbortedData> = Ok(proc::MSQuery {
123 |     trans_tables: vec![
124 |       (
125 |         TransTableName("tt\\0\\tt1".to_string()),
126 |         proc::MSQueryStage::TableSelect(proc::TableSelect {
127 |           distinct: false,
128 |           projection: proc::SelectClause::SelectList(vec![]),
129 |           from: proc::GeneralSource {
130 |             source_ref: proc::GeneralSourceRef::TablePath(TablePath("t2".to_string())),
131 |             alias: None,
132 |           },
133 |           selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) },
134 |         }),
135 |       ),
136 |       (
137 |         TransTableName("tt\\1\\tt1".to_string()),
138 |         proc::MSQueryStage::TableSelect(proc::TableSelect {
139 |           distinct: false,
140 |           projection: proc::SelectClause::SelectList(vec![]),
141 |           from: proc::GeneralSource {
142 |             source_ref: proc::GeneralSourceRef::TransTableName(TransTableName(
143 |               "tt\\0\\tt1".to_string(),
144 |             )),
145 |             alias: None,
146 |           },
147 |           selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) },
148 |         }),
149 |       ),
150 |       (
151 |         TransTableName("tt\\2\\tt2".to_string()),
152 |         proc::MSQueryStage::TableSelect(proc::TableSelect {
153 |           distinct: false,
154 |           projection: proc::SelectClause::SelectList(vec![]),
155 |           from: proc::GeneralSource {
156 |             source_ref: proc::GeneralSourceRef::TransTableName(TransTableName(
157 |               "tt\\1\\tt1".to_string(),
158 |             )),
159 |             alias: None,
160 |           },
161 |           selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) },
162 |         }),
163 |       ),
164 |       (
165 |         TransTableName("tt\\3\\".to_string()),
166 |         proc::MSQueryStage::TableSelect(proc::TableSelect {
167 |           distinct: false,
168 |           projection: proc::SelectClause::SelectList(vec![]),
169 |           from: proc::GeneralSource {
170 |             source_ref: proc::GeneralSourceRef::TransTableName(TransTableName(
171 |               "tt\\2\\tt2".to_string(),
172 |             )),
173 |             alias: None,
174 |           },
175 |           selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) },
176 |         }),
177 |       ),
178 |     ]
179 |     .into_iter()
180 |     .collect(),
181 |     returning: TransTableName("tt\\3\\".to_string()),
182 |   });
183 | 
184 |   let mut ctx = ConversionContext { col_usage_map: Default::default(), counter: 3 };
185 |   assert_eq!(ctx.flatten_top_level_query(&query).unwrap(), expected);
186 | }
187 | 


--------------------------------------------------------------------------------
/src/bin/paxos2pc_sim/stm_simple_tm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::message as msg;
  2 | use crate::slave::{SlaveContext, SlavePLm};
  3 | use runiversal::common::BasicIOCtx;
  4 | use runiversal::common::{EndpointId, RequestId, SlaveGroupId};
  5 | use runiversal::stmpaxos2pc_tm::{
  6 |   RMMessage, STMPaxos2PCTMInner, STMPaxos2PCTMOuter, TMClosedPLm, TMCommittedPLm, TMMessage, TMPLm,
  7 |   TMPayloadTypes, TMServerContext,
  8 | };
  9 | use serde::{Deserialize, Serialize};
 10 | use std::collections::BTreeMap;
 11 | 
 12 | // -----------------------------------------------------------------------------------------------
 13 | //  Payloads
 14 | // -----------------------------------------------------------------------------------------------
 15 | 
 16 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 17 | pub struct STMSimpleTMPayloadTypes {}
 18 | 
 19 | impl TMPayloadTypes for STMSimpleTMPayloadTypes {
 20 |   // Master
 21 |   type RMPath = SlaveGroupId;
 22 |   type TMPath = SlaveGroupId;
 23 |   type NetworkMessageT = msg::NetworkMessage;
 24 |   type TMContext = SlaveContext;
 25 | 
 26 |   // TM PLm
 27 |   type TMPreparedPLm = STMSimpleTMPrepared;
 28 |   type TMCommittedPLm = STMSimpleTMCommitted;
 29 |   type TMAbortedPLm = STMSimpleTMAborted;
 30 |   type TMClosedPLm = STMSimpleTMClosed;
 31 | 
 32 |   // TM-to-RM Messages
 33 |   type Prepare = STMSimplePrepare;
 34 |   type Abort = STMSimpleAbort;
 35 |   type Commit = STMSimpleCommit;
 36 | 
 37 |   // RM-to-TM Messages
 38 |   type Prepared = STMSimplePrepared;
 39 |   type Aborted = STMSimpleAborted;
 40 |   type Closed = STMSimpleClosed;
 41 | }
 42 | 
 43 | // TM PLm
 44 | 
 45 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 46 | pub struct STMSimpleTMPrepared {
 47 |   pub rms: Vec<SlaveGroupId>,
 48 | }
 49 | 
 50 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 51 | pub struct STMSimpleTMCommitted {}
 52 | 
 53 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 54 | pub struct STMSimpleTMAborted {}
 55 | 
 56 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 57 | pub struct STMSimpleTMClosed {}
 58 | 
 59 | // TM-to-RM
 60 | 
 61 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 62 | pub struct STMSimplePrepare {}
 63 | 
 64 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 65 | pub struct STMSimpleAbort {}
 66 | 
 67 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 68 | pub struct STMSimpleCommit {}
 69 | 
 70 | // RM-to-TM
 71 | 
 72 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 73 | pub struct STMSimplePrepared {}
 74 | 
 75 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 76 | pub struct STMSimpleAborted {}
 77 | 
 78 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 79 | pub struct STMSimpleClosed {}
 80 | 
 81 | // -----------------------------------------------------------------------------------------------
 82 | //  STMTMServerContext
 83 | // -----------------------------------------------------------------------------------------------
 84 | 
 85 | impl TMServerContext<STMSimpleTMPayloadTypes> for SlaveContext {
 86 |   fn push_plm(&mut self, plm: TMPLm<STMSimpleTMPayloadTypes>) {
 87 |     self.slave_bundle.plms.push(SlavePLm::SimpleSTMTM(plm));
 88 |   }
 89 | 
 90 |   fn send_to_rm<IO: BasicIOCtx<msg::NetworkMessage>>(
 91 |     &mut self,
 92 |     io_ctx: &mut IO,
 93 |     rm: &SlaveGroupId,
 94 |     msg: RMMessage<STMSimpleTMPayloadTypes>,
 95 |   ) {
 96 |     self.send(io_ctx, rm, msg::SlaveRemotePayload::STMRMMessage(msg));
 97 |   }
 98 | 
 99 |   fn mk_node_path(&self) -> SlaveGroupId {
100 |     self.this_sid.clone()
101 |   }
102 | 
103 |   fn is_leader(&self) -> bool {
104 |     SlaveContext::is_leader(self)
105 |   }
106 | }
107 | 
108 | // -----------------------------------------------------------------------------------------------
109 | //  General STMPaxos2PC TM Types
110 | // -----------------------------------------------------------------------------------------------
111 | #[derive(Debug)]
112 | pub struct ResponseData {
113 |   pub request_id: RequestId,
114 |   pub sender_eid: EndpointId,
115 | }
116 | 
117 | // -----------------------------------------------------------------------------------------------
118 | //  Simple Implementation
119 | // -----------------------------------------------------------------------------------------------
120 | 
121 | pub type STMSimpleTMES = STMPaxos2PCTMOuter<STMSimpleTMPayloadTypes, STMSimpleTMInner>;
122 | 
123 | #[derive(Debug)]
124 | pub struct STMSimpleTMInner {
125 |   // RMs to use
126 |   pub rms: Vec<SlaveGroupId>,
127 | }
128 | 
129 | impl STMPaxos2PCTMInner<STMSimpleTMPayloadTypes> for STMSimpleTMInner {
130 |   fn new_follower<IO: BasicIOCtx<msg::NetworkMessage>>(
131 |     _: &mut SlaveContext,
132 |     _: &mut IO,
133 |     payload: STMSimpleTMPrepared,
134 |   ) -> STMSimpleTMInner {
135 |     STMSimpleTMInner { rms: payload.rms }
136 |   }
137 | 
138 |   fn mk_prepared_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
139 |     &mut self,
140 |     _: &mut SlaveContext,
141 |     _: &mut IO,
142 |   ) -> STMSimpleTMPrepared {
143 |     STMSimpleTMPrepared { rms: self.rms.clone() }
144 |   }
145 | 
146 |   fn prepared_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
147 |     &mut self,
148 |     _: &mut SlaveContext,
149 |     _: &mut IO,
150 |   ) -> BTreeMap<SlaveGroupId, STMSimplePrepare> {
151 |     let mut prepares = BTreeMap::<SlaveGroupId, STMSimplePrepare>::new();
152 |     for rm in &self.rms {
153 |       prepares.insert(rm.clone(), STMSimplePrepare {});
154 |     }
155 |     prepares
156 |   }
157 | 
158 |   fn mk_committed_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
159 |     &mut self,
160 |     _: &mut SlaveContext,
161 |     _: &mut IO,
162 |     _: &BTreeMap<SlaveGroupId, STMSimplePrepared>,
163 |   ) -> STMSimpleTMCommitted {
164 |     STMSimpleTMCommitted {}
165 |   }
166 | 
167 |   fn committed_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
168 |     &mut self,
169 |     _: &mut SlaveContext,
170 |     _: &mut IO,
171 |     _: &TMCommittedPLm<STMSimpleTMPayloadTypes>,
172 |   ) -> BTreeMap<SlaveGroupId, STMSimpleCommit> {
173 |     let mut commits = BTreeMap::<SlaveGroupId, STMSimpleCommit>::new();
174 |     for rm in &self.rms {
175 |       commits.insert(rm.clone(), STMSimpleCommit {});
176 |     }
177 |     commits
178 |   }
179 | 
180 |   fn mk_aborted_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
181 |     &mut self,
182 |     _: &mut SlaveContext,
183 |     _: &mut IO,
184 |   ) -> STMSimpleTMAborted {
185 |     STMSimpleTMAborted {}
186 |   }
187 | 
188 |   fn aborted_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
189 |     &mut self,
190 |     _: &mut SlaveContext,
191 |     _: &mut IO,
192 |   ) -> BTreeMap<SlaveGroupId, STMSimpleAbort> {
193 |     let mut aborts = BTreeMap::<SlaveGroupId, STMSimpleAbort>::new();
194 |     for rm in &self.rms {
195 |       aborts.insert(rm.clone(), STMSimpleAbort {});
196 |     }
197 |     aborts
198 |   }
199 | 
200 |   fn mk_closed_plm<IO: BasicIOCtx<msg::NetworkMessage>>(
201 |     &mut self,
202 |     _: &mut SlaveContext,
203 |     _: &mut IO,
204 |   ) -> STMSimpleTMClosed {
205 |     STMSimpleTMClosed {}
206 |   }
207 | 
208 |   fn closed_plm_inserted<IO: BasicIOCtx<msg::NetworkMessage>>(
209 |     &mut self,
210 |     _: &mut SlaveContext,
211 |     _: &mut IO,
212 |     _: &TMClosedPLm<STMSimpleTMPayloadTypes>,
213 |   ) {
214 |   }
215 | 
216 |   fn leader_changed<IO: BasicIOCtx<msg::NetworkMessage>>(
217 |     &mut self,
218 |     _: &mut SlaveContext,
219 |     _: &mut IO,
220 |   ) {
221 |   }
222 | 
223 |   fn reconfig_snapshot(&self) -> Self {
224 |     unimplemented!()
225 |   }
226 | }
227 | 


--------------------------------------------------------------------------------
/src/query_planning.rs:
--------------------------------------------------------------------------------
  1 | use crate::col_usage::{QueryElement, QueryIterator};
  2 | use crate::common::{
  3 |   lookup, ColName, FullGen, Gen, TablePath, TableSchema, TierMap, Timestamp, TransTableName,
  4 | };
  5 | use crate::master_query_planning_es::{DBSchemaView, ErrorTrait};
  6 | use crate::message as msg;
  7 | use crate::multiversion_map::MVM;
  8 | use crate::sql_ast::proc;
  9 | use sqlparser::test_utils::table;
 10 | use std::collections::{BTreeMap, BTreeSet};
 11 | 
 12 | /// Gather every reference to a `TablePath` found in the `query`.
 13 | pub fn collect_table_paths(query: &proc::MSQuery) -> BTreeSet<TablePath> {
 14 |   let mut table_paths = BTreeSet::<TablePath>::new();
 15 |   QueryIterator::new().iterate_ms_query(
 16 |     &mut |stage: QueryElement| match stage {
 17 |       QueryElement::TableSelect(query) => {
 18 |         table_paths.insert(query.from.table_path.clone());
 19 |       }
 20 |       QueryElement::TransTableSelect(_) => {}
 21 |       QueryElement::JoinSelect(_) => {}
 22 |       QueryElement::JoinNode(_) => {}
 23 |       QueryElement::JoinLeaf(_) => {}
 24 |       QueryElement::TableSelect(query) => {
 25 |         table_paths.insert(query.from.table_path.clone());
 26 |       }
 27 |       QueryElement::Update(query) => {
 28 |         table_paths.insert(query.table.table_path.clone());
 29 |       }
 30 |       QueryElement::Insert(query) => {
 31 |         table_paths.insert(query.table.table_path.clone());
 32 |       }
 33 |       QueryElement::Delete(query) => {
 34 |         table_paths.insert(query.table.table_path.clone());
 35 |       }
 36 |       QueryElement::ValExpr(_) => {}
 37 |       QueryElement::MSQuery(_) => {}
 38 |       QueryElement::GRQuery(_) => {}
 39 |       QueryElement::GRQueryStage(_) => {}
 40 |     },
 41 |     query,
 42 |   );
 43 |   table_paths
 44 | }
 45 | 
 46 | /// Compute the `TierMap` for every stage in the `MSQuery`. A `TablePath` should appear
 47 | /// in a `TierMap` iff it is written to by the `MSQuery`.
 48 | ///
 49 | /// The `TierMap` for a stage contains the Tiers that should be used to read the `TablePath`s
 50 | /// inside. Note that if a stage is a write (e.g. an Update), the Tier of the written `TablePath`
 51 | /// in the `TierMap` is one behind (i.e. one more) the Tier that the write should commit at.
 52 | pub fn compute_all_tier_maps(ms_query: &proc::MSQuery) -> BTreeMap<TransTableName, TierMap> {
 53 |   let mut all_tier_maps = BTreeMap::<TransTableName, TierMap>::new();
 54 |   let mut cur_tier_map = BTreeMap::<TablePath, u32>::new();
 55 |   for (_, stage) in &ms_query.trans_tables {
 56 |     match stage {
 57 |       proc::MSQueryStage::TableSelect(_) => {}
 58 |       proc::MSQueryStage::TransTableSelect(_) => {}
 59 |       proc::MSQueryStage::JoinSelect(_) => {}
 60 |       proc::MSQueryStage::Update(update) => {
 61 |         cur_tier_map.insert(update.table.table_path.clone(), 0);
 62 |       }
 63 |       proc::MSQueryStage::Insert(insert) => {
 64 |         cur_tier_map.insert(insert.table.table_path.clone(), 0);
 65 |       }
 66 |       proc::MSQueryStage::Delete(delete) => {
 67 |         cur_tier_map.insert(delete.table.table_path.clone(), 0);
 68 |       }
 69 |     }
 70 |   }
 71 |   for (trans_table_name, stage) in ms_query.trans_tables.iter().rev() {
 72 |     match stage {
 73 |       proc::MSQueryStage::TableSelect(_) => {}
 74 |       proc::MSQueryStage::TransTableSelect(_) => {}
 75 |       proc::MSQueryStage::JoinSelect(_) => {}
 76 |       proc::MSQueryStage::Update(update) => {
 77 |         *cur_tier_map.get_mut(&update.table.table_path).unwrap() += 1;
 78 |       }
 79 |       proc::MSQueryStage::Insert(insert) => {
 80 |         *cur_tier_map.get_mut(&insert.table.table_path).unwrap() += 1;
 81 |       }
 82 |       proc::MSQueryStage::Delete(delete) => {
 83 |         *cur_tier_map.get_mut(&delete.table.table_path).unwrap() += 1;
 84 |       }
 85 |     }
 86 |     all_tier_maps.insert(trans_table_name.clone(), TierMap { map: cur_tier_map.clone() });
 87 |   }
 88 |   all_tier_maps
 89 | }
 90 | 
 91 | /// Computes a map that maps all `TablePath`s used in the MSQuery to the `Gen`
 92 | /// in the `table_generation` at `timestamp`.
 93 | ///
 94 | /// Precondition:
 95 | ///   1. All `TablePath`s in the MSQuery must have a non-None `Gen` in `table_generation`.
 96 | pub fn compute_table_location_map<ViewT: DBSchemaView>(
 97 |   view: &mut ViewT,
 98 |   table_paths: &BTreeSet<TablePath>,
 99 | ) -> Result<BTreeMap<TablePath, FullGen>, ViewT::ErrorT> {
100 |   let mut table_location_map = BTreeMap::<TablePath, FullGen>::new();
101 |   for table_path in table_paths {
102 |     table_location_map.insert(table_path.clone(), view.get_gen(table_path)?);
103 |   }
104 |   Ok(table_location_map)
105 | }
106 | 
107 | /// Validates the `MSQuery` in various ways. In particularly, this checks whether the
108 | /// columns that are written by an Insert or Update are valid and present in `view`.
109 | pub fn perform_validations<ErrorT: ErrorTrait, ViewT: DBSchemaView<ErrorT = ErrorT>>(
110 |   view: &mut ViewT,
111 |   ms_query: &proc::MSQuery,
112 | ) -> Result<(), ErrorT> {
113 |   for (_, stage) in &ms_query.trans_tables {
114 |     match stage {
115 |       proc::MSQueryStage::TableSelect(_) => {}
116 |       proc::MSQueryStage::TransTableSelect(_) => {}
117 |       proc::MSQueryStage::JoinSelect(_) => {}
118 |       proc::MSQueryStage::Update(query) => {
119 |         // Check that the `stage` is not trying to modify a KeyCol,
120 |         // all assigned columns are unique, and they are present.
121 |         let table_path = &query.table.table_path;
122 |         let key_cols = view.key_cols(table_path)?.clone();
123 |         let mut all_cols = BTreeSet::<&ColName>::new();
124 |         for (col_name, _) in &query.assignment {
125 |           if !all_cols.insert(col_name) || lookup(&key_cols, col_name).is_some() {
126 |             return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidUpdate));
127 |           }
128 |           if !view.contains_col(table_path, col_name)? {
129 |             return Err(ErrorTrait::mk_error(msg::QueryPlanningError::RequiredColumnDNE(
130 |               col_name.clone(),
131 |             )));
132 |           }
133 |         }
134 |       }
135 |       proc::MSQueryStage::Insert(query) => {
136 |         // Check that the `stage` is inserting to all KeyCols.
137 |         let table_path = &query.table.table_path;
138 |         let key_cols = view.key_cols(table_path)?;
139 |         for (col_name, _) in key_cols {
140 |           if !query.columns.contains(col_name) {
141 |             return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidInsert));
142 |           }
143 |         }
144 | 
145 |         // Check that every inserted column is present
146 |         for col_name in &query.columns {
147 |           if !view.contains_col(table_path, col_name)? {
148 |             return Err(ErrorTrait::mk_error(msg::QueryPlanningError::RequiredColumnDNE(
149 |               col_name.clone(),
150 |             )));
151 |           }
152 |         }
153 | 
154 |         // Check that all assigned columns are unique.
155 |         let mut all_cols = BTreeSet::<&ColName>::new();
156 |         for col_name in &query.columns {
157 |           if !all_cols.insert(col_name) {
158 |             return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidInsert));
159 |           }
160 |         }
161 | 
162 |         // Check that `values` has equal length to `columns`.
163 |         for row in &query.values {
164 |           if row.len() != query.columns.len() {
165 |             return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidInsert));
166 |           }
167 |         }
168 |       }
169 |       proc::MSQueryStage::Delete(_) => {}
170 |     }
171 |   }
172 | 
173 |   Ok(())
174 | }
175 | 


--------------------------------------------------------------------------------
/src/multiversion_map.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::Timestamp;
  2 | use crate::common::{mk_t, update_all_eids};
  3 | use serde::{Deserialize, Serialize};
  4 | use std::cmp::max;
  5 | use std::collections::BTreeMap;
  6 | use std::hash::Hash;
  7 | 
  8 | /// Here, `min_lat` is used to increase the LATs of all Keys in existance (which is an
  9 | /// infinite set). When it is incremeted, the LATs of every key that is present in `map`
 10 | /// are updated as well so that they are always >= to `min_lat`.
 11 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 12 | pub struct MVM<K: Eq + Ord + Clone, V> {
 13 |   min_lat: Timestamp,
 14 |   map: BTreeMap<K, (Timestamp, Vec<(Timestamp, Option<V>)>)>,
 15 | }
 16 | 
 17 | impl<K, V> MVM<K, V>
 18 | where
 19 |   K: Eq + Ord + Clone,
 20 |   V: Clone,
 21 | {
 22 |   pub fn new() -> MVM<K, V> {
 23 |     MVM { min_lat: mk_t(0), map: BTreeMap::new() }
 24 |   }
 25 | 
 26 |   pub fn init(init_vals: BTreeMap<K, V>) -> MVM<K, V> {
 27 |     let mut map = BTreeMap::<K, (Timestamp, Vec<(Timestamp, Option<V>)>)>::new();
 28 |     for (key, value) in init_vals {
 29 |       map.insert(key, (mk_t(0), vec![(mk_t(0), Some(value))]));
 30 |     }
 31 |     MVM { min_lat: mk_t(0), map }
 32 |   }
 33 | 
 34 |   /// Performs an MVMWrite to the MVM. The user *must* be sure that `timestamp` is beyond the
 35 |   /// `lat` of the key, otherwise we `assert`. They can verify this by doing static and weak reads.
 36 |   pub fn write(&mut self, key: &K, value: Option<V>, timestamp: Timestamp) {
 37 |     if let Some((lat, versions)) = self.map.get_mut(key) {
 38 |       assert!(*lat < timestamp);
 39 |       *lat = timestamp.clone();
 40 |       versions.push((timestamp, value));
 41 |     } else {
 42 |       // Here, the `key` has a LAT of `min_lat` and contains no versions.
 43 |       assert!(self.min_lat < timestamp);
 44 |       self.map.insert(key.clone(), (timestamp.clone(), vec![(timestamp, value)]));
 45 |     }
 46 |   }
 47 | 
 48 |   pub fn read(&mut self, key: &K, timestamp: &Timestamp) -> Option<V> {
 49 |     if let Some((lat, versions)) = self.map.get_mut(key) {
 50 |       *lat = max(lat.clone(), timestamp.clone());
 51 |       find_prior_value(versions, timestamp).cloned()
 52 |     } else {
 53 |       if timestamp > &self.min_lat {
 54 |         self.map.insert(key.clone(), (timestamp.clone(), vec![]));
 55 |       }
 56 | 
 57 |       None
 58 |     }
 59 |   }
 60 | 
 61 |   pub fn update_lat(&mut self, key: &K, timestamp: Timestamp) {
 62 |     if let Some((lat, _)) = self.map.get_mut(key) {
 63 |       *lat = max(lat.clone(), timestamp);
 64 |     } else if timestamp > self.min_lat {
 65 |       self.map.insert(key.clone(), (timestamp, vec![]));
 66 |     }
 67 |   }
 68 | 
 69 |   pub fn update_all_lats(&mut self, timestamp: Timestamp) {
 70 |     if timestamp > self.min_lat {
 71 |       for (_, (lat, _)) in &mut self.map {
 72 |         *lat = max(lat.clone(), timestamp.clone())
 73 |       }
 74 |       self.min_lat = timestamp;
 75 |     }
 76 |   }
 77 | 
 78 |   /// Reads the version prior to the timestamp. This function asserts that the `lat` of
 79 |   /// the `key` is `>= timestamp`. Recall that all keys in existance implicitly at
 80 |   /// least have a `lat` of 0. Thus, the return value of this function is idempotent.
 81 |   pub fn strong_static_read(&self, key: &K, timestamp: &Timestamp) -> Option<&V> {
 82 |     if let Some((lat, versions)) = self.map.get(key) {
 83 |       assert!(timestamp <= lat);
 84 |       find_prior_value(versions, timestamp)
 85 |     } else {
 86 |       assert!(timestamp <= &self.min_lat);
 87 |       None
 88 |     }
 89 |   }
 90 | 
 91 |   /// Get the value that would be read if we did a `read` at the `lat`.
 92 |   pub fn get_last_version(&self, key: &K) -> Option<&V> {
 93 |     if let Some((_, versions)) = self.map.get(key) {
 94 |       if let Some((_, val)) = versions.iter().last() {
 95 |         val.as_ref()
 96 |       } else {
 97 |         None
 98 |       }
 99 |     } else {
100 |       None
101 |     }
102 |   }
103 | 
104 |   /// Get the latest version of the `key` that was non-`None`.
105 |   pub fn get_last_present_version(&self, key: &K) -> Option<&V> {
106 |     if let Some((_, versions)) = self.map.get(key) {
107 |       for (_, val) in versions.iter().rev() {
108 |         if val.is_some() {
109 |           return val.as_ref();
110 |         }
111 |       }
112 |     }
113 |     None
114 |   }
115 | 
116 |   /// Reads the prior value at the timestamp. This does not mutate the `lat` if the read
117 |   /// happens with a future timestamp. Thus, the values read are not idempotent.
118 |   pub fn static_read(&self, key: &K, timestamp: &Timestamp) -> Option<&V> {
119 |     let (_, value) = self.static_read_version(key, timestamp)?;
120 |     value.as_ref()
121 |   }
122 | 
123 |   /// Reads the prior version at the timestamp. This does not mutate the `lat` if the read
124 |   /// happens with a future timestamp. Thus, the values read are not idempotent.
125 |   pub fn static_read_version(
126 |     &self,
127 |     key: &K,
128 |     timestamp: &Timestamp,
129 |   ) -> Option<&(Timestamp, Option<V>)> {
130 |     if let Some((_, versions)) = self.map.get(key) {
131 |       find_prior_version(versions, timestamp)
132 |     } else {
133 |       None
134 |     }
135 |   }
136 | 
137 |   /// Returns the values for all keys that are present at the given
138 |   /// `timestamp`. This is done statically, so no lats are updated.
139 |   pub fn static_snapshot_read(&self, timestamp: &Timestamp) -> BTreeMap<K, V> {
140 |     let mut snapshot = BTreeMap::new();
141 |     for (key, (_, versions)) in &self.map {
142 |       if let Some(value) = find_prior_value(versions, timestamp) {
143 |         snapshot.insert(key.clone(), value.clone());
144 |       }
145 |     }
146 |     return snapshot;
147 |   }
148 | 
149 |   /// Recall that abstractly, all keys are mapped to `(0, [])`
150 |   pub fn get_lat(&self, key: &K) -> Timestamp {
151 |     if let Some((lat, _)) = self.map.get(key) {
152 |       lat.clone()
153 |     } else {
154 |       self.min_lat.clone()
155 |     }
156 |   }
157 | 
158 |   /// Get the smallest LAT among all keys. There certainly exists a key with a LAT of
159 |   /// `min_lat`, since there are infinite keys. Thus, we simply return `min_lat`.
160 |   pub fn get_min_lat(&self) -> Timestamp {
161 |     self.min_lat.clone()
162 |   }
163 | 
164 |   /// Get the highest LAT of any key-value pair in the MVM.
165 |   pub fn get_latest_lat(&self) -> Timestamp {
166 |     let mut latest_lat = self.min_lat.clone();
167 |     for (_, (lat, _)) in &self.map {
168 |       latest_lat = max(latest_lat, lat.clone());
169 |     }
170 |     latest_lat
171 |   }
172 | }
173 | 
174 | fn find_prior_value<'a, V>(
175 |   versions: &'a Vec<(Timestamp, Option<V>)>,
176 |   timestamp: &Timestamp,
177 | ) -> Option<&'a V> {
178 |   let (_, value) = find_prior_version(versions, timestamp)?;
179 |   value.as_ref()
180 | }
181 | 
182 | fn find_prior_version<'a, V>(
183 |   versions: &'a Vec<(Timestamp, Option<V>)>,
184 |   timestamp: &Timestamp,
185 | ) -> Option<&'a (Timestamp, Option<V>)> {
186 |   for version in versions.iter().rev() {
187 |     let (t, _) = version;
188 |     if t <= timestamp {
189 |       return Some(version);
190 |     }
191 |   }
192 |   return None;
193 | }
194 | 
195 | #[cfg(test)]
196 | mod tests {
197 |   use crate::common::mk_t;
198 |   use crate::common::Timestamp;
199 |   use crate::multiversion_map::MVM;
200 | 
201 |   #[test]
202 |   fn single_key_test() {
203 |     let mut mvm = MVM::new();
204 |     let k = String::from("k");
205 |     let v1 = String::from("v1");
206 |     let v2 = String::from("v2");
207 |     let v3 = String::from("v3");
208 |     assert_eq!(mvm.read(&k, &mk_t(1)), None);
209 |     mvm.write(&k, Some(v1.clone()), mk_t(2));
210 |     mvm.write(&k, Some(v2.clone()), mk_t(4));
211 |     assert_eq!(mvm.read(&k, &mk_t(3)), Some(v1));
212 |     assert_eq!(mvm.read(&k, &mk_t(5)), Some(v2));
213 |     mvm.write(&k, Some(v3.clone()), mk_t(6));
214 |     assert_eq!(mvm.read(&k, &mk_t(6)), Some(v3));
215 |     mvm.write(&k, None, mk_t(7));
216 |     assert_eq!(mvm.read(&k, &mk_t(7)), None);
217 |   }
218 | }
219 | 


--------------------------------------------------------------------------------
/src/tm_status.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{merge_table_views, mk_qid, CoreIOCtx, OrigP};
  2 | use crate::common::{
  3 |   CQueryPath, CTNodePath, ColName, LeadershipId, PaxosGroupIdTrait, QueryId, SlaveGroupId,
  4 |   TQueryPath, TableView, TabletGroupId, TransTableLocationPrefix,
  5 | };
  6 | use crate::message as msg;
  7 | use crate::server::{CTServerContext, CommonQuery};
  8 | use std::collections::{BTreeMap, BTreeSet};
  9 | 
 10 | // -----------------------------------------------------------------------------------------------
 11 | //  TMStatus
 12 | // -----------------------------------------------------------------------------------------------
 13 | 
 14 | pub enum SendHelper {
 15 |   TableQuery(msg::GeneralQuery, Vec<TabletGroupId>),
 16 |   TransTableQuery(msg::GeneralQuery, TransTableLocationPrefix),
 17 | }
 18 | 
 19 | // These are used to perform PCSA over the network for reads and writes.
 20 | #[derive(Debug)]
 21 | pub struct TMStatus {
 22 |   root_query_path: CQueryPath,
 23 |   /// The QueryId of the TMStatus.
 24 |   pub query_id: QueryId,
 25 |   /// This is the QueryId of the PerformQuery. We keep this distinct from the TMStatus'
 26 |   /// QueryId, since one of the RMs might be this node.
 27 |   child_query_id: QueryId,
 28 |   /// Accumulates all transitively accessed Tablets where an `MSQueryES` was used.
 29 |   new_rms: BTreeSet<TQueryPath>,
 30 |   /// The current set of Leaderships that this TMStatus is waiting on. Thus, in order to
 31 |   /// contact an RM, we just use the `LeadershipId` found here.
 32 |   pub leaderships: BTreeMap<SlaveGroupId, LeadershipId>,
 33 |   /// Holds the number of nodes that responded (used to decide when this TM is done).
 34 |   responded_count: usize,
 35 |   /// Holds all child Querys, initially mapping to `None`. As results come in, we hold them here.
 36 |   tm_state: BTreeMap<CTNodePath, Option<Vec<TableView>>>,
 37 |   pub orig_p: OrigP,
 38 | }
 39 | 
 40 | impl TMStatus {
 41 |   pub fn new<IO: CoreIOCtx>(
 42 |     io_ctx: &mut IO,
 43 |     root_query_path: CQueryPath,
 44 |     orig_p: OrigP,
 45 |   ) -> TMStatus {
 46 |     TMStatus {
 47 |       root_query_path,
 48 |       query_id: mk_qid(io_ctx.rand()),
 49 |       child_query_id: mk_qid(io_ctx.rand()),
 50 |       new_rms: Default::default(),
 51 |       leaderships: Default::default(),
 52 |       responded_count: 0,
 53 |       tm_state: Default::default(),
 54 |       orig_p,
 55 |     }
 56 |   }
 57 | 
 58 |   pub fn query_id(&self) -> &QueryId {
 59 |     &self.query_id
 60 |   }
 61 | 
 62 |   /// Perform the sending indicated by `helper`.
 63 |   /// TODO: when sharding occurs, this query_leader_map.get might be invalid.
 64 |   pub fn send_general<IO: CoreIOCtx, Ctx: CTServerContext>(
 65 |     &mut self,
 66 |     ctx: &mut Ctx,
 67 |     io_ctx: &mut IO,
 68 |     query_leader_map: &BTreeMap<SlaveGroupId, LeadershipId>,
 69 |     helper: SendHelper,
 70 |   ) -> bool {
 71 |     match helper {
 72 |       SendHelper::TableQuery(general_query, tids) => {
 73 |         // Validate the LeadershipId of PaxosGroups that the PerformQuery will be sent to.
 74 |         // We do this before sending any messages, in case it fails. Recall that the local
 75 |         // `leader_map` is allowed to get ahead of the `query_leader_map` which we computed
 76 |         // earlier, so this check is necessary.
 77 |         for tid in &tids {
 78 |           let sid = ctx.gossip().get().tablet_address_config.get(&tid).unwrap();
 79 |           if let Some(lid) = query_leader_map.get(sid) {
 80 |             if lid.gen < ctx.leader_map().get(&sid.to_gid()).unwrap().gen {
 81 |               // The `lid` has since changed, so we cannot finish this MSQueryES.
 82 |               return false;
 83 |             }
 84 |           }
 85 |         }
 86 | 
 87 |         // Having non-empty `tids` solves the TMStatus deadlock and determining the child schema.
 88 |         assert!(tids.len() > 0);
 89 |         for tid in tids {
 90 |           // Recall we already validated that `lid` in `query_leader_map` is no lower than
 91 |           // the one at this node's LeaderMap, so it safe to use.
 92 |           let to_node_path = ctx.mk_tablet_node_path(tid).into_ct();
 93 |           let sid = &to_node_path.sid;
 94 |           let to_lid = query_leader_map.get(sid).or(ctx.leader_map().get(&sid.to_gid())).unwrap();
 95 |           self.send_perform(ctx, io_ctx, general_query.clone(), to_node_path, to_lid.clone());
 96 |         }
 97 |       }
 98 |       SendHelper::TransTableQuery(general_query, location_prefix) => {
 99 |         // Validate the LeadershipId of PaxosGroups that the PerformQuery will be sent to.
100 |         // We do this before sending any messages, in case it fails.
101 |         let sid = &location_prefix.source.node_path.sid;
102 |         if let Some(lid) = query_leader_map.get(sid) {
103 |           if lid.gen < ctx.leader_map().get(&sid.to_gid()).unwrap().gen {
104 |             // The `lid` is too old, so we cannot finish this GRQueryES.
105 |             return false;
106 |           }
107 |         }
108 | 
109 |         // Recall we already validated that `lid` in `query_leader_map` is no lower than
110 |         // the one at this node's LeaderMap, so it safe to use.
111 |         let to_lid = query_leader_map.get(&sid).or(ctx.leader_map().get(&sid.to_gid())).unwrap();
112 |         let to_node_path = location_prefix.source.node_path.clone();
113 |         self.send_perform(ctx, io_ctx, general_query, to_node_path, to_lid.clone());
114 |       }
115 |     }
116 | 
117 |     true
118 |   }
119 | 
120 |   /// Cleans up all currently owned resources, and goes to Done.
121 |   pub fn send_perform<IO: CoreIOCtx, Ctx: CTServerContext>(
122 |     &mut self,
123 |     ctx: &mut Ctx,
124 |     io_ctx: &mut IO,
125 |     general_query: msg::GeneralQuery,
126 |     to_node_path: CTNodePath,
127 |     to_lid: LeadershipId,
128 |   ) {
129 |     let sender_path = ctx.mk_this_query_path(self.query_id.clone());
130 |     // Construct PerformQuery
131 |     let perform_query = msg::PerformQuery {
132 |       root_query_path: self.root_query_path.clone(),
133 |       sender_path,
134 |       query_id: self.child_query_id.clone(),
135 |       query: general_query,
136 |     };
137 | 
138 |     // Send out PerformQuery. Recall that this could only be a Tablet.
139 |     let common_query = CommonQuery::PerformQuery(perform_query);
140 |     ctx.send_to_ct_lid(io_ctx, to_node_path.clone(), common_query, to_lid.clone());
141 | 
142 |     // Add the TabletGroup into the TMStatus.
143 |     self.leaderships.insert(to_node_path.sid.clone(), to_lid);
144 |     self.tm_state.insert(to_node_path, None);
145 |   }
146 | 
147 |   /// We accumulate the results of the `query_success` here.
148 |   pub fn handle_query_success(&mut self, query_success: msg::QuerySuccess) {
149 |     let node_path = query_success.responder_path.node_path;
150 |     self.tm_state.insert(node_path, Some(query_success.result.clone()));
151 |     self.new_rms.extend(query_success.new_rms);
152 |     self.responded_count += 1;
153 |   }
154 | 
155 |   /// Merge there `TableView`s together. Note that this should be only called when
156 |   /// all child queries have responded.
157 |   pub fn get_results(self) -> (OrigP, Vec<TableView>, BTreeSet<TQueryPath>) {
158 |     debug_assert!(self.is_complete());
159 |     let mut results = Vec::<Vec<TableView>>::new();
160 |     for (_, rm_result) in self.tm_state {
161 |       results.push(rm_result.unwrap());
162 |     }
163 |     (self.orig_p, merge_table_views(results), self.new_rms)
164 |   }
165 | 
166 |   pub fn is_complete(&self) -> bool {
167 |     self.responded_count == self.tm_state.len()
168 |   }
169 | 
170 |   /// We ECU this `TMStatus` by sending `CancelQuery` to all remaining RMs.
171 |   pub fn exit_and_clean_up<IO: CoreIOCtx, Ctx: CTServerContext>(
172 |     self,
173 |     ctx: &mut Ctx,
174 |     io_ctx: &mut IO,
175 |   ) {
176 |     for (rm_path, rm_result) in self.tm_state {
177 |       if rm_result.is_none() {
178 |         let orig_sid = &rm_path.sid;
179 |         let orig_lid = self.leaderships.get(&orig_sid).unwrap().clone();
180 |         ctx.send_to_ct_lid(
181 |           io_ctx,
182 |           rm_path,
183 |           CommonQuery::CancelQuery(msg::CancelQuery { query_id: self.child_query_id.clone() }),
184 |           orig_lid,
185 |         );
186 |       }
187 |     }
188 |   }
189 | }
190 | 


--------------------------------------------------------------------------------
/src/create_table_rm_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{mk_t, BasicIOCtx, CTSubNodePath, PaxosGroupIdTrait, TableSchema};
  2 | use crate::common::{
  3 |   ColName, ColType, Gen, SlaveGroupId, TablePath, TabletGroupId, TabletKeyRange,
  4 | };
  5 | use crate::create_table_tm_es::{
  6 |   CreateTableClosed, CreateTableCommit, CreateTablePrepare, CreateTablePrepared,
  7 |   CreateTableTMPayloadTypes,
  8 | };
  9 | use crate::message as msg;
 10 | use crate::multiversion_map::MVM;
 11 | use crate::server::ServerContextBase;
 12 | use crate::slave::{SlaveContext, SlavePLm};
 13 | use crate::stmpaxos2pc_rm::{
 14 |   RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner,
 15 |   STMPaxos2PCRMOuter,
 16 | };
 17 | use crate::stmpaxos2pc_tm::TMMessage;
 18 | use crate::storage::GenericMVTable;
 19 | use crate::tablet::{TabletConfig, TabletContext};
 20 | use rand::RngCore;
 21 | use serde::{Deserialize, Serialize};
 22 | 
 23 | // -----------------------------------------------------------------------------------------------
 24 | //  Payloads
 25 | // -----------------------------------------------------------------------------------------------
 26 | 
 27 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 28 | pub struct CreateTableRMPayloadTypes {}
 29 | 
 30 | impl RMPayloadTypes for CreateTableRMPayloadTypes {
 31 |   type TM = CreateTableTMPayloadTypes;
 32 |   type RMContext = SlaveContext;
 33 | 
 34 |   // Actions
 35 |   type RMCommitActionData = TabletContext;
 36 | 
 37 |   // RM PLm
 38 |   type RMPreparedPLm = CreateTableRMPrepared;
 39 |   type RMCommittedPLm = CreateTableRMCommitted;
 40 |   type RMAbortedPLm = CreateTableRMAborted;
 41 | }
 42 | 
 43 | // RM PLm
 44 | 
 45 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 46 | pub struct CreateTableRMPrepared {
 47 |   pub tablet_group_id: TabletGroupId,
 48 |   pub table_path: TablePath,
 49 |   pub gen: Gen,
 50 | 
 51 |   pub key_range: TabletKeyRange,
 52 |   pub key_cols: Vec<(ColName, ColType)>,
 53 |   pub val_cols: Vec<(ColName, ColType)>,
 54 | }
 55 | 
 56 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 57 | pub struct CreateTableRMCommitted {}
 58 | 
 59 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 60 | pub struct CreateTableRMAborted {}
 61 | 
 62 | // -----------------------------------------------------------------------------------------------
 63 | //  RMServerContext
 64 | // -----------------------------------------------------------------------------------------------
 65 | 
 66 | impl RMServerContext<CreateTableRMPayloadTypes> for SlaveContext {
 67 |   fn push_plm(&mut self, plm: RMPLm<CreateTableRMPayloadTypes>) {
 68 |     self.slave_bundle.plms.push(SlavePLm::CreateTable(plm));
 69 |   }
 70 | 
 71 |   fn send_to_tm<IO: BasicIOCtx>(
 72 |     &mut self,
 73 |     io_ctx: &mut IO,
 74 |     _: &(),
 75 |     msg: TMMessage<CreateTableTMPayloadTypes>,
 76 |   ) {
 77 |     self.send_to_master(io_ctx, msg::MasterRemotePayload::CreateTable(msg));
 78 |   }
 79 | 
 80 |   fn mk_node_path(&self) -> SlaveGroupId {
 81 |     self.this_sid.clone()
 82 |   }
 83 | 
 84 |   fn is_leader(&self) -> bool {
 85 |     SlaveContext::is_leader(self)
 86 |   }
 87 | }
 88 | 
 89 | // -----------------------------------------------------------------------------------------------
 90 | //  CreateTableES Implementation
 91 | // -----------------------------------------------------------------------------------------------
 92 | 
 93 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 94 | pub struct CreateTableRMInner {
 95 |   pub tablet_group_id: TabletGroupId,
 96 |   pub table_path: TablePath,
 97 |   pub gen: Gen,
 98 | 
 99 |   pub key_range: TabletKeyRange,
100 |   pub key_cols: Vec<(ColName, ColType)>,
101 |   pub val_cols: Vec<(ColName, ColType)>,
102 | }
103 | 
104 | pub type CreateTableRMES = STMPaxos2PCRMOuter<CreateTableRMPayloadTypes, CreateTableRMInner>;
105 | pub type CreateTableRMAction = STMPaxos2PCRMAction<CreateTableRMPayloadTypes>;
106 | 
107 | impl STMPaxos2PCRMInner<CreateTableRMPayloadTypes> for CreateTableRMInner {
108 |   fn new<IO: BasicIOCtx>(
109 |     _: &mut SlaveContext,
110 |     _: &mut IO,
111 |     payload: CreateTablePrepare,
112 |   ) -> CreateTableRMInner {
113 |     CreateTableRMInner {
114 |       tablet_group_id: payload.tablet_group_id,
115 |       table_path: payload.table_path,
116 |       gen: payload.gen,
117 |       key_range: payload.key_range,
118 |       key_cols: payload.key_cols,
119 |       val_cols: payload.val_cols,
120 |     }
121 |   }
122 | 
123 |   fn new_follower<IO: BasicIOCtx>(
124 |     _: &mut SlaveContext,
125 |     _: &mut IO,
126 |     payload: CreateTableRMPrepared,
127 |   ) -> CreateTableRMInner {
128 |     CreateTableRMInner {
129 |       tablet_group_id: payload.tablet_group_id,
130 |       table_path: payload.table_path,
131 |       gen: payload.gen,
132 |       key_range: payload.key_range,
133 |       key_cols: payload.key_cols,
134 |       val_cols: payload.val_cols,
135 |     }
136 |   }
137 | 
138 |   fn mk_closed() -> CreateTableClosed {
139 |     CreateTableClosed {}
140 |   }
141 | 
142 |   fn mk_prepared_plm<IO: BasicIOCtx>(
143 |     &mut self,
144 |     _: &mut SlaveContext,
145 |     _: &mut IO,
146 |   ) -> Option<CreateTableRMPrepared> {
147 |     Some(CreateTableRMPrepared {
148 |       tablet_group_id: self.tablet_group_id.clone(),
149 |       table_path: self.table_path.clone(),
150 |       gen: self.gen.clone(),
151 |       key_range: self.key_range.clone(),
152 |       key_cols: self.key_cols.clone(),
153 |       val_cols: self.val_cols.clone(),
154 |     })
155 |   }
156 | 
157 |   fn prepared_plm_inserted<IO: BasicIOCtx>(
158 |     &mut self,
159 |     _: &mut SlaveContext,
160 |     _: &mut IO,
161 |   ) -> CreateTablePrepared {
162 |     CreateTablePrepared {}
163 |   }
164 | 
165 |   fn mk_committed_plm<IO: BasicIOCtx>(
166 |     &mut self,
167 |     _: &mut SlaveContext,
168 |     _: &mut IO,
169 |     _: &CreateTableCommit,
170 |   ) -> CreateTableRMCommitted {
171 |     CreateTableRMCommitted {}
172 |   }
173 | 
174 |   /// Construct `TabletContext` so a Tablet be constructed. We return the `TabletContext`
175 |   /// in the `RMCommitActionData` rather than construct the Tablet here, since we do not have
176 |   /// access to the `SlaveIOCtx`.
177 |   fn committed_plm_inserted<IO: BasicIOCtx>(
178 |     &mut self,
179 |     ctx: &mut SlaveContext,
180 |     io_ctx: &mut IO,
181 |     _: &RMCommittedPLm<CreateTableRMPayloadTypes>,
182 |   ) -> TabletContext {
183 |     let mut rand_seed = [0; 16];
184 |     io_ctx.rand().fill_bytes(&mut rand_seed);
185 |     TabletContext {
186 |       tablet_config: TabletConfig {
187 |         timestamp_suffix_divisor: ctx.slave_config.timestamp_suffix_divisor,
188 |       },
189 |       this_sid: ctx.this_sid.clone(),
190 |       this_gid: ctx.this_sid.to_gid(),
191 |       this_tid: self.tablet_group_id.clone(),
192 |       sub_node_path: CTSubNodePath::Tablet(self.tablet_group_id.clone()),
193 |       this_eid: ctx.this_eid.clone(),
194 |       gossip: ctx.gossip.clone(),
195 |       leader_map: ctx.leader_map.value().clone(),
196 |       storage: GenericMVTable::new(),
197 |       this_table_path: self.table_path.clone(),
198 |       this_sharding_gen: Gen(0),
199 |       this_tablet_key_range: self.key_range.clone(),
200 |       sharding_done: true,
201 |       table_schema: TableSchema {
202 |         key_cols: self.key_cols.clone(),
203 |         val_cols: MVM::init(self.val_cols.clone().into_iter().collect()),
204 |       },
205 |       presence_timestamp: mk_t(0),
206 |       verifying_writes: Default::default(),
207 |       inserting_prepared_writes: Default::default(),
208 |       prepared_writes: Default::default(),
209 |       committed_writes: Default::default(),
210 |       waiting_read_protected: Default::default(),
211 |       inserting_read_protected: Default::default(),
212 |       read_protected: Default::default(),
213 |       waiting_locked_cols: Default::default(),
214 |       inserting_locked_cols: Default::default(),
215 |       ms_root_query_map: Default::default(),
216 |       tablet_bundle: vec![],
217 |     }
218 |   }
219 | 
220 |   fn mk_aborted_plm<IO: BasicIOCtx>(
221 |     &mut self,
222 |     _: &mut SlaveContext,
223 |     _: &mut IO,
224 |   ) -> CreateTableRMAborted {
225 |     CreateTableRMAborted {}
226 |   }
227 | 
228 |   fn aborted_plm_inserted<IO: BasicIOCtx>(&mut self, _: &mut SlaveContext, _: &mut IO) {}
229 | 
230 |   fn reconfig_snapshot(&self) -> CreateTableRMInner {
231 |     self.clone()
232 |   }
233 | }
234 | 


--------------------------------------------------------------------------------
/src/slave_group_create_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{
  2 |   mk_cid, mk_sid, update_all_eids, CoordGroupId, EndpointId, Gen, LeadershipId, MasterIOCtx,
  3 |   PaxosGroupIdTrait, SlaveGroupId,
  4 | };
  5 | use crate::master::{MasterContext, MasterPLm};
  6 | use crate::message as msg;
  7 | use serde::{Deserialize, Serialize};
  8 | use std::collections::{BTreeMap, BTreeSet};
  9 | 
 10 | // -----------------------------------------------------------------------------------------------
 11 | //  PLms
 12 | // -----------------------------------------------------------------------------------------------
 13 | 
 14 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 15 | pub struct ConfirmCreateGroup {
 16 |   sid: SlaveGroupId,
 17 | }
 18 | 
 19 | // -----------------------------------------------------------------------------------------------
 20 | //  SlaveGroupCreateES
 21 | // -----------------------------------------------------------------------------------------------
 22 | 
 23 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 24 | enum State {
 25 |   Follower,
 26 |   WaitingConfirmed(BTreeSet<EndpointId>),
 27 |   InsertingConfirmed,
 28 | }
 29 | 
 30 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 31 | struct SlaveGroupCreateES {
 32 |   create_msg: msg::CreateSlaveGroup,
 33 |   paxos_nodes: Vec<EndpointId>,
 34 |   state: State,
 35 | }
 36 | 
 37 | impl SlaveGroupCreateES {
 38 |   /// Constructs an `ES`, sending out `CreateSlaveGroup` if this is the Master node.
 39 |   fn create<IO: MasterIOCtx>(
 40 |     ctx: &mut MasterContext,
 41 |     io_ctx: &mut IO,
 42 |     sid: SlaveGroupId,
 43 |     paxos_nodes: Vec<EndpointId>,
 44 |     coord_ids: Vec<CoordGroupId>,
 45 |   ) -> SlaveGroupCreateES {
 46 |     // Construct the `CreateSlaveGroup` message.
 47 |     let create_msg = msg::CreateSlaveGroup {
 48 |       gossip: ctx.gossip.clone(),
 49 |       leader_map: ctx.leader_map.value().clone(),
 50 |       sid,
 51 |       paxos_nodes: paxos_nodes.clone(),
 52 |       coord_ids,
 53 |     };
 54 | 
 55 |     // If this is the Leader, start the new Slave Nodes
 56 |     let state = if ctx.is_leader() {
 57 |       for eid in &paxos_nodes {
 58 |         io_ctx.send(
 59 |           eid,
 60 |           msg::NetworkMessage::FreeNode(msg::FreeNodeMessage::CreateSlaveGroup(create_msg.clone())),
 61 |         )
 62 |       }
 63 |       State::WaitingConfirmed(BTreeSet::new())
 64 |     } else {
 65 |       // Otherwise, start in the `Follower` state.
 66 |       State::Follower
 67 |     };
 68 | 
 69 |     SlaveGroupCreateES { create_msg, paxos_nodes, state }
 70 |   }
 71 | 
 72 |   /// Handles the `ConfirmSlaveCreation` sent back by a node that successfully constructed itself.
 73 |   fn handle_confirm_msg<IO: MasterIOCtx>(
 74 |     &mut self,
 75 |     ctx: &mut MasterContext,
 76 |     _: &mut IO,
 77 |     msg: msg::ConfirmSlaveCreation,
 78 |   ) {
 79 |     match &mut self.state {
 80 |       State::WaitingConfirmed(eids) => {
 81 |         // Add in the incoming `EndpointId`.
 82 |         debug_assert!(self.paxos_nodes.contains(&msg.sender_eid));
 83 |         eids.insert(msg.sender_eid.clone());
 84 | 
 85 |         // If a majority of nodes have responded, we can finish.
 86 |         if 2 * eids.len() > self.paxos_nodes.len() {
 87 |           ctx.master_bundle.plms.push(MasterPLm::ConfirmCreateGroup(ConfirmCreateGroup {
 88 |             sid: self.create_msg.sid.clone(),
 89 |           }));
 90 |           self.state = State::InsertingConfirmed;
 91 |         }
 92 |       }
 93 |       _ => {}
 94 |     }
 95 |   }
 96 | 
 97 |   /// Handles the insertion of the `ConfirmCreateGroup` PLm.
 98 |   fn handle_confirm_plm<IO: MasterIOCtx>(&mut self, ctx: &mut MasterContext, io_ctx: &mut IO) {
 99 |     match &self.state {
100 |       State::Follower | State::InsertingConfirmed => {
101 |         // Update the GossipData
102 |         let sid = &self.create_msg.sid;
103 |         let paxos_nodes = &self.create_msg.paxos_nodes;
104 |         ctx.gossip.update(|gossip_data| {
105 |           gossip_data.slave_address_config.insert(sid.clone(), paxos_nodes.clone())
106 |         });
107 | 
108 |         // Update the LeaderMap
109 |         let lid = LeadershipId { gen: Gen(0), eid: paxos_nodes.get(0).unwrap().clone() };
110 |         ctx.leader_map.update(move |leader_map| {
111 |           leader_map.insert(sid.to_gid(), lid);
112 |         });
113 | 
114 |         // Update the `all_eids`
115 |         update_all_eids(&mut ctx.all_eids, &vec![], self.create_msg.paxos_nodes.clone());
116 | 
117 |         if ctx.is_leader() {
118 |           // Broadcast the GossipData.
119 |           ctx.broadcast_gossip(io_ctx);
120 |         }
121 |       }
122 |       State::WaitingConfirmed(_) => {}
123 |     }
124 |   }
125 | 
126 |   /// Handle the current (Master) leader changing.
127 |   fn leader_changed<IO: MasterIOCtx>(&mut self, ctx: &mut MasterContext, io_ctx: &mut IO) {
128 |     match &self.state {
129 |       State::Follower => {
130 |         if ctx.is_leader() {
131 |           // Broadcast `CreateSlaveGroup` and then go to `WaitingConfirmed`.
132 |           for eid in &self.paxos_nodes {
133 |             io_ctx.send(
134 |               eid,
135 |               msg::NetworkMessage::FreeNode(msg::FreeNodeMessage::CreateSlaveGroup(
136 |                 self.create_msg.clone(),
137 |               )),
138 |             )
139 |           }
140 |           self.state = State::WaitingConfirmed(BTreeSet::new())
141 |         }
142 |       }
143 |       State::WaitingConfirmed(_) | State::InsertingConfirmed => {
144 |         self.state = State::Follower;
145 |       }
146 |     }
147 |   }
148 | 
149 |   /// If this node is a Follower, a copy of this `SlaveGroupCreateES` is returned. If this
150 |   /// node is a Leader, then the value of this `SlaveGroupCreateES` that would result from
151 |   /// losing Leadership is returned (i.e. after calling `leader_changed`).
152 |   fn reconfig_snapshot(&self) -> SlaveGroupCreateES {
153 |     SlaveGroupCreateES {
154 |       create_msg: self.create_msg.clone(),
155 |       paxos_nodes: self.paxos_nodes.clone(),
156 |       state: State::Follower,
157 |     }
158 |   }
159 | }
160 | 
161 | // -----------------------------------------------------------------------------------------------
162 | //  ES Container Functions
163 | // -----------------------------------------------------------------------------------------------
164 | 
165 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
166 | pub struct SlaveGroupCreateESS {
167 |   ess: BTreeMap<SlaveGroupId, SlaveGroupCreateES>,
168 | }
169 | 
170 | impl SlaveGroupCreateESS {
171 |   pub fn new() -> SlaveGroupCreateESS {
172 |     SlaveGroupCreateESS { ess: Default::default() }
173 |   }
174 | 
175 |   // Leader-only
176 | 
177 |   pub fn handle_msg<IO: MasterIOCtx>(
178 |     &mut self,
179 |     ctx: &mut MasterContext,
180 |     io_ctx: &mut IO,
181 |     confirm_msg: msg::ConfirmSlaveCreation,
182 |   ) {
183 |     if let Some(es) = self.ess.get_mut(&confirm_msg.sid) {
184 |       es.handle_confirm_msg(ctx, io_ctx, confirm_msg);
185 |     }
186 |   }
187 | 
188 |   pub fn handle_new_slaves<IO: MasterIOCtx>(
189 |     &mut self,
190 |     ctx: &mut MasterContext,
191 |     io_ctx: &mut IO,
192 |     new_slave_groups: BTreeMap<SlaveGroupId, (Vec<EndpointId>, Vec<CoordGroupId>)>,
193 |   ) {
194 |     // Construct `SlaveGroupCreateES`s accordingly
195 |     for (sid, (paxos_nodes, coord_ids)) in new_slave_groups {
196 |       let es = SlaveGroupCreateES::create(ctx, io_ctx, sid.clone(), paxos_nodes, coord_ids);
197 |       self.ess.insert(sid, es);
198 |     }
199 |   }
200 | 
201 |   // Leader and Follower
202 | 
203 |   pub fn handle_plm<IO: MasterIOCtx>(
204 |     &mut self,
205 |     ctx: &mut MasterContext,
206 |     io_ctx: &mut IO,
207 |     confirm_create: ConfirmCreateGroup,
208 |   ) {
209 |     // Here, we remove the ES and then finish it off.
210 |     let mut es = self.ess.remove(&confirm_create.sid).unwrap();
211 |     es.handle_confirm_plm(ctx, io_ctx);
212 |   }
213 | 
214 |   pub fn handle_lc<IO: MasterIOCtx>(&mut self, ctx: &mut MasterContext, io_ctx: &mut IO) {
215 |     for (_, es) in &mut self.ess {
216 |       es.leader_changed(ctx, io_ctx);
217 |     }
218 |   }
219 | 
220 |   /// Add in the `SlaveGroupCreateES` where at least `ReconfigSlaveGroup` PLm has been inserted.
221 |   pub fn handle_reconfig_snapshot(&self) -> SlaveGroupCreateESS {
222 |     let mut create_ess = SlaveGroupCreateESS::new();
223 |     for (qid, es) in &self.ess {
224 |       let es = es.reconfig_snapshot();
225 |       create_ess.ess.insert(qid.clone(), es);
226 |     }
227 |     create_ess
228 |   }
229 | }
230 | 


--------------------------------------------------------------------------------
/src/bin/paxos/main.rs:
--------------------------------------------------------------------------------
  1 | #![feature(map_first_last)]
  2 | 
  3 | use crate::simulation::{SimConfig, SimpleBundle, Simulation};
  4 | use rand::RngCore;
  5 | use rand_xorshift::XorShiftRng;
  6 | use runiversal::common::{Gen, LeadershipId};
  7 | use runiversal::message as msg;
  8 | use std::iter::FromIterator;
  9 | 
 10 | mod simulation;
 11 | 
 12 | fn main() {
 13 |   test();
 14 | }
 15 | 
 16 | /**
 17 | 
 18 | Next tests
 19 |   1. We might not be exercising retries because of how long they take. We
 20 |      should reduce the timer event times when doing simulation tests a little
 21 |      so it is not as expensive.
 22 | 
 23 | */
 24 | 
 25 | fn test() {
 26 |   println!("test_basic");
 27 |   test_basic();
 28 | 
 29 |   println!("test_leader_partition");
 30 |   test_leader_partition();
 31 | 
 32 |   println!("test_general_partition");
 33 |   test_general_partition();
 34 | 
 35 |   println!("Test Successful!");
 36 | }
 37 | 
 38 | fn default_config() -> SimConfig {
 39 |   SimConfig { target_temp_blocked_frac: 0.5, max_pause_time_ms: 2000 }
 40 | }
 41 | 
 42 | fn print_stats(sim: &Simulation) {
 43 |   for (_, paxos_data) in &sim.paxos_data {
 44 |     println!("Size: {:#?}", sim.max_common_index + paxos_data.paxos_log.len());
 45 |   }
 46 | }
 47 | 
 48 | // -----------------------------------------------------------------------------------------------
 49 | //  test_basic
 50 | // -----------------------------------------------------------------------------------------------
 51 | 
 52 | /// This is a basic test with random queues being paused temporarily randomly.
 53 | fn test_basic() {
 54 |   let mut sim = Simulation::new([0; 16], 5, default_config());
 55 |   sim.simulate_n_ms(1000);
 56 |   assert!(sim.global_paxos_log.len() > 0, "Failed! No elements in Global Paxos Log.",);
 57 |   print_stats(&sim);
 58 | }
 59 | 
 60 | // -----------------------------------------------------------------------------------------------
 61 | //  test_leader_partition
 62 | // -----------------------------------------------------------------------------------------------
 63 | 
 64 | /// Run the simulation for a bit, find the latest leader, partition it out, and then
 65 | /// run the simulation some more. Verify that more `PLEntry`s were added.
 66 | fn test_leader_partition() {
 67 |   let mut sim = Simulation::new([0; 16], 5, default_config());
 68 |   sim.simulate_n_ms(10000);
 69 |   print_stats(&sim);
 70 | 
 71 |   // Find the latest Leader
 72 |   let lid = LeadershipId { gen: Gen(0), eid: sim.address_config[0].clone() };
 73 |   let mut latest_leader_changed = msg::LeaderChanged { lid };
 74 |   for entry in sim.global_paxos_log.iter().rev() {
 75 |     if let msg::PLEntry::LeaderChanged(leader_changed) = entry {
 76 |       latest_leader_changed = leader_changed.clone();
 77 |       break;
 78 |     }
 79 |   }
 80 | 
 81 |   // Partition out this Leader
 82 |   let leader_eid = latest_leader_changed.lid.eid;
 83 |   let eids = sim.address_config.clone();
 84 |   for eid in eids {
 85 |     sim.block_queue_permanently(leader_eid.clone(), eid.clone());
 86 |     sim.block_queue_permanently(eid, leader_eid.clone());
 87 |   }
 88 | 
 89 |   let old_log_len = sim.global_paxos_log.len();
 90 |   sim.simulate_n_ms(20000);
 91 | 
 92 |   assert!(
 93 |     old_log_len < sim.global_paxos_log.len(),
 94 |     "Failed! No new log messages where added since the old Leader died.",
 95 |   );
 96 | 
 97 |   print_stats(&sim);
 98 | }
 99 | 
100 | // -----------------------------------------------------------------------------------------------
101 | //  test_general_partition
102 | // -----------------------------------------------------------------------------------------------
103 | 
104 | /// Generates a partition out of `indicies, where at least one partition has the
105 | /// majority of nodes (as Paxos requires).
106 | fn gen_partition(rand: &mut XorShiftRng, mut indices: Vec<usize>) -> Vec<Vec<usize>> {
107 |   assert!(indices.len() > 0);
108 | 
109 |   fn add_partition(
110 |     rand: &mut XorShiftRng,
111 |     partition: &mut Vec<Vec<usize>>,
112 |     rem_indices: &mut Vec<usize>,
113 |     new_partition_len: usize,
114 |   ) {
115 |     assert!(new_partition_len <= rem_indices.len());
116 |     let mut new_partition = Vec::<usize>::new();
117 |     while new_partition.len() < new_partition_len {
118 |       let r = rand.next_u32() as usize % rem_indices.len();
119 |       new_partition.push(rem_indices.remove(r));
120 |     }
121 |     partition.push(new_partition);
122 |   }
123 | 
124 |   let mut partition = Vec::<Vec<usize>>::new();
125 |   // Construct the majority partition
126 |   let majority_partition_len = indices.len() / 2 + 1;
127 |   add_partition(rand, &mut partition, &mut indices, majority_partition_len);
128 |   // Construct other partitions
129 |   while indices.len() > 0 {
130 |     let next_partition_len = (rand.next_u32() as usize % indices.len()) + 1;
131 |     add_partition(rand, &mut partition, &mut indices, next_partition_len);
132 |   }
133 | 
134 |   partition
135 | }
136 | 
137 | /// Here, `partition` is a partition of the indices `sim.address_config`. This function
138 | /// permamently blocks queues between these partitions.
139 | fn block_partition(sim: &mut Simulation, partition: &Vec<Vec<usize>>) {
140 |   let eids = sim.address_config.clone();
141 |   for i in 0..partition.len() {
142 |     for j in 0..partition.len() {
143 |       if i != j {
144 |         for idx_i in partition.get(i).unwrap() {
145 |           for idx_j in partition.get(j).unwrap() {
146 |             let eid_i = eids.get(*idx_i).unwrap().clone();
147 |             let eid_j = eids.get(*idx_j).unwrap().clone();
148 |             sim.block_queue_permanently(eid_i, eid_j);
149 |           }
150 |         }
151 |       }
152 |     }
153 |   }
154 | }
155 | 
156 | /// Here, `partition` is a partition of the indices `sim.address_config`. This function
157 | /// permamently unblocks queues between these partitions.
158 | fn unblock_partition(sim: &mut Simulation, partition: &Vec<Vec<usize>>) {
159 |   let eids = sim.address_config.clone();
160 |   for i in 0..partition.len() {
161 |     for j in 0..partition.len() {
162 |       if i != j {
163 |         for idx_i in partition.get(i).unwrap() {
164 |           for idx_j in partition.get(j).unwrap() {
165 |             let eid_i = eids.get(*idx_i).unwrap().clone();
166 |             let eid_j = eids.get(*idx_j).unwrap().clone();
167 |             sim.unblock_queue_permanently(eid_i, eid_j);
168 |           }
169 |         }
170 |       }
171 |     }
172 |   }
173 | }
174 | 
175 | fn verify_leadership_changes(sim: &Simulation, expected_changes: u32) {
176 |   let lid = LeadershipId { gen: Gen(0), eid: sim.address_config[0].clone() };
177 |   // Verify that there were Leadership changes.
178 |   let mut num_leader_changes = 0;
179 |   for entry in sim.global_paxos_log.iter() {
180 |     if let msg::PLEntry::LeaderChanged(leader_changed) = entry {
181 |       assert_ne!(lid, leader_changed.lid);
182 |       num_leader_changes += 1;
183 |     }
184 |   }
185 | 
186 |   assert!(
187 |     num_leader_changes >= expected_changes,
188 |     "Test Failed! Not enough LeaderChanges occurred: {:?} instead of {:?}.",
189 |     num_leader_changes,
190 |     expected_changes
191 |   );
192 | }
193 | 
194 | /// Loop around for some time, creating and changing network partition. Verify that
195 | /// the algorithm is safe and that new `PLEntry`s constantly get added.
196 | fn test_general_partition() {
197 |   let sim_config = SimConfig { target_temp_blocked_frac: 0.0, max_pause_time_ms: 0 };
198 |   let mut sim = Simulation::new([0; 16], 5, sim_config);
199 |   let all_indices: Vec<usize> = (0..sim.address_config.len()).collect();
200 | 
201 |   // Verification metadata
202 |   let mut num_unlive_periods = 0;
203 |   let mut num_periods = 0;
204 |   let mut last_log_len = 0;
205 | 
206 |   // Simulation
207 |   let mut cur_time = 0;
208 |   let mut cur_partition = gen_partition(&mut sim.rand, all_indices.clone());
209 |   while cur_time < 200000 {
210 |     let time_for_partition = sim.rand.next_u32() as usize % 15000;
211 |     sim.simulate_n_ms(time_for_partition as u32);
212 |     cur_time += time_for_partition;
213 | 
214 |     // Update verification metadata
215 |     if sim.global_paxos_log.len() == last_log_len {
216 |       num_unlive_periods += 1;
217 |     }
218 |     num_periods += 1;
219 |     last_log_len = sim.global_paxos_log.len();
220 | 
221 |     // Change the partition
222 |     unblock_partition(&mut sim, &cur_partition);
223 |     cur_partition = gen_partition(&mut sim.rand, all_indices.clone());
224 |     block_partition(&mut sim, &cur_partition);
225 |   }
226 | 
227 |   // Make simple assertions about Verification Metadata.
228 |   // Check if the fraction of unlive periods to live periods is low enough.
229 |   assert!(
230 |     (num_unlive_periods as f32) < 0.3 * num_periods as f32,
231 |     "Failed! There were too many unlive periods: {:?} of {:?}.",
232 |     num_unlive_periods,
233 |     num_periods
234 |   );
235 | 
236 |   // Verify that there were Leadership changes.
237 |   verify_leadership_changes(&sim, 5);
238 |   print_stats(&sim);
239 | }
240 | 


--------------------------------------------------------------------------------
/run:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Extract 
  4 | A1="${1:-}"
  5 | A2="${2:-}"
  6 | A3="${3:-}"
  7 | A4="${4:-}"
  8 | 
  9 | # The number of Slave Groups to instantiate (e.g. from the `start` subcommand)
 10 | readonly NUM_SLAVES_GROUPS=2
 11 | readonly NUM_SLAVES=$((5 * NUM_SLAVES_GROUPS))
 12 | 
 13 | is_help_flag() {
 14 |   [[ "$1" = "-h" || "$1" = "--help" ]]
 15 | }
 16 | 
 17 | function masters() {
 18 |   # Start the image by running transact, and have it be in the background.
 19 |   for ((i = 10; i < 15; i++)); do
 20 |     CONTAINER_NAME="runiversal${i}";
 21 |     docker run --cap-add=NET_ADMIN -d --name="${CONTAINER_NAME}" --ip 172.20.0."${i}" --network=runiversal-net runiversal scripts/transact -i 172.20.0."${i}" -t masterbootup > /dev/null &
 22 |   done
 23 |   wait
 24 | }
 25 | 
 26 | function slaves() {
 27 |   # Start the image by running transact, and have it be in the background.
 28 |   for ((i = 15; i < $((15 + NUM_SLAVES)); i++)); do
 29 |     CONTAINER_NAME="runiversal${i}";
 30 |     docker run --cap-add=NET_ADMIN -d --name="${CONTAINER_NAME}" --ip 172.20.0."${i}" --network=runiversal-net runiversal scripts/transact -i 172.20.0."${i}" -t freenode -f newslave -e 172.20.0.10 > /dev/null &
 31 |   done
 32 |   wait
 33 | }
 34 | 
 35 | function client_init () {
 36 |   # Start the image by running the client, but only to initialize the Master Group.
 37 |   docker run --cap-add=NET_ADMIN -it --name=rclient --ip 172.20.0.2 --network=runiversal-net runiversal scripts/client -i 172.20.0.2 -m '172.20.0.10 172.20.0.11 172.20.0.12 172.20.0.13 172.20.0.14' > /dev/null;
 38 |   # After we exit the terminal, clean up the container.
 39 |   docker container rm rclient > /dev/null;
 40 | }
 41 | 
 42 | function client() {
 43 |   # Start the image by running the client, and start an interactive terminal.
 44 |   docker run --cap-add=NET_ADMIN -it --name=rclient --ip 172.20.0.2 --network=runiversal-net runiversal scripts/client -i 172.20.0.2 -e 172.20.0.10;
 45 |   # After we exit the terminal, clean up the container.
 46 |   docker container rm rclient;
 47 | }
 48 | 
 49 | if [[ "$A1" = "setup" ]]; then
 50 |   if is_help_flag "$A2"; then
 51 |     echo "Usage: ./run setup
 52 | 
 53 | Setup a bridge network and the initial Docker image."
 54 |     exit 0
 55 |   fi
 56 | 
 57 |   # This should be run once after installing docker to create the docker
 58 |   # network, the base image, etc.
 59 |   docker network create --subnet=172.20.0.0/16 runiversal-net
 60 |   docker build -t runiversal -f Dockerfile.init .
 61 |   docker build -t runiversal .
 62 | 
 63 | elif [[ "$A1" = "build" ]]; then
 64 |   if is_help_flag "$A2"; then
 65 |     echo "Usage: ./run build
 66 | 
 67 | Do an incremental build based on the prior Docker image."
 68 |     exit 0
 69 |   fi
 70 | 
 71 |   docker build -t runiversal .
 72 | 
 73 | # Spin a client and turn the current shell into an interactive CLI.
 74 | elif [[ "$A1" = "new_client" ]]; then
 75 |   if is_help_flag "$A2"; then
 76 |     echo "Usage: ./run new_client <n> <m>
 77 | 
 78 | Create a client container, turning the current shell into an interactive session.
 79 | 
 80 | E.g. \`./run new_client 3 10\` will create a docker container named \`rclient3\` with
 81 | IP \`172.20.0.3\` who will register itself with the system by contacting \`172.20.0.10\`,
 82 | which needs to be the current leader of the Master group."
 83 |     exit 0
 84 |   fi
 85 | 
 86 |   docker run --cap-add=NET_ADMIN -it --name=rclient"$A2" --ip 172.20.0."$A2" --network=runiversal-net runiversal scripts/client -i 172.20.0."$A2" -e 172.20.0."$A3"
 87 | 
 88 | # Spin up a slave in the background (starting as a FreeNode)
 89 | elif [[ "$A1" = "new_node" ]]; then
 90 |   if is_help_flag "$A2"; then
 91 |     echo "Usage: ./run new_node <n> <new_node_type> <m>
 92 | 
 93 | Create and register a new node with the system for use as either a replacement
 94 | in case an existing node goes down, or to expand the system to scale it up.
 95 | 
 96 | Values for <new_node_type>: 
 97 |   reconfig    Create a free node that is used to replace an existing node that died.
 98 |   new_slave   Create a free node that is used to create a new SlaveGroup if enough
 99 |               such nodes have been registered.
100 | 
101 | E.g. \`./run new_node 31 reconfig 10\` will create a docker container named \`runiversal31\` with
102 | IP \`172.20.0.31\` who will register itself with the system as a node meant for reconfiguration by
103 | contacting \`172.20.0.10\`, which needs to be the current leader of the Master group."
104 |     exit 0
105 |   fi
106 | 
107 |   docker run --cap-add=NET_ADMIN -d --name=runiversal"$A2" --ip 172.20.0."$A2" --network=runiversal-net runiversal scripts/transact -i 172.20.0."$A2" -t freenode -f "$A3" -e 172.20.0."$A4"
108 | 
109 | elif [[ "$A1" = "masters" ]]; then
110 |   masters
111 | 
112 | elif [[ "$A1" = "slaves" ]]; then
113 |   slaves
114 | 
115 | elif [[ "$A1" = "client_init" ]]; then
116 |   client_init
117 | 
118 | elif [[ "$A1" = "client" ]]; then
119 |   client
120 | 
121 | # Spin up the Master Group, `NUM_SLAVES_GROUPS` number of Slave Groups, and a an interactive client.
122 | elif [[ "$A1" = "start" ]]; then
123 |   if is_help_flag "$A2"; then
124 |     echo "Usage: ./run start
125 | 
126 | Instantiate the system locally using Docker."
127 |     exit 0
128 |   fi
129 | 
130 |   masters
131 |   client_init
132 |   slaves
133 |   client
134 | 
135 | # Clean only what is allocated by the `start`
136 | elif [[ "$A1" = "clean" ]]; then
137 |   CONTAINERS=("rclient");
138 |   for ((i = 10; i < $((15 + NUM_SLAVES)); i++)); do
139 |     CONTAINERS+=("runiversal${i}");
140 |   done
141 |   docker kill "${CONTAINERS[@]}" &> /dev/null;
142 |   docker container rm "${CONTAINERS[@]}" &> /dev/null;
143 | 
144 | # Do a deep clean, where we just kill all `runiversal` and `rclient` nodes that we might have spun up.
145 | elif [[ "$A1" = "dclean" ]]; then
146 |   if is_help_flag "$A2"; then
147 |     echo "Usage: ./run dclean
148 | 
149 | Stop and delete all Docker containers associated with rUniversalDB. 'd' stands for 'deep'."
150 |     exit 0
151 |   fi
152 | 
153 |   CONTAINERS=("rclient");
154 |   for ((i = 2; i < 10; i++)); do
155 |     CONTAINERS+=("rclient${i}");
156 |   done
157 |   for ((i = 10; i < 45; i++)); do
158 |     CONTAINERS+=("runiversal${i}");
159 |   done
160 |   docker kill "${CONTAINERS[@]}" &> /dev/null;
161 |   docker container rm "${CONTAINERS[@]}" &> /dev/null;
162 | 
163 | # Clean up single `runiversal` node.
164 | elif [[ "$A1" = "nclean" ]]; then
165 |   if is_help_flag "$A2"; then
166 |     echo "Usage: ./run nclean <n>
167 | 
168 | Stop and delete a specific system node.
169 | 
170 | E.g. \`./run nclean 20\` will stop and remove the docker container \`runiversal20\`."
171 |     exit 0
172 |   fi
173 | 
174 |   CONTAINER="runiversal${A2}"
175 |   docker kill "${CONTAINER}" &> /dev/null;
176 |   docker container rm "${CONTAINER}" &> /dev/null;
177 | 
178 | # Clean up single `rclient` node.
179 | elif [[ "$A1" = "cclean" ]]; then
180 |   if is_help_flag "$A2"; then
181 |     echo "Usage: ./run cclean <n>
182 | 
183 | Stop and delete a specific client node.
184 | 
185 | E.g. \`./run cclean 3\` will stop and remove the docker container \`rclient3\`."
186 |     exit 0
187 |   fi
188 | 
189 |   CONTAINER="rclient${A2}"
190 |   docker kill "${CONTAINER}" &> /dev/null;
191 |   docker container rm "${CONTAINER}" &> /dev/null;
192 | 
193 | elif [[ "$A1" = "test" ]]; then
194 |   if is_help_flag "$A2"; then
195 |     echo "Usage: ./run test
196 | 
197 | Run the main simulation tests."
198 |     exit 0
199 |   fi
200 | 
201 |   cargo run --release --bin simtest -- -i 8 -r 80
202 | 
203 | elif [[ "$A1" = "test_all"  ]]; then
204 |   if is_help_flag "$A2"; then
205 |     echo "Usage: ./run test_all
206 | 
207 | Run all simulation tests."
208 |     exit 0
209 |   fi
210 | 
211 |   cargo run --release --bin paxos;
212 |   cargo run --release --bin paxos2pc_sim;
213 |   cargo run --release --bin simtest -- -i 8 -r 80;
214 | 
215 | elif [[ "$A1" = "bench" ]]; then
216 |   if is_help_flag "$A2"; then
217 |     echo "Usage: ./run bench
218 | 
219 | Do a small run of the main simulation tests to guage performance regressions."
220 |     exit 0
221 |   fi
222 | 
223 |   cargo run --release --bin simtest -- -i 4 -r 8
224 | 
225 | else
226 |   if ! is_help_flag "$A1"; then
227 |     echo "unrecognized command: $A1
228 | "
229 |   fi
230 | 
231 |   echo "Usage: ./run [COMMAND]
232 | 
233 | A tool to help build, run, and test rUniversalDB locally.
234 | 
235 | Commands:
236 |   setup         Setup a bridge network and the initial Docker image.
237 |   build         Do an incremental build based on the prior Docker image.
238 |   new_client    Create a client container, turning the current shell into an interactive session.
239 |   new_node      Create a new node for the system to use if needed.
240 |   start         Instantiate the system locally using Docker.
241 |   dclean        Stop and delete all Docker container associated with rUniverslDB.
242 |   nclean        Stop and delete a specific system node.
243 |   cclean        Stop and delete a specific client node.
244 |   test          Run the main simulation tests.
245 |   test_all      Run all simulation tests.
246 |   bench         Do a small run of the main simulation test to guage performance regressions."
247 | 
248 | fi
249 | 


--------------------------------------------------------------------------------
/src/ms_table_delete_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::col_usage::{col_collecting_cb, col_ref_collecting_cb, QueryIterator};
  2 | use crate::common::{mk_qid, ColName, CoreIOCtx, OrigP, QueryESResult, WriteRegion};
  3 | use crate::common::{
  4 |   ColValN, ContextRow, PrimaryKey, QueryId, TablePath, TableView, TransTableName,
  5 | };
  6 | use crate::expression::is_true;
  7 | use crate::gr_query_es::{GRQueryConstructorView, GRQueryES};
  8 | use crate::message as msg;
  9 | use crate::ms_table_es::{GeneralQueryES, MSTableES, SqlQueryInner};
 10 | use crate::server::{evaluate_delete, mk_eval_error, ContextConstructor, GeneralColumnRef};
 11 | use crate::sql_ast::proc;
 12 | use crate::storage::{GenericTable, MSStorageView};
 13 | use crate::table_read_es::compute_read_region;
 14 | use crate::tablet::{
 15 |   compute_subqueries, MSQueryES, RequestedReadProtected, StorageLocalTable, TPESAction,
 16 |   TabletContext,
 17 | };
 18 | use std::collections::BTreeSet;
 19 | use std::iter::FromIterator;
 20 | 
 21 | // -----------------------------------------------------------------------------------------------
 22 | //  MSTableDeleteES
 23 | // -----------------------------------------------------------------------------------------------
 24 | 
 25 | pub type MSTableDeleteES = MSTableES<DeleteInner>;
 26 | 
 27 | #[derive(Debug)]
 28 | pub struct DeleteInner {
 29 |   sql_query: proc::Delete,
 30 | }
 31 | 
 32 | impl DeleteInner {
 33 |   pub fn new(sql_query: proc::Delete) -> Self {
 34 |     DeleteInner { sql_query }
 35 |   }
 36 | }
 37 | 
 38 | impl SqlQueryInner for DeleteInner {
 39 |   fn table_path(&self) -> &TablePath {
 40 |     &self.sql_query.table.table_path
 41 |   }
 42 | 
 43 |   fn request_region_locks<IO: CoreIOCtx>(
 44 |     &mut self,
 45 |     ctx: &mut TabletContext,
 46 |     io_ctx: &mut IO,
 47 |     es: &GeneralQueryES,
 48 |   ) -> Result<QueryId, msg::QueryError> {
 49 |     // Collect all `ColNames` of this table that all `ColumnRefs` refer to.
 50 |     let mut safe_present_cols = Vec::<ColName>::new();
 51 |     QueryIterator::new().iterate_delete(
 52 |       &mut col_collecting_cb(&self.sql_query.table.alias, &mut safe_present_cols),
 53 |       &self.sql_query,
 54 |     );
 55 | 
 56 |     // Compute the ReadRegion
 57 |     let read_region = compute_read_region(
 58 |       &ctx.table_schema.key_cols,
 59 |       &ctx.this_tablet_key_range,
 60 |       &es.context,
 61 |       &self.sql_query.selection,
 62 |       &self.sql_query.table.alias,
 63 |       safe_present_cols,
 64 |       vec![],
 65 |     );
 66 | 
 67 |     // Compute the WriteRegion
 68 |     let write_region = WriteRegion {
 69 |       row_region: read_region.row_region.clone(),
 70 |       presence: true,
 71 |       val_col_region: vec![],
 72 |     };
 73 | 
 74 |     // Verify that we have WriteRegion Isolation with Subsequent Reads. We abort
 75 |     // if we don't, and we amend this MSQuery's VerifyingReadWriteRegions if we do.
 76 |     if !ctx.check_write_region_isolation(&write_region, &es.timestamp) {
 77 |       Err(msg::QueryError::WriteRegionConflictWithSubsequentRead)
 78 |     } else {
 79 |       // Move the MSTableDeleteES to the Pending state with the given ReadRegion.
 80 |       let protect_qid = mk_qid(io_ctx.rand());
 81 | 
 82 |       // Add a ReadRegion to the `m_waiting_read_protected` and the
 83 |       // WriteRegion into `m_write_protected`.
 84 |       let verifying = ctx.verifying_writes.get_mut(&es.timestamp).unwrap();
 85 |       verifying.m_waiting_read_protected.insert(RequestedReadProtected {
 86 |         orig_p: OrigP::new(es.query_id.clone()),
 87 |         query_id: protect_qid.clone(),
 88 |         read_region,
 89 |       });
 90 |       verifying.m_write_protected.insert(write_region);
 91 | 
 92 |       Ok(protect_qid)
 93 |     }
 94 |   }
 95 | 
 96 |   fn compute_subqueries<IO: CoreIOCtx>(
 97 |     &mut self,
 98 |     ctx: &mut TabletContext,
 99 |     io_ctx: &mut IO,
100 |     es: &GeneralQueryES,
101 |     ms_query_es: &mut MSQueryES,
102 |   ) -> Vec<GRQueryES> {
103 |     compute_subqueries(
104 |       GRQueryConstructorView {
105 |         root_query_path: &es.root_query_path,
106 |         timestamp: &es.timestamp,
107 |         sql_query: &self.sql_query,
108 |         query_plan: &es.query_plan,
109 |         query_id: &es.query_id,
110 |         context: &es.context,
111 |       },
112 |       io_ctx.rand(),
113 |       StorageLocalTable::new(
114 |         &ctx.table_schema,
115 |         &es.timestamp,
116 |         &self.sql_query.table,
117 |         &ctx.this_tablet_key_range,
118 |         &self.sql_query.selection,
119 |         MSStorageView::new(
120 |           &ctx.storage,
121 |           &ctx.table_schema,
122 |           &ms_query_es.update_views,
123 |           es.tier.clone(),
124 |         ),
125 |       ),
126 |     )
127 |   }
128 | 
129 |   fn finish<IO: CoreIOCtx>(
130 |     &mut self,
131 |     ctx: &mut TabletContext,
132 |     _: &mut IO,
133 |     es: &GeneralQueryES,
134 |     (children, subquery_results): (
135 |       Vec<(Vec<proc::ColumnRef>, Vec<TransTableName>)>,
136 |       Vec<Vec<TableView>>,
137 |     ),
138 |     ms_query_es: &mut MSQueryES,
139 |   ) -> Option<TPESAction> {
140 |     // Create the ContextConstructor.
141 |     let context_constructor = ContextConstructor::new(
142 |       es.context.context_schema.clone(),
143 |       StorageLocalTable::new(
144 |         &ctx.table_schema,
145 |         &es.timestamp,
146 |         &self.sql_query.table,
147 |         &ctx.this_tablet_key_range,
148 |         &self.sql_query.selection,
149 |         MSStorageView::new(
150 |           &ctx.storage,
151 |           &ctx.table_schema,
152 |           &ms_query_es.update_views,
153 |           es.tier.clone(),
154 |         ),
155 |       ),
156 |       children,
157 |     );
158 | 
159 |     // These are all of the `ColNames` that we need in order to evaluate the Delete.
160 |     // This consists of all Top-Level Columns for every expression, as well as all Key
161 |     // Columns (since they are included in the resulting table).
162 |     let mut top_level_cols_set = BTreeSet::<proc::ColumnRef>::new();
163 |     let cur_alias = &self.sql_query.table.alias;
164 |     top_level_cols_set.extend(ctx.table_schema.get_key_col_refs(cur_alias));
165 |     QueryIterator::new_top_level()
166 |       .iterate_delete(&mut col_ref_collecting_cb(&mut top_level_cols_set), &self.sql_query);
167 |     let top_level_col_names = Vec::from_iter(top_level_cols_set.into_iter());
168 |     let top_level_extra_col_refs =
169 |       Vec::from_iter(top_level_col_names.iter().map(|c| GeneralColumnRef::Named(c.clone())));
170 | 
171 |     // Setup the TableView that we are going to return and the UpdateView that we're going
172 |     // to hold in the MSQueryES.
173 |     let mut res_table_view = TableView::new();
174 |     let mut update_view = GenericTable::new();
175 | 
176 |     // Finally, iterate over the Context Rows of the subqueries and compute the final values.
177 |     let eval_res = context_constructor.run(
178 |       &es.context.context_rows,
179 |       top_level_extra_col_refs,
180 |       &mut |context_row_idx: usize,
181 |             top_level_col_vals: Vec<ColValN>,
182 |             contexts: Vec<(ContextRow, usize)>,
183 |             count: u64| {
184 |         assert_eq!(context_row_idx, 0); // Recall there is only one ContextRow for Updates.
185 | 
186 |         // First, we extract the subquery values using the child Context indices.
187 |         let mut subquery_vals = Vec::<TableView>::new();
188 |         for (subquery_idx, (_, child_context_idx)) in contexts.iter().enumerate() {
189 |           let val = subquery_results.get(subquery_idx).unwrap().get(*child_context_idx).unwrap();
190 |           subquery_vals.push(val.clone());
191 |         }
192 | 
193 |         // Now, we evaluate all expressions in the SQL query and amend the
194 |         // result to this TableView (if the WHERE clause evaluates to true).
195 |         let evaluated_delete = evaluate_delete(
196 |           &self.sql_query,
197 |           &top_level_col_names,
198 |           &top_level_col_vals,
199 |           &subquery_vals,
200 |         )?;
201 |         if is_true(&evaluated_delete.selection)? {
202 |           // This means that the current row should be selected for the result.
203 |           let mut res_row = Vec::<ColValN>::new();
204 | 
205 |           // We reconstruct the PrimaryKey
206 |           let mut primary_key = PrimaryKey { cols: vec![] };
207 |           let cur_alias = &self.sql_query.table.alias;
208 |           for key_col in &ctx.table_schema.get_key_col_refs(cur_alias) {
209 |             let idx = top_level_col_names.iter().position(|col| key_col == col).unwrap();
210 |             let col_val = top_level_col_vals.get(idx).unwrap().clone();
211 |             res_row.push(col_val.clone());
212 |             primary_key.cols.push(col_val.unwrap());
213 |           }
214 | 
215 |           // Amend the UpdateView to delete the PrimaryKey
216 |           update_view.insert((primary_key, None), None);
217 |         };
218 |         Ok(())
219 |       },
220 |     );
221 | 
222 |     match eval_res {
223 |       Ok(()) => {
224 |         // Amend the `update_view` in the MSQueryES.
225 |         ms_query_es.update_views.insert(es.tier.clone() - 1, update_view);
226 | 
227 |         // Signal Success and return the data.
228 |         Some(TPESAction::Success(QueryESResult {
229 |           result: vec![res_table_view],
230 |           new_rms: es.new_rms.iter().cloned().collect(),
231 |         }))
232 |       }
233 |       Err(eval_error) => Some(TPESAction::QueryError(mk_eval_error(eval_error))),
234 |     }
235 |   }
236 | }
237 | 


--------------------------------------------------------------------------------
/src/shard_snapshot_es.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{
  2 |   CTSubNodePath, CoreIOCtx, PaxosGroupId, PaxosGroupIdTrait, QueryId, RemoteLeaderChangedPLm,
  3 |   SlaveIOCtx, TNodePath, TabletGroupId,
  4 | };
  5 | use crate::expression::range_might_intersect_row_region;
  6 | use crate::finish_query_rm_es::FinishQueryRMES;
  7 | use crate::message as msg;
  8 | use crate::server::ServerContextBase;
  9 | use crate::shard_split_tm_es::STRange;
 10 | use crate::slave::{SlaveContext, SlavePLm};
 11 | use crate::storage::{compute_range_storage, remove_range, GenericMVTable};
 12 | use crate::tablet::{ShardingSnapshot, TabletConfig, TabletContext, TabletForwardMsg, TabletPLm};
 13 | use serde::{Deserialize, Serialize};
 14 | use std::collections::{BTreeMap, Bound};
 15 | 
 16 | // -----------------------------------------------------------------------------------------------
 17 | //  PLms
 18 | // -----------------------------------------------------------------------------------------------
 19 | 
 20 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 21 | pub struct ShardingConfirmedPLm {
 22 |   query_id: QueryId,
 23 | }
 24 | 
 25 | // -----------------------------------------------------------------------------------------------
 26 | //  ShardingSnapshotES
 27 | // -----------------------------------------------------------------------------------------------
 28 | 
 29 | pub enum ShardingSnapshotAction {
 30 |   Wait,
 31 |   Exit,
 32 | }
 33 | 
 34 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 35 | enum State {
 36 |   Follower,
 37 |   WaitingPreparedWrites,
 38 |   ShardingSnapshotSent,
 39 |   InsertingShardingConfirmed,
 40 | }
 41 | 
 42 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 43 | pub struct ShardingSnapshotES {
 44 |   pub query_id: QueryId,
 45 |   /// The Target to send the `ShardingSnapshot` to.
 46 |   target: STRange,
 47 |   /// If this is `true`, the snapshot is sent to Slave to create the `target`. Otherwise,
 48 |   /// the `target` already exists.
 49 |   is_new: bool,
 50 |   state: State,
 51 | }
 52 | 
 53 | impl ShardingSnapshotES {
 54 |   pub fn create_split<IO: CoreIOCtx>(
 55 |     ctx: &mut TabletContext,
 56 |     io_ctx: &mut IO,
 57 |     finish_query_ess: &BTreeMap<QueryId, FinishQueryRMES>,
 58 |     query_id: QueryId,
 59 |     target: STRange,
 60 |   ) -> ShardingSnapshotES {
 61 |     let mut es = ShardingSnapshotES { query_id, target, is_new: false, state: State::Follower };
 62 |     es.start(ctx, io_ctx, finish_query_ess);
 63 |     es
 64 |   }
 65 | 
 66 |   fn start<IO: CoreIOCtx>(
 67 |     &mut self,
 68 |     ctx: &mut TabletContext,
 69 |     io_ctx: &mut IO,
 70 |     finish_query_ess: &BTreeMap<QueryId, FinishQueryRMES>,
 71 |   ) -> ShardingSnapshotAction {
 72 |     if ctx.is_leader() {
 73 |       self.advance_prepared(ctx, io_ctx, finish_query_ess);
 74 |     } else {
 75 |       self.state = State::Follower;
 76 |     }
 77 | 
 78 |     ShardingSnapshotAction::Wait
 79 |   }
 80 | 
 81 |   fn send_sharding_snapshot<IO: CoreIOCtx>(&mut self, ctx: &mut TabletContext, io_ctx: &mut IO) {
 82 |     // Construct the ShardingSnapshot
 83 |     let snapshot = ShardingSnapshot {
 84 |       this_tid: self.target.tid.clone(),
 85 |       this_table_path: ctx.this_table_path.clone(),
 86 |       this_sharding_gen: ctx.this_sharding_gen.clone(),
 87 |       this_table_key_range: self.target.range.clone(),
 88 |       storage: compute_range_storage(&ctx.storage, &self.target.range),
 89 |       table_schema: ctx.table_schema.clone(),
 90 |       presence_timestamp: ctx.presence_timestamp.clone(),
 91 |       committed_writes: ctx.committed_writes.clone(),
 92 |       read_protected: ctx.read_protected.clone(),
 93 |     };
 94 | 
 95 |     // Send the Snapshot
 96 |     let node_path = ctx.mk_node_path();
 97 |     ctx.send_to_slave_common(
 98 |       io_ctx,
 99 |       self.target.sid.clone(),
100 |       msg::SlaveRemotePayload::ShardingMessage(msg::ShardingMessage {
101 |         query_id: self.query_id.clone(),
102 |         node_path,
103 |         snapshot,
104 |       }),
105 |     );
106 |   }
107 | 
108 |   fn advance_prepared<IO: CoreIOCtx>(
109 |     &mut self,
110 |     ctx: &mut TabletContext,
111 |     io_ctx: &mut IO,
112 |     finish_query_ess: &BTreeMap<QueryId, FinishQueryRMES>,
113 |   ) {
114 |     let ready_to_send = (|| -> bool {
115 |       // We compute if all `FinishQueryESs` with old `ShardingGen` are done
116 |       for (_, es) in finish_query_ess {
117 |         match es {
118 |           FinishQueryRMES::Committed => {}
119 |           FinishQueryRMES::Aborted => {}
120 |           FinishQueryRMES::Paxos2PCRMExecOuter(es) => {
121 |             if es.inner.sharding_gen < ctx.this_sharding_gen {
122 |               return false;
123 |             }
124 |           }
125 |           _ => {}
126 |         }
127 |       }
128 | 
129 |       // Then, we check that all ReadRegions in `(waiting/inserting)_read_protected` are within
130 |       // the new TabletKeyRange here. To do this, we simply see if ReadRegion
131 |       // intersects with the part of the TabletKeyRange that is being sent off.
132 |       let unpersisted_read_protected =
133 |         ctx.waiting_read_protected.iter().chain(ctx.inserting_read_protected.iter());
134 |       for (_, reqs) in unpersisted_read_protected {
135 |         for req in reqs {
136 |           if range_might_intersect_row_region(
137 |             &ctx.table_schema.key_cols,
138 |             &self.target.range,
139 |             &req.read_region.row_region,
140 |           ) {
141 |             return false;
142 |           }
143 |         }
144 |       }
145 | 
146 |       true
147 |     })();
148 | 
149 |     // If so, construct and send the snapshot. Either way, advance the state.
150 |     if ready_to_send {
151 |       self.send_sharding_snapshot(ctx, io_ctx);
152 |       self.state = State::ShardingSnapshotSent;
153 |     } else {
154 |       self.state = State::WaitingPreparedWrites;
155 |     }
156 |   }
157 | 
158 |   /// In order to figure out if all `FinishQueryES`s and `(waiting/inserting)_read_protected`
159 |   /// that must be finished are finished, we check every time a `TabletBundle` is inserted.
160 |   /// Note: A less wasteful scheme might be possible later.
161 |   pub fn handle_bundle_processed<IO: CoreIOCtx>(
162 |     &mut self,
163 |     ctx: &mut TabletContext,
164 |     io_ctx: &mut IO,
165 |     finish_query_ess: &BTreeMap<QueryId, FinishQueryRMES>,
166 |   ) -> ShardingSnapshotAction {
167 |     match &self.state {
168 |       State::WaitingPreparedWrites => {
169 |         self.advance_prepared(ctx, io_ctx, finish_query_ess);
170 |       }
171 |       _ => {}
172 |     }
173 |     ShardingSnapshotAction::Wait
174 |   }
175 | 
176 |   pub fn handle_msg(
177 |     &mut self,
178 |     ctx: &mut TabletContext,
179 |     confirm: msg::ShardingConfirmed,
180 |   ) -> ShardingSnapshotAction {
181 |     match &self.state {
182 |       State::ShardingSnapshotSent => {
183 |         ctx
184 |           .tablet_bundle
185 |           .push(TabletPLm::ShardingConfirmedPLm(ShardingConfirmedPLm { query_id: confirm.qid }));
186 |         self.state = State::InsertingShardingConfirmed;
187 |       }
188 |       _ => {
189 |         // TODO: Figure out why this debug_assert can get hit (i.e. why it is okay for it
190 |         //  to get hit). Removing it results in the simulation tests still passing, so there
191 |         //  was definitely an execution path that caused this to get hit.
192 |         // debug_assert!(false);
193 |       }
194 |     }
195 |     ShardingSnapshotAction::Wait
196 |   }
197 | 
198 |   /// This function returns `true` iff this ES is finished.
199 |   pub fn handle_plm(
200 |     &mut self,
201 |     ctx: &mut TabletContext,
202 |     _: ShardingConfirmedPLm,
203 |   ) -> ShardingSnapshotAction {
204 |     match &self.state {
205 |       State::InsertingShardingConfirmed | State::Follower => {
206 |         // Remove all the storage data that this Tablet no longer manages.
207 |         let remaining = remove_range(&mut ctx.storage, &self.target.range);
208 |         debug_assert!(remaining.is_empty());
209 |         ShardingSnapshotAction::Exit
210 |       }
211 |       _ => {
212 |         debug_assert!(false);
213 |         ShardingSnapshotAction::Wait
214 |       }
215 |     }
216 |   }
217 | 
218 |   pub fn handle_lc<IO: CoreIOCtx>(
219 |     &mut self,
220 |     ctx: &mut TabletContext,
221 |     io_ctx: &mut IO,
222 |     finish_query_ess: &BTreeMap<QueryId, FinishQueryRMES>,
223 |   ) -> ShardingSnapshotAction {
224 |     match &self.state {
225 |       State::Follower => {
226 |         if ctx.is_leader() {
227 |           self.advance_prepared(ctx, io_ctx, finish_query_ess);
228 |         }
229 |       }
230 |       State::WaitingPreparedWrites
231 |       | State::ShardingSnapshotSent
232 |       | State::InsertingShardingConfirmed => self.state = State::Follower,
233 |     }
234 |     ShardingSnapshotAction::Wait
235 |   }
236 | 
237 |   pub fn handle_rlc<IO: CoreIOCtx>(
238 |     &mut self,
239 |     ctx: &mut TabletContext,
240 |     io_ctx: &mut IO,
241 |     remote_leader_changed: RemoteLeaderChangedPLm,
242 |   ) -> ShardingSnapshotAction {
243 |     match &self.state {
244 |       State::ShardingSnapshotSent => {
245 |         // If the Leader that changed was of the target SlaveGroupId, we resend the snapshot.
246 |         if remote_leader_changed.gid == self.target.sid.to_gid() {
247 |           self.send_sharding_snapshot(ctx, io_ctx);
248 |         }
249 |       }
250 |       _ => {}
251 |     }
252 |     ShardingSnapshotAction::Wait
253 |   }
254 | 
255 |   /// Construct the version of `ShardingSnapshotES` that would result by losing Leadership.
256 |   pub fn reconfig_snapshot(&self) -> ShardingSnapshotES {
257 |     let mut es = self.clone();
258 |     es.state = State::Follower;
259 |     es
260 |   }
261 | }
262 | 


--------------------------------------------------------------------------------
/src/bin/simtest/main.rs:
--------------------------------------------------------------------------------
  1 | #![feature(map_first_last)]
  2 | 
  3 | use crate::advanced_parallel_test::test_all_advanced_parallel;
  4 | use crate::basic_serial_test::test_all_basic_serial;
  5 | use crate::paxos_parallel_test::{
  6 |   test_all_basic_parallel, test_all_paxos_parallel, ParallelTestStats, Writer,
  7 | };
  8 | use crate::stats::{format_message_stats, process_stats, Stats};
  9 | use clap::{arg, App};
 10 | use rand::{RngCore, SeedableRng};
 11 | use rand_xorshift::XorShiftRng;
 12 | use runiversal::test_utils::mk_seed;
 13 | use std::cmp::max;
 14 | use std::collections::BTreeMap;
 15 | use std::panic::AssertUnwindSafe;
 16 | use std::sync::mpsc;
 17 | use std::sync::mpsc::Sender;
 18 | 
 19 | #[macro_export]
 20 | macro_rules! cast {
 21 |   ($enum:path, $expr:expr) => {{
 22 |     if let $enum(item) = $expr {
 23 |       Ok(item)
 24 |     } else {
 25 |       Err("Could not cast the value to the desired Variant.")
 26 |     }
 27 |   }};
 28 | }
 29 | 
 30 | mod advanced_parallel_test;
 31 | mod basic_serial_test;
 32 | mod paxos_parallel_test;
 33 | mod serial_test_utils;
 34 | mod simulation;
 35 | mod stats;
 36 | 
 37 | /**
 38 |  * Debugging Tips:
 39 |  *  - We thread a global RNG through all test cases. However, in every test case, we try to
 40 |  *    use it for nothing more than creating a new RNG by creating a random seed. The reason
 41 |  *    for this is so that if a failure happens, we can just print the seed and then quickly
 42 |  *    reproduce by using that seed directly to run the test case.
 43 |  */
 44 | 
 45 | fn main() {
 46 |   // Setup CLI parsing
 47 |   let matches = App::new("rUniversalDB Tests")
 48 |     .version("1.0")
 49 |     .author("Pasindu M. <pasindumuth@gmail.com>")
 50 |     .arg(
 51 |       arg!(-i --instances <VALUE>)
 52 |         .required(false)
 53 |         .help("Indicates if the simulation tests should be run in parallel."),
 54 |     )
 55 |     .arg(
 56 |       arg!(-r --rounds <VALUE>)
 57 |         .required(false)
 58 |         .help("The number of rounds to execute the parallel tests."),
 59 |     )
 60 |     .get_matches();
 61 | 
 62 |   // Run Serial tests in just one thread (since these are fast).
 63 |   let mut rand = XorShiftRng::from_seed([1; 16]);
 64 |   println!("Basic Serial Tests:");
 65 |   test_all_basic_serial(&mut rand);
 66 |   println!("\n");
 67 | 
 68 |   // Run parallel tests, potentially in multiple threads if requested.
 69 |   const DEFAULT_NUM_ROUNDS: u32 = 33;
 70 |   let rounds: u32 = if let Some(rounds) = matches.value_of("rounds") {
 71 |     rounds.parse().unwrap()
 72 |   } else {
 73 |     DEFAULT_NUM_ROUNDS
 74 |   };
 75 | 
 76 |   if let Some(instances) = matches.value_of("instances") {
 77 |     let instances: u32 = instances.parse().unwrap();
 78 |     execute_multi(instances, rounds);
 79 |   } else {
 80 |     execute_once(&mut rand, rounds);
 81 |   }
 82 | }
 83 | 
 84 | // -----------------------------------------------------------------------------------------------
 85 | //  Print Utils
 86 | // -----------------------------------------------------------------------------------------------
 87 | 
 88 | /// Trivial implementation just using `println!`.
 89 | struct BasicPrintWriter {}
 90 | 
 91 | impl Writer for BasicPrintWriter {
 92 |   fn println(&mut self, s: String) {
 93 |     println!("{}", s);
 94 |   }
 95 | 
 96 |   fn flush(&mut self) {}
 97 | }
 98 | 
 99 | /// Concurrent Writer for when we want multiple threads writing data. This class allows multiple
100 | /// `println` calls to be batched together and then written to the console atomically with
101 | /// `flush`. We also have `flush_error` so that if the thread errors out before it would normally
102 | /// call `flush`, then we can catch the exception and then call this function explicitly.
103 | struct ConcurrentWriter<'a> {
104 |   sender: &'a Sender<ParallelTestMessage>,
105 |   print_buffer: Vec<String>,
106 | }
107 | 
108 | impl<'a> ConcurrentWriter<'a> {
109 |   fn create(sender: &Sender<ParallelTestMessage>) -> ConcurrentWriter {
110 |     ConcurrentWriter { sender, print_buffer: vec![] }
111 |   }
112 | 
113 |   fn mk_text(&mut self) -> String {
114 |     let print_buffer = std::mem::take(&mut self.print_buffer);
115 |     print_buffer.join("")
116 |   }
117 | 
118 |   /// Flushes the currently bufferred string as an error, indicating that the sending
119 |   /// thread encountered an error.
120 |   fn flush_error(&mut self) {
121 |     let text = self.mk_text();
122 |     self.sender.send(ParallelTestMessage::Error(text)).unwrap();
123 |   }
124 | }
125 | 
126 | impl<'a> Writer for ConcurrentWriter<'a> {
127 |   fn println(&mut self, s: String) {
128 |     self.print_buffer.push(format!("{}\n", s));
129 |   }
130 | 
131 |   /// Flushes the currently bufferred string normally.
132 |   fn flush(&mut self) {
133 |     let text = self.mk_text();
134 |     self.sender.send(ParallelTestMessage::PrintMessage(text)).unwrap();
135 |   }
136 | }
137 | 
138 | // -----------------------------------------------------------------------------------------------
139 | //  Parallel Simulation Tests
140 | // -----------------------------------------------------------------------------------------------
141 | 
142 | /// The message sent from a the test executor threads to the coordinator thread
143 | /// (i.e the main thread).
144 | enum ParallelTestMessage {
145 |   PrintMessage(String),
146 |   Error(String),
147 |   Done((ParallelTestStats, Vec<Stats>)),
148 | }
149 | 
150 | /// Execute parallel tests in a single thread.
151 | fn execute_once(rand: &mut XorShiftRng, rounds: u32) {
152 |   let mut writer = BasicPrintWriter {};
153 |   println!("Paxos Parallel Tests:");
154 |   test_all_paxos_parallel(rand, &mut writer, rounds);
155 |   println!("\n");
156 |   println!("Basic Parallel Tests:");
157 |   test_all_basic_parallel(rand, &mut writer, rounds);
158 |   println!("\n");
159 | }
160 | 
161 | /// Execute parallel tests in multiple threads.
162 | fn execute_multi(instances: u32, rounds: u32) {
163 |   let (sender, receiver) = mpsc::channel::<ParallelTestMessage>();
164 | 
165 |   // Create `instances` number of threads to run the test in parallel.
166 |   for i in 0..instances {
167 |     let mut seed: [u8; 16] = [0; 16];
168 |     seed[0] = i as u8;
169 |     let sender = sender.clone();
170 |     std::thread::spawn(move || {
171 |       let mut writer = ConcurrentWriter::create(&sender);
172 |       let mut rand = XorShiftRng::from_seed(seed);
173 | 
174 |       // Catch any panics or errors that happen inside
175 |       let result = std::panic::catch_unwind(AssertUnwindSafe(|| {
176 |         println!("Paxos Parallel Tests:");
177 |         let parallel_stats = test_all_paxos_parallel(&mut rand, &mut writer, rounds);
178 |         println!("\n");
179 |         println!("Basic Parallel Tests:");
180 |         let stats_basic = test_all_basic_parallel(&mut rand, &mut writer, rounds);
181 |         println!("\n");
182 | 
183 |         (parallel_stats, stats_basic)
184 |       }));
185 | 
186 |       // If the above ended with an error, we flush the last of whatever was  written
187 |       // as an error. Otherwise, we flush it normally and send off the results.
188 |       match result {
189 |         Ok(done) => {
190 |           writer.flush();
191 |           sender.send(ParallelTestMessage::Done(done)).unwrap();
192 |         }
193 |         Err(_) => writer.flush_error(),
194 |       }
195 |     });
196 |   }
197 | 
198 |   // Drop the original sender to avoid blocking the following `recv` call forever.
199 |   drop(sender);
200 | 
201 |   let mut parallel_stats_acc = Vec::<ParallelTestStats>::new();
202 |   let mut basic_stats_acc = Vec::<Vec<Stats>>::new();
203 | 
204 |   // Receive data until there are no more `senders` in existance; i.e. when all
205 |   // threads above have finished.
206 |   while let Ok(result) = receiver.recv() {
207 |     match result {
208 |       ParallelTestMessage::PrintMessage(string) => println!("{}", string),
209 |       ParallelTestMessage::Error(string) => {
210 |         println!("{}", string);
211 |         println!("Terminating...");
212 |         // Terminate all testing.
213 |         return;
214 |       }
215 |       ParallelTestMessage::Done((parallel_stats, basic_stats)) => {
216 |         parallel_stats_acc.push(parallel_stats);
217 |         basic_stats_acc.push(basic_stats);
218 |       }
219 |     }
220 |   }
221 | 
222 |   // Process the basic stats
223 |   {
224 |     let mut all_stats = Vec::<Stats>::new();
225 | 
226 |     for basic_stats in basic_stats_acc {
227 |       all_stats.extend(basic_stats);
228 |     }
229 | 
230 |     let (avg_duration, avg_message_stats) = process_stats(all_stats);
231 | 
232 |     // Print the stats.
233 |     println!("Avg Basic Duration: {}", avg_duration);
234 |     println!("Avg Basic Statistics: {}", format_message_stats(&avg_message_stats));
235 |   }
236 | 
237 |   // Process the parallel stats
238 |   {
239 |     let mut all_stats = Vec::<Stats>::new();
240 |     let mut all_reconfig_stats = Vec::<Stats>::new();
241 |     let mut all_sharding_stats = Vec::<Stats>::new();
242 | 
243 |     for parallel_stats in parallel_stats_acc {
244 |       all_stats.extend(parallel_stats.all_stats);
245 |       all_reconfig_stats.extend(parallel_stats.all_reconfig_stats);
246 |       all_sharding_stats.extend(parallel_stats.all_sharding_stats);
247 |     }
248 | 
249 |     let (avg_duration, avg_message_stats) = process_stats(all_stats);
250 |     let (avg_reconfig_duration, avg_reconfig_message_stats) = process_stats(all_reconfig_stats);
251 |     let (avg_sharding_duration, avg_sharding_message_stats) = process_stats(all_sharding_stats);
252 | 
253 |     // Print the stats.
254 |     println!("Avg Duration: {}", avg_duration);
255 |     println!("Avg Statistics: {}", format_message_stats(&avg_message_stats));
256 |     println!("Avg Reconfig Duration: {}", avg_reconfig_duration);
257 |     println!("Avg Reconfig Statistics: {}", format_message_stats(&avg_reconfig_message_stats));
258 |     println!("Avg Sharding Duration: {}", avg_sharding_duration);
259 |     println!("Avg Sharding Statistics: {}", format_message_stats(&avg_sharding_message_stats));
260 |   }
261 | }
262 | 


--------------------------------------------------------------------------------