├── .gitignore ├── Cargo.toml ├── README.md ├── src ├── acceptor.rs ├── client.rs ├── lib.rs ├── proposer.rs ├── storage.rs └── udp_transport.rs └── tests └── simulator.rs /.gitignore: -------------------------------------------------------------------------------- 1 | *db 2 | *conf 3 | *snap* 4 | *grind.out* 5 | vgcore* 6 | *.bk 7 | *orig 8 | tags 9 | perf* 10 | *folded 11 | *out 12 | *perf 13 | *svg 14 | *txt 15 | experiments 16 | target 17 | Cargo.lock 18 | *swp 19 | *swo 20 | *.proptest-regressions 21 | corpus 22 | artifacts 23 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "paxos" 3 | version = "0.0.5" 4 | authors = ["Tyler Neely "] 5 | description = "a well-tested implementation of the CASPaxos protocol for the sled lightweight database kit" 6 | license = "MIT/Apache-2.0" 7 | homepage = "https://github.com/spacejam/paxos" 8 | repository = "https://github.com/spacejam/paxos" 9 | keywords = ["database", "embedded", "consensus", "distributed", "paxos"] 10 | documentation = "https://docs.rs/paxos/" 11 | readme = "README.md" 12 | edition = "2018" 13 | 14 | [dependencies] 15 | sled = "0.31.0" 16 | bincode = "1.2.1" 17 | serde = "1.0.111" 18 | serde_derive = "1.0.112" 19 | crc32fast = "1.2.0" 20 | 21 | [dev-dependencies] 22 | quickcheck = "0.9.2" 23 | rand = "0.7.3" 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # paxos 2 | 3 | [![crates.io](https://meritbadge.herokuapp.com/paxos)](https://crates.io/crates/paxos) 4 | [![documentation](https://docs.rs/paxos/badge.svg)](https://docs.rs/paxos) 5 | [![chat](https://img.shields.io/discord/509773073294295082.svg?logo=discord)](https://discord.gg/Z6VsXds) 6 | [![sponsors](https://img.shields.io/opencollective/backers/sled)](https://github.com/sponsors/spacejam) 7 | 8 | Currently, this is an implementation of 9 | [CASPaxos](https://arxiv.org/pdf/1802.07000.pdf) 10 | built on top of the sled lightweight database kit. 11 | 12 | It is being grown into a more featureful replication 13 | library that is mindful of modern consensus research. 14 | 15 | #### why not raft? 16 | 17 | * raft is great for teaching purposes, but is not very performant 18 | * a replicated log is just the beginning 19 | 20 | # roadmap 21 | 22 | - [ ] simple http API 23 | - [ ] built-in kv 24 | - [ ] membership reconfiguration 25 | - [ ] cheap-paxos storage reduction 26 | - [ ] gossip-based replication of state 27 | - [ ] log reclamation 28 | - [ ] read-only followers 29 | 30 | # References 31 | 32 | * [CASPaxos: Replicated State Machines without logs](https://arxiv.org/pdf/1802.07000.pdf) 33 | * [PigPaxos: Devouring the communication bottlenecks in distributed consensus](https://arxiv.org/abs/2003.07760) 34 | * [SDPaxos: Building Efficient Semi-Decentralized Geo-replicatedState Machines](https://www.microsoft.com/en-us/research/uploads/prod/2018/09/172-zhao.pdf) 35 | * [State-Machine Replication for Planet-Scale Systems (Extended Version)](https://arxiv.org/abs/2003.11789) 36 | * [WPaxos: Wide Area Network Flexible Consensus](https://arxiv.org/abs/1703.08905) 37 | * [A Generalised Solution to Distributed Consensus](https://arxiv.org/abs/1902.06776) 38 | * [Cheap Paxos](https://lamport.azurewebsites.net/pubs/web-dsn-submission.pdf) 39 | * [Edelweiss: Automatic Storage Reclamation for Distributed Programming](http://www.neilconway.org/docs/vldb2014_edelweiss.pdf) 40 | -------------------------------------------------------------------------------- /src/acceptor.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[derive(Clone, Debug)] 4 | pub struct Acceptor 5 | where 6 | S: Clone, 7 | { 8 | store: S, 9 | } 10 | 11 | impl Default for Acceptor 12 | where 13 | S: Clone + Default, 14 | { 15 | fn default() -> Acceptor { 16 | Acceptor { 17 | store: S::default(), 18 | } 19 | } 20 | } 21 | 22 | impl Reactor for Acceptor 23 | where 24 | S: storage::Storage + Clone + Sized, 25 | { 26 | type Peer = String; 27 | type Message = Rpc; 28 | 29 | fn receive( 30 | &mut self, 31 | _at: SystemTime, 32 | from: Self::Peer, 33 | msg: Self::Message, 34 | ) -> Vec<(Self::Peer, Self::Message)> { 35 | match msg { 36 | ProposeReq(ballot, key) => { 37 | let current_ballot = self.store.get_highest_seen(key.clone()); 38 | 39 | if ballot > current_ballot { 40 | self.store.set_highest_seen(key.clone(), ballot.clone()); 41 | vec![( 42 | from, 43 | ProposeRes { 44 | req_ballot: ballot, 45 | last_accepted_ballot: self 46 | .store 47 | .get_accepted_ballot(key.clone()), 48 | last_accepted_value: self 49 | .store 50 | .get_accepted_value(key.clone()), 51 | res: Ok(()), 52 | }, 53 | )] 54 | } else { 55 | vec![( 56 | from, 57 | ProposeRes { 58 | req_ballot: ballot, 59 | last_accepted_ballot: self 60 | .store 61 | .get_accepted_ballot(key.clone()), 62 | last_accepted_value: self 63 | .store 64 | .get_accepted_value(key.clone()), 65 | res: Err(Error::ProposalRejected { 66 | last: current_ballot, 67 | }), 68 | }, 69 | )] 70 | } 71 | } 72 | AcceptReq(ballot, key, to) => { 73 | let current_ballot = self.store.get_highest_seen(key.clone()); 74 | if ballot >= current_ballot { 75 | self.store.set_highest_seen(key.clone(), ballot.clone()); 76 | self.store.set_accepted_ballot(key.clone(), ballot.clone()); 77 | self.store.set_accepted_value(key.clone(), to); 78 | vec![(from, AcceptRes(ballot, Ok(())))] 79 | } else { 80 | vec![( 81 | from, 82 | AcceptRes( 83 | ballot, 84 | Err(Error::AcceptRejected { 85 | last: current_ballot, 86 | }), 87 | ), 88 | )] 89 | } 90 | } 91 | _ => panic!("Acceptor got non-propose/accept"), 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/client.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use std::collections::HashMap; 4 | 5 | #[derive(Debug, Default, Clone)] 6 | pub struct Client { 7 | proposers: Vec, 8 | req_counter: u64, 9 | in_flight: HashMap, 10 | } 11 | 12 | impl Client { 13 | pub fn new(proposers: Vec) -> Client { 14 | Client { 15 | proposers: proposers, 16 | req_counter: 0, 17 | in_flight: HashMap::new(), 18 | } 19 | } 20 | 21 | pub fn get(&self) -> Result, Error> { 22 | unimplemented!() 23 | } 24 | 25 | pub fn del(&self) -> Result, Error> { 26 | unimplemented!() 27 | } 28 | 29 | pub fn set(&self, _value: Value) -> Result, Error> { 30 | unimplemented!() 31 | } 32 | 33 | pub fn cas( 34 | &self, 35 | _old_value: Value, 36 | _new_value: Value, 37 | ) -> Result, Error> { 38 | unimplemented!() 39 | } 40 | } 41 | 42 | impl Reactor for Client { 43 | type Peer = String; 44 | type Message = Rpc; 45 | 46 | fn receive( 47 | &mut self, 48 | _at: SystemTime, 49 | _from: Self::Peer, 50 | _msg: Self::Message, 51 | ) -> Vec<(Self::Peer, Self::Message)> { 52 | unimplemented!() 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::time::{Duration, SystemTime}; 3 | 4 | use serde::de::DeserializeOwned; 5 | use serde::Serialize; 6 | use serde_derive::{Deserialize, Serialize}; 7 | 8 | mod acceptor; 9 | mod client; 10 | mod proposer; 11 | mod storage; 12 | mod udp_transport; 13 | 14 | pub use acceptor::Acceptor; 15 | pub use client::Client; 16 | pub use proposer::Proposer; 17 | pub use storage::{MemStorage, SledStorage}; 18 | pub use udp_transport::UdpTransport; 19 | 20 | /// An abstraction over network communication. 21 | /// It is not expected to provide fault tolerance. 22 | /// It may send messages 0 times, or many times. 23 | /// The expectation is that as long as the messages 24 | /// are sometimes sent, that forward progress will 25 | /// happen eventually. 26 | pub trait Transport { 27 | /// Blocks until the next message is received. 28 | fn next_message(&mut self) -> (R::Peer, R::Message); 29 | 30 | /// Enqueues the message to be sent. May be sent 0-N times with no ordering guarantees. 31 | fn send_message(&mut self, to: R::Peer, msg: R::Message); 32 | 33 | /// Runs a reactor on the transport. 34 | fn run(&mut self, mut reactor: R) { 35 | loop { 36 | let (from, msg) = self.next_message(); 37 | let now = SystemTime::now(); 38 | 39 | let outbound = reactor.receive(now, from, msg); 40 | 41 | for (to, msg) in outbound { 42 | self.send_message(to, msg); 43 | } 44 | } 45 | } 46 | } 47 | 48 | // Reactor is a trait for building simulable systems. 49 | pub trait Reactor: Debug + Clone { 50 | type Peer: std::net::ToSocketAddrs; 51 | type Message: Serialize + DeserializeOwned; 52 | 53 | fn receive( 54 | &mut self, 55 | at: SystemTime, 56 | from: Self::Peer, 57 | msg: Self::Message, 58 | ) -> Vec<(Self::Peer, Self::Message)>; 59 | } 60 | 61 | #[derive( 62 | Default, 63 | Clone, 64 | Debug, 65 | PartialOrd, 66 | PartialEq, 67 | Eq, 68 | Hash, 69 | Ord, 70 | Serialize, 71 | Deserialize, 72 | )] 73 | pub struct Ballot(u64); 74 | 75 | type Key = Vec; 76 | type Value = Vec; 77 | 78 | #[derive( 79 | PartialOrd, Ord, Eq, PartialEq, Debug, Clone, Serialize, Deserialize, 80 | )] 81 | pub enum Req { 82 | Get(Key), 83 | Del(Key), 84 | Set(Key, Value), 85 | Cas(Key, Option, Option), 86 | } 87 | 88 | impl Req { 89 | fn key(&self) -> Key { 90 | match *self { 91 | Req::Get(ref k) 92 | | Req::Del(ref k) 93 | | Req::Set(ref k, _) 94 | | Req::Cas(ref k, _, _) => k.clone(), 95 | } 96 | } 97 | } 98 | 99 | #[derive( 100 | Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Serialize, Deserialize, 101 | )] 102 | pub enum Rpc { 103 | ClientRequest(u64, Req), 104 | ClientResponse(u64, Result, Error>), 105 | SetAcceptAcceptors(Vec), 106 | SetProposeAcceptors(Vec), 107 | ProposeReq(Ballot, Key), 108 | ProposeRes { 109 | req_ballot: Ballot, 110 | last_accepted_ballot: Ballot, 111 | last_accepted_value: Option, 112 | res: Result<(), Error>, 113 | }, 114 | AcceptReq(Ballot, Key, Option), 115 | AcceptRes(Ballot, Result<(), Error>), 116 | } 117 | use Rpc::*; 118 | 119 | impl Rpc { 120 | pub fn client_req_id(&self) -> Option { 121 | match *self { 122 | ClientResponse(id, _) | ClientRequest(id, _) => Some(id), 123 | _ => None, 124 | } 125 | } 126 | 127 | pub fn client_req(self) -> Option { 128 | match self { 129 | ClientRequest(_, req) => Some(req), 130 | _ => None, 131 | } 132 | } 133 | } 134 | 135 | #[derive( 136 | Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Serialize, Deserialize, 137 | )] 138 | pub enum Error { 139 | ProposalRejected { last: Ballot }, 140 | AcceptRejected { last: Ballot }, 141 | CasFailed(Option), 142 | Timeout, 143 | } 144 | 145 | impl Error { 146 | pub fn is_rejected_accept(&self) -> bool { 147 | match *self { 148 | Error::AcceptRejected { .. } => true, 149 | _ => false, 150 | } 151 | } 152 | 153 | pub fn is_rejected_proposal(&self) -> bool { 154 | match *self { 155 | Error::ProposalRejected { .. } => true, 156 | _ => false, 157 | } 158 | } 159 | 160 | pub fn is_timeout(&self) -> bool { 161 | match *self { 162 | Error::Timeout => true, 163 | _ => false, 164 | } 165 | } 166 | 167 | pub fn is_failed_cas(&self) -> bool { 168 | match *self { 169 | Error::CasFailed(_) => true, 170 | _ => false, 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/proposer.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use std::collections::HashMap; 4 | 5 | #[derive(Eq, PartialEq, Debug, Clone)] 6 | enum Phase { 7 | Propose, 8 | Accept, 9 | } 10 | 11 | #[derive(Clone, Debug)] 12 | pub struct Pending { 13 | client_addr: String, 14 | id: u64, 15 | req: Req, 16 | new_v: Option, 17 | phase: Phase, 18 | waiting_for: Vec, 19 | acks_from: Vec, 20 | nacks_from: Vec, 21 | highest_promise_ballot: Ballot, 22 | highest_promise_value: Option, 23 | received_at: SystemTime, 24 | cas_failed: Result<(), Error>, 25 | has_retried_once: bool, 26 | } 27 | 28 | impl Pending { 29 | fn apply_op(&mut self) { 30 | match self.req { 31 | Req::Get(_) => { 32 | self.new_v = self.highest_promise_value.clone(); 33 | } 34 | Req::Del(_) => { 35 | self.new_v = None; 36 | } 37 | Req::Set(_, ref new_v) => { 38 | self.new_v = Some(new_v.clone()); 39 | } 40 | Req::Cas(_, ref old_v, ref new_v) => { 41 | if *old_v == self.highest_promise_value { 42 | self.new_v = new_v.clone(); 43 | } else { 44 | self.new_v = self.highest_promise_value.clone(); 45 | self.cas_failed = Err(Error::CasFailed( 46 | self.highest_promise_value.clone(), 47 | )); 48 | } 49 | } 50 | } 51 | } 52 | 53 | fn transition_to_accept(&mut self, acceptors: Vec) { 54 | self.phase = Phase::Accept; 55 | self.acks_from = vec![]; 56 | self.nacks_from = vec![]; 57 | self.waiting_for = acceptors; 58 | self.apply_op(); 59 | } 60 | } 61 | 62 | #[derive(Default, Debug, Clone)] 63 | pub struct Proposer { 64 | accept_acceptors: Vec, 65 | propose_acceptors: Vec, 66 | ballot_counter: u64, 67 | in_flight: HashMap, 68 | timeout: Duration, 69 | } 70 | 71 | impl Proposer { 72 | pub fn new(timeout_ms: u64, proposers: Vec) -> Proposer { 73 | let mut ret = Proposer::default(); 74 | ret.accept_acceptors = proposers.clone(); 75 | ret.propose_acceptors = proposers; 76 | ret.timeout = Duration::from_millis(timeout_ms); 77 | ret 78 | } 79 | 80 | fn bump_ballot(&mut self) -> Ballot { 81 | self.ballot_counter += 1; 82 | Ballot(self.ballot_counter) 83 | } 84 | 85 | fn propose( 86 | &mut self, 87 | at: SystemTime, 88 | from: String, 89 | id: u64, 90 | req: Req, 91 | retry: bool, 92 | ) -> Vec<(String, Rpc)> { 93 | let ballot = self.bump_ballot(); 94 | self.in_flight.insert( 95 | ballot.clone(), 96 | Pending { 97 | client_addr: from, 98 | id: id, 99 | req: req.clone(), 100 | new_v: None, 101 | phase: Phase::Propose, 102 | waiting_for: self.propose_acceptors.clone(), 103 | acks_from: vec![], 104 | nacks_from: vec![], 105 | highest_promise_ballot: Ballot(0), 106 | highest_promise_value: None, 107 | received_at: at, 108 | cas_failed: Ok(()), 109 | has_retried_once: retry, 110 | }, 111 | ); 112 | 113 | self.propose_acceptors 114 | .iter() 115 | .map(|a| (a.clone(), ProposeReq(ballot.clone(), req.key()))) 116 | .collect() 117 | } 118 | } 119 | 120 | impl Reactor for Proposer { 121 | type Peer = String; 122 | type Message = Rpc; 123 | 124 | fn receive( 125 | &mut self, 126 | at: SystemTime, 127 | from: Self::Peer, 128 | msg: Self::Message, 129 | ) -> Vec<(Self::Peer, Self::Message)> { 130 | let mut clear_ballot = None; 131 | let mut retry = None; 132 | let mut res = match msg { 133 | ClientRequest(id, r) => self.propose(at, from, id, r, false), 134 | SetAcceptAcceptors(sas) => { 135 | self.accept_acceptors = sas; 136 | vec![] 137 | } 138 | SetProposeAcceptors(sas) => { 139 | self.propose_acceptors = sas; 140 | vec![] 141 | } 142 | ProposeRes { 143 | req_ballot, 144 | last_accepted_ballot, 145 | last_accepted_value, 146 | res, 147 | } => { 148 | if !self.in_flight.contains_key(&req_ballot) { 149 | // we've already moved on 150 | return vec![]; 151 | } 152 | 153 | let mut pending = self.in_flight.get_mut(&req_ballot).unwrap(); 154 | 155 | if pending.phase != Phase::Propose { 156 | // we've already moved on 157 | return vec![]; 158 | } 159 | 160 | assert!( 161 | !pending.acks_from.contains(&from) 162 | && !pending.nacks_from.contains(&from), 163 | "somehow got a response from this peer already... \ 164 | we don't do retries in this game yet!" 165 | ); 166 | 167 | assert!( 168 | pending.waiting_for.contains(&from), 169 | "somehow got a response from someone we didn't send \ 170 | a request to... maybe the network is funky and we \ 171 | should use a higher level identifier to identify them \ 172 | than their network address." 173 | ); 174 | 175 | let majority = (pending.waiting_for.len() / 2) + 1; 176 | 177 | match res { 178 | Err(Error::ProposalRejected { ref last }) => { 179 | // some nerd didn't like our request... 180 | if self.ballot_counter < last.0 { 181 | self.ballot_counter = last.0; 182 | } 183 | 184 | pending.nacks_from.push(from); 185 | 186 | if pending.nacks_from.len() >= majority { 187 | clear_ballot = Some(req_ballot.clone()); 188 | 189 | if !pending.has_retried_once { 190 | retry = Some(( 191 | pending.received_at, 192 | pending.client_addr.clone(), 193 | pending.id, 194 | pending.req.clone(), 195 | )); 196 | vec![] 197 | } else { 198 | vec![( 199 | pending.client_addr.clone(), 200 | ClientResponse( 201 | pending.id, 202 | Err(Error::ProposalRejected { 203 | last: last.clone(), 204 | }), 205 | ), 206 | )] 207 | } 208 | } else { 209 | // still waiting for a majority of positive responses 210 | vec![] 211 | } 212 | } 213 | Ok(()) => { 214 | assert!( 215 | req_ballot.0 > pending.highest_promise_ballot.0, 216 | "somehow the acceptor promised us a vote for our ballot {:?} \ 217 | even though their highest promise ballot of {:?} \ 218 | is higher than our request...", 219 | req_ballot.0, 220 | pending.highest_promise_ballot.0 221 | ); 222 | pending.acks_from.push(from); 223 | 224 | if last_accepted_ballot > pending.highest_promise_ballot 225 | { 226 | pending.highest_promise_ballot = 227 | last_accepted_ballot; 228 | pending.highest_promise_value = last_accepted_value; 229 | } 230 | 231 | if pending.acks_from.len() >= majority { 232 | // transition to ACCEPT phase 233 | // NB assumption: we use CURRENT acceptor list, 234 | // rather than the acceptor list when we received 235 | // the client request. need to think on this more. 236 | pending.transition_to_accept( 237 | self.accept_acceptors.clone(), 238 | ); 239 | 240 | pending 241 | .waiting_for 242 | .iter() 243 | .map(|a| { 244 | ( 245 | a.clone(), 246 | AcceptReq( 247 | req_ballot.clone(), 248 | pending.req.key(), 249 | pending.new_v.clone(), 250 | ), 251 | ) 252 | }) 253 | .collect() 254 | } else { 255 | // still waiting for promises 256 | vec![] 257 | } 258 | } 259 | other => panic!("got unhandled ProposeRes: {:?}", other), 260 | } 261 | } 262 | AcceptRes(ballot, res) => { 263 | if !self.in_flight.contains_key(&ballot) { 264 | // we've already moved on 265 | return vec![]; 266 | } 267 | 268 | let pending = self.in_flight.get_mut(&ballot).unwrap(); 269 | 270 | assert_eq!( 271 | pending.phase, 272 | Phase::Accept, 273 | "somehow we went back in time and became a proposal..." 274 | ); 275 | 276 | assert!( 277 | !pending.acks_from.contains(&from) 278 | && !pending.nacks_from.contains(&from), 279 | "somehow got a response from this peer already... \ 280 | we don't do retries in this game yet!" 281 | ); 282 | 283 | assert!( 284 | pending.waiting_for.contains(&from), 285 | "somehow got a response from someone we didn't send \ 286 | a request to... maybe the network is funky and we \ 287 | should use a higher level identifier to identify them \ 288 | than their network address." 289 | ); 290 | 291 | let majority = (pending.waiting_for.len() / 2) + 1; 292 | 293 | match res { 294 | Err(Error::AcceptRejected { ref last }) => { 295 | // some nerd didn't like our request... 296 | if self.ballot_counter < last.0 { 297 | self.ballot_counter = last.0; 298 | } 299 | 300 | pending.nacks_from.push(from); 301 | 302 | if pending.nacks_from.len() >= majority { 303 | clear_ballot = Some(ballot); 304 | vec![( 305 | pending.client_addr.clone(), 306 | ClientResponse( 307 | pending.id, 308 | Err(Error::AcceptRejected { 309 | last: last.clone(), 310 | }), 311 | ), 312 | )] 313 | } else { 314 | vec![] 315 | } 316 | } 317 | Ok(()) => { 318 | pending.acks_from.push(from); 319 | 320 | if pending.acks_from.len() >= majority { 321 | // respond favorably to the client and nuke pending 322 | clear_ballot = Some(ballot); 323 | vec![( 324 | pending.client_addr.clone(), 325 | ClientResponse( 326 | pending.id, 327 | pending 328 | .cas_failed 329 | .clone() 330 | .map(|_| pending.new_v.clone()), 331 | ), 332 | )] 333 | } else { 334 | // still waiting for acceptances 335 | vec![] 336 | } 337 | } 338 | other => panic!("got unhandled AcceptRes: {:?}", other), 339 | } 340 | } 341 | other => panic!("proposer got unhandled rpc: {:?}", other), 342 | }; 343 | 344 | if let Some(ballot) = clear_ballot.take() { 345 | self.in_flight.remove(&ballot); 346 | } 347 | 348 | if let Some((received_at, client_addr, id, req)) = retry { 349 | self.propose(received_at, client_addr, id, req, true) 350 | } else { 351 | let mut timeouts: Vec<_> = { 352 | let late = self.in_flight.values().filter(|i| { 353 | at.duration_since(i.received_at).unwrap() > self.timeout 354 | }); 355 | 356 | late.map(|pending| { 357 | ( 358 | pending.client_addr.clone(), 359 | ClientResponse(pending.id, Err(Error::Timeout)), 360 | ) 361 | }) 362 | .collect() 363 | }; 364 | 365 | res.append(&mut timeouts); 366 | 367 | let timeout = self.timeout.clone(); 368 | 369 | self.in_flight.retain(|_, i| { 370 | at.duration_since(i.received_at).unwrap() <= timeout 371 | }); 372 | 373 | res 374 | } 375 | } 376 | } 377 | -------------------------------------------------------------------------------- /src/storage.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use bincode::{deserialize, serialize}; 4 | 5 | use super::*; 6 | 7 | /// Use Storage to plug this CASPaxos instance into an underlying store. 8 | pub trait Storage: Clone + Debug { 9 | fn get_highest_seen(&mut self, key: Key) -> Ballot; 10 | fn get_accepted_ballot(&mut self, key: Key) -> Ballot; 11 | fn get_accepted_value(&mut self, key: Key) -> Option; 12 | fn set_highest_seen(&mut self, key: Key, ballot: Ballot); 13 | fn set_accepted_ballot(&mut self, key: Key, ballot: Ballot); 14 | fn set_accepted_value(&mut self, key: Key, value: Option); 15 | } 16 | 17 | const HIGHEST_SEEN_SUFFIX: u8 = 0; 18 | const LAST_BALLOT_SUFFIX: u8 = 1; 19 | const LAST_VALUE_SUFFIX: u8 = 2; 20 | 21 | #[derive(Default, Clone, Debug)] 22 | pub struct MemStorage { 23 | inner: HashMap, 24 | } 25 | 26 | impl Storage for MemStorage { 27 | fn get_highest_seen(&mut self, mut k: Key) -> Ballot { 28 | k.push(HIGHEST_SEEN_SUFFIX); 29 | self.inner 30 | .get(&k) 31 | .cloned() 32 | .map(|v| deserialize(&v[..]).unwrap()) 33 | .unwrap_or_else(|| Ballot::default()) 34 | } 35 | fn get_accepted_ballot(&mut self, mut k: Key) -> Ballot { 36 | k.push(LAST_BALLOT_SUFFIX); 37 | self.inner 38 | .get(&k) 39 | .cloned() 40 | .map(|v| deserialize(&v[..]).unwrap()) 41 | .unwrap_or_else(|| Ballot::default()) 42 | } 43 | fn get_accepted_value(&mut self, mut k: Key) -> Option { 44 | k.push(LAST_VALUE_SUFFIX); 45 | self.inner.get(&k).cloned() 46 | } 47 | fn set_highest_seen(&mut self, mut k: Key, ballot: Ballot) { 48 | k.push(HIGHEST_SEEN_SUFFIX); 49 | let v = serialize(&ballot).unwrap(); 50 | self.inner.insert(k, v); 51 | } 52 | fn set_accepted_ballot(&mut self, mut k: Key, ballot: Ballot) { 53 | k.push(LAST_BALLOT_SUFFIX); 54 | let v = serialize(&ballot).unwrap(); 55 | self.inner.insert(k, v); 56 | } 57 | fn set_accepted_value(&mut self, mut k: Key, value: Option) { 58 | k.push(LAST_VALUE_SUFFIX); 59 | if let Some(v) = value { 60 | self.inner.insert(k, v); 61 | } else { 62 | self.inner.remove(&k); 63 | } 64 | } 65 | } 66 | 67 | #[derive(Clone, Debug)] 68 | pub struct SledStorage { 69 | inner: sled::Tree, 70 | } 71 | 72 | impl SledStorage { 73 | pub fn new(tree: sled::Tree) -> SledStorage { 74 | SledStorage { inner: tree } 75 | } 76 | } 77 | 78 | impl Storage for SledStorage { 79 | fn get_highest_seen(&mut self, mut k: Key) -> Ballot { 80 | k.push(HIGHEST_SEEN_SUFFIX); 81 | self.inner 82 | .get(&k) 83 | .unwrap() 84 | .map(|v| deserialize(&v[..]).unwrap()) 85 | .unwrap_or_else(|| Ballot::default()) 86 | } 87 | fn get_accepted_ballot(&mut self, mut k: Key) -> Ballot { 88 | k.push(LAST_BALLOT_SUFFIX); 89 | self.inner 90 | .get(&k) 91 | .unwrap() 92 | .map(|v| deserialize(&v[..]).unwrap()) 93 | .unwrap_or_else(|| Ballot::default()) 94 | } 95 | fn get_accepted_value(&mut self, mut k: Key) -> Option { 96 | k.push(LAST_VALUE_SUFFIX); 97 | self.inner.get(&k).unwrap().map(|iv| iv.to_vec()) 98 | } 99 | fn set_highest_seen(&mut self, mut k: Key, ballot: Ballot) { 100 | k.push(HIGHEST_SEEN_SUFFIX); 101 | let v = serialize(&ballot).unwrap(); 102 | self.inner.insert(k, v).unwrap(); 103 | } 104 | fn set_accepted_ballot(&mut self, mut k: Key, ballot: Ballot) { 105 | k.push(LAST_BALLOT_SUFFIX); 106 | let v = serialize(&ballot).unwrap(); 107 | self.inner.insert(k, v).unwrap(); 108 | } 109 | fn set_accepted_value(&mut self, mut k: Key, value: Option) { 110 | k.push(LAST_VALUE_SUFFIX); 111 | if let Some(v) = value { 112 | self.inner.insert(k, v).unwrap(); 113 | } else { 114 | self.inner.remove(&k).unwrap(); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/udp_transport.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | convert::TryInto, 3 | io, 4 | net::{ToSocketAddrs, UdpSocket}, 5 | }; 6 | 7 | use bincode::{deserialize, serialize}; 8 | use crc32fast::Hasher; 9 | 10 | use crate::{Reactor, Transport}; 11 | 12 | const MAX_SZ: usize = 64 * 1024; 13 | 14 | /// A transport that uses UDP and bincode for sending messages 15 | pub struct UdpTransport { 16 | socket: UdpSocket, 17 | } 18 | 19 | impl UdpTransport { 20 | /// Create a new UdpTransport 21 | pub fn new(addr: A) -> io::Result { 22 | let socket = UdpSocket::bind(addr)?; 23 | Ok(UdpTransport { socket }) 24 | } 25 | } 26 | 27 | impl> Transport for UdpTransport { 28 | /// Blocks until the next message is received. 29 | fn next_message(&mut self) -> (R::Peer, R::Message) { 30 | let mut buf = [0; MAX_SZ]; 31 | let (n, from) = self.socket.recv_from(&mut buf).unwrap(); 32 | 33 | let crc_sz = std::mem::size_of::(); 34 | let data_buf = &buf[..n - crc_sz]; 35 | let crc_buf = &buf[n - crc_sz..]; 36 | 37 | let mut hasher = Hasher::new(); 38 | hasher.update(&data_buf); 39 | let hash = hasher.finalize(); 40 | 41 | let crc_array: [u8; 4] = crc_buf.try_into().unwrap(); 42 | assert_eq!(u32::from_le_bytes(crc_array), hash); 43 | 44 | let msg: R::Message = deserialize(&buf[..n]).unwrap(); 45 | (from.to_string(), msg) 46 | } 47 | 48 | /// Enqueues the message to be sent. May be sent 0-N times with no ordering guarantees. 49 | fn send_message(&mut self, to: R::Peer, msg: R::Message) { 50 | let mut serialized = serialize(&msg).unwrap(); 51 | let mut hasher = Hasher::new(); 52 | hasher.update(&serialized); 53 | let hash = hasher.finalize(); 54 | serialized.copy_from_slice(&hash.to_le_bytes()); 55 | assert!(serialized.len() <= MAX_SZ); 56 | 57 | let n = self.socket.send_to(&serialized, to).unwrap(); 58 | assert_eq!(n, serialized.len()); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /tests/simulator.rs: -------------------------------------------------------------------------------- 1 | /// Simulation for network partitions. Like Jepsen but thousands of times faster. 2 | use std::cmp::Ordering; 3 | use std::collections::{BinaryHeap, HashMap}; 4 | use std::ops::Add; 5 | use std::time::{Duration, SystemTime, UNIX_EPOCH}; 6 | 7 | use quickcheck::{Arbitrary, Gen, QuickCheck, StdGen}; 8 | use rand::{prelude::SliceRandom, Rng}; 9 | 10 | use paxos::{Acceptor, Error, Proposer, Reactor, Req, Rpc}; 11 | 12 | #[derive(PartialOrd, Ord, Eq, PartialEq, Debug, Clone)] 13 | struct Partition { 14 | at: SystemTime, 15 | duration: Duration, 16 | from: String, 17 | to: String, 18 | } 19 | 20 | impl Partition { 21 | fn generate( 22 | g: &mut G, 23 | clients: usize, 24 | proposers: usize, 25 | acceptors: usize, 26 | ) -> Self { 27 | static NAMES: [&'static str; 3] = ["client:", "proposer:", "acceptor:"]; 28 | 29 | let from_choice = g.gen_range(0, 3); 30 | let mut to_choice = g.gen_range(0, 3); 31 | 32 | while to_choice == from_choice { 33 | to_choice = g.gen_range(0, 3); 34 | } 35 | 36 | let at = UNIX_EPOCH.add(Duration::new(0, g.gen_range(0, 100))); 37 | let duration = Duration::new(0, g.gen_range(0, 100)); 38 | 39 | let mut n = |choice| match choice { 40 | 0 => g.gen_range(0, clients), 41 | 1 => g.gen_range(0, proposers), 42 | 2 => g.gen_range(0, acceptors), 43 | _ => panic!("too high"), 44 | }; 45 | 46 | let from = format!("{}{}", NAMES[from_choice], n(from_choice)); 47 | let to = format!("{}{}", NAMES[to_choice], n(to_choice)); 48 | Partition { 49 | at: at, 50 | duration: duration, 51 | to: to, 52 | from: from, 53 | } 54 | } 55 | } 56 | 57 | #[derive(PartialOrd, Ord, Eq, PartialEq, Debug, Clone)] 58 | enum ClientRequest { 59 | Get, 60 | Set(Vec), 61 | Cas(Option>, Option>), 62 | Del, 63 | } 64 | 65 | impl Arbitrary for ClientRequest { 66 | fn arbitrary(g: &mut G) -> Self { 67 | // NB ONLY GENERATE CAS 68 | // the linearizability checker can't handle 69 | // anything else! 70 | let choice = g.gen_range(3, 10); 71 | 72 | match choice { 73 | 0 => ClientRequest::Get, 74 | 1 => ClientRequest::Del, 75 | 2 => ClientRequest::Set(vec![g.gen_range(0, 2)]), 76 | _ => ClientRequest::Cas( 77 | if g.gen_ratio(1, 3) { 78 | None 79 | } else { 80 | Some(vec![g.gen_range(0, 2)]) 81 | }, 82 | Some(vec![g.gen_range(0, 2)]), 83 | ), 84 | } 85 | } 86 | } 87 | 88 | #[derive(Eq, PartialEq, Debug, Clone)] 89 | struct ScheduledMessage { 90 | at: SystemTime, 91 | from: String, 92 | to: String, 93 | msg: Rpc, 94 | } 95 | 96 | // we implement Ord and PartialOrd to make the BinaryHeap 97 | // act like a min-heap on time, rather than the default 98 | // max-heap, so time progresses forwards. 99 | impl Ord for ScheduledMessage { 100 | fn cmp(&self, other: &ScheduledMessage) -> Ordering { 101 | other.at.cmp(&self.at) 102 | } 103 | } 104 | 105 | impl PartialOrd for ScheduledMessage { 106 | fn partial_cmp(&self, other: &ScheduledMessage) -> Option { 107 | Some(self.cmp(other)) 108 | } 109 | } 110 | 111 | #[derive(Debug, Clone)] 112 | enum Node { 113 | Acceptor(Acceptor), 114 | Proposer(Proposer), 115 | } 116 | 117 | impl Reactor for Node { 118 | type Peer = String; 119 | type Message = Rpc; 120 | 121 | fn receive( 122 | &mut self, 123 | at: SystemTime, 124 | from: Self::Peer, 125 | msg: Self::Message, 126 | ) -> Vec<(Self::Peer, Self::Message)> { 127 | match *self { 128 | Node::Proposer(ref mut inner) => inner.receive(at, from, msg), 129 | Node::Acceptor(ref mut inner) => inner.receive(at, from, msg), 130 | } 131 | } 132 | } 133 | 134 | #[derive(Debug, Clone)] 135 | struct Cluster { 136 | peers: HashMap, 137 | partitions: Vec, 138 | in_flight: BinaryHeap, 139 | client_responses: Vec, 140 | } 141 | 142 | impl Cluster { 143 | fn step(&mut self) -> Option<()> { 144 | let pop = self.in_flight.pop(); 145 | if let Some(sm) = pop { 146 | if sm.to.starts_with("client:") { 147 | // We'll check linearizability later 148 | // for client responses. 149 | self.client_responses.push(sm); 150 | return Some(()); 151 | } 152 | let mut node = self.peers.remove(&sm.to).unwrap(); 153 | for (to, msg) in node.receive(sm.at, sm.from, sm.msg) { 154 | let from = &*sm.to; 155 | if self.is_partitioned(sm.at, &*to, from) { 156 | // don't push this message on the priority queue 157 | continue; 158 | } 159 | // TODO clock messin' 160 | let new_sm = ScheduledMessage { 161 | at: sm.at.add(Duration::new(0, 1)), 162 | from: sm.to.clone(), 163 | to: to, 164 | msg: msg, 165 | }; 166 | self.in_flight.push(new_sm); 167 | } 168 | self.peers.insert(sm.to, node); 169 | Some(()) 170 | } else { 171 | None 172 | } 173 | } 174 | 175 | fn is_partitioned(&mut self, at: SystemTime, to: &str, from: &str) -> bool { 176 | let mut to_clear = vec![]; 177 | let mut ret = false; 178 | for (i, partition) in self.partitions.iter().enumerate() { 179 | if partition.at > at { 180 | break; 181 | } 182 | 183 | if partition.at <= at && partition.at.add(partition.duration) < at { 184 | to_clear.push(i); 185 | continue; 186 | } 187 | 188 | // the partition is in effect at this time 189 | if &*partition.to == to && &*partition.from == from { 190 | ret = true; 191 | break; 192 | } 193 | } 194 | 195 | // clear partitions that are no longer relevant 196 | for i in to_clear.into_iter().rev() { 197 | self.partitions.remove(i); 198 | } 199 | 200 | ret 201 | } 202 | } 203 | 204 | unsafe impl Send for Cluster {} 205 | 206 | impl Arbitrary for Cluster { 207 | fn arbitrary(g: &mut G) -> Self { 208 | let n_clients = g.gen_range(1, 4); 209 | let client_addrs: Vec = 210 | (0..n_clients).map(|i| format!("client:{}", i)).collect(); 211 | 212 | let n_proposers = g.gen_range(1, 4); 213 | let proposer_addrs: Vec = (0..n_proposers) 214 | .map(|i| format!("proposer:{}", i)) 215 | .collect(); 216 | 217 | let n_acceptors = g.gen_range(1, 4); 218 | let acceptor_addrs: Vec = (0..n_acceptors) 219 | .map(|i| format!("acceptor:{}", i)) 220 | .collect(); 221 | 222 | let proposers: Vec<(String, Node)> = proposer_addrs 223 | .iter() 224 | .map(|addr| { 225 | let timeout_ms = g.gen_range(0, 10); 226 | ( 227 | addr.clone(), 228 | Node::Proposer(Proposer::new( 229 | timeout_ms, 230 | acceptor_addrs.clone(), 231 | )), 232 | ) 233 | }) 234 | .collect(); 235 | 236 | let acceptors: Vec<(String, Node)> = acceptor_addrs 237 | .iter() 238 | .map(|addr| (addr.clone(), Node::Acceptor(Acceptor::default()))) 239 | .collect(); 240 | 241 | let mut requests = vec![]; 242 | let mut req_counter = 0; 243 | 244 | for client_addr in client_addrs { 245 | let n_requests = g.gen_range(1, 10); 246 | 247 | for _ in 0..n_requests { 248 | req_counter += 1; 249 | let k = g.gen_range(0, 3); 250 | let req = match ClientRequest::arbitrary(g) { 251 | ClientRequest::Get => Req::Get(vec![k]), 252 | ClientRequest::Set(v) => Req::Set(vec![k], v), 253 | ClientRequest::Cas(ov, nv) => Req::Cas(vec![k], ov, nv), 254 | ClientRequest::Del => Req::Del(vec![k]), 255 | }; 256 | 257 | let msg = Rpc::ClientRequest(req_counter, req); 258 | 259 | let at = g.gen_range(0, 100); 260 | 261 | requests.push(ScheduledMessage { 262 | at: UNIX_EPOCH.add(Duration::new(0, at)), 263 | from: client_addr.clone(), 264 | to: SliceRandom::choose(&*proposer_addrs, g) 265 | .unwrap() 266 | .clone(), 267 | msg: msg, 268 | }); 269 | } 270 | } 271 | 272 | let n_partitions = g.gen_range(0, 10); 273 | let mut partitions = vec![]; 274 | for _ in 0..n_partitions { 275 | partitions.push(Partition::generate( 276 | g, 277 | n_clients, 278 | n_proposers, 279 | n_acceptors, 280 | )); 281 | } 282 | partitions.sort(); 283 | 284 | Cluster { 285 | peers: proposers.into_iter().chain(acceptors.into_iter()).collect(), 286 | partitions: partitions, 287 | in_flight: requests.clone().into_iter().collect(), 288 | client_responses: vec![], 289 | } 290 | } 291 | 292 | fn shrink(&self) -> Box> { 293 | let mut ret = vec![]; 294 | 295 | for i in 0..self.in_flight.len() { 296 | let mut in_flight: Vec<_> = 297 | self.in_flight.clone().into_iter().collect(); 298 | in_flight.remove(i); 299 | let mut c = self.clone(); 300 | c.in_flight = in_flight.into_iter().collect(); 301 | ret.push(c); 302 | } 303 | 304 | Box::new(ret.into_iter()) 305 | } 306 | } 307 | 308 | #[derive(PartialOrd, Ord, Eq, PartialEq, Debug, Clone)] 309 | enum Act { 310 | Publish(Option>), 311 | Observe(Option>), 312 | Consume(Option>), 313 | } 314 | 315 | #[derive(PartialOrd, Ord, Eq, PartialEq, Debug, Clone)] 316 | struct Event { 317 | at: SystemTime, 318 | act: Act, 319 | client_req_id: u64, 320 | } 321 | 322 | // simple (simplistic, not exhaustive) linearizability checker: 323 | // main properties: 324 | // 1. an effect must NOT be observed before 325 | // its causal operation starts 326 | // 2. after its causal operation ends, 327 | // an effect MUST be observed 328 | // 329 | // for each successful operation end time 330 | // * populate a pending set for possibly 331 | // successful operations that start before 332 | // then (one pass ever, iterate along-side 333 | // the end times by start times) 334 | // * for each successful operation that reads 335 | // a previous write, ensure that it is present 336 | // in the write set. 337 | // * for each successful operation that "consumes" 338 | // a previous write (CAS, Del) we try to pop 339 | // its consumed value out of our pending set 340 | // after filling the pending set with any write 341 | // that could have happened before then. 342 | // if its not there, we failed linearizability. 343 | // 344 | fn check_linearizability( 345 | request_rpcs: Vec, 346 | response_rpcs: Vec, 347 | ) -> bool { 348 | use Req::*; 349 | // publishes "happen" as soon as a not-explicitly-failed 350 | // request begins 351 | let mut publishes = vec![]; 352 | // observes "happen" at the end of a succesful response or 353 | // cas failure 354 | let mut observes = vec![]; 355 | // consumes "happen" at the end of a successful response 356 | let mut consumes = vec![]; 357 | 358 | let responses: std::collections::BTreeMap<_, _> = response_rpcs 359 | .into_iter() 360 | .filter_map(|r| { 361 | if let Rpc::ClientResponse(id, res) = r.msg { 362 | Some((id, (r.at, res))) 363 | } else { 364 | panic!("non-ClientResponse sent to client") 365 | } 366 | }) 367 | .collect(); 368 | 369 | for r in request_rpcs { 370 | let (id, req) = if let Rpc::ClientRequest(id, req) = r.msg { 371 | (id, req) 372 | } else { 373 | panic!("Cluster started with non-ClientRequest") 374 | }; 375 | 376 | let begin = r.at; 377 | 378 | // reasoning about effects: 379 | // 380 | // OP | res | consumes | publishes | observes 381 | // ----------------------------------------- 382 | // CAS | ok | old | new | old 383 | // CAS | casf | - | - | actual 384 | // CAS | ? | ? | ? | ? 385 | // DEL | ok | ? | None | - 386 | // DEL | ? | ? | None? | - 387 | // SET | ok | ? | value | - 388 | // SET | ? | ? | value? | - 389 | // GET | ok | - | - | value 390 | // GET | ? | - | - | value? 391 | match responses.get(&id) { 392 | None 393 | | Some(&(_, Err(Error::Timeout))) 394 | | Some(&(_, Err(Error::AcceptRejected { .. }))) => { 395 | // not sure if this actually took effect or not. 396 | // NB this is sort of weird, because even if an accept was 397 | // rejected by a majority, it may be used as a later message. 398 | // so as a client we have to treat it as being in a weird pending 399 | // state. 400 | match req { 401 | Cas(k, _old, new) => publishes.push((k, begin, new, id)), 402 | Del(k) => publishes.push((k, begin, None, id)), 403 | Set(k, value) => { 404 | publishes.push((k, begin, Some(value), id)) 405 | } 406 | Get(_k) => {} 407 | } 408 | } 409 | Some(&(end, Ok(ref v))) => match req { 410 | Cas(k, old, new) => { 411 | consumes.push((k.clone(), end, old.clone(), id)); 412 | observes.push((k.clone(), end, old, id)); 413 | publishes.push((k.clone(), begin, new, id)); 414 | } 415 | Get(k) => observes.push((k, end, v.clone(), id)), 416 | Del(k) => publishes.push((k, end, None, id)), 417 | Set(k, value) => publishes.push((k, end, Some(value), id)), 418 | }, 419 | Some(&(end, Err(Error::CasFailed(ref witnessed)))) => match req { 420 | Cas(k, _old, _new) => { 421 | observes.push((k, end, witnessed.clone(), id)); 422 | } 423 | _ => panic!("non-cas request found for CasFailed response"), 424 | }, 425 | _ => { 426 | // propose/accept failure, no actionable info can be derived 427 | } 428 | } 429 | } 430 | 431 | let mut events_per_k = HashMap::new(); 432 | 433 | for (k, time, value, id) in publishes { 434 | let events = events_per_k.entry(k).or_insert(vec![]); 435 | events.push(Event { 436 | at: time, 437 | act: Act::Publish(value), 438 | client_req_id: id, 439 | }); 440 | } 441 | 442 | for (k, time, value, id) in consumes { 443 | let events = events_per_k.entry(k).or_insert(vec![]); 444 | events.push(Event { 445 | at: time, 446 | act: Act::Consume(value), 447 | client_req_id: id, 448 | }); 449 | } 450 | 451 | for (k, time, value, id) in observes { 452 | let events = events_per_k.entry(k).or_insert(vec![]); 453 | events.push(Event { 454 | at: time, 455 | act: Act::Observe(value), 456 | client_req_id: id, 457 | }); 458 | } 459 | 460 | for (_k, mut events) in events_per_k.into_iter() { 461 | events.sort(); 462 | 463 | let mut value_pool = HashMap::new(); 464 | value_pool.insert(None, 1); 465 | 466 | for event in events { 467 | // println!("k: {:?}, event: {:?}", _k, event); 468 | match event.act { 469 | Act::Publish(v) => { 470 | let entry = value_pool.entry(v).or_insert(0); 471 | *entry += 1; 472 | } 473 | Act::Observe(v) => { 474 | let count = value_pool.get(&v).unwrap(); 475 | assert!( 476 | *count > 0, 477 | "expect to be able to witness {:?} at time {:?} for req {}", 478 | v, 479 | event.at, 480 | event.client_req_id 481 | ) 482 | } 483 | Act::Consume(v) => { 484 | let count = value_pool.get_mut(&v).unwrap(); 485 | assert!(*count > 0); 486 | *count -= 1; 487 | } 488 | } 489 | } 490 | } 491 | 492 | true 493 | } 494 | 495 | fn prop_cluster_linearizability(mut cluster: Cluster) -> bool { 496 | let client_requests: Vec<_> = 497 | cluster.in_flight.clone().into_iter().collect(); 498 | 499 | while let Some(_) = cluster.step() {} 500 | 501 | check_linearizability(client_requests, cluster.client_responses) 502 | } 503 | 504 | #[test] 505 | fn test_quickcheck_paxos_linearizes() { 506 | QuickCheck::new() 507 | .gen(StdGen::new(rand::thread_rng(), 100)) 508 | .tests(10000) 509 | .max_tests(1000000) 510 | .quickcheck(prop_cluster_linearizability as fn(Cluster) -> bool); 511 | } 512 | 513 | #[test] 514 | fn linearizability_bug_01() { 515 | // postmortem: was not considering that requests that received 516 | // explicit AcceptRejected messages from a quorum could actually 517 | // be used as the input to a later round, as long as they landed 518 | // a single ACCEPT on any node. 519 | prop_cluster_linearizability(Cluster { 520 | peers: vec![ 521 | ("acceptor:0".to_owned(), Node::Acceptor(Acceptor::default())), 522 | ("acceptor:1".to_owned(), Node::Acceptor(Acceptor::default())), 523 | ("acceptor:2".to_owned(), Node::Acceptor(Acceptor::default())), 524 | ( 525 | "proposer:0".to_owned(), 526 | Node::Proposer(Proposer::new( 527 | 8, 528 | vec![ 529 | "acceptor:0".to_owned(), 530 | "acceptor:1".to_owned(), 531 | "acceptor:2".to_owned(), 532 | ], 533 | )), 534 | ), 535 | ( 536 | "proposer:1".to_owned(), 537 | Node::Proposer(Proposer::new( 538 | 8, 539 | vec![ 540 | "acceptor:0".to_owned(), 541 | "acceptor:1".to_owned(), 542 | "acceptor:2".to_owned(), 543 | ], 544 | )), 545 | ), 546 | ( 547 | "proposer:2".to_owned(), 548 | Node::Proposer(Proposer::new( 549 | 8, 550 | vec![ 551 | "acceptor:0".to_owned(), 552 | "acceptor:1".to_owned(), 553 | "acceptor:2".to_owned(), 554 | ], 555 | )), 556 | ), 557 | ] 558 | .into_iter() 559 | .collect(), 560 | partitions: vec![], 561 | in_flight: vec![ 562 | ScheduledMessage { 563 | at: UNIX_EPOCH.add(Duration::new(0, 6)), 564 | from: "client:1".to_owned(), 565 | to: "proposer:1".to_owned(), 566 | msg: Rpc::ClientRequest( 567 | 9, 568 | Req::Cas(b"k1".to_vec(), Some(vec![1]), Some(vec![1])), 569 | ), 570 | }, 571 | ScheduledMessage { 572 | at: UNIX_EPOCH.add(Duration::new(0, 20)), 573 | from: "client:0".to_owned(), 574 | to: "proposer:2".to_owned(), 575 | msg: Rpc::ClientRequest( 576 | 1, 577 | Req::Cas(b"k1".to_vec(), None, Some(vec![1])), 578 | ), 579 | }, 580 | ScheduledMessage { 581 | at: UNIX_EPOCH.add(Duration::new(0, 22)), 582 | from: "client:1".to_owned(), 583 | to: "proposer:1".to_owned(), 584 | msg: Rpc::ClientRequest( 585 | 11, 586 | Req::Cas(b"k1".to_vec(), Some(vec![1]), Some(vec![1])), 587 | ), 588 | }, 589 | ScheduledMessage { 590 | at: UNIX_EPOCH.add(Duration::new(0, 28)), 591 | from: "client:0".to_owned(), 592 | to: "proposer:0".to_owned(), 593 | msg: Rpc::ClientRequest( 594 | 6, 595 | Req::Cas(b"k1".to_vec(), Some(vec![0]), Some(vec![1])), 596 | ), 597 | }, 598 | ScheduledMessage { 599 | at: UNIX_EPOCH.add(Duration::new(0, 30)), 600 | from: "client:0".to_owned(), 601 | to: "proposer:2".to_owned(), 602 | msg: Rpc::ClientRequest( 603 | 3, 604 | Req::Cas(b"k1".to_vec(), Some(vec![0]), Some(vec![1])), 605 | ), 606 | }, 607 | ScheduledMessage { 608 | at: UNIX_EPOCH.add(Duration::new(0, 45)), 609 | from: "client:0".to_owned(), 610 | to: "proposer:2".to_owned(), 611 | msg: Rpc::ClientRequest( 612 | 4, 613 | Req::Cas(b"k1".to_vec(), Some(vec![0]), Some(vec![0])), 614 | ), 615 | }, 616 | ScheduledMessage { 617 | at: UNIX_EPOCH.add(Duration::new(0, 45)), 618 | from: "client:0".to_owned(), 619 | to: "proposer:2".to_owned(), 620 | msg: Rpc::ClientRequest( 621 | 7, 622 | Req::Cas(b"k1".to_vec(), Some(vec![0]), Some(vec![1])), 623 | ), 624 | }, 625 | ScheduledMessage { 626 | at: UNIX_EPOCH.add(Duration::new(0, 51)), 627 | from: "client:0".to_owned(), 628 | to: "proposer:2".to_owned(), 629 | msg: Rpc::ClientRequest( 630 | 8, 631 | Req::Cas(b"k1".to_vec(), Some(vec![1]), Some(vec![1])), 632 | ), 633 | }, 634 | ScheduledMessage { 635 | at: UNIX_EPOCH.add(Duration::new(0, 66)), 636 | from: "client:0".to_owned(), 637 | to: "proposer:0".to_owned(), 638 | msg: Rpc::ClientRequest( 639 | 2, 640 | Req::Cas(b"k1".to_vec(), Some(vec![0]), Some(vec![1])), 641 | ), 642 | }, 643 | ScheduledMessage { 644 | at: UNIX_EPOCH.add(Duration::new(0, 84)), 645 | from: "client:1".to_owned(), 646 | to: "proposer:1".to_owned(), 647 | msg: Rpc::ClientRequest( 648 | 10, 649 | Req::Cas(b"k1".to_vec(), Some(vec![0]), Some(vec![0])), 650 | ), 651 | }, 652 | ScheduledMessage { 653 | at: UNIX_EPOCH.add(Duration::new(0, 87)), 654 | from: "client:0".to_owned(), 655 | to: "proposer:1".to_owned(), 656 | msg: Rpc::ClientRequest( 657 | 5, 658 | Req::Cas(b"k1".to_vec(), Some(vec![0]), Some(vec![1])), 659 | ), 660 | }, 661 | ] 662 | .into_iter() 663 | .collect(), 664 | client_responses: vec![], 665 | }); 666 | } 667 | --------------------------------------------------------------------------------