├── .cargo └── config.toml ├── .github └── workflows │ ├── lint.yml │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── LICENSE.txt ├── README.md ├── benchmarks ├── Cargo.toml ├── README.md └── src │ ├── main.rs │ ├── replication │ ├── datacake_memory.rs │ └── mod.rs │ └── stores │ ├── memstore.rs │ └── mod.rs ├── datacake-crdt ├── Cargo.toml ├── README.md └── src │ ├── lib.rs │ ├── orswot.rs │ └── timestamp.rs ├── datacake-eventual-consistency ├── Cargo.toml ├── README.md ├── src │ ├── core.rs │ ├── error.rs │ ├── keyspace │ │ ├── actor.rs │ │ ├── group.rs │ │ ├── messages.rs │ │ └── mod.rs │ ├── lib.rs │ ├── replication │ │ ├── distributor.rs │ │ ├── mod.rs │ │ └── poller.rs │ ├── rpc │ │ ├── client.rs │ │ ├── mod.rs │ │ └── services │ │ │ ├── consistency_impl.rs │ │ │ ├── mod.rs │ │ │ └── replication_impl.rs │ ├── statistics.rs │ ├── storage.rs │ └── test_utils.rs └── tests │ ├── basic_connect.rs │ ├── dynamic_membership.rs │ ├── multi_node_cluster.rs │ ├── multiple_keyspace.rs │ └── single_node_cluster.rs ├── datacake-lmdb ├── Cargo.toml ├── README.md ├── src │ ├── db.rs │ └── lib.rs └── tests │ └── basic_cluster.rs ├── datacake-node ├── Cargo.toml ├── README.md ├── src │ ├── clock.rs │ ├── error.rs │ ├── extension.rs │ ├── lib.rs │ ├── node.rs │ ├── nodes_selector.rs │ ├── rpc │ │ ├── chitchat_transport.rs │ │ ├── mod.rs │ │ ├── network.rs │ │ └── services │ │ │ ├── chitchat_impl.rs │ │ │ └── mod.rs │ └── statistics.rs └── tests │ └── membership.rs ├── datacake-rpc ├── Cargo.toml ├── README.md ├── src │ ├── body.rs │ ├── client.rs │ ├── handler.rs │ ├── lib.rs │ ├── net │ │ ├── client.rs │ │ ├── mod.rs │ │ ├── server.rs │ │ ├── simulation.rs │ │ └── status.rs │ ├── request.rs │ ├── rkyv_tooling │ │ ├── mod.rs │ │ ├── scratch.rs │ │ └── view.rs │ ├── server.rs │ └── utils.rs └── tests │ ├── basic.rs │ ├── many_messages.rs │ ├── many_services.rs │ ├── passing_headers.rs │ ├── service_error.rs │ ├── stream.rs │ └── unknown_service.rs ├── datacake-sqlite ├── Cargo.toml ├── README.md ├── src │ ├── db.rs │ ├── from_row_impl.rs │ └── lib.rs └── tests │ └── basic_cluster.rs ├── examples ├── README.md └── replicated-kv │ ├── Cargo.toml │ ├── README.md │ ├── scripts │ └── store-many-docs.py │ └── src │ ├── main.rs │ └── storage.rs ├── rustfmt.toml ├── simulation-tests ├── Cargo.toml ├── src │ └── lib.rs └── tests │ └── rpc.rs ├── src └── lib.rs └── test-helper ├── Cargo.toml └── src ├── addr.rs └── lib.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | rustflags = ["--cfg", "tokio_unstable"] -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | types: [ opened, synchronize, reopened ] 8 | 9 | jobs: 10 | fmt: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | 15 | - name: Install Rust 16 | uses: actions-rs/toolchain@v1 17 | with: 18 | toolchain: nightly 19 | profile: minimal 20 | components: rustfmt 21 | override: true 22 | - name: Rustfmt Check 23 | uses: actions-rs/cargo@v1 24 | with: 25 | command: fmt 26 | args: --all -- --check 27 | 28 | clippy: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - uses: actions/checkout@v2 32 | - name: Install Protoc 33 | uses: arduino/setup-protoc@v1 34 | with: 35 | repo-token: ${{ secrets.GITHUB_TOKEN }} 36 | - name: Install cargo-hack 37 | run: cargo install cargo-hack 38 | - uses: arduino/setup-protoc@v1 39 | with: 40 | repo-token: ${{ secrets.GITHUB_TOKEN }} 41 | - name: Install cargo-hack 42 | run: cargo install cargo-hack 43 | - uses: Swatinem/rust-cache@v1 44 | - name: Install Rust 45 | uses: actions-rs/toolchain@v1 46 | with: 47 | toolchain: nightly 48 | profile: minimal 49 | components: clippy 50 | override: true 51 | - name: Clippy Check 52 | uses: actions-rs/clippy-check@v1 53 | with: 54 | token: ${{ secrets.GITHUB_TOKEN }} 55 | args: --workspace --all-features --tests --examples --bins -- -Dclippy::todo -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | check: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | os: [ubuntu-latest, macOS-latest, windows-latest] 18 | rust: [stable] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Install Protoc 23 | uses: arduino/setup-protoc@v1 24 | with: 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | - name: Install cargo-hack 27 | run: cargo install cargo-hack 28 | - uses: arduino/setup-protoc@v1 29 | with: 30 | repo-token: ${{ secrets.GITHUB_TOKEN }} 31 | - name: Install cargo-hack 32 | run: cargo install cargo-hack 33 | - uses: Swatinem/rust-cache@v1 34 | - name: Check features 35 | run: cargo hack check --all --ignore-private --each-feature --no-dev-deps 36 | - name: Check all targets 37 | run: cargo check --all --all-targets --all-features 38 | 39 | build: 40 | runs-on: ${{ matrix.os }} 41 | strategy: 42 | matrix: 43 | os: [ubuntu-latest, macOS-latest, windows-latest] 44 | rust: [stable] 45 | 46 | steps: 47 | - uses: actions/checkout@v2 48 | - uses: taiki-e/install-action@nextest 49 | - uses: Swatinem/rust-cache@v2 50 | 51 | - name: Run main tests 52 | run: cargo nextest run --features test-utils --workspace --exclude simulation-tests --retries 3 53 | 54 | - name: Run doc tests 55 | run: cargo test --doc --features test-utils --workspace --exclude simulation-tests 56 | 57 | - name: Run simulation tests 58 | run: cargo nextest run -p simulation-tests --retries 3 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target 2 | **/Cargo.lock 3 | /data 4 | /.idea -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datacake" 3 | version = "0.8.0" 4 | edition = "2021" 5 | description = "A batteries included framework for building fault-tolerance distributed data systems." 6 | license = "MIT" 7 | keywords = ["crdt", "databases", "distributed", "tokio", "async"] 8 | categories = ["concurrency", "data-structures", "asynchronous"] 9 | repository = "https://github.com/lnx-search/datacake" 10 | readme = "README.md" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | datacake-crdt = { version = "0.5", path = "datacake-crdt", optional = true } 16 | datacake-eventual-consistency = { version = "0.6", path = "datacake-eventual-consistency", optional = true } 17 | datacake-sqlite = { version = "0.5", path = "datacake-sqlite", optional = true } 18 | datacake-rpc = { version = "0.6", path = "datacake-rpc", optional = true } 19 | datacake-node = { version = "0.5", path = "datacake-node", optional = true } 20 | datacake-lmdb = { version = "0.3", path = "datacake-lmdb", optional = true } 21 | 22 | [dev-dependencies] 23 | tokio = { version = "1", features = ["rt"] } 24 | anyhow = "1" 25 | 26 | [features] 27 | test-utils = ["datacake-eventual-consistency/test-utils"] 28 | rkyv = ["datacake-crdt/rkyv-support"] 29 | rkyv-validation = ["rkyv", "datacake-crdt/rkyv-validation"] 30 | simulation = ["datacake-rpc/simulation"] 31 | default = [ 32 | "datacake-crdt", 33 | "datacake-rpc", 34 | "datacake-eventual-consistency", 35 | "datacake-node", 36 | ] 37 | 38 | [workspace] 39 | members = [ 40 | "datacake-node", 41 | "datacake-eventual-consistency", 42 | "datacake-crdt", 43 | "datacake-sqlite", 44 | "datacake-rpc", 45 | "datacake-lmdb", 46 | 47 | # Utils 48 | "test-helper", 49 | "benchmarks", 50 | "simulation-tests", 51 | 52 | # Example set 53 | "examples/replicated-kv", 54 | ] -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 lnx search 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lnx Datacake 2 | Easy to use tooling for building eventually consistent distributed data systems in Rust. 3 | 4 | > "Oh consistency where art thou?" - CF. 5 | 6 | ### Features ✨ 7 | - **Simple** setup, a cluster can be setup and ready to use with one trait. 8 | - Adjustable consistency levels when mutating state. 9 | - Data center aware replication prioritisation. 10 | - Pre-built test suite for `Storage` trait implementations to ensure correct functionality. 11 | 12 | ### The packages 13 | Datacake provides several utility libraries as well as some pre-made data store handlers: 14 | 15 | - `datacake-crdt` - A CRDT implementation based on a hybrid logical clock (HLC) 16 | provided in the form of the `HLCTimestamp`. 17 | - `datacake-node` - A cluster membership system and managed RPC built on top of chitchat. 18 | - `datacake-eventual-consistency` - Built on top of `datacake-crdt`, a batteries included framework 19 | for building eventually consistent, replicated systems where you only need to implement a basic 20 | storage trait. 21 | - `datacake-sqlite` - A pre-built and tested implementation of the datacake `Storage` trait built 22 | upon SQLite. 23 | - `datacake-lmdb` - A pre-built and tested implementation of the datacake `Storage` trait built upon LMDB. 24 | - `datacake-rpc` - A fast, zero-copy RPC framework with a familiar actor-like feel to it. 25 | 26 | ### Examples 27 | Check out some pre-built apps we have in the 28 | [example folder](https://github.com/lnx-search/datacake/tree/main/examples) 29 | 30 | You can also look at some heavier integration tests 31 | [here](https://github.com/lnx-search/datacake/tree/main/datacake-eventual-consistency/tests) 32 | 33 | #### Single Node Cluster 34 | Here's an example of a basic cluster with one node that runs on your local network, it uses almost all of the packages 35 | including: 36 | 37 | - `datacake-node` for the core node membership. 38 | - `datacake-crdt` for the HLCTimestamp and CRDT implementations 39 | - `datacake-eventually-consistency` for the eventually consistent replication of state. 40 | - `datacake-rpc` bundled up with everything for managing all the cluster RPC. 41 | 42 | ```rust 43 | use std::net::SocketAddr; 44 | use datacake::node::{Consistency, ConnectionConfig, DCAwareSelector, DatacakeNodeBuilder}; 45 | use datacake::eventual_consistency::test_utils::MemStore; 46 | use datacake::eventual_consistency::EventuallyConsistentStoreExtension; 47 | 48 | #[tokio::main] 49 | async fn main() -> anyhow::Result<()> { 50 | let addr = "127.0.0.1:8080".parse::().unwrap(); 51 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 52 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 53 | .connect() 54 | .await 55 | .expect("Connect node."); 56 | 57 | let store = node 58 | .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 59 | .await 60 | .expect("Create store."); 61 | 62 | let handle = store.handle(); 63 | 64 | handle 65 | .put( 66 | "my-keyspace", 67 | 1, 68 | b"Hello, world! From keyspace 1.".to_vec(), 69 | Consistency::All, 70 | ) 71 | .await 72 | .expect("Put doc."); 73 | 74 | Ok(()) 75 | } 76 | ``` 77 | 78 | ### Why does Datacake exist? 79 | 80 | Datacake is the result of my attempts at bringing high-availability to [lnx](https://github.com/lnx-search/lnx) 81 | unlike languages like Erlang or Go, Rust currently has a fairly young ecosystem around distributed 82 | systems. This makes it very hard to build a replicated system in Rust without implementing a lot of things 83 | from scratch and without a lot of research into the area to begin with. 84 | 85 | Currently, the main algorithms available in Rust is [Raft](https://raft.github.io/) which is replication via 86 | consensus, overall it is a very good algorithm, and it's a very simple to understand algorithm however, 87 | I'm not currently satisfied that the current implementations are stable enough or are maintained in order to 88 | choose it. (Also for lnx's particular use case leader-less eventual consistency was more preferable.) 89 | 90 | Because of the above, I built Datacake with the aim of building a reliable, well tested, eventual consistent system 91 | akin to how Cassandra or more specifically how ScyllaDB behave with eventual consistent replication, but with a few 92 | core differences: 93 | 94 | - Datacake does not require an external source or read repair to clear tombstones. 95 | - The underlying CRDTs which are what actually power Datacake are kept purely in memory. 96 | - Partitioning and sharding is not (currently) supported. 97 | 98 | It's worth noting that Datacake itself does not implement the consensus and membership algorithms from scratch, instead 99 | we use [chitchat](https://github.com/quickwit-oss/chitchat) developed by [Quickwit](https://quickwit.io/) which is an 100 | implementation of the scuttlebutt algorithm. 101 | 102 | ### Inspirations and references 103 | - [CRDTs for Mortals by James Long](https://www.youtube.com/watch?v=iEFcmfmdh2w) 104 | - [Big(ger) Sets: Making CRDT Sets Scale in Riak by Russell Brown](https://www.youtube.com/watch?v=f20882ZSdkU) 105 | - ["CRDTs Illustrated" by Arnout Engelen](https://www.youtube.com/watch?v=9xFfOhasiOE) 106 | - ["Practical data synchronization with CRDTs" by Dmitry Ivanov](https://www.youtube.com/watch?v=veeWamWy8dk) 107 | - [CRDTs and the Quest for Distributed Consistency](https://www.youtube.com/watch?v=B5NULPSiOGw) 108 | - [Logical Physical Clocks and Consistent Snapshots in Globally Distributed Databases](https://cse.buffalo.edu/tech-reports/2014-04.pdf) 109 | 110 | ### Contributing 111 | Contributions are always welcome, although please open an issue for an idea about extending the main cluster system 112 | if you wish to extend or modify it heavily as something's are not always as simple as they seem. 113 | 114 | #### What sort of things could I contribute? 115 | 🧪 Tests! 🧪 Tests! 🧪 Tests! Joking aside testing is probably the most important part of the system, extending these 116 | tests in any way you might think of, big or small is a huge help :) 117 | 118 | ### Future Ideas 119 | - Multi-raft framework? 120 | - CASPaxos??? 121 | - More storage implementations? 122 | -------------------------------------------------------------------------------- /benchmarks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "benchmarks" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | anyhow = "1" 10 | tracing = "0.1.37" 11 | tracing-subscriber = "0.3.16" 12 | tracing-futures = "0.2.5" 13 | humantime = "2.1.0" 14 | thiserror = "1.0.38" 15 | async-trait = "0.1.60" 16 | parking_lot = "0.12.1" 17 | 18 | test-helper = { path = "../test-helper" } 19 | datacake = { path = ".." } 20 | tokio = { version = "1", features = ["full"] } 21 | mimalloc = { version = "0.1.32", default-features = false } -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Datacake Benchmarks 2 | 3 | This is a set of benchmarks used to guide development of the implementations, *IT SHOULD NOT BE USED TO CHOOSE WHAT 4 | SYSTEM TO USE.* -------------------------------------------------------------------------------- /benchmarks/src/main.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate tracing; 3 | 4 | mod replication; 5 | mod stores; 6 | 7 | use std::time::Instant; 8 | 9 | use anyhow::Result; 10 | use datacake::node::Consistency; 11 | use mimalloc::MiMalloc; 12 | 13 | #[global_allocator] 14 | static GLOBAL: MiMalloc = MiMalloc; 15 | 16 | #[tokio::main] 17 | async fn main() -> Result<()> { 18 | std::env::set_var( 19 | "RUST_LOG", 20 | "info,datacake_node=error,datacake_eventual_consistency=error", 21 | ); 22 | tracing_subscriber::fmt::init(); 23 | 24 | info!("Beginning eventually consistent replication benchmark..."); 25 | let start = Instant::now(); 26 | 27 | replication::run_datacake(1, Consistency::All).await?; 28 | replication::run_datacake(3, Consistency::All).await?; 29 | replication::run_datacake(5, Consistency::All).await?; 30 | 31 | replication::run_datacake(1, Consistency::None).await?; 32 | replication::run_datacake(3, Consistency::None).await?; 33 | replication::run_datacake(5, Consistency::None).await?; 34 | 35 | // 1 node cluster cannot use Consistency::One - replication::run_datacake(1, Consistency::One).await?; 36 | replication::run_datacake(3, Consistency::One).await?; 37 | replication::run_datacake(5, Consistency::One).await?; 38 | 39 | replication::run_datacake(1, Consistency::EachQuorum).await?; 40 | replication::run_datacake(3, Consistency::EachQuorum).await?; 41 | replication::run_datacake(5, Consistency::EachQuorum).await?; 42 | 43 | replication::run_datacake(1, Consistency::Quorum).await?; 44 | replication::run_datacake(3, Consistency::Quorum).await?; 45 | replication::run_datacake(5, Consistency::Quorum).await?; 46 | 47 | replication::run_datacake(1, Consistency::LocalQuorum).await?; 48 | replication::run_datacake(3, Consistency::LocalQuorum).await?; 49 | replication::run_datacake(5, Consistency::LocalQuorum).await?; 50 | 51 | info!( 52 | "Benchmark Took: {}", 53 | humantime::format_duration(start.elapsed()) 54 | ); 55 | 56 | Ok(()) 57 | } 58 | -------------------------------------------------------------------------------- /benchmarks/src/replication/datacake_memory.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | use std::time::{Duration, Instant}; 3 | 4 | use anyhow::Result; 5 | use datacake::eventual_consistency::{ 6 | EventuallyConsistentStore, 7 | EventuallyConsistentStoreExtension, 8 | ReplicatedStoreHandle, 9 | }; 10 | use datacake::node::{ 11 | ConnectionConfig, 12 | Consistency, 13 | DCAwareSelector, 14 | DatacakeNode, 15 | DatacakeNodeBuilder, 16 | }; 17 | 18 | use crate::stores::memstore::MemStore; 19 | 20 | #[instrument(name = "datacake-ec-benchmark")] 21 | pub async fn run_datacake(n_nodes: u8, consistency: Consistency) -> Result<()> { 22 | let nodes = connect_nodes(n_nodes).await?; 23 | let first_node = &nodes[0]; 24 | let handle = first_node.store.handle(); 25 | 26 | let start = Instant::now(); 27 | insert_n_docs(handle.clone(), 0..1_000, consistency).await?; 28 | info!( 29 | "Inserting 1000 docs serially took {}", 30 | humantime::format_duration(start.elapsed()) 31 | ); 32 | 33 | for concurrency in [10, 125, 256, 400, 512, 1024, 2048] { 34 | let start = Instant::now(); 35 | run_insert_concurrently(handle.clone(), concurrency, 0..1_000, consistency) 36 | .await?; 37 | info!( 38 | "Inserting 1000 docs @ {concurrency} took {}", 39 | humantime::format_duration(start.elapsed()) 40 | ); 41 | } 42 | 43 | let start = Instant::now(); 44 | remove_n_docs(handle.clone(), 0..1_000, consistency).await?; 45 | info!( 46 | "Removing 1000 docs serially took {}", 47 | humantime::format_duration(start.elapsed()) 48 | ); 49 | 50 | for concurrency in [10, 125, 256, 400, 512, 1024, 2048] { 51 | let start = Instant::now(); 52 | run_remove_concurrently(handle.clone(), concurrency, 0..1_000, consistency) 53 | .await?; 54 | info!( 55 | "Removing 1000 docs @ {concurrency} took {}", 56 | humantime::format_duration(start.elapsed()) 57 | ); 58 | } 59 | 60 | Ok(()) 61 | } 62 | 63 | async fn insert_n_docs( 64 | handle: ReplicatedStoreHandle, 65 | range: Range, 66 | consistency: Consistency, 67 | ) -> Result<()> { 68 | for id in range { 69 | handle 70 | .put( 71 | "my-keyspace", 72 | id, 73 | b"Hello, world! From keyspace 1.".to_vec(), 74 | consistency, 75 | ) 76 | .await?; 77 | } 78 | Ok(()) 79 | } 80 | 81 | async fn remove_n_docs( 82 | handle: ReplicatedStoreHandle, 83 | range: Range, 84 | consistency: Consistency, 85 | ) -> Result<()> { 86 | for id in range { 87 | handle.del("my-keyspace", id, consistency).await?; 88 | } 89 | Ok(()) 90 | } 91 | 92 | async fn run_insert_concurrently( 93 | handle: ReplicatedStoreHandle, 94 | concurrency: usize, 95 | range: Range, 96 | consistency: Consistency, 97 | ) -> Result<()> { 98 | let mut handles = Vec::new(); 99 | for _ in 0..concurrency { 100 | let handle = handle.clone(); 101 | handles.push(tokio::spawn(insert_n_docs( 102 | handle, 103 | range.clone(), 104 | consistency, 105 | ))); 106 | } 107 | 108 | for handle in handles { 109 | handle.await??; 110 | } 111 | 112 | Ok(()) 113 | } 114 | 115 | async fn run_remove_concurrently( 116 | handle: ReplicatedStoreHandle, 117 | concurrency: usize, 118 | range: Range, 119 | consistency: Consistency, 120 | ) -> Result<()> { 121 | let mut handles = Vec::new(); 122 | for _ in 0..concurrency { 123 | let handle = handle.clone(); 124 | handles.push(tokio::spawn(remove_n_docs( 125 | handle, 126 | range.clone(), 127 | consistency, 128 | ))); 129 | } 130 | 131 | for handle in handles { 132 | handle.await??; 133 | } 134 | 135 | Ok(()) 136 | } 137 | 138 | async fn connect_nodes(n: u8) -> Result> { 139 | let mut nodes = Vec::new(); 140 | let mut previous_seeds = Vec::new(); 141 | let mut previous_node_ids = Vec::new(); 142 | for id in 0..n { 143 | let addr = test_helper::get_unused_addr(); 144 | 145 | let connection_cfg = ConnectionConfig::new(addr, addr, &previous_seeds); 146 | let node = DatacakeNodeBuilder::::new(id, connection_cfg) 147 | .connect() 148 | .await 149 | .expect("Connect node."); 150 | 151 | node.wait_for_nodes(&previous_node_ids, Duration::from_secs(30)) 152 | .await?; 153 | 154 | previous_node_ids.push(id); 155 | if previous_seeds.len() >= 2 { 156 | previous_seeds.pop(); 157 | } 158 | previous_seeds.push(addr.to_string()); 159 | 160 | let store = node 161 | .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 162 | .await 163 | .expect("Create store."); 164 | 165 | nodes.push(DatacakeSystem { _node: node, store }); 166 | } 167 | 168 | Ok(nodes) 169 | } 170 | 171 | struct DatacakeSystem { 172 | _node: DatacakeNode, 173 | store: EventuallyConsistentStore, 174 | } 175 | -------------------------------------------------------------------------------- /benchmarks/src/replication/mod.rs: -------------------------------------------------------------------------------- 1 | mod datacake_memory; 2 | 3 | pub use datacake_memory::run_datacake; 4 | -------------------------------------------------------------------------------- /benchmarks/src/stores/memstore.rs: -------------------------------------------------------------------------------- 1 | //! Copied from the test_utils file in the EC store. 2 | use std::collections::HashMap; 3 | 4 | use datacake::crdt::{HLCTimestamp, Key}; 5 | use datacake::eventual_consistency::{ 6 | BulkMutationError, 7 | Document, 8 | DocumentMetadata, 9 | Storage, 10 | }; 11 | use parking_lot::RwLock; 12 | 13 | #[derive(Debug, Default)] 14 | /// A in-memory storage implementor. 15 | /// 16 | /// This is not suitable for any sort of real world usage outside of testing. 17 | pub struct MemStore { 18 | #[allow(clippy::complexity)] 19 | metadata: RwLock>>, 20 | data: RwLock>>, 21 | } 22 | 23 | #[derive(Debug, thiserror::Error)] 24 | #[error("{0}")] 25 | pub struct MemStoreError(#[from] pub anyhow::Error); 26 | 27 | #[async_trait::async_trait] 28 | impl Storage for MemStore { 29 | type Error = MemStoreError; 30 | type DocsIter = std::vec::IntoIter; 31 | type MetadataIter = std::vec::IntoIter<(Key, HLCTimestamp, bool)>; 32 | 33 | async fn get_keyspace_list(&self) -> Result, Self::Error> { 34 | Ok(self.metadata.read().keys().cloned().collect()) 35 | } 36 | 37 | async fn iter_metadata( 38 | &self, 39 | keyspace: &str, 40 | ) -> Result { 41 | if let Some(ks) = self.metadata.read().get(keyspace) { 42 | return Ok(ks 43 | .iter() 44 | .map(|(k, (ts, tombstone))| (*k, *ts, *tombstone)) 45 | .collect::>() 46 | .into_iter()); 47 | }; 48 | 49 | Ok(Vec::new().into_iter()) 50 | } 51 | 52 | async fn remove_tombstones( 53 | &self, 54 | keyspace: &str, 55 | keys: impl Iterator + Send, 56 | ) -> Result<(), BulkMutationError> { 57 | if let Some(ks) = self.metadata.write().get_mut(keyspace) { 58 | for key in keys { 59 | ks.remove(&key); 60 | } 61 | } 62 | 63 | Ok(()) 64 | } 65 | 66 | async fn put(&self, keyspace: &str, document: Document) -> Result<(), Self::Error> { 67 | self.multi_put(keyspace, [document].into_iter()) 68 | .await 69 | .map_err(|e| e.into_inner()) 70 | } 71 | 72 | async fn multi_put( 73 | &self, 74 | keyspace: &str, 75 | documents: impl Iterator + Send, 76 | ) -> Result<(), BulkMutationError> { 77 | let documents = documents.collect::>(); 78 | self.data 79 | .write() 80 | .entry(keyspace.to_string()) 81 | .and_modify(|entries| { 82 | for doc in documents.clone() { 83 | entries.insert(doc.id(), doc); 84 | } 85 | }) 86 | .or_insert_with(|| { 87 | HashMap::from_iter( 88 | documents.clone().into_iter().map(|doc| (doc.id(), doc)), 89 | ) 90 | }); 91 | self.metadata 92 | .write() 93 | .entry(keyspace.to_string()) 94 | .and_modify(|entries| { 95 | for doc in documents.clone() { 96 | entries.insert(doc.id(), (doc.last_updated(), false)); 97 | } 98 | }) 99 | .or_insert_with(|| { 100 | HashMap::from_iter( 101 | documents 102 | .into_iter() 103 | .map(|doc| (doc.id(), (doc.last_updated(), false))), 104 | ) 105 | }); 106 | 107 | Ok(()) 108 | } 109 | 110 | async fn mark_as_tombstone( 111 | &self, 112 | keyspace: &str, 113 | doc_id: Key, 114 | timestamp: HLCTimestamp, 115 | ) -> Result<(), Self::Error> { 116 | self.mark_many_as_tombstone( 117 | keyspace, 118 | [DocumentMetadata { 119 | id: doc_id, 120 | last_updated: timestamp, 121 | }] 122 | .into_iter(), 123 | ) 124 | .await 125 | .map_err(|e| e.into_inner()) 126 | } 127 | 128 | async fn mark_many_as_tombstone( 129 | &self, 130 | keyspace: &str, 131 | documents: impl Iterator + Send, 132 | ) -> Result<(), BulkMutationError> { 133 | let docs = documents.collect::>(); 134 | self.data 135 | .write() 136 | .entry(keyspace.to_string()) 137 | .and_modify(|entries| { 138 | for doc in docs.iter() { 139 | entries.remove(&doc.id); 140 | } 141 | }); 142 | self.metadata 143 | .write() 144 | .entry(keyspace.to_string()) 145 | .and_modify(|entries| { 146 | for doc in docs { 147 | entries.insert(doc.id, (doc.last_updated, true)); 148 | } 149 | }); 150 | 151 | Ok(()) 152 | } 153 | 154 | async fn get( 155 | &self, 156 | keyspace: &str, 157 | doc_id: Key, 158 | ) -> Result, Self::Error> { 159 | Ok(self 160 | .data 161 | .read() 162 | .get(keyspace) 163 | .and_then(|ks| ks.get(&doc_id).cloned())) 164 | } 165 | 166 | async fn multi_get( 167 | &self, 168 | keyspace: &str, 169 | doc_ids: impl Iterator + Send, 170 | ) -> Result { 171 | let mut docs = Vec::new(); 172 | 173 | if let Some(ks) = self.data.read().get(keyspace) { 174 | for doc_id in doc_ids { 175 | if let Some(doc) = ks.get(&doc_id) { 176 | docs.push(doc.clone()); 177 | } 178 | } 179 | } 180 | 181 | Ok(docs.into_iter()) 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /benchmarks/src/stores/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod memstore; 2 | -------------------------------------------------------------------------------- /datacake-crdt/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datacake-crdt" 3 | version = "0.5.0" 4 | edition = "2021" 5 | description = "A conflict free replicated datatype based on a hybrid logical clock implementation for building eventually consistent data stores." 6 | license = "MIT" 7 | keywords = ["crdt", "databases", "distributed"] 8 | categories = ["concurrency", "data-structures"] 9 | repository = "https://github.com/lnx-search/datacake" 10 | readme = "README.md" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | thiserror = "1.0.33" 16 | 17 | rkyv = { version = "0.7.42", features = ["strict", "archive_le"], optional = true } 18 | 19 | [features] 20 | # Enables (de)serialization support for all data types. 21 | rkyv-support = ["rkyv"] 22 | rkyv-validation = ["rkyv-support", "rkyv/validation"] -------------------------------------------------------------------------------- /datacake-crdt/README.md: -------------------------------------------------------------------------------- 1 | # Datacake CRDT 2 | 3 | An implementation of Riak's ORSWOT CRDT which is a CRDT which allows for removal of old 4 | tombstones once a new event has been observed. 5 | 6 | The set is built upon the second supported structure `HLCTimestamp` which is a Hybrid Logical Clock 7 | which guarantees the timestamp will always be unique and monotonic (providing it's used correctly.) 8 | 9 | ### Basic Example 10 | ```rust 11 | use std::time::Duration; 12 | use datacake_crdt::{OrSWotSet, HLCTimestamp}; 13 | 14 | fn main() { 15 | let mut node_a = HLCTimestamp::now(0, 0); 16 | 17 | // Simulating a node begin slightly ahead in time. 18 | let mut node_b = HLCTimestamp::new(node_a.datacake_timestamp() + Duration::from_secs(5), 0, 1); 19 | 20 | let mut node_a_set = OrSWotSet::default(); 21 | let mut node_b_set = OrSWotSet::default(); 22 | 23 | // Insert a new key with a new timestamp in set A. 24 | node_a_set.insert(1, node_a.send().unwrap()); 25 | 26 | // Insert a new entry in set B. 27 | node_b_set.insert(2, node_b.send().unwrap()); 28 | 29 | // Let some time pass for demonstration purposes. 30 | std::thread::sleep(Duration::from_millis(500)); 31 | 32 | // Set A has key `1` removed. 33 | node_a_set.delete(1, node_a.send().unwrap()); 34 | 35 | // Merging set B with set A and vice versa. 36 | // Our sets are now aligned without conflicts. 37 | node_b_set.merge(node_a_set.clone()); 38 | node_a_set.merge(node_b_set.clone()); 39 | 40 | // Set A and B should both see that key `1` has been deleted. 41 | assert!(node_a_set.get(&1).is_none(), "Key should be correctly removed."); 42 | assert!(node_b_set.get(&1).is_none(), "Key should be correctly removed."); 43 | } 44 | ``` 45 | 46 | ### Inspirations 47 | - [CRDTs for Mortals by James Long](https://www.youtube.com/watch?v=iEFcmfmdh2w) 48 | - [Big(ger) Sets: Making CRDT Sets Scale in Riak by Russell Brown](https://www.youtube.com/watch?v=f20882ZSdkU) 49 | - ["CRDTs Illustrated" by Arnout Engelen](https://www.youtube.com/watch?v=9xFfOhasiOE) -------------------------------------------------------------------------------- /datacake-crdt/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Datacake CRDT 2 | //! 3 | //! An implementation of Riak's ORSWOT CRDT which is a CRDT which allows for removal of old 4 | //! tombstones once a new event has been observed. 5 | //! 6 | //! The set is built upon the second supported structure `HLCTimestamp` which is a Hybrid Logical Clock 7 | //! which guarantees the timestamp will always be unique and monotonic (providing it's used correctly.) 8 | //! 9 | //! ### Basic Example 10 | //! ```rust 11 | //! use std::time::Duration; 12 | //! use datacake_crdt::{OrSWotSet, HLCTimestamp}; 13 | //! 14 | //! let mut node_a = HLCTimestamp::now(0, 0); 15 | //! 16 | //! // Simulating a node begin slightly ahead. 17 | //! let mut node_b = HLCTimestamp::new(node_a.datacake_timestamp() + Duration::from_secs(5), 0, 1); 18 | //! 19 | //! // We only have one effective source here. 20 | //! let mut node_a_set = OrSWotSet::<1>::default(); 21 | //! let mut node_b_set = OrSWotSet::<1>::default(); 22 | //! 23 | //! // Insert a new key with a new timestamp in set A. 24 | //! node_a_set.insert(1, node_a.send().unwrap()); 25 | //! 26 | //! // Insert a new entry in set B. 27 | //! node_b_set.insert(2, node_b.send().unwrap()); 28 | //! 29 | //! // Let some time pass for demonstration purposes. 30 | //! std::thread::sleep(Duration::from_millis(500)); 31 | //! 32 | //! // Set A has key `1` removed. 33 | //! node_a_set.delete(1, node_a.send().unwrap()); 34 | //! 35 | //! // Merging set B with set A and vice versa. 36 | //! // Our sets are now aligned without conflicts. 37 | //! node_b_set.merge(node_a_set.clone()); 38 | //! node_a_set.merge(node_b_set.clone()); 39 | //! 40 | //! // Set A and B should both see that key `1` has been deleted. 41 | //! assert!(node_a_set.get(&1).is_none(), "Key should be correctly removed."); 42 | //! assert!(node_b_set.get(&1).is_none(), "Key should be correctly removed."); 43 | //! ``` 44 | //! 45 | //! ### Inspirations 46 | //! - [CRDTs for Mortals by James Long](https://www.youtube.com/watch?v=iEFcmfmdh2w) 47 | //! - [Big(ger) Sets: Making CRDT Sets Scale in Riak by Russell Brown](https://www.youtube.com/watch?v=f20882ZSdkU) 48 | //! - ["CRDTs Illustrated" by Arnout Engelen](https://www.youtube.com/watch?v=9xFfOhasiOE) 49 | 50 | mod orswot; 51 | mod timestamp; 52 | 53 | #[cfg(feature = "rkyv-support")] 54 | pub use orswot::BadState; 55 | pub use orswot::{Key, OrSWotSet, StateChanges}; 56 | pub use timestamp::{ 57 | get_datacake_timestamp, 58 | get_unix_timestamp_ms, 59 | HLCTimestamp, 60 | InvalidFormat, 61 | TimestampError, 62 | DATACAKE_EPOCH, 63 | TIMESTAMP_MAX, 64 | }; 65 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datacake-eventual-consistency" 3 | version = "0.6.0" 4 | edition = "2021" 5 | description = "Eventually consistent state replication as a library (consensus, RPC and conflict resolution) for building your own eventually consistent databases." 6 | license = "MIT" 7 | keywords = ["crdt", "databases", "distributed", "tokio", "async"] 8 | categories = ["concurrency", "data-structures", "asynchronous"] 9 | repository = "https://github.com/lnx-search/datacake" 10 | readme = "README.md" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | tracing = "0.1.36" 16 | tokio-stream = "0.1.9" 17 | flume = "0.10.14" 18 | futures = "0.3.23" 19 | itertools = "0.10.3" 20 | thiserror = "1" 21 | parking_lot = "0.12.1" 22 | crc32fast = "1.3.2" 23 | crossbeam-channel = "0.5.6" 24 | crossbeam-utils = "0.8.14" 25 | async-trait = "0.1.58" 26 | anyhow = "1" 27 | rand = "0.8.5" 28 | puppet = "0.4.0" 29 | smallvec = "1" 30 | 31 | chitchat = { version = "0.5.1", package = "datacake-chitchat-fork" } 32 | tokio = { version = "1", default-features = false, features = ["sync", "time"] } 33 | rkyv = { version = "0.7.42", features = ["strict", "validation", "smallvec"] } 34 | 35 | datacake-rpc = { path = "../datacake-rpc", version = "0.6" } 36 | datacake-node = { path = "../datacake-node", version = "0.5" } 37 | datacake-crdt = { path = "../datacake-crdt", version = "0.5", features = ["rkyv-support"] } 38 | 39 | [features] 40 | test-utils = [] 41 | test-suite = [] 42 | 43 | [dev-dependencies] 44 | anyhow = "1" 45 | tracing-subscriber = "0.3.15" 46 | bytes = "1.2.1" 47 | test-helper = { path = "../test-helper" } 48 | datacake-rpc = { path = "../datacake-rpc", version = "0.6", features = ["test-utils"] } 49 | 50 | 51 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/README.md: -------------------------------------------------------------------------------- 1 | # Datacake Cluster 2 | A batteries included library for building your own distributed data stores or replicated state. 3 | 4 | This library is largely based on the same concepts as Riak and Cassandra. Consensus, membership and failure 5 | detection are managed by [Quickwit's Chitchat](https://github.com/quickwit-oss/chitchat) while state alignment 6 | and replication is managed by [Datacake CRDT](https://github.com/lnx-search/datacake/tree/main/datacake-crdt). 7 | 8 | This library is focused around providing a simple and easy to build framework for your distributed apps without 9 | being overwhelming. In fact, you can be up and running just by implementing 1 async trait. 10 | 11 | ## Basic Example 12 | 13 | ```rust 14 | use std::net::SocketAddr; 15 | use datacake_node::{Consistency, ConnectionConfig, DCAwareSelector, DatacakeNodeBuilder}; 16 | use datacake_eventual_consistency::test_utils::MemStore; 17 | use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 18 | 19 | async fn main() -> anyhow::Result<()> { 20 | let addr = "127.0.0.1:8080".parse::().unwrap(); 21 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 22 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 23 | .connect() 24 | .await 25 | .expect("Connect node."); 26 | 27 | let store = node 28 | .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 29 | .await 30 | .expect("Create store."); 31 | 32 | let handle = store.handle(); 33 | 34 | handle 35 | .put( 36 | "my-keyspace", 37 | 1, 38 | b"Hello, world! From keyspace 1.".to_vec(), 39 | Consistency::All, 40 | ) 41 | .await 42 | .expect("Put doc."); 43 | 44 | Ok(()) 45 | } 46 | ``` 47 | 48 | ## Complete Examples 49 | Indepth examples [can be found here](https://github.com/lnx-search/datacake/tree/main/examples). 50 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/core.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Formatter}; 2 | use std::hash::{Hash, Hasher}; 3 | use std::sync::Arc; 4 | 5 | use datacake_crdt::{HLCTimestamp, Key}; 6 | use rkyv::with::CopyOptimize; 7 | use rkyv::{Archive, Deserialize, Serialize}; 8 | use smallvec::SmallVec; 9 | 10 | pub(crate) type DocVec = SmallVec<[T; 4]>; 11 | 12 | #[repr(C)] 13 | #[derive(Serialize, Deserialize, Archive, Copy, Clone, Debug, PartialEq)] 14 | #[archive(check_bytes)] 15 | #[archive_attr(repr(C))] 16 | /// The metadata attached to each document. 17 | pub struct DocumentMetadata { 18 | /// The unique id of the document. 19 | pub id: Key, 20 | 21 | /// The timestamp of when the document was last updated. 22 | pub last_updated: HLCTimestamp, 23 | } 24 | 25 | impl DocumentMetadata { 26 | /// Creates a new metadata block with the given unique key and last updated 27 | /// timestamp. 28 | pub fn new(id: Key, last_updated: HLCTimestamp) -> Self { 29 | Self { id, last_updated } 30 | } 31 | } 32 | 33 | #[repr(C)] 34 | #[derive(Serialize, Deserialize, Archive, Clone)] 35 | #[archive(check_bytes)] 36 | #[archive_attr(repr(C))] 37 | /// A single document managed by the store. 38 | pub struct Document { 39 | /// The metadata associated with the document. 40 | pub metadata: DocumentMetadata, 41 | 42 | /// The raw binary data of the document's value. 43 | data: Arc, 44 | } 45 | 46 | impl Document { 47 | /// A convenience method for passing data values which can be sent as bytes. 48 | pub fn new(id: Key, last_updated: HLCTimestamp, data: impl Into>) -> Self { 49 | Self { 50 | metadata: DocumentMetadata { id, last_updated }, 51 | data: Arc::new(Bytes { 52 | buffer: data.into(), 53 | }), 54 | } 55 | } 56 | 57 | #[inline] 58 | /// The binary data of the document. 59 | pub fn data(&self) -> &[u8] { 60 | &self.data.buffer 61 | } 62 | 63 | #[inline] 64 | /// The unique id of the document. 65 | pub fn id(&self) -> Key { 66 | self.metadata.id 67 | } 68 | 69 | #[inline] 70 | /// The timestamp of when the document was last updated. 71 | pub fn last_updated(&self) -> HLCTimestamp { 72 | self.metadata.last_updated 73 | } 74 | } 75 | 76 | impl Eq for Document {} 77 | 78 | impl PartialEq for Document { 79 | fn eq(&self, other: &Self) -> bool { 80 | self.metadata.id == other.metadata.id 81 | && self.metadata.last_updated == other.metadata.last_updated 82 | && self.data() == other.data() 83 | } 84 | } 85 | 86 | impl Hash for Document { 87 | fn hash(&self, state: &mut H) { 88 | self.metadata.id.hash(state) 89 | } 90 | } 91 | 92 | impl Debug for Document { 93 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 94 | let mut f = f.debug_struct("Document"); 95 | f.field("id", &self.id()); 96 | f.field("last_updated", &self.last_updated()); 97 | 98 | #[cfg(any(test, feature = "test-utils"))] 99 | { 100 | f.field("data", &String::from_utf8_lossy(self.data())); 101 | } 102 | 103 | f.finish() 104 | } 105 | } 106 | 107 | #[repr(C)] 108 | #[derive(Serialize, Deserialize, Archive, PartialEq, Clone)] 109 | #[archive(check_bytes)] 110 | #[archive_attr(repr(C))] 111 | /// A new type wrapper around a `Vec` to implement the 112 | /// [CopyOptimize] optimisations from [rkyv]. 113 | pub struct Bytes { 114 | #[with(CopyOptimize)] 115 | buffer: Vec, 116 | } 117 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fmt::Debug; 3 | use std::io; 4 | use std::net::SocketAddr; 5 | 6 | use datacake_node::ConsistencyError; 7 | use datacake_rpc::Status; 8 | use thiserror::Error; 9 | 10 | use crate::storage::BulkMutationError; 11 | 12 | #[derive(Debug, Error)] 13 | /// A wrapping error for the store which can potentially fail under situations. 14 | pub enum StoreError { 15 | #[error("{0}")] 16 | /// An error has occurred within Chitchat. 17 | ChitChatError(String), 18 | 19 | #[error("An unknown error occurred during the operation: {0}")] 20 | /// An error has occurred which datacake was not expecting nad from 21 | /// an unknown source. 22 | UnknownError(anyhow::Error), 23 | 24 | #[error( 25 | "A failure occurred within the user provided `DataStore` implementation: {0}" 26 | )] 27 | /// A failure has occurred within the provided storage system causing the operation 28 | /// to fail. 29 | StorageError(#[from] E), 30 | 31 | #[error( 32 | "A failure occurred within the user provided `DataStore` implementation on a bulk operation: {0}" 33 | )] 34 | /// A failure has occurred within the provided storage system causing the operation 35 | /// to fail. 36 | /// 37 | /// This error however, includes the set of doc ids which *were* successfully completed, 38 | /// this can be used to maintain partial and incremental updates despite and error, otherwise 39 | /// bulk storage operations must be entirely atomic if they do not specify the successful IDs. 40 | BulkStorageError(#[from] BulkMutationError), 41 | 42 | #[error("Failed to complete operation due to consistency level failure: {0}")] 43 | /// The operation succeeded on the local node but failed to meet the required 44 | /// consistency level within the timeout period. (2 seconds) 45 | ConsistencyError(ConsistencyError), 46 | 47 | #[error("Transport Error: ({0}) - {1}")] 48 | /// An error occurred when attempting to open a connection or listen on a given address. 49 | TransportError(SocketAddr, io::Error), 50 | 51 | #[error("Rpc Error: ({0}) - {1}")] 52 | /// An error occurred during RPC communication with other nodes. 53 | RpcError(SocketAddr, Status), 54 | } 55 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/keyspace/messages.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use datacake_crdt::{HLCTimestamp, OrSWotSet, StateChanges}; 4 | use puppet::{derive_message, Message}; 5 | 6 | use crate::core::DocumentMetadata; 7 | use crate::storage::BulkMutationError; 8 | use crate::{DocVec, Document, PutContext, Storage}; 9 | 10 | #[derive(Debug, thiserror::Error)] 11 | #[error("Failed to (de)serialize state.")] 12 | pub struct CorruptedState; 13 | 14 | pub const NUM_SOURCES: usize = 2; 15 | 16 | pub struct Set { 17 | pub source: usize, 18 | pub doc: Document, 19 | pub ctx: Option, 20 | pub _marker: PhantomData, 21 | } 22 | impl Message for Set { 23 | type Output = Result<(), S::Error>; 24 | } 25 | 26 | pub struct MultiSet { 27 | pub source: usize, 28 | pub docs: DocVec, 29 | pub ctx: Option, 30 | pub _marker: PhantomData, 31 | } 32 | impl Message for MultiSet { 33 | type Output = Result<(), BulkMutationError>; 34 | } 35 | 36 | pub struct Del { 37 | pub source: usize, 38 | pub doc: DocumentMetadata, 39 | pub _marker: PhantomData, 40 | } 41 | impl Message for Del { 42 | type Output = Result<(), S::Error>; 43 | } 44 | 45 | pub struct MultiDel { 46 | pub source: usize, 47 | pub docs: DocVec, 48 | pub _marker: PhantomData, 49 | } 50 | impl Message for MultiDel { 51 | type Output = Result<(), BulkMutationError>; 52 | } 53 | 54 | #[derive(Copy, Clone)] 55 | pub struct Serialize; 56 | derive_message!(Serialize, Result, CorruptedState>); 57 | 58 | #[derive(Copy, Clone)] 59 | pub struct LastUpdated; 60 | derive_message!(LastUpdated, HLCTimestamp); 61 | 62 | #[derive(Clone)] 63 | pub struct Diff(pub OrSWotSet); 64 | derive_message!(Diff, (StateChanges, StateChanges)); 65 | 66 | #[derive(Clone)] 67 | pub struct SymDiff(pub OrSWotSet); 68 | derive_message!(SymDiff, (StateChanges, StateChanges)); 69 | 70 | #[derive(Copy, Clone)] 71 | pub struct PurgeDeletes(pub PhantomData); 72 | impl Message for PurgeDeletes { 73 | type Output = Result<(), S::Error>; 74 | } 75 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/keyspace/mod.rs: -------------------------------------------------------------------------------- 1 | mod actor; 2 | mod group; 3 | mod messages; 4 | 5 | pub use actor::{spawn_keyspace, KeyspaceActor}; 6 | pub use group::{KeyspaceGroup, KeyspaceInfo, KeyspaceTimestamps}; 7 | pub use messages::{ 8 | Del, 9 | Diff, 10 | LastUpdated, 11 | MultiDel, 12 | MultiSet, 13 | Serialize, 14 | Set, 15 | NUM_SOURCES, 16 | }; 17 | 18 | pub const CONSISTENCY_SOURCE_ID: usize = 0; 19 | pub const READ_REPAIR_SOURCE_ID: usize = 1; 20 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/replication/distributor.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::collections::BTreeMap; 3 | use std::net::SocketAddr; 4 | use std::sync::atomic::{AtomicBool, Ordering}; 5 | use std::sync::Arc; 6 | use std::time::Duration; 7 | 8 | use crossbeam_channel::{unbounded, Receiver, Sender}; 9 | use datacake_node::{Clock, MembershipChange, NodeId, RpcNetwork}; 10 | use tokio::sync::Semaphore; 11 | use tokio::time::{interval, MissedTickBehavior}; 12 | 13 | use crate::core::DocumentMetadata; 14 | use crate::replication::MAX_CONCURRENT_REQUESTS; 15 | use crate::rpc::services::consistency_impl::{ 16 | BatchPayload, 17 | Context, 18 | MultiPutPayload, 19 | MultiRemovePayload, 20 | }; 21 | use crate::{ConsistencyClient, DocVec, Document, Storage}; 22 | 23 | const BATCHING_INTERVAL: Duration = Duration::from_secs(1); 24 | 25 | pub struct TaskServiceContext { 26 | /// The global cluster clock. 27 | pub(crate) clock: Clock, 28 | /// The network handle which contains all RPC connections. 29 | pub(crate) network: RpcNetwork, 30 | /// The unique ID of the node running. 31 | pub(crate) local_node_id: NodeId, 32 | /// The public RPC address of the node running. 33 | pub(crate) public_node_addr: SocketAddr, 34 | } 35 | 36 | #[derive(Clone)] 37 | /// A handle to the task distributor service. 38 | /// 39 | /// This handle is cheap to clone. 40 | pub(crate) struct TaskDistributor { 41 | tx: Sender, 42 | kill_switch: Arc, 43 | } 44 | 45 | impl TaskDistributor { 46 | /// Marks that the cluster has had a membership change. 47 | pub(crate) fn membership_change(&self, changes: MembershipChange) { 48 | let _ = self.tx.send(Op::MembershipChange(changes)); 49 | } 50 | 51 | /// Marks that the cluster has mutated some data. 52 | pub(crate) fn mutation(&self, mutation: Mutation) { 53 | let _ = self.tx.send(Op::Mutation(mutation)); 54 | } 55 | 56 | /// Kills the distributor service. 57 | pub(crate) fn kill(&self) { 58 | self.kill_switch.store(true, Ordering::Relaxed); 59 | } 60 | } 61 | 62 | /// A enqueued event/operation for the distributor to handle next tick. 63 | enum Op { 64 | MembershipChange(MembershipChange), 65 | Mutation(Mutation), 66 | } 67 | 68 | /// Represents an operation on the store, mutating the data. 69 | pub enum Mutation { 70 | Put { 71 | keyspace: Cow<'static, str>, 72 | doc: Document, 73 | }, 74 | MultiPut { 75 | keyspace: Cow<'static, str>, 76 | docs: DocVec, 77 | }, 78 | Del { 79 | keyspace: Cow<'static, str>, 80 | doc: DocumentMetadata, 81 | }, 82 | MultiDel { 83 | keyspace: Cow<'static, str>, 84 | docs: DocVec, 85 | }, 86 | } 87 | 88 | /// Starts the task distributor service. 89 | /// 90 | /// The distributor service is responsible for batching mutation requests 91 | /// which are not part of the node broadcast when mutating data. 92 | /// 93 | /// This service will send the events to the remaining nodes in a single batch. 94 | pub(crate) async fn start_task_distributor_service( 95 | ctx: TaskServiceContext, 96 | ) -> TaskDistributor 97 | where 98 | S: Storage, 99 | { 100 | let kill_switch = Arc::new(AtomicBool::new(false)); 101 | let (tx, rx) = unbounded(); 102 | 103 | tokio::spawn(task_distributor_service::(ctx, rx, kill_switch.clone())); 104 | 105 | TaskDistributor { tx, kill_switch } 106 | } 107 | 108 | async fn task_distributor_service( 109 | ctx: TaskServiceContext, 110 | rx: Receiver, 111 | kill_switch: Arc, 112 | ) where 113 | S: Storage, 114 | { 115 | info!("Task distributor service is running."); 116 | 117 | let mut live_members = BTreeMap::new(); 118 | let mut interval = interval(BATCHING_INTERVAL); 119 | interval.set_missed_tick_behavior(MissedTickBehavior::Skip); 120 | loop { 121 | interval.tick().await; 122 | 123 | if kill_switch.load(Ordering::Relaxed) { 124 | break; 125 | } 126 | 127 | let mut put_payloads = BTreeMap::new(); 128 | let mut del_payloads = BTreeMap::new(); 129 | while let Ok(task) = rx.try_recv() { 130 | match task { 131 | Op::MembershipChange(changes) => { 132 | for member in changes.left { 133 | live_members.remove(&member.node_id); 134 | } 135 | 136 | for member in changes.joined { 137 | live_members.insert(member.node_id, member.public_addr); 138 | } 139 | }, 140 | Op::Mutation(mutation) => { 141 | register_mutation(&mut put_payloads, &mut del_payloads, mutation); 142 | }, 143 | } 144 | } 145 | 146 | if !put_payloads.is_empty() || !del_payloads.is_empty() { 147 | let timestamp = ctx.clock.get_time().await; 148 | let batch = BatchPayload { 149 | timestamp, 150 | modified: put_payloads 151 | .into_iter() 152 | .map(|(keyspace, payloads)| MultiPutPayload { 153 | keyspace: keyspace.to_string(), 154 | ctx: Some(Context { 155 | node_id: ctx.local_node_id, 156 | node_addr: ctx.public_node_addr, 157 | }), 158 | documents: payloads, 159 | timestamp, 160 | }) 161 | .collect(), 162 | removed: del_payloads 163 | .into_iter() 164 | .map(|(keyspace, payloads)| MultiRemovePayload { 165 | keyspace: keyspace.to_string(), 166 | documents: payloads, 167 | timestamp, 168 | }) 169 | .collect(), 170 | }; 171 | 172 | if let Err(e) = execute_batch::(&ctx, &live_members, batch).await { 173 | error!(error = ?e, "Failed to execute synchronisation batch."); 174 | } 175 | } 176 | } 177 | } 178 | 179 | fn register_mutation( 180 | put_payloads: &mut BTreeMap, DocVec>, 181 | del_payloads: &mut BTreeMap, DocVec>, 182 | mutation: Mutation, 183 | ) { 184 | match mutation { 185 | Mutation::Put { keyspace, doc } => { 186 | put_payloads.entry(keyspace).or_default().push(doc); 187 | }, 188 | Mutation::MultiPut { keyspace, docs } => { 189 | put_payloads.entry(keyspace).or_default().extend(docs); 190 | }, 191 | Mutation::Del { keyspace, doc } => { 192 | del_payloads.entry(keyspace).or_default().push(doc); 193 | }, 194 | Mutation::MultiDel { keyspace, docs } => { 195 | del_payloads.entry(keyspace).or_default().extend(docs); 196 | }, 197 | } 198 | } 199 | 200 | async fn execute_batch( 201 | ctx: &TaskServiceContext, 202 | live_members: &BTreeMap, 203 | batch: BatchPayload, 204 | ) -> anyhow::Result<()> 205 | where 206 | S: Storage, 207 | { 208 | let batch = Arc::new(batch); 209 | let limiter = Arc::new(Semaphore::new(MAX_CONCURRENT_REQUESTS)); 210 | let mut tasks = Vec::with_capacity(live_members.len()); 211 | for (node_id, &addr) in live_members { 212 | let node_id = *node_id; 213 | let limiter = limiter.clone(); 214 | let batch = batch.clone(); 215 | let channel = ctx.network.get_or_connect(addr); 216 | let mut client = ConsistencyClient::::new(ctx.clock.clone(), channel); 217 | 218 | let task = tokio::spawn(async move { 219 | let _permit = limiter.acquire().await; 220 | let resp = client.apply_batch(&batch).await; 221 | (node_id, addr, resp) 222 | }); 223 | tasks.push(task); 224 | } 225 | 226 | for task in tasks { 227 | let (node_id, addr, res) = task.await.expect("Join task."); 228 | if let Err(e) = res { 229 | error!( 230 | error = ?e, 231 | target_node_id = %node_id, 232 | target_addr = %addr, 233 | "Failed to synchronise node with batch events. This will resolved when the next replication cycle occurs.", 234 | ); 235 | } 236 | } 237 | 238 | Ok(()) 239 | } 240 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/replication/mod.rs: -------------------------------------------------------------------------------- 1 | mod distributor; 2 | mod poller; 3 | 4 | pub const MAX_CONCURRENT_REQUESTS: usize = 10; 5 | 6 | pub(crate) use distributor::{ 7 | start_task_distributor_service, 8 | Mutation, 9 | TaskDistributor, 10 | TaskServiceContext, 11 | }; 12 | pub(crate) use poller::{ 13 | start_replication_cycle, 14 | ReplicationCycleContext, 15 | ReplicationHandle, 16 | }; 17 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/rpc/client.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | use std::net::SocketAddr; 3 | 4 | use datacake_crdt::{HLCTimestamp, Key, OrSWotSet}; 5 | use datacake_node::{Clock, NodeId}; 6 | use datacake_rpc::{Channel, RpcClient, Status}; 7 | 8 | use crate::core::{Document, DocumentMetadata}; 9 | use crate::rpc::services::consistency_impl::{ 10 | BatchPayload, 11 | ConsistencyService, 12 | Context, 13 | MultiPutPayload, 14 | MultiRemovePayload, 15 | PutPayload, 16 | RemovePayload, 17 | }; 18 | use crate::rpc::services::replication_impl::{ 19 | FetchDocs, 20 | GetState, 21 | PollKeyspace, 22 | ReplicationService, 23 | }; 24 | use crate::{DocVec, Storage}; 25 | 26 | /// A high level wrapper around the consistency GRPC service. 27 | pub struct ConsistencyClient 28 | where 29 | S: Storage, 30 | { 31 | clock: Clock, 32 | inner: RpcClient>, 33 | } 34 | 35 | impl ConsistencyClient 36 | where 37 | S: Storage, 38 | { 39 | pub fn new(clock: Clock, channel: Channel) -> Self { 40 | Self { 41 | clock, 42 | inner: RpcClient::new(channel), 43 | } 44 | } 45 | } 46 | 47 | impl ConsistencyClient 48 | where 49 | S: Storage, 50 | { 51 | /// Adds a document to the remote node's state. 52 | pub async fn put( 53 | &mut self, 54 | keyspace: impl Into, 55 | document: Document, 56 | node_id: NodeId, 57 | node_addr: SocketAddr, 58 | ) -> Result<(), Status> { 59 | let timestamp = self.clock.get_time().await; 60 | let ts = self 61 | .inner 62 | .send(&PutPayload { 63 | keyspace: keyspace.into(), 64 | document, 65 | ctx: Some(Context { node_id, node_addr }), 66 | timestamp, 67 | }) 68 | .await? 69 | .cast(); 70 | self.clock.register_ts(ts).await; 71 | Ok(()) 72 | } 73 | 74 | /// Adds a set of documents to the remote node's state. 75 | pub async fn multi_put( 76 | &mut self, 77 | keyspace: impl Into, 78 | documents: impl Iterator, 79 | node_id: NodeId, 80 | node_addr: SocketAddr, 81 | ) -> Result<(), Status> { 82 | let timestamp = self.clock.get_time().await; 83 | let ts = self 84 | .inner 85 | .send(&MultiPutPayload { 86 | keyspace: keyspace.into(), 87 | documents: documents.collect(), 88 | ctx: Some(Context { node_id, node_addr }), 89 | timestamp, 90 | }) 91 | .await? 92 | .cast(); 93 | self.clock.register_ts(ts).await; 94 | Ok(()) 95 | } 96 | 97 | /// Removes a document from the remote node's state. 98 | pub async fn del( 99 | &mut self, 100 | keyspace: impl Into, 101 | id: Key, 102 | ts: HLCTimestamp, 103 | ) -> Result<(), Status> { 104 | let timestamp = self.clock.get_time().await; 105 | let ts = self 106 | .inner 107 | .send(&RemovePayload { 108 | keyspace: keyspace.into(), 109 | document: DocumentMetadata::new(id, ts), 110 | timestamp, 111 | }) 112 | .await? 113 | .cast(); 114 | self.clock.register_ts(ts).await; 115 | Ok(()) 116 | } 117 | 118 | /// Removes a set of documents from the remote node's state. 119 | pub async fn multi_del( 120 | &mut self, 121 | keyspace: impl Into, 122 | documents: DocVec, 123 | ) -> Result<(), Status> { 124 | let timestamp = self.clock.get_time().await; 125 | let ts = self 126 | .inner 127 | .send(&MultiRemovePayload { 128 | keyspace: keyspace.into(), 129 | documents, 130 | timestamp, 131 | }) 132 | .await? 133 | .cast(); 134 | self.clock.register_ts(ts).await; 135 | Ok(()) 136 | } 137 | 138 | pub async fn apply_batch(&mut self, batch: &BatchPayload) -> Result<(), Status> { 139 | let ts = self.inner.send(batch).await?.cast(); 140 | self.clock.register_ts(ts).await; 141 | Ok(()) 142 | } 143 | } 144 | 145 | /// A high level wrapper around the replication GRPC service. 146 | pub struct ReplicationClient 147 | where 148 | S: Storage, 149 | { 150 | clock: Clock, 151 | inner: RpcClient>, 152 | } 153 | 154 | impl ReplicationClient 155 | where 156 | S: Storage, 157 | { 158 | pub fn new(clock: Clock, channel: Channel) -> Self { 159 | Self { 160 | clock, 161 | inner: RpcClient::new(channel), 162 | } 163 | } 164 | } 165 | 166 | impl ReplicationClient 167 | where 168 | S: Storage, 169 | { 170 | /// Fetches the newest version of the node's keyspace timestamps. 171 | pub async fn poll_keyspace( 172 | &mut self, 173 | ) -> Result, Status> { 174 | let timestamp = self.clock.get_time().await; 175 | let inner = self 176 | .inner 177 | .send(&PollKeyspace(timestamp)) 178 | .await? 179 | .deserialize_view() 180 | .map_err(Status::internal)?; 181 | 182 | self.clock.register_ts(inner.timestamp).await; 183 | Ok(inner.keyspace_timestamps) 184 | } 185 | 186 | /// Fetches the node's current state for a given keyspace and returns 187 | /// the last time the keyspace was modified. 188 | /// 189 | /// The returned timestamp must only be used when compared against timestamps produced 190 | /// by the remote node itself. This is mostly provided to reduce unnecessary IO if the state 191 | /// has changed between when the keyspace was polled, and when the state was requested. 192 | pub async fn get_state( 193 | &mut self, 194 | keyspace: impl Into, 195 | ) -> Result<(HLCTimestamp, OrSWotSet<{ crate::keyspace::NUM_SOURCES }>), Status> 196 | { 197 | let timestamp = self.clock.get_time().await; 198 | let inner = self 199 | .inner 200 | .send(&GetState { 201 | timestamp, 202 | keyspace: keyspace.into(), 203 | }) 204 | .await?; 205 | 206 | self.clock.register_ts(inner.timestamp.cast()).await; 207 | 208 | // SAFETY: 209 | // Although this may seem very unsafe, we can rely on the parent type (`KeyspaceOrSwotSet`) 210 | // to satisfy our guarantees when performing this operation. 211 | // - Internally datacake-rpc has already validated and checked the checksum of the overall 212 | // payload of the message when it originally deserialized `KeyspaceOrSwotSet` this ensures 213 | // the actual layout and original data is intact. 214 | // - The alignment issues are solved by the the fact the DataView maintains a 16 byte aligned 215 | // buffer which the parent type maintains in its view form. 216 | let state = unsafe { 217 | rkyv::from_bytes_unchecked(&inner.set).map_err(|_| Status::invalid())? 218 | }; 219 | 220 | Ok((inner.last_updated.cast(), state)) 221 | } 222 | 223 | /// Fetches a set of documents with the provided IDs belonging to the given keyspace. 224 | pub async fn fetch_docs( 225 | &mut self, 226 | keyspace: impl Into, 227 | doc_ids: Vec, 228 | ) -> Result, Status> { 229 | let timestamp = self.clock.get_time().await; 230 | let inner = self 231 | .inner 232 | .send(&FetchDocs { 233 | timestamp, 234 | keyspace: keyspace.into(), 235 | doc_ids, 236 | }) 237 | .await?; 238 | 239 | let payload = inner.deserialize_view().unwrap(); 240 | 241 | self.clock.register_ts(payload.timestamp).await; 242 | Ok(payload.documents) 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/rpc/mod.rs: -------------------------------------------------------------------------------- 1 | mod client; 2 | pub mod services; 3 | 4 | pub use client::{ConsistencyClient, ReplicationClient}; 5 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/rpc/services/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod consistency_impl; 2 | pub(crate) mod replication_impl; 3 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/src/statistics.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Deref; 2 | use std::sync::atomic::{AtomicU64, Ordering}; 3 | use std::sync::Arc; 4 | 5 | pub type Counter = AtomicU64; 6 | 7 | #[derive(Debug, Clone, Default)] 8 | /// Live metrics around the cluster system. 9 | pub struct SystemStatistics(Arc); 10 | 11 | impl Deref for SystemStatistics { 12 | type Target = SystemStatisticsInner; 13 | 14 | fn deref(&self) -> &Self::Target { 15 | &self.0 16 | } 17 | } 18 | 19 | #[derive(Debug, Default)] 20 | pub struct SystemStatisticsInner { 21 | /// The number of synchronisation tasks that are currently running concurrently. 22 | pub(crate) num_ongoing_sync_tasks: Counter, 23 | /// The number of synchronisation tasks that took longer than the selected timeout. 24 | pub(crate) num_slow_sync_tasks: Counter, 25 | /// The number of sync tasks that failed to complete due to an error. 26 | pub(crate) num_failed_sync_tasks: Counter, 27 | /// The number of times the node has observed a remote keyspace change. 28 | pub(crate) num_keyspace_changes: Counter, 29 | } 30 | 31 | impl SystemStatisticsInner { 32 | /// The number of synchronisation tasks that are currently running concurrently. 33 | pub fn num_ongoing_sync_tasks(&self) -> u64 { 34 | self.num_ongoing_sync_tasks.load(Ordering::Relaxed) 35 | } 36 | 37 | /// The number of synchronisation tasks that took longer than the selected timeout. 38 | pub fn num_slow_sync_tasks(&self) -> u64 { 39 | self.num_slow_sync_tasks.load(Ordering::Relaxed) 40 | } 41 | 42 | /// The number of sync tasks that failed to complete due to an error. 43 | pub fn num_failed_sync_tasks(&self) -> u64 { 44 | self.num_failed_sync_tasks.load(Ordering::Relaxed) 45 | } 46 | 47 | /// The number of times the node has observed a remote keyspace change. 48 | pub fn num_keyspace_changes(&self) -> u64 { 49 | self.num_keyspace_changes.load(Ordering::Relaxed) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/tests/basic_connect.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use datacake_eventual_consistency::test_utils::MemStore; 4 | use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 5 | use datacake_node::{ConnectionConfig, DCAwareSelector, DatacakeNodeBuilder}; 6 | 7 | #[tokio::test] 8 | async fn test_basic_connect() -> anyhow::Result<()> { 9 | let _ = tracing_subscriber::fmt::try_init(); 10 | 11 | let addr = test_helper::get_unused_addr(); 12 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 13 | 14 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 15 | .connect() 16 | .await?; 17 | let _store = node 18 | .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 19 | .await?; 20 | 21 | tokio::time::sleep(Duration::from_secs(1)).await; 22 | 23 | node.shutdown().await; 24 | 25 | Ok(()) 26 | } 27 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/tests/dynamic_membership.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use datacake_eventual_consistency::test_utils::MemStore; 4 | use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 5 | use datacake_node::{ 6 | ConnectionConfig, 7 | Consistency, 8 | DCAwareSelector, 9 | DatacakeNodeBuilder, 10 | }; 11 | 12 | #[tokio::test] 13 | pub async fn test_member_join() -> anyhow::Result<()> { 14 | let _ = tracing_subscriber::fmt::try_init(); 15 | 16 | let node_1_addr = test_helper::get_unused_addr(); 17 | let node_2_addr = test_helper::get_unused_addr(); 18 | let node_3_addr = test_helper::get_unused_addr(); 19 | let node_1_connection_cfg = ConnectionConfig::new( 20 | node_1_addr, 21 | node_1_addr, 22 | [node_2_addr.to_string(), node_3_addr.to_string()], 23 | ); 24 | let node_2_connection_cfg = ConnectionConfig::new( 25 | node_2_addr, 26 | node_2_addr, 27 | [node_1_addr.to_string(), node_3_addr.to_string()], 28 | ); 29 | let node_3_connection_cfg = ConnectionConfig::new( 30 | node_3_addr, 31 | node_3_addr, 32 | [node_1_addr.to_string(), node_2_addr.to_string()], 33 | ); 34 | 35 | let node_1 = DatacakeNodeBuilder::::new(1, node_1_connection_cfg) 36 | .connect() 37 | .await?; 38 | let node_2 = DatacakeNodeBuilder::::new(2, node_2_connection_cfg) 39 | .connect() 40 | .await?; 41 | 42 | node_1 43 | .wait_for_nodes(&[2], Duration::from_secs(30)) 44 | .await 45 | .expect("Nodes should connect within timeout."); 46 | node_2 47 | .wait_for_nodes(&[1], Duration::from_secs(30)) 48 | .await 49 | .expect("Nodes should connect within timeout."); 50 | let store_1 = node_1 51 | .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 52 | .await?; 53 | let store_2 = node_2 54 | .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 55 | .await?; 56 | 57 | let node_1_handle = store_1.handle_with_keyspace("my-keyspace"); 58 | let node_2_handle = store_2.handle_with_keyspace("my-keyspace"); 59 | 60 | node_1_handle 61 | .put(1, b"Hello, world from node-1".to_vec(), Consistency::All) 62 | .await 63 | .expect("Put value."); 64 | node_2_handle 65 | .put(2, b"Hello, world from node-2".to_vec(), Consistency::All) 66 | .await 67 | .expect("Put value."); 68 | 69 | let doc = node_1_handle 70 | .get(1) 71 | .await 72 | .expect("Get value.") 73 | .expect("Document should not be none"); 74 | assert_eq!(doc.id(), 1); 75 | assert_eq!(doc.data(), b"Hello, world from node-1"); 76 | let doc = node_1_handle 77 | .get(2) 78 | .await 79 | .expect("Get value.") 80 | .expect("Document should not be none"); 81 | assert_eq!(doc.id(), 2); 82 | assert_eq!(doc.data(), b"Hello, world from node-2"); 83 | 84 | let doc = node_2_handle 85 | .get(1) 86 | .await 87 | .expect("Get value.") 88 | .expect("Document should not be none"); 89 | assert_eq!(doc.id(), 1); 90 | assert_eq!(doc.data(), b"Hello, world from node-1"); 91 | let doc = node_2_handle 92 | .get(2) 93 | .await 94 | .expect("Get value.") 95 | .expect("Document should not be none"); 96 | assert_eq!(doc.id(), 2); 97 | assert_eq!(doc.data(), b"Hello, world from node-2"); 98 | 99 | // Node-3 joins the cluster. 100 | let node_3 = DatacakeNodeBuilder::::new(3, node_3_connection_cfg) 101 | .connect() 102 | .await?; 103 | node_3 104 | .wait_for_nodes(&[2, 1], Duration::from_secs(60)) 105 | .await 106 | .expect("Nodes should connect within timeout."); 107 | let store_3 = node_3 108 | .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 109 | .await?; 110 | let node_3_handle = store_3.handle_with_keyspace("my-keyspace"); 111 | 112 | node_3_handle 113 | .put(3, b"Hello, world from node-3".to_vec(), Consistency::All) 114 | .await 115 | .expect("Put value."); 116 | 117 | let doc = node_3_handle.get(1).await.expect("Get value."); 118 | assert!(doc.is_none()); 119 | let doc = node_3_handle.get(2).await.expect("Get value."); 120 | assert!(doc.is_none()); 121 | 122 | node_3 123 | .wait_for_nodes(&[1, 2], Duration::from_secs(30)) 124 | .await 125 | .expect("Nodes should connect within timeout."); 126 | 127 | // Let state propagate 128 | tokio::time::sleep(Duration::from_secs(10)).await; 129 | 130 | let doc = node_3_handle 131 | .get(1) 132 | .await 133 | .expect("Get value.") 134 | .expect("Document should not be none"); 135 | assert_eq!(doc.id(), 1); 136 | assert_eq!(doc.data(), b"Hello, world from node-1"); 137 | let doc = node_3_handle 138 | .get(2) 139 | .await 140 | .expect("Get value.") 141 | .expect("Document should not be none"); 142 | assert_eq!(doc.id(), 2); 143 | assert_eq!(doc.data(), b"Hello, world from node-2"); 144 | 145 | let doc = node_1_handle 146 | .get(3) 147 | .await 148 | .expect("Get value.") 149 | .expect("Document should not be none"); 150 | assert_eq!(doc.id(), 3); 151 | assert_eq!(doc.data(), b"Hello, world from node-3"); 152 | let doc = node_2_handle 153 | .get(3) 154 | .await 155 | .expect("Get value.") 156 | .expect("Document should not be none"); 157 | assert_eq!(doc.id(), 3); 158 | assert_eq!(doc.data(), b"Hello, world from node-3"); 159 | 160 | Ok(()) 161 | } 162 | -------------------------------------------------------------------------------- /datacake-eventual-consistency/tests/single_node_cluster.rs: -------------------------------------------------------------------------------- 1 | use datacake_eventual_consistency::test_utils::MemStore; 2 | use datacake_eventual_consistency::{ 3 | EventuallyConsistentStore, 4 | EventuallyConsistentStoreExtension, 5 | }; 6 | use datacake_node::{ 7 | ConnectionConfig, 8 | Consistency, 9 | DCAwareSelector, 10 | DatacakeNodeBuilder, 11 | }; 12 | 13 | static KEYSPACE: &str = "my-keyspace"; 14 | 15 | #[tokio::test] 16 | async fn test_single_node_cluster() -> anyhow::Result<()> { 17 | let _ = tracing_subscriber::fmt::try_init(); 18 | 19 | let store = create_store().await; 20 | let handle = store.handle(); 21 | 22 | // Test reading 23 | let doc = handle.get(KEYSPACE, 1).await.expect("Get value."); 24 | assert!(doc.is_none(), "No document should not exist!"); 25 | 26 | // Test writing 27 | handle 28 | .put(KEYSPACE, 1, b"Hello, world".to_vec(), Consistency::All) 29 | .await 30 | .expect("Put value."); 31 | 32 | let doc = handle 33 | .get(KEYSPACE, 1) 34 | .await 35 | .expect("Get value.") 36 | .expect("Document should not be none"); 37 | assert_eq!(doc.id(), 1); 38 | assert_eq!(doc.data(), b"Hello, world"); 39 | 40 | handle 41 | .del(KEYSPACE, 1, Consistency::All) 42 | .await 43 | .expect("Del value."); 44 | let doc = handle.get(KEYSPACE, 1).await.expect("Get value."); 45 | assert!(doc.is_none(), "No document should not exist!"); 46 | 47 | handle 48 | .del(KEYSPACE, 2, Consistency::All) 49 | .await 50 | .expect("Del value which doesnt exist locally."); 51 | let doc = handle.get(KEYSPACE, 2).await.expect("Get value."); 52 | assert!(doc.is_none(), "No document should not exist!"); 53 | 54 | Ok(()) 55 | } 56 | 57 | #[tokio::test] 58 | async fn test_single_node_cluster_with_keyspace_handle() -> anyhow::Result<()> { 59 | let _ = tracing_subscriber::fmt::try_init(); 60 | 61 | let store = create_store().await; 62 | let handle = store.handle_with_keyspace(KEYSPACE); 63 | 64 | // Test reading 65 | let doc = handle.get(1).await.expect("Get value."); 66 | assert!(doc.is_none(), "No document should not exist!"); 67 | 68 | // Test writing 69 | handle 70 | .put(1, b"Hello, world".to_vec(), Consistency::All) 71 | .await 72 | .expect("Put value."); 73 | 74 | let doc = handle 75 | .get(1) 76 | .await 77 | .expect("Get value.") 78 | .expect("Document should not be none"); 79 | assert_eq!(doc.id(), 1); 80 | assert_eq!(doc.data(), b"Hello, world"); 81 | 82 | handle.del(1, Consistency::All).await.expect("Del value."); 83 | let doc = handle.get(1).await.expect("Get value."); 84 | assert!(doc.is_none(), "No document should not exist!"); 85 | 86 | handle 87 | .del(2, Consistency::All) 88 | .await 89 | .expect("Del value which doesnt exist locally."); 90 | let doc = handle.get(2).await.expect("Get value."); 91 | assert!(doc.is_none(), "No document should not exist!"); 92 | 93 | Ok(()) 94 | } 95 | 96 | #[tokio::test] 97 | async fn test_single_node_cluster_bulk_op() -> anyhow::Result<()> { 98 | let _ = tracing_subscriber::fmt::try_init(); 99 | 100 | let store = create_store().await; 101 | let handle = store.handle(); 102 | 103 | // Test reading 104 | let num_docs = handle 105 | .get_many(KEYSPACE, [1]) 106 | .await 107 | .expect("Get value.") 108 | .count(); 109 | assert_eq!(num_docs, 0, "No document should not exist!"); 110 | 111 | // Test writing 112 | handle 113 | .put_many(KEYSPACE, [(1, b"Hello, world".to_vec())], Consistency::All) 114 | .await 115 | .expect("Put value."); 116 | 117 | let docs = handle 118 | .get_many(KEYSPACE, [1]) 119 | .await 120 | .expect("Get value.") 121 | .collect::>(); 122 | assert_eq!(docs[0].id(), 1); 123 | assert_eq!(docs[0].data(), b"Hello, world"); 124 | 125 | handle 126 | .del_many(KEYSPACE, [1], Consistency::All) 127 | .await 128 | .expect("Del value."); 129 | let num_docs = handle 130 | .get_many(KEYSPACE, [1]) 131 | .await 132 | .expect("Get value.") 133 | .count(); 134 | assert_eq!(num_docs, 0, "No document should not exist!"); 135 | 136 | handle 137 | .del_many(KEYSPACE, [2, 3, 1, 5], Consistency::All) 138 | .await 139 | .expect("Del value which doesnt exist locally."); 140 | let num_docs = handle 141 | .get_many(KEYSPACE, [2, 3, 5, 1]) 142 | .await 143 | .expect("Get value.") 144 | .count(); 145 | assert_eq!(num_docs, 0, "No document should not exist!"); 146 | 147 | Ok(()) 148 | } 149 | 150 | #[tokio::test] 151 | async fn test_single_node_cluster_bulk_op_with_keyspace_handle() -> anyhow::Result<()> { 152 | let _ = tracing_subscriber::fmt::try_init(); 153 | 154 | let store = create_store().await; 155 | let handle = store.handle_with_keyspace(KEYSPACE); 156 | 157 | // Test reading 158 | let num_docs = handle.get_many([1]).await.expect("Get value.").count(); 159 | assert_eq!(num_docs, 0, "No document should not exist!"); 160 | 161 | // Test writing 162 | handle 163 | .put_many([(1, b"Hello, world".to_vec())], Consistency::All) 164 | .await 165 | .expect("Put value."); 166 | 167 | let docs = handle 168 | .get_many([1]) 169 | .await 170 | .expect("Get value.") 171 | .collect::>(); 172 | assert_eq!(docs[0].id(), 1); 173 | assert_eq!(docs[0].data(), b"Hello, world"); 174 | 175 | handle 176 | .del_many([1], Consistency::All) 177 | .await 178 | .expect("Del value."); 179 | let num_docs = handle.get_many([1]).await.expect("Get value.").count(); 180 | assert_eq!(num_docs, 0, "No document should not exist!"); 181 | 182 | handle 183 | .del_many([2, 3, 1, 5], Consistency::All) 184 | .await 185 | .expect("Del value which doesnt exist locally."); 186 | let num_docs = handle 187 | .get_many([2, 3, 5, 1]) 188 | .await 189 | .expect("Get value.") 190 | .count(); 191 | assert_eq!(num_docs, 0, "No document should not exist!"); 192 | 193 | Ok(()) 194 | } 195 | 196 | async fn create_store() -> EventuallyConsistentStore { 197 | let addr = test_helper::get_unused_addr(); 198 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 199 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 200 | .connect() 201 | .await 202 | .expect("Connect node."); 203 | 204 | node.add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 205 | .await 206 | .expect("Create store.") 207 | } 208 | -------------------------------------------------------------------------------- /datacake-lmdb/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datacake-lmdb" 3 | version = "0.3.0" 4 | edition = "2021" 5 | description = "A pre-built implementation of datacake's Storage trait using LMDB." 6 | license = "MIT" 7 | keywords = ["databases", "distributed"] 8 | categories = ["concurrency", "data-structures"] 9 | repository = "https://github.com/lnx-search/datacake" 10 | readme = "README.md" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | async-trait = "0.1" 16 | futures = "0.3" 17 | flume = "0.10" 18 | thiserror = "1" 19 | 20 | heed = { version = "=0.20.0-alpha.9", default-features = false } 21 | tokio = { version = "1", default-features = false, features = ["rt"] } 22 | 23 | datacake-crdt = { version = "0.5", path = "../datacake-crdt" } 24 | datacake-eventual-consistency = { version = "0.6", path = "../datacake-eventual-consistency" } 25 | 26 | [dev-dependencies] 27 | anyhow = "1" 28 | tracing = "0.1.37" 29 | tracing-subscriber = "0.3.16" 30 | 31 | test-helper = { path = "../test-helper" } 32 | 33 | uuid = { version = "1", features = ["v4"] } 34 | datacake-node = { version = "0.5", path = "../datacake-node" } 35 | datacake-eventual-consistency = { version = "0.6", path = "../datacake-eventual-consistency", features = ["test-utils"] } 36 | -------------------------------------------------------------------------------- /datacake-lmdb/README.md: -------------------------------------------------------------------------------- 1 | # Datacake LMDB 2 | 3 | A pre-built implementation of the datacake-eventual-consistency `Storage` trait, this allows you to set up 4 | a persistent cluster immediately without any hassle of implementing a correct store. 5 | 6 | For more info see https://github.com/lnx-search/datacake 7 | 8 | ## Example 9 | 10 | ```rust 11 | use std::env::temp_dir; 12 | use anyhow::Result; 13 | use uuid::Uuid; 14 | use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 15 | use datacake_node::{ 16 | ConnectionConfig, 17 | Consistency, 18 | DCAwareSelector, 19 | DatacakeNodeBuilder, 20 | }; 21 | use datacake_lmdb::LmdbStorage; 22 | 23 | static KEYSPACE: &str = "lmdb-store"; 24 | 25 | #[tokio::main] 26 | async fn main() -> Result<()> { 27 | tracing_subscriber::fmt::init(); 28 | 29 | let temp_dir = temp_dir().join(Uuid::new_v4().to_string()); 30 | std::fs::create_dir_all(&temp_dir)?; 31 | 32 | let store = LmdbStorage::open(temp_dir).await?; 33 | 34 | let addr = test_helper::get_unused_addr(); 35 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 36 | 37 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 38 | .connect() 39 | .await?; 40 | let store = node 41 | .add_extension(EventuallyConsistentStoreExtension::new(store)) 42 | .await?; 43 | 44 | let handle = store.handle(); 45 | 46 | handle.put(KEYSPACE, 1, b"Hello, world".to_vec(), Consistency::All).await?; 47 | 48 | let doc = handle 49 | .get(KEYSPACE, 1) 50 | .await? 51 | .expect("Document should not be none"); 52 | assert_eq!(doc.id(), 1); 53 | assert_eq!(doc.data(), b"Hello, world"); 54 | 55 | handle.del(KEYSPACE, 1, Consistency::All).await?; 56 | let doc = handle.get(KEYSPACE, 1).await?; 57 | assert!(doc.is_none(), "No document should not exist!"); 58 | 59 | handle.del(KEYSPACE, 2, Consistency::All).await?; 60 | let doc = handle.get(KEYSPACE, 2).await?; 61 | assert!(doc.is_none(), "No document should not exist!"); 62 | 63 | node.shutdown().await; 64 | 65 | Ok(()) 66 | } 67 | 68 | 69 | 70 | 71 | 72 | 73 | ``` -------------------------------------------------------------------------------- /datacake-lmdb/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Datacake LMDB 2 | //! 3 | //! A pre-built implementation of the datacake-eventual-consistency `Storage` trait, this allows you to set up 4 | //! a persistent cluster immediately without any hassle of implementing a correct store. 5 | //! 6 | //! For more info see 7 | //! 8 | //! ## Example 9 | //! 10 | //! ```rust 11 | //! use std::env::temp_dir; 12 | //! use anyhow::Result; 13 | //! use uuid::Uuid; 14 | //! use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 15 | //! use datacake_node::{ 16 | //! ConnectionConfig, 17 | //! Consistency, 18 | //! DCAwareSelector, 19 | //! DatacakeNodeBuilder, 20 | //! }; 21 | //! use datacake_lmdb::LmdbStorage; 22 | //! 23 | //! static KEYSPACE: &str = "lmdb-store"; 24 | //! 25 | //! #[tokio::main] 26 | //! async fn main() -> Result<()> { 27 | //! tracing_subscriber::fmt::init(); 28 | //! 29 | //! let temp_dir = temp_dir().join(Uuid::new_v4().to_string()); 30 | //! std::fs::create_dir_all(&temp_dir)?; 31 | //! 32 | //! let store = LmdbStorage::open(temp_dir).await?; 33 | //! 34 | //! let addr = test_helper::get_unused_addr(); 35 | //! let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 36 | //! 37 | //! let node = DatacakeNodeBuilder::::new(1, connection_cfg) 38 | //! .connect() 39 | //! .await?; 40 | //! let store = node 41 | //! .add_extension(EventuallyConsistentStoreExtension::new(store)) 42 | //! .await?; 43 | //! 44 | //! let handle = store.handle(); 45 | //! 46 | //! handle.put(KEYSPACE, 1, b"Hello, world".to_vec(), Consistency::All).await?; 47 | //! 48 | //! let doc = handle 49 | //! .get(KEYSPACE, 1) 50 | //! .await? 51 | //! .expect("Document should not be none"); 52 | //! assert_eq!(doc.id(), 1); 53 | //! assert_eq!(doc.data(), b"Hello, world"); 54 | //! 55 | //! handle.del(KEYSPACE, 1, Consistency::All).await?; 56 | //! let doc = handle.get(KEYSPACE, 1).await?; 57 | //! assert!(doc.is_none(), "No document should not exist!"); 58 | //! 59 | //! handle.del(KEYSPACE, 2, Consistency::All).await?; 60 | //! let doc = handle.get(KEYSPACE, 2).await?; 61 | //! assert!(doc.is_none(), "No document should not exist!"); 62 | //! 63 | //! node.shutdown().await; 64 | //! 65 | //! Ok(()) 66 | //! } 67 | //! ``` 68 | 69 | mod db; 70 | 71 | use std::path::Path; 72 | 73 | use async_trait::async_trait; 74 | use datacake_crdt::{HLCTimestamp, Key}; 75 | use datacake_eventual_consistency::{ 76 | BulkMutationError, 77 | Document, 78 | DocumentMetadata, 79 | Storage, 80 | }; 81 | pub use db::StorageHandle; 82 | pub use heed; 83 | pub use heed::Error; 84 | 85 | pub struct LmdbStorage { 86 | db: StorageHandle, 87 | } 88 | 89 | impl LmdbStorage { 90 | /// Connects to the LMDB database. 91 | /// This spawns 1 background threads with actions being executed within that thread. 92 | /// This approach reduces the affect of writes blocking reads and vice-versa. 93 | pub async fn open(path: impl AsRef) -> heed::Result { 94 | let db = StorageHandle::open(path).await?; 95 | 96 | Ok(Self { db }) 97 | } 98 | 99 | /// Access to the LMDB storage handle. 100 | /// 101 | /// This allows you to access the LMDB db directly 102 | /// including it's environment, but it does 103 | /// not provide any access to the KV databases used 104 | /// by the datacake storage layer. 105 | pub fn handle(&self) -> &StorageHandle { 106 | &self.db 107 | } 108 | } 109 | 110 | #[async_trait] 111 | impl Storage for LmdbStorage { 112 | type Error = heed::Error; 113 | type DocsIter = Box>; 114 | type MetadataIter = Box>; 115 | 116 | async fn get_keyspace_list(&self) -> Result, Self::Error> { 117 | self.handle().keyspace_list().await 118 | } 119 | 120 | async fn iter_metadata( 121 | &self, 122 | keyspace: &str, 123 | ) -> Result { 124 | self.handle() 125 | .get_metadata(keyspace) 126 | .await 127 | .map(|v| Box::new(v.into_iter()) as Self::MetadataIter) 128 | } 129 | 130 | async fn remove_tombstones( 131 | &self, 132 | keyspace: &str, 133 | keys: impl Iterator + Send, 134 | ) -> Result<(), BulkMutationError> { 135 | self.handle() 136 | .remove_tombstones(keyspace, keys) 137 | .await 138 | .map_err(BulkMutationError::empty_with_error) 139 | } 140 | 141 | async fn put(&self, keyspace: &str, document: Document) -> Result<(), Self::Error> { 142 | self.handle().put_kv(keyspace, document).await 143 | } 144 | 145 | async fn multi_put( 146 | &self, 147 | keyspace: &str, 148 | documents: impl Iterator + Send, 149 | ) -> Result<(), BulkMutationError> { 150 | self.handle() 151 | .put_many_kv(keyspace, documents) 152 | .await 153 | .map_err(BulkMutationError::empty_with_error) 154 | } 155 | 156 | async fn mark_as_tombstone( 157 | &self, 158 | keyspace: &str, 159 | doc_id: Key, 160 | timestamp: HLCTimestamp, 161 | ) -> Result<(), Self::Error> { 162 | self.handle() 163 | .mark_tombstone(keyspace, doc_id, timestamp) 164 | .await 165 | } 166 | 167 | async fn mark_many_as_tombstone( 168 | &self, 169 | keyspace: &str, 170 | documents: impl Iterator + Send, 171 | ) -> Result<(), BulkMutationError> { 172 | self.handle() 173 | .mark_many_as_tombstone(keyspace, documents) 174 | .await 175 | .map_err(BulkMutationError::empty_with_error) 176 | } 177 | 178 | async fn get( 179 | &self, 180 | keyspace: &str, 181 | doc_id: Key, 182 | ) -> Result, Self::Error> { 183 | self.handle().get(keyspace, doc_id).await 184 | } 185 | 186 | async fn multi_get( 187 | &self, 188 | keyspace: &str, 189 | doc_ids: impl Iterator + Send, 190 | ) -> Result { 191 | self.handle() 192 | .get_many(keyspace, doc_ids) 193 | .await 194 | .map(|v| Box::new(v.into_iter()) as Self::DocsIter) 195 | } 196 | } 197 | 198 | #[cfg(test)] 199 | mod tests { 200 | use std::env::temp_dir; 201 | 202 | use datacake_eventual_consistency::test_suite; 203 | use uuid::Uuid; 204 | 205 | use crate::LmdbStorage; 206 | 207 | #[tokio::test] 208 | async fn test_storage_logic() { 209 | let path = temp_dir().join(Uuid::new_v4().to_string()); 210 | std::fs::create_dir_all(&path).unwrap(); 211 | 212 | let storage = LmdbStorage::open(path).await.expect("Open DB"); 213 | test_suite::run_test_suite(storage).await; 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /datacake-lmdb/tests/basic_cluster.rs: -------------------------------------------------------------------------------- 1 | use std::env::temp_dir; 2 | 3 | use anyhow::Result; 4 | use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 5 | use datacake_lmdb::LmdbStorage; 6 | use datacake_node::{ 7 | ConnectionConfig, 8 | Consistency, 9 | DCAwareSelector, 10 | DatacakeNodeBuilder, 11 | }; 12 | use uuid::Uuid; 13 | 14 | static KEYSPACE: &str = "lmdb-store"; 15 | 16 | #[tokio::test] 17 | async fn test_basic_lmdb_cluster() -> Result<()> { 18 | let _ = tracing_subscriber::fmt::try_init(); 19 | 20 | let temp_dir = temp_dir().join(Uuid::new_v4().to_string()); 21 | std::fs::create_dir_all(&temp_dir)?; 22 | 23 | let store = LmdbStorage::open(temp_dir).await?; 24 | 25 | let addr = test_helper::get_unused_addr(); 26 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 27 | 28 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 29 | .connect() 30 | .await?; 31 | let store = node 32 | .add_extension(EventuallyConsistentStoreExtension::new(store)) 33 | .await?; 34 | 35 | let handle = store.handle(); 36 | 37 | handle 38 | .put(KEYSPACE, 1, b"Hello, world".to_vec(), Consistency::All) 39 | .await 40 | .expect("Put value."); 41 | 42 | let doc = handle 43 | .get(KEYSPACE, 1) 44 | .await 45 | .expect("Get value.") 46 | .expect("Document should not be none"); 47 | assert_eq!(doc.id(), 1); 48 | assert_eq!(doc.data(), b"Hello, world"); 49 | 50 | handle 51 | .del(KEYSPACE, 1, Consistency::All) 52 | .await 53 | .expect("Del value."); 54 | let doc = handle.get(KEYSPACE, 1).await.expect("Get value."); 55 | assert!(doc.is_none(), "No document should not exist!"); 56 | 57 | handle 58 | .del(KEYSPACE, 2, Consistency::All) 59 | .await 60 | .expect("Del value which doesnt exist locally."); 61 | let doc = handle.get(KEYSPACE, 2).await.expect("Get value."); 62 | assert!(doc.is_none(), "No document should not exist!"); 63 | 64 | node.shutdown().await; 65 | 66 | Ok(()) 67 | } 68 | -------------------------------------------------------------------------------- /datacake-node/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datacake-node" 3 | version = "0.5.0" 4 | edition = "2021" 5 | description = "The core cluster membership system built ontop of Quickwit's chitchat." 6 | license = "MIT" 7 | keywords = ["crdt", "databases", "distributed", "tokio", "async"] 8 | categories = ["concurrency", "data-structures", "asynchronous"] 9 | repository = "https://github.com/lnx-search/datacake" 10 | readme = "README.md" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | anyhow = "1" 16 | thiserror = "1" 17 | flume = "0.10.14" 18 | async-trait = "0.1.59" 19 | tracing = "0.1" 20 | tokio-stream = "0.1.9" 21 | parking_lot = "0.12.1" 22 | rand = "0.8.5" 23 | futures = "0.3" 24 | crc32fast = "1.3.2" 25 | smallvec = "1" 26 | 27 | datacake-rpc = { version = "0.6", path = "../datacake-rpc" } 28 | datacake-crdt = { version = "0.5", path = "../datacake-crdt", features = ["rkyv-support"] } 29 | chitchat = { version = "0.5.1", package = "datacake-chitchat-fork" } 30 | tokio = { version = "1", default-features = false, features = ["sync", "time"] } 31 | rkyv = { version = "0.7.42", features = ["strict", "validation"] } 32 | 33 | [dev-dependencies] 34 | tracing-subscriber = "0.3.16" 35 | tokio = { version = "1", features = ["full"] } 36 | test-helper = { path = "../test-helper" } 37 | datacake-rpc = { version = "0.6", path = "../datacake-rpc", features = ["test-utils"] } 38 | 39 | -------------------------------------------------------------------------------- /datacake-node/README.md: -------------------------------------------------------------------------------- 1 | # Datacake Node 2 | 3 | The core membership system used within Datacake. 4 | 5 | This system allows you to build cluster extensions on top of this core functionality giving you access to 6 | the live membership watchers, node selectors, cluster clock, etc... 7 | 8 | A good example of this is the `datacake-eventual-consistency` crate, it simply implements the `ClusterExtension` crate 9 | which lets it be added at runtime without issue. 10 | 11 | ## Features 12 | - Zero-copy RPC framework which allows for runtime adding and removing of services. 13 | - Changeable node selector used for picking nodes out of a live membership to handle tasks. 14 | - Pre-built data-center aware node selector for prioritisation of nodes in other availability zones. 15 | - Distributed clock used for keeping an effective wall clock which respects causality. 16 | 17 | ## Getting Started 18 | 19 | To get started we'll begin by creating our cluster: 20 | 21 | ```rust 22 | use std::net::SocketAddr; 23 | 24 | use datacake_node::{ConnectionConfig, DCAwareSelector, DatacakeNodeBuilder}; 25 | 26 | #[tokio::main] 27 | async fn main() -> anyhow::Result<()> { 28 | let bind_addr = "127.0.0.1:8000".parse::().unwrap(); 29 | 30 | // We setup our connection config for the node passing in the bind address, public address and seed nodes. 31 | // Here we're just using the bind address as our public address with no seed, but in the real world 32 | // this will be a different value when deployed across several servers with seeds to contact. 33 | let connection_cfg = ConnectionConfig::new(bind_addr, bind_addr, Vec::::new()); 34 | 35 | // Our builder lets us configure the node. 36 | // 37 | // We can configure the node selector, data center of the node, cluster ID, etc... 38 | let my_node = DatacakeNodeBuilder::::new(1, connection_cfg).connect().await?; 39 | 40 | // Now we're connected we can add any extensions at runtime, our RPC server will already be 41 | // running and setup. 42 | // 43 | // Check out the `datacake-eventual-consistency` implementation for a demo. 44 | 45 | Ok(()) 46 | } 47 | ``` 48 | 49 | #### Creating A Extension 50 | 51 | Creating a cluster extension is really simple, it's one trait and it can do just about anything: 52 | 53 | ```rust 54 | use datacake_node::{ClusterExtension, DatacakeNode}; 55 | use async_trait::async_trait; 56 | 57 | pub struct MyExtension; 58 | 59 | #[async_trait] 60 | impl ClusterExtension for MyExtension { 61 | type Output = (); 62 | type Error = MyError; 63 | 64 | async fn init_extension( 65 | self, 66 | node: &DatacakeNode, 67 | ) -> Result { 68 | // In here we can setup our system using the live node. 69 | // This gives us things like the cluster clock and RPC server: 70 | 71 | println!("Creating my extension!"); 72 | 73 | let timestamp = node.clock().get_time().await; 74 | println!("My timestamp: {timestamp}"); 75 | 76 | Ok(()) 77 | } 78 | } 79 | 80 | pub struct MyError; 81 | ``` 82 | -------------------------------------------------------------------------------- /datacake-node/src/clock.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use datacake_crdt::HLCTimestamp; 4 | use tokio::sync::oneshot; 5 | 6 | use crate::NodeId; 7 | 8 | const CLOCK_BACKPRESSURE_LIMIT: u16 = u16::MAX - 10; 9 | 10 | #[derive(Clone)] 11 | pub struct Clock { 12 | node_id: NodeId, 13 | tx: flume::Sender, 14 | } 15 | 16 | impl Clock { 17 | pub fn new(node_id: NodeId) -> Self { 18 | let ts = HLCTimestamp::now(0, node_id); 19 | let (tx, rx) = flume::bounded(1000); 20 | 21 | tokio::spawn(run_clock(ts, rx)); 22 | 23 | Self { node_id, tx } 24 | } 25 | 26 | pub async fn register_ts(&self, ts: HLCTimestamp) { 27 | if ts.node() == self.node_id { 28 | return; 29 | } 30 | 31 | self.tx 32 | .send_async(Event::Register(ts)) 33 | .await 34 | .expect("Clock actor should never die"); 35 | } 36 | 37 | pub async fn get_time(&self) -> HLCTimestamp { 38 | let (tx, rx) = oneshot::channel(); 39 | 40 | self.tx 41 | .send_async(Event::Get(tx)) 42 | .await 43 | .expect("Clock actor should never die"); 44 | 45 | rx.await.expect("Responder should not be dropped") 46 | } 47 | } 48 | 49 | pub enum Event { 50 | Get(oneshot::Sender), 51 | Register(HLCTimestamp), 52 | } 53 | 54 | async fn run_clock(mut clock: HLCTimestamp, reqs: flume::Receiver) { 55 | while let Ok(event) = reqs.recv_async().await { 56 | match event { 57 | Event::Get(tx) => { 58 | let ts = clock.send().expect("Clock counter should not overflow"); 59 | 60 | if clock.counter() >= CLOCK_BACKPRESSURE_LIMIT { 61 | tokio::time::sleep(Duration::from_millis(1)).await; 62 | } 63 | 64 | let _ = tx.send(ts); 65 | }, 66 | Event::Register(remote_ts) => { 67 | let _ = clock.recv(&remote_ts); 68 | 69 | if clock.counter() >= CLOCK_BACKPRESSURE_LIMIT { 70 | tokio::time::sleep(Duration::from_millis(1)).await; 71 | } 72 | }, 73 | } 74 | } 75 | } 76 | 77 | #[cfg(test)] 78 | mod tests { 79 | use super::*; 80 | 81 | #[tokio::test] 82 | async fn test_clock() { 83 | let clock = Clock::new(0); 84 | 85 | let ts1 = clock.get_time().await; 86 | clock.register_ts(ts1).await; 87 | let ts2 = clock.get_time().await; 88 | assert!(ts1 < ts2); 89 | 90 | let ts1 = clock.get_time().await; 91 | let ts2 = clock.get_time().await; 92 | let ts3 = clock.get_time().await; 93 | assert!(ts1 < ts2); 94 | assert!(ts2 < ts3); 95 | 96 | let drift_ts = 97 | HLCTimestamp::new(ts3.datacake_timestamp() + Duration::from_secs(50), 0, 1); 98 | clock.register_ts(drift_ts).await; 99 | let ts = clock.get_time().await; 100 | assert!( 101 | drift_ts < ts, 102 | "New timestamp should be monotonic relative to drifted ts." 103 | ); 104 | 105 | let old_ts = 106 | HLCTimestamp::new(ts3.datacake_timestamp() + Duration::from_secs(5), 0, 1); 107 | clock.register_ts(old_ts).await; 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /datacake-node/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::io; 3 | 4 | use thiserror::Error; 5 | 6 | use crate::nodes_selector::ConsistencyError; 7 | 8 | #[derive(Debug, Error)] 9 | pub enum NodeError { 10 | #[error("{0}")] 11 | /// An error has occurred within Chitchat. 12 | ChitChat(String), 13 | 14 | #[error("{0}")] 15 | /// An IO error has occurred, 16 | IO(#[from] io::Error), 17 | 18 | #[error("{0}")] 19 | /// An RPC error has occurred, 20 | Rpc(#[from] datacake_rpc::Error), 21 | 22 | #[error("Failed to complete operation due to consistency level failure: {0}")] 23 | /// The operation succeeded on the local node but failed to meet the required 24 | /// consistency level within the timeout period. (2 seconds) 25 | Consistency(ConsistencyError), 26 | 27 | #[error("Failed to initialised cluster extension: {0}")] 28 | Extension(anyhow::Error), 29 | } 30 | -------------------------------------------------------------------------------- /datacake-node/src/extension.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | 3 | use crate::DatacakeNode; 4 | 5 | #[async_trait] 6 | /// An extension of the base node/cluster. 7 | /// 8 | /// This can be used to extend a base node to include additional 9 | /// functionality, like the eventually consistent store, multi-raft cluster 10 | /// or anything else which may want to use the membership, rpc and clock system. 11 | pub trait ClusterExtension { 12 | type Output; 13 | type Error; 14 | 15 | async fn init_extension( 16 | self, 17 | node: &DatacakeNode, 18 | ) -> Result; 19 | } 20 | -------------------------------------------------------------------------------- /datacake-node/src/rpc/chitchat_transport.rs: -------------------------------------------------------------------------------- 1 | use std::net::SocketAddr; 2 | use std::ops::Deref; 3 | use std::sync::Arc; 4 | 5 | use anyhow::bail; 6 | use async_trait::async_trait; 7 | use chitchat::serialize::Serializable; 8 | use chitchat::transport::{Socket, Transport}; 9 | use chitchat::ChitchatMessage; 10 | use datacake_rpc::RpcClient; 11 | use tracing::trace; 12 | 13 | use crate::rpc::network::RpcNetwork; 14 | use crate::rpc::services::chitchat_impl::{ChitchatRpcMessage, ChitchatService}; 15 | use crate::Clock; 16 | 17 | #[derive(Clone)] 18 | /// Chitchat compatible transport built on top of an existing RPC connection. 19 | /// 20 | /// This allows us to maintain a single connection rather than both a UDP and TCP connection. 21 | pub struct ChitchatTransport(Arc); 22 | 23 | impl ChitchatTransport { 24 | /// Creates a new GRPC transport instances. 25 | pub fn new( 26 | rpc_listen_addr: SocketAddr, 27 | clock: Clock, 28 | network: RpcNetwork, 29 | messages: flume::Receiver<(SocketAddr, ChitchatMessage)>, 30 | ) -> Self { 31 | Self(Arc::new(ChitchatTransportInner { 32 | rpc_listen_addr, 33 | clock, 34 | network, 35 | messages, 36 | })) 37 | } 38 | } 39 | 40 | impl Deref for ChitchatTransport { 41 | type Target = ChitchatTransportInner; 42 | 43 | fn deref(&self) -> &Self::Target { 44 | &self.0 45 | } 46 | } 47 | 48 | #[async_trait] 49 | impl Transport for ChitchatTransport { 50 | async fn open( 51 | &self, 52 | listen_addr: SocketAddr, 53 | ) -> Result, anyhow::Error> { 54 | if listen_addr != self.rpc_listen_addr { 55 | bail!( 56 | "Listen addr does not match RPC server address. {listen_addr} != {}", 57 | self.rpc_listen_addr 58 | ); 59 | } 60 | 61 | Ok(Box::new(GrpcConnection { 62 | clock: self.clock.clone(), 63 | self_addr: self.rpc_listen_addr, 64 | network: self.network.clone(), 65 | messages: self.messages.clone(), 66 | })) 67 | } 68 | } 69 | 70 | pub struct ChitchatTransportInner { 71 | /// The socket address the RPC server is listening on. 72 | rpc_listen_addr: SocketAddr, 73 | 74 | /// The node clock. 75 | clock: Clock, 76 | 77 | /// The RPC network of clients. 78 | network: RpcNetwork, 79 | 80 | /// Received messages to be sent to the Chitchat cluster. 81 | messages: flume::Receiver<(SocketAddr, ChitchatMessage)>, 82 | } 83 | 84 | pub struct GrpcConnection { 85 | clock: Clock, 86 | self_addr: SocketAddr, 87 | network: RpcNetwork, 88 | messages: flume::Receiver<(SocketAddr, ChitchatMessage)>, 89 | } 90 | 91 | #[async_trait] 92 | impl Socket for GrpcConnection { 93 | async fn send( 94 | &mut self, 95 | to: SocketAddr, 96 | msg: ChitchatMessage, 97 | ) -> Result<(), anyhow::Error> { 98 | trace!(to = %to, msg = ?msg, "Gossip send"); 99 | let data = msg.serialize_to_vec(); 100 | 101 | let channel = self.network.get_or_connect(to); 102 | 103 | let timestamp = self.clock.get_time().await; 104 | let msg = ChitchatRpcMessage { 105 | data, 106 | source: self.self_addr, 107 | timestamp, 108 | }; 109 | 110 | let client = RpcClient::::new(channel); 111 | client.send(&msg).await?; 112 | 113 | Ok(()) 114 | } 115 | 116 | async fn recv(&mut self) -> Result<(SocketAddr, ChitchatMessage), anyhow::Error> { 117 | let msg = self.messages.recv_async().await?; 118 | Ok(msg) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /datacake-node/src/rpc/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod chitchat_transport; 2 | pub(crate) mod network; 3 | pub(crate) mod services; 4 | -------------------------------------------------------------------------------- /datacake-node/src/rpc/network.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::net::SocketAddr; 3 | use std::sync::Arc; 4 | 5 | use datacake_rpc::Channel; 6 | use parking_lot::RwLock; 7 | use tracing::trace; 8 | 9 | #[derive(Clone, Default)] 10 | /// A collection of RPC client connections which can be reused and multiplexed. 11 | pub struct RpcNetwork { 12 | clients: Arc>>, 13 | } 14 | 15 | impl RpcNetwork { 16 | /// Attempts to get an already existing connection or creates a new connection. 17 | pub fn get_or_connect(&self, addr: SocketAddr) -> Channel { 18 | { 19 | let guard = self.clients.read(); 20 | if let Some(channel) = guard.get(&addr) { 21 | return channel.clone(); 22 | } 23 | } 24 | 25 | trace!(addr = %addr, "Connect client to network."); 26 | self.connect(addr) 27 | } 28 | 29 | /// Connects to a given address and adds it to the clients. 30 | pub fn connect(&self, addr: SocketAddr) -> Channel { 31 | let channel = Channel::connect(addr); 32 | 33 | { 34 | let mut guard = self.clients.write(); 35 | guard.insert(addr, channel.clone()); 36 | } 37 | 38 | channel 39 | } 40 | 41 | /// Removes a client from the network. 42 | pub fn disconnect(&self, addr: SocketAddr) { 43 | let mut guard = self.clients.write(); 44 | guard.remove(&addr); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /datacake-node/src/rpc/services/chitchat_impl.rs: -------------------------------------------------------------------------------- 1 | use std::net::SocketAddr; 2 | 3 | use chitchat::serialize::Serializable; 4 | use chitchat::ChitchatMessage; 5 | use datacake_crdt::HLCTimestamp; 6 | use datacake_rpc::{Handler, Request, RpcService, ServiceRegistry, Status}; 7 | use rkyv::{Archive, Deserialize, Serialize}; 8 | 9 | use crate::Clock; 10 | 11 | #[repr(C)] 12 | #[derive(Serialize, Deserialize, Archive)] 13 | #[archive(check_bytes)] 14 | pub struct ChitchatRpcMessage { 15 | pub data: Vec, 16 | pub source: SocketAddr, 17 | pub timestamp: HLCTimestamp, 18 | } 19 | 20 | pub struct ChitchatService { 21 | clock: Clock, 22 | messages: flume::Sender<(SocketAddr, ChitchatMessage)>, 23 | } 24 | 25 | impl ChitchatService { 26 | pub fn new( 27 | clock: Clock, 28 | messages: flume::Sender<(SocketAddr, ChitchatMessage)>, 29 | ) -> Self { 30 | Self { clock, messages } 31 | } 32 | } 33 | 34 | impl RpcService for ChitchatService { 35 | fn register_handlers(registry: &mut ServiceRegistry) { 36 | registry.add_handler::(); 37 | } 38 | } 39 | 40 | #[datacake_rpc::async_trait] 41 | impl Handler for ChitchatService { 42 | type Reply = HLCTimestamp; 43 | 44 | async fn on_message( 45 | &self, 46 | request: Request, 47 | ) -> Result { 48 | let msg = request.deserialize_view().map_err(Status::internal)?; 49 | 50 | let from = msg.source; 51 | self.clock.register_ts(msg.timestamp).await; 52 | 53 | let mut buffer = msg.data.as_slice(); 54 | let msg = ::deserialize(&mut buffer) 55 | .map_err(|e| Status::internal(e.to_string()))?; 56 | 57 | let _ = self.messages.try_send((from, msg)); 58 | 59 | Ok(self.clock.get_time().await) 60 | } 61 | } 62 | 63 | #[cfg(test)] 64 | mod tests { 65 | use std::net::{IpAddr, Ipv4Addr}; 66 | 67 | use super::*; 68 | 69 | #[tokio::test] 70 | async fn test_chitchat_service() { 71 | let clock = Clock::new(0); 72 | let (tx, rx) = flume::bounded(10); 73 | let service = ChitchatService::new(clock.clone(), tx); 74 | 75 | let source = SocketAddr::new(IpAddr::V4(Ipv4Addr::from([127, 0, 0, 1])), 80); 76 | let message = ChitchatMessage::BadCluster; 77 | let timestamp = clock.get_time().await; 78 | 79 | let msg_req = Request::using_owned(ChitchatRpcMessage { 80 | timestamp, 81 | data: message.serialize_to_vec(), 82 | source, 83 | }) 84 | .await; 85 | 86 | service.on_message(msg_req).await.expect("Send message"); 87 | 88 | let (addr, msg) = rx.try_recv().expect("Message should be registered"); 89 | assert_eq!(addr, source); 90 | assert_eq!(msg, message); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /datacake-node/src/rpc/services/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod chitchat_impl; 2 | -------------------------------------------------------------------------------- /datacake-node/src/statistics.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Deref; 2 | use std::sync::atomic::{AtomicU64, Ordering}; 3 | use std::sync::Arc; 4 | 5 | pub type Counter = AtomicU64; 6 | 7 | #[derive(Debug, Clone, Default)] 8 | /// Live metrics around the cluster system. 9 | pub struct ClusterStatistics(Arc); 10 | 11 | impl Deref for ClusterStatistics { 12 | type Target = ClusterStatisticsInner; 13 | 14 | fn deref(&self) -> &Self::Target { 15 | &self.0 16 | } 17 | } 18 | 19 | #[derive(Debug, Default)] 20 | pub struct ClusterStatisticsInner { 21 | /// The number of currently alive members the node is aware of. 22 | pub(crate) num_live_members: Counter, 23 | /// The number of members the node currently believes is dead. 24 | pub(crate) num_dead_members: Counter, 25 | /// The number of data centers/availability zones the cluster belongs to. 26 | pub(crate) num_data_centers: Counter, 27 | } 28 | 29 | impl ClusterStatisticsInner { 30 | /// The number of currently alive members the node is aware of. 31 | pub fn num_live_members(&self) -> u64 { 32 | self.num_live_members.load(Ordering::Relaxed) 33 | } 34 | 35 | /// The number of members the node currently believes is dead. 36 | pub fn num_dead_members(&self) -> u64 { 37 | self.num_dead_members.load(Ordering::Relaxed) 38 | } 39 | 40 | /// The number of data centers/availability zones the cluster belongs to. 41 | pub fn num_data_centers(&self) -> u64 { 42 | self.num_data_centers.load(Ordering::Relaxed) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /datacake-node/tests/membership.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use datacake_node::{ConnectionConfig, DCAwareSelector, DatacakeNodeBuilder}; 4 | 5 | #[tokio::test] 6 | // TODO: Improve this test so it's not really flaky 7 | pub async fn test_member_join() -> anyhow::Result<()> { 8 | let _ = tracing_subscriber::fmt::try_init(); 9 | 10 | let node_1_addr = test_helper::get_unused_addr(); 11 | let node_2_addr = test_helper::get_unused_addr(); 12 | let node_3_addr = test_helper::get_unused_addr(); 13 | let node_1_connection_cfg = 14 | ConnectionConfig::new(node_1_addr, node_1_addr, [node_2_addr.to_string()]); 15 | let node_2_connection_cfg = 16 | ConnectionConfig::new(node_2_addr, node_2_addr, [node_1_addr.to_string()]); 17 | let node_3_connection_cfg = ConnectionConfig::new( 18 | node_3_addr, 19 | node_3_addr, 20 | [node_1_addr.to_string(), node_2_addr.to_string()], 21 | ); 22 | 23 | let node_1 = DatacakeNodeBuilder::::new(1, node_1_connection_cfg) 24 | .connect() 25 | .await?; 26 | let node_2 = DatacakeNodeBuilder::::new(2, node_2_connection_cfg) 27 | .connect() 28 | .await?; 29 | 30 | node_1 31 | .wait_for_nodes(&[2], Duration::from_secs(30)) 32 | .await 33 | .expect("Nodes should connect within timeout."); 34 | node_2 35 | .wait_for_nodes(&[1], Duration::from_secs(30)) 36 | .await 37 | .expect("Nodes should connect within timeout."); 38 | 39 | let stats = node_1.statistics(); 40 | assert_eq!(stats.num_data_centers(), 1); 41 | assert_eq!(stats.num_live_members(), 2); 42 | assert_eq!(stats.num_dead_members(), 0); 43 | 44 | let stats = node_2.statistics(); 45 | assert_eq!(stats.num_data_centers(), 1); 46 | assert_eq!(stats.num_live_members(), 2); 47 | assert_eq!(stats.num_dead_members(), 0); 48 | 49 | let node_3 = DatacakeNodeBuilder::::new(3, node_3_connection_cfg) 50 | .connect() 51 | .await?; 52 | 53 | node_3 54 | .wait_for_nodes(&[2, 1], Duration::from_secs(30)) 55 | .await 56 | .expect("Nodes should connect within timeout."); 57 | 58 | tokio::time::sleep(Duration::from_secs(5)).await; 59 | 60 | let stats = node_3.statistics(); 61 | assert_eq!(stats.num_data_centers(), 1); 62 | assert_eq!(stats.num_live_members(), 3); 63 | 64 | let stats = node_1.statistics(); 65 | assert_eq!(stats.num_data_centers(), 1); 66 | assert_eq!(stats.num_live_members(), 3); 67 | 68 | let stats = node_2.statistics(); 69 | assert_eq!(stats.num_data_centers(), 1); 70 | assert_eq!(stats.num_live_members(), 3); 71 | 72 | Ok(()) 73 | } 74 | 75 | #[tokio::test] 76 | // TODO: Improve this test so it's not really flaky 77 | pub async fn test_member_leave() -> anyhow::Result<()> { 78 | let _ = tracing_subscriber::fmt::try_init(); 79 | 80 | let node_1_addr = test_helper::get_unused_addr(); 81 | let node_2_addr = test_helper::get_unused_addr(); 82 | let node_3_addr = test_helper::get_unused_addr(); 83 | let node_1_connection_cfg = ConnectionConfig::new( 84 | node_1_addr, 85 | node_1_addr, 86 | [node_2_addr.to_string(), node_3_addr.to_string()], 87 | ); 88 | let node_2_connection_cfg = ConnectionConfig::new( 89 | node_2_addr, 90 | node_2_addr, 91 | [node_1_addr.to_string(), node_3_addr.to_string()], 92 | ); 93 | let node_3_connection_cfg = ConnectionConfig::new( 94 | node_3_addr, 95 | node_3_addr, 96 | [node_1_addr.to_string(), node_2_addr.to_string()], 97 | ); 98 | 99 | let node_1 = DatacakeNodeBuilder::::new(1, node_1_connection_cfg) 100 | .connect() 101 | .await?; 102 | let node_2 = DatacakeNodeBuilder::::new(2, node_2_connection_cfg) 103 | .connect() 104 | .await?; 105 | let node_3 = DatacakeNodeBuilder::::new(3, node_3_connection_cfg) 106 | .connect() 107 | .await?; 108 | 109 | node_1 110 | .wait_for_nodes(&[2, 3], Duration::from_secs(30)) 111 | .await 112 | .expect("Nodes should connect within timeout."); 113 | node_2 114 | .wait_for_nodes(&[1, 3], Duration::from_secs(30)) 115 | .await 116 | .expect("Nodes should connect within timeout."); 117 | node_3 118 | .wait_for_nodes(&[2, 1], Duration::from_secs(30)) 119 | .await 120 | .expect("Nodes should connect within timeout."); 121 | 122 | let stats = node_1.statistics(); 123 | assert_eq!(stats.num_data_centers(), 1); 124 | assert_eq!(stats.num_live_members(), 3); 125 | assert_eq!(stats.num_dead_members(), 0); 126 | 127 | let stats = node_2.statistics(); 128 | assert_eq!(stats.num_data_centers(), 1); 129 | assert_eq!(stats.num_live_members(), 3); 130 | assert_eq!(stats.num_dead_members(), 0); 131 | 132 | let stats = node_3.statistics(); 133 | assert_eq!(stats.num_data_centers(), 1); 134 | assert_eq!(stats.num_live_members(), 3); 135 | assert_eq!(stats.num_dead_members(), 0); 136 | 137 | node_3.shutdown().await; 138 | 139 | // Let the cluster sort itself out. 140 | // It's a long time because the system tries to give the node time to become apart of the system again. 141 | tokio::time::sleep(Duration::from_secs(90)).await; 142 | 143 | let stats = node_1.statistics(); 144 | assert_eq!(stats.num_data_centers(), 1); 145 | assert_eq!(stats.num_live_members(), 2); 146 | 147 | let stats = node_2.statistics(); 148 | assert_eq!(stats.num_data_centers(), 1); 149 | assert_eq!(stats.num_live_members(), 2); 150 | 151 | Ok(()) 152 | } 153 | -------------------------------------------------------------------------------- /datacake-rpc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datacake-rpc" 3 | version = "0.6.0" 4 | edition = "2021" 5 | description = "A zero-copy, actor-like RPC framework using rkyv." 6 | license = "MIT" 7 | keywords = ["tokio", "rpc", "zero-copy"] 8 | categories = ["concurrency", "asyncronous"] 9 | repository = "https://github.com/lnx-search/datacake" 10 | readme = "README.md" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | http = "0.2.8" 16 | bytes = "1.3.0" 17 | anyhow = "1" 18 | async-trait = "0.1.60" 19 | thiserror = "1" 20 | parking_lot = "0.12.1" 21 | tracing = "0.1.37" 22 | crc32fast = "1.3.2" 23 | 24 | hyper = { version = "0.14.23", features = ["full"] } 25 | rkyv = { version = "0.7.42", features = ["strict"] } 26 | tokio = { version = "1", default-features = false, features = ["rt"] } 27 | 28 | # Used for simulation 29 | turmoil = { version = "0.4.0", optional = true } 30 | async-stream = { version = "0.3.3", optional = true } 31 | 32 | [dev-dependencies] 33 | tokio = { version = "1", features = ["full"] } 34 | test-helper = { path = "../test-helper" } 35 | rkyv = { version = "0.7.42", features = ["strict", "validation"] } 36 | 37 | [features] 38 | test-utils = [] 39 | 40 | # Enable turmoil simulation for testing. 41 | simulation = ["turmoil", "async-stream"] 42 | 43 | -------------------------------------------------------------------------------- /datacake-rpc/README.md: -------------------------------------------------------------------------------- 1 | # An actor-like RPC framework built for true zero-copy message handling. 2 | 3 | A actor-like RPC framework built for true zero-copy message handling. 4 | 5 | This framework is inspired by tonic but is not a GRPC framework. 6 | Instead, it makes use of the incredible rkyv (de)serialization framework which provides us 7 | with lightning fast (de)serialization and also lets us perform true zero-copy deserialization 8 | which can lead to massive performance improvements when processing lots of big messages at once. 9 | 10 | ### Features 11 | - Fast (de)serialization of owned types. 12 | - True zero-copy deserialization avoiding heavy allocations. 13 | - Dynamic adding and removing of message handlers/services. 14 | 15 | ### Basic example 16 | ```rust 17 | use std::net::SocketAddr; 18 | 19 | use datacake_rpc::{ 20 | Channel, 21 | Handler, 22 | Request, 23 | RpcClient, 24 | RpcService, 25 | Server, 26 | ServiceRegistry, 27 | Status, 28 | }; 29 | use rkyv::{Archive, Deserialize, Serialize}; 30 | 31 | // The framework accepts any messages which implement `Archive` and `Serialize` along 32 | // with the archived values implementing `CheckBytes` from the `bytecheck` crate. 33 | // This is to ensure safe, validated deserialization of the values. 34 | // 35 | // Checkout rkyv for more information! 36 | #[repr(C)] 37 | #[derive(Serialize, Deserialize, Archive, PartialEq, Debug)] 38 | #[archive(compare(PartialEq), check_bytes)] 39 | #[archive_attr(derive(PartialEq, Debug))] 40 | pub struct MyMessage { 41 | name: String, 42 | age: u32, 43 | } 44 | 45 | pub struct MyService; 46 | 47 | impl RpcService for MyService { 48 | // The `register_handlers` is used to mark messages as something 49 | // the given service can handle and process. 50 | // 51 | // Messages which are not registered will not be dispatched to the handler. 52 | fn register_handlers(registry: &mut ServiceRegistry) { 53 | registry.add_handler::(); 54 | } 55 | } 56 | 57 | #[datacake_rpc::async_trait] 58 | impl Handler for MyService { 59 | type Reply = String; 60 | 61 | // Our `Request` gives us a zero-copy view to our message, this doesn't actually 62 | // allocate the message type. 63 | async fn on_message(&self, msg: Request) -> Result { 64 | Ok(msg.to_owned().unwrap().name) 65 | } 66 | } 67 | 68 | #[tokio::main] 69 | async fn main() -> anyhow::Result<()> { 70 | let address = "127.0.0.1:8000".parse::()?; 71 | 72 | let server = Server::listen(address).await?; 73 | // Services can be added and removed at runtime once the server is started. 74 | server.add_service(MyService); 75 | println!("Listening to address {}!", address); 76 | 77 | // Channels are cheap to clone similar to tonic. 78 | let client = Channel::connect(address); 79 | println!("Connected to address {}!", address); 80 | 81 | let rpc_client = RpcClient::::new(client); 82 | 83 | let msg1 = MyMessage { 84 | name: "Bobby".to_string(), 85 | age: 12, 86 | }; 87 | 88 | // Clients only need references to the message which helps 89 | // reduce allocations. 90 | let resp = rpc_client.send(&msg1).await?; 91 | assert_eq!(resp, msg1.name); 92 | Ok(()) 93 | } 94 | ``` 95 | -------------------------------------------------------------------------------- /datacake-rpc/src/body.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Deref, DerefMut}; 2 | 3 | use rkyv::{Archive, Serialize}; 4 | 5 | use crate::rkyv_tooling::DatacakeSerializer; 6 | use crate::Status; 7 | 8 | /// A wrapper type around the internal [hyper::Body] 9 | pub struct Body(pub(crate) hyper::Body); 10 | 11 | impl Body { 12 | /// Creates a new body. 13 | pub fn new(inner: hyper::Body) -> Self { 14 | Self(inner) 15 | } 16 | 17 | /// Consumes the body returning the inner hyper object. 18 | pub fn into_inner(self) -> hyper::Body { 19 | self.0 20 | } 21 | } 22 | 23 | impl From for Body 24 | where 25 | T: Into, 26 | { 27 | fn from(value: T) -> Self { 28 | Self(value.into()) 29 | } 30 | } 31 | 32 | impl Deref for Body { 33 | type Target = hyper::Body; 34 | 35 | fn deref(&self) -> &Self::Target { 36 | &self.0 37 | } 38 | } 39 | 40 | impl DerefMut for Body { 41 | fn deref_mut(&mut self) -> &mut Self::Target { 42 | &mut self.0 43 | } 44 | } 45 | 46 | /// The serializer trait converting replies into hyper bodies. 47 | /// 48 | /// This is a light abstraction to allow users to be able to 49 | /// stream data across the RPC system which may not fit in memory. 50 | /// 51 | /// Any types which implement [TryAsBody] will implement this type. 52 | pub trait TryIntoBody { 53 | /// Try convert the reply into a body or return an error 54 | /// status. 55 | fn try_into_body(self) -> Result; 56 | } 57 | 58 | /// The serializer trait for converting replies into hyper bodies 59 | /// using a reference to self. 60 | /// 61 | /// This will work for most implementations but if you want to stream 62 | /// hyper bodies for example, you cannot implement this trait. 63 | pub trait TryAsBody { 64 | /// Try convert the reply into a body or return an error 65 | /// status. 66 | fn try_as_body(&self) -> Result; 67 | } 68 | 69 | impl TryAsBody for T 70 | where 71 | T: Archive + Serialize, 72 | { 73 | #[inline] 74 | fn try_as_body(&self) -> Result { 75 | crate::rkyv_tooling::to_view_bytes(self) 76 | .map(|v| Body::from(v.to_vec())) 77 | .map_err(|e| Status::internal(e.to_string())) 78 | } 79 | } 80 | 81 | impl TryIntoBody for T 82 | where 83 | T: TryAsBody, 84 | { 85 | #[inline] 86 | fn try_into_body(self) -> Result { 87 | ::try_as_body(&self) 88 | } 89 | } 90 | 91 | impl TryIntoBody for Body { 92 | #[inline] 93 | fn try_into_body(self) -> Result { 94 | Ok(self) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /datacake-rpc/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! An actor-like RPC framework built for true zero-copy message handling. 2 | //! 3 | //! This framework is inspired by tonic but is *not* a GRPC framework. Instead, 4 | //! it makes use of the incredible [rkyv] (de)serialization framework which provides 5 | //! us with lightning fast (de)serialization and also lets us perform true zero-copy 6 | //! deserialization which can lead to massive performance improvements when processing 7 | //! lots of big messages at once. 8 | //! 9 | //! ### Features 10 | //! - Fast (de)serialization of owned types. 11 | //! - True zero-copy deserialization avoiding heavy allocations. 12 | //! - Dynamic adding and removing of message handlers/services. 13 | //! 14 | //! ### Basic example 15 | //! ```rust 16 | //! use std::net::SocketAddr; 17 | //! 18 | //! use datacake_rpc::{ 19 | //! Channel, 20 | //! Handler, 21 | //! Request, 22 | //! RpcClient, 23 | //! RpcService, 24 | //! Server, 25 | //! ServiceRegistry, 26 | //! Status, 27 | //! }; 28 | //! use rkyv::{Archive, Deserialize, Serialize}; 29 | //! 30 | //! // The framework accepts any messages which implement `Archive` and `Serialize` along 31 | //! // with the archived values implementing `CheckBytes` from the `bytecheck` crate. 32 | //! // This is to ensure safe, validated deserialization of the values. 33 | //! // 34 | //! // Checkout rkyv for more information! 35 | //! #[repr(C)] 36 | //! #[derive(Serialize, Deserialize, Archive, PartialEq, Debug)] 37 | //! #[archive(compare(PartialEq), check_bytes)] 38 | //! #[archive_attr(derive(PartialEq, Debug))] 39 | //! pub struct MyMessage { 40 | //! name: String, 41 | //! age: u32, 42 | //! } 43 | //! 44 | //! pub struct MyService; 45 | //! 46 | //! impl RpcService for MyService { 47 | //! // The `register_handlers` is used to mark messages as something 48 | //! // the given service can handle and process. 49 | //! // 50 | //! // Messages which are not registered will not be dispatched to the handler. 51 | //! fn register_handlers(registry: &mut ServiceRegistry) { 52 | //! registry.add_handler::(); 53 | //! } 54 | //! } 55 | //! 56 | //! #[datacake_rpc::async_trait] 57 | //! impl Handler for MyService { 58 | //! type Reply = String; 59 | //! 60 | //! // Our `Request` gives us a zero-copy view to our message, this doesn't actually 61 | //! // allocate the message type. 62 | //! async fn on_message(&self, msg: Request) -> Result { 63 | //! Ok(msg.deserialize_view().unwrap().name) 64 | //! } 65 | //! } 66 | //! 67 | //! #[tokio::main] 68 | //! async fn main() -> anyhow::Result<()> { 69 | //! let address = "127.0.0.1:8000".parse::()?; 70 | //! 71 | //! let server = Server::listen(address).await?; 72 | //! // Services can be added and removed at runtime once the server is started. 73 | //! server.add_service(MyService); 74 | //! println!("Listening to address {}!", address); 75 | //! 76 | //! // Channels are cheap to clone similar to tonic. 77 | //! let client = Channel::connect(address); 78 | //! println!("Connected to address {}!", address); 79 | //! 80 | //! let rpc_client = RpcClient::::new(client); 81 | //! 82 | //! let msg1 = MyMessage { 83 | //! name: "Bobby".to_string(), 84 | //! age: 12, 85 | //! }; 86 | //! 87 | //! // Clients only need references to the message which helps 88 | //! // reduce allocations. 89 | //! let resp = rpc_client.send(&msg1).await?; 90 | //! assert_eq!(resp, msg1.name); 91 | //! Ok(()) 92 | //! } 93 | //! ``` 94 | 95 | #[macro_use] 96 | extern crate tracing; 97 | 98 | mod body; 99 | mod client; 100 | mod handler; 101 | mod net; 102 | mod request; 103 | mod rkyv_tooling; 104 | mod server; 105 | mod utils; 106 | 107 | use std::collections::hash_map::DefaultHasher; 108 | use std::hash::{Hash, Hasher}; 109 | 110 | /// A re-export of the async-trait macro. 111 | pub use async_trait::async_trait; 112 | pub use http; 113 | 114 | pub use self::body::{Body, TryAsBody, TryIntoBody}; 115 | pub use self::client::{MessageReply, RpcClient}; 116 | pub use self::handler::{Handler, RpcService, ServiceRegistry}; 117 | pub use self::net::{ 118 | ArchivedErrorCode, 119 | ArchivedStatus, 120 | Channel, 121 | Error, 122 | ErrorCode, 123 | Status, 124 | }; 125 | pub use self::request::{Request, RequestContents}; 126 | pub use self::rkyv_tooling::{to_view_bytes, DataView, InvalidView}; 127 | pub use self::server::Server; 128 | 129 | pub(crate) fn hash(v: &H) -> u64 { 130 | let mut hasher = DefaultHasher::new(); 131 | v.hash(&mut hasher); 132 | hasher.finish() 133 | } 134 | 135 | pub(crate) fn to_uri_path(service: &str, path: &str) -> String { 136 | format!("/{}/{}", sanitise(service), sanitise(path)) 137 | } 138 | 139 | fn sanitise(parameter: &str) -> String { 140 | parameter.replace(['<', '>'], "-") 141 | } 142 | -------------------------------------------------------------------------------- /datacake-rpc/src/net/client.rs: -------------------------------------------------------------------------------- 1 | use std::net::SocketAddr; 2 | 3 | use http::{HeaderMap, Method, Request, Response}; 4 | 5 | #[cfg(feature = "simulation")] 6 | use super::simulation::LazyClient; 7 | use crate::body::Body; 8 | use crate::net::Error; 9 | use crate::request::MessageMetadata; 10 | 11 | #[derive(Clone)] 12 | /// A raw client connection which can produce multiplexed streams. 13 | pub struct Channel { 14 | #[cfg(not(feature = "simulation"))] 15 | connection: hyper::Client, 16 | 17 | #[cfg(feature = "simulation")] 18 | connection: LazyClient, 19 | 20 | remote_addr: SocketAddr, 21 | } 22 | 23 | impl Channel { 24 | #[cfg(not(feature = "simulation"))] 25 | /// Connects to a remote RPC server. 26 | pub fn connect(remote_addr: SocketAddr) -> Self { 27 | let mut http = hyper::client::HttpConnector::new(); 28 | http.enforce_http(false); 29 | http.set_nodelay(true); 30 | http.set_connect_timeout(Some(std::time::Duration::from_secs(2))); 31 | 32 | let client = hyper::Client::builder() 33 | .http2_keep_alive_while_idle(true) 34 | .http2_only(true) 35 | .http2_adaptive_window(true) 36 | .build(http); 37 | 38 | Self { 39 | connection: client, 40 | remote_addr, 41 | } 42 | } 43 | 44 | #[cfg(feature = "simulation")] 45 | /// Connects to a remote RPC server with turmoil simulation enabled. 46 | pub fn connect(remote_addr: SocketAddr) -> Self { 47 | let client = LazyClient::connect(remote_addr); 48 | 49 | Self { 50 | connection: client, 51 | remote_addr, 52 | } 53 | } 54 | 55 | /// Sends a message payload the remote server and gets the response 56 | /// data back. 57 | pub(crate) async fn send_parts( 58 | &self, 59 | metadata: MessageMetadata, 60 | headers: HeaderMap, 61 | body: Body, 62 | ) -> Result, Error> { 63 | let uri = format!("http://{}{}", self.remote_addr, metadata.to_uri_path(),); 64 | let mut request = Request::builder() 65 | .method(Method::POST) 66 | .uri(uri) 67 | .body(body.into_inner()) 68 | .unwrap(); 69 | 70 | (*request.headers_mut()) = headers; 71 | 72 | #[cfg(not(feature = "simulation"))] 73 | let resp = self.connection.request(request).await?; 74 | #[cfg(feature = "simulation")] 75 | let resp = { 76 | let conn = self.connection.get_or_init().await?; 77 | conn.lock().await.send_request(request).await? 78 | }; 79 | 80 | Ok(resp) 81 | } 82 | 83 | #[inline] 84 | /// The address of the remote connection. 85 | pub fn remote_addr(&self) -> SocketAddr { 86 | self.remote_addr 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /datacake-rpc/src/net/mod.rs: -------------------------------------------------------------------------------- 1 | mod client; 2 | mod server; 3 | mod status; 4 | 5 | #[cfg(feature = "simulation")] 6 | mod simulation; 7 | 8 | use std::io; 9 | 10 | pub use client::Channel; 11 | pub(crate) use server::start_rpc_server; 12 | pub use status::{ArchivedErrorCode, ArchivedStatus, ErrorCode, Status}; 13 | 14 | #[derive(Debug, thiserror::Error)] 15 | /// A failure in an RPC operation. 16 | pub enum Error { 17 | #[error("IO Error: {0}")] 18 | /// The system failed to complete operation due to an IO error. 19 | Io(#[from] io::Error), 20 | #[error("Hyper Error: {0}")] 21 | /// The operation failed due an error originating in hyper. 22 | Hyper(#[from] hyper::Error), 23 | } 24 | -------------------------------------------------------------------------------- /datacake-rpc/src/net/server.rs: -------------------------------------------------------------------------------- 1 | use std::convert::Infallible; 2 | use std::io; 3 | use std::net::SocketAddr; 4 | use std::time::Duration; 5 | 6 | use http::{Request, Response, StatusCode}; 7 | use hyper::server::conn::Http; 8 | use hyper::service::service_fn; 9 | use rkyv::AlignedVec; 10 | use tokio::sync::oneshot; 11 | use tokio::task::JoinHandle; 12 | 13 | use crate::body::Body; 14 | use crate::server::ServerState; 15 | use crate::Status; 16 | 17 | /// Starts the RPC server. 18 | /// 19 | /// This takes a binding socket address and server state. 20 | pub(crate) async fn start_rpc_server( 21 | bind_addr: SocketAddr, 22 | state: ServerState, 23 | ) -> io::Result> { 24 | #[cfg(not(feature = "simulation"))] 25 | let listener = tokio::net::TcpListener::bind(bind_addr).await?; 26 | #[cfg(feature = "simulation")] 27 | let listener = turmoil::net::TcpListener::bind(bind_addr).await?; 28 | 29 | let (ready, waiter) = oneshot::channel(); 30 | let handle = tokio::spawn(async move { 31 | let _ = ready.send(()); 32 | 33 | loop { 34 | let (io, remote_addr) = match listener.accept().await { 35 | Ok(accepted) => accepted, 36 | Err(e) => { 37 | warn!(error = ?e, "Failed to accept client."); 38 | continue; 39 | }, 40 | }; 41 | 42 | let state = state.clone(); 43 | tokio::task::spawn(async move { 44 | let state = state.clone(); 45 | let handler = service_fn(move |req| { 46 | handle_connection(req, state.clone(), remote_addr) 47 | }); 48 | 49 | let connection = Http::new() 50 | .http2_only(true) 51 | .http2_adaptive_window(true) 52 | .http2_keep_alive_timeout(Duration::from_secs(10)) 53 | .serve_connection(io, handler); 54 | 55 | if let Err(e) = connection.await { 56 | error!(error = ?e, "Error while serving HTTP connection."); 57 | } 58 | }); 59 | } 60 | }); 61 | 62 | let _ = waiter.await; 63 | 64 | Ok(handle) 65 | } 66 | 67 | /// A single connection handler. 68 | /// 69 | /// This accepts new streams being created and spawns concurrent tasks to handle 70 | /// them. 71 | async fn handle_connection( 72 | req: Request, 73 | state: ServerState, 74 | remote_addr: SocketAddr, 75 | ) -> Result, Infallible> { 76 | match handle_message(req, state, remote_addr).await { 77 | Ok(r) => Ok(r), 78 | Err(e) => { 79 | let mut response = Response::new(e.to_string().into()); 80 | (*response.status_mut()) = StatusCode::INTERNAL_SERVER_ERROR; 81 | Ok(response) 82 | }, 83 | } 84 | } 85 | 86 | async fn handle_message( 87 | req: Request, 88 | state: ServerState, 89 | remote_addr: SocketAddr, 90 | ) -> anyhow::Result> { 91 | let reply = try_handle_request(req, state, remote_addr).await; 92 | 93 | match reply { 94 | Ok(body) => { 95 | let mut response = Response::new(body.into_inner()); 96 | (*response.status_mut()) = StatusCode::OK; 97 | Ok(response) 98 | }, 99 | Err(status) => Ok(create_bad_request(&status)), 100 | } 101 | } 102 | 103 | async fn try_handle_request( 104 | req: Request, 105 | state: ServerState, 106 | remote_addr: SocketAddr, 107 | ) -> Result { 108 | let (req, body) = req.into_parts(); 109 | let uri = req.uri.path(); 110 | let headers = req.headers; 111 | 112 | let handler = state 113 | .get_handler(uri) 114 | .ok_or_else(|| Status::unavailable(format!("Unknown service {uri}")))?; 115 | 116 | handler 117 | .try_handle(remote_addr, headers, Body::new(body)) 118 | .await 119 | } 120 | 121 | fn create_bad_request(status: &Status) -> Response { 122 | // This should be infallible. 123 | let buffer = 124 | crate::rkyv_tooling::to_view_bytes(status).unwrap_or_else(|_| AlignedVec::new()); 125 | 126 | let mut response = Response::new(buffer.to_vec().into()); 127 | (*response.status_mut()) = StatusCode::BAD_REQUEST; 128 | 129 | response 130 | } 131 | -------------------------------------------------------------------------------- /datacake-rpc/src/net/simulation.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::io::ErrorKind; 3 | use std::net::SocketAddr; 4 | use std::sync::Arc; 5 | use std::time::Duration; 6 | 7 | use hyper::client::conn::SendRequest; 8 | use hyper::Body; 9 | use tokio::sync::{Mutex, OnceCell}; 10 | use tokio::time::timeout; 11 | 12 | use crate::net::Error; 13 | 14 | #[derive(Clone)] 15 | /// A client used for simulation testing via turmoil. 16 | /// 17 | /// This is not a production grade client and is only really meant for testing not 18 | /// performance. 19 | pub struct LazyClient { 20 | addr: SocketAddr, 21 | client: Arc>>>, 22 | } 23 | 24 | impl LazyClient { 25 | /// Creates a new lazy client. 26 | pub fn connect(socket: SocketAddr) -> Self { 27 | Self { 28 | addr: socket, 29 | client: Arc::new(OnceCell::new()), 30 | } 31 | } 32 | 33 | /// Ensures the connection is initialised and ready to handle events. 34 | pub async fn get_or_init(&self) -> Result<&Mutex>, Error> { 35 | if let Some(existing) = self.client.get() { 36 | return Ok(existing); 37 | } 38 | 39 | let io = timeout( 40 | Duration::from_secs(2), 41 | turmoil::net::TcpStream::connect(self.addr), 42 | ) 43 | .await 44 | .map_err(|_| { 45 | Error::Io(io::Error::new( 46 | ErrorKind::TimedOut, 47 | "Failed to connect within deadline", 48 | )) 49 | })??; 50 | 51 | let (sender, connection) = hyper::client::conn::Builder::new() 52 | .http2_keep_alive_while_idle(true) 53 | .http2_only(true) 54 | .http2_adaptive_window(true) 55 | .handshake(io) 56 | .await?; 57 | 58 | tokio::spawn(async move { 59 | if let Err(e) = connection.await { 60 | error!(error = ?e, "Error in client connection"); 61 | } 62 | }); 63 | 64 | self.client.set(Mutex::new(sender)).unwrap(); 65 | Ok(self.client.get().unwrap()) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /datacake-rpc/src/net/status.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fmt::{Debug, Display, Formatter}; 3 | 4 | use rkyv::{Archive, Deserialize, Serialize}; 5 | 6 | #[repr(C)] 7 | #[derive(Serialize, Deserialize, Archive, PartialEq, Eq)] 8 | #[archive(compare(PartialEq))] 9 | #[archive_attr(derive(PartialEq, Eq, Debug))] 10 | /// Status information around the cause of a message request failing. 11 | /// 12 | /// This includes a generic status code and message. 13 | pub struct Status { 14 | /// The generic error code of the request. 15 | pub code: ErrorCode, 16 | /// The display message for the error. 17 | pub message: String, 18 | } 19 | 20 | impl Status { 21 | /// The server is running but the specified service does not exist 22 | /// or cannot handle messages at this time. 23 | pub fn unavailable(msg: impl Display) -> Self { 24 | Self { 25 | code: ErrorCode::ServiceUnavailable, 26 | message: msg.to_string(), 27 | } 28 | } 29 | 30 | /// An internal error occurred while processing the message. 31 | pub fn internal(msg: impl Display) -> Self { 32 | Self { 33 | code: ErrorCode::InternalError, 34 | message: msg.to_string(), 35 | } 36 | } 37 | 38 | /// The provided message data is invalid or unable to be deserialized 39 | /// by the server processing it. 40 | pub fn invalid() -> Self { 41 | Self { 42 | code: ErrorCode::InvalidPayload, 43 | message: "Invalid message payload was provided to be deserialized." 44 | .to_string(), 45 | } 46 | } 47 | 48 | /// The connection is closed or interrupted during the operation. 49 | pub fn connection(msg: impl Display) -> Self { 50 | Self { 51 | code: ErrorCode::ConnectionError, 52 | message: msg.to_string(), 53 | } 54 | } 55 | 56 | /// The operation took too long to be completed and was aborted. 57 | pub fn timeout() -> Self { 58 | Self { 59 | code: ErrorCode::Timeout, 60 | message: "The operation took to long to be completed.".to_string(), 61 | } 62 | } 63 | } 64 | 65 | impl Display for Status { 66 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 67 | write!(f, "{:?}: {}", self.code, self.message) 68 | } 69 | } 70 | 71 | impl Debug for Status { 72 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 73 | f.debug_struct("Status") 74 | .field("code", &self.code) 75 | .field("message", &self.message) 76 | .finish() 77 | } 78 | } 79 | 80 | impl Error for Status {} 81 | 82 | #[repr(C)] 83 | #[derive(Serialize, Deserialize, Archive, PartialEq, Eq, Debug)] 84 | #[archive(compare(PartialEq))] 85 | #[archive_attr(derive(Debug, PartialEq, Eq))] 86 | /// A generic error code describing the high level reason why the request failed. 87 | pub enum ErrorCode { 88 | /// The server is running but the specified service does not exist 89 | /// or cannot handle messages at this time. 90 | ServiceUnavailable, 91 | /// An internal error occurred while processing the message. 92 | InternalError, 93 | /// The provided message data is invalid or unable to be deserialized 94 | /// by the server processing it. 95 | InvalidPayload, 96 | /// The connection is closed or interrupted during the operation. 97 | ConnectionError, 98 | /// The operation took too long to be completed and was aborted. 99 | Timeout, 100 | } 101 | 102 | #[cfg(test)] 103 | mod tests { 104 | use super::*; 105 | 106 | fn test_status_variant(status: Status) { 107 | println!("Testing: {:?}", &status); 108 | let bytes = crate::rkyv_tooling::to_view_bytes(&status).expect("Serialize OK"); 109 | let view = 110 | crate::rkyv_tooling::DataView::::using(bytes).expect("Archive OK"); 111 | assert_eq!( 112 | view, status, 113 | "Archived value and original value should match" 114 | ); 115 | 116 | let copy: Status = view.deserialize_view().expect("Deserialize OK"); 117 | assert_eq!( 118 | copy, status, 119 | "Deserialized value and original value should match" 120 | ); 121 | } 122 | 123 | #[test] 124 | fn test_variants() { 125 | test_status_variant(Status::invalid()); 126 | test_status_variant(Status::connection("Test connection failed.")); 127 | test_status_variant(Status::unavailable("Test unavailable.")); 128 | test_status_variant(Status::internal("Test internal error.")); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /datacake-rpc/src/request.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Formatter}; 2 | use std::net::SocketAddr; 3 | use std::ops::Deref; 4 | 5 | use async_trait::async_trait; 6 | use http::HeaderMap; 7 | use rkyv::Archive; 8 | 9 | use crate::rkyv_tooling::DataView; 10 | use crate::{Body, Status}; 11 | 12 | #[async_trait] 13 | /// The deserializer trait for converting the request body into 14 | /// the desired type specified by [Self::Content]. 15 | /// 16 | /// This trait is automatically implemented for the [Body] type 17 | /// and any type implementing [rkyv]'s (de)serializer traits. 18 | pub trait RequestContents { 19 | /// The deserialized message type. 20 | type Content: Send + Sized + 'static; 21 | 22 | async fn from_body(body: Body) -> Result; 23 | } 24 | 25 | #[async_trait] 26 | impl RequestContents for Body { 27 | type Content = Self; 28 | 29 | async fn from_body(body: Body) -> Result { 30 | Ok(body) 31 | } 32 | } 33 | 34 | #[async_trait] 35 | impl RequestContents for Msg 36 | where 37 | Msg: Archive + Send + Sync + 'static, 38 | Msg::Archived: Send + Sync + 'static, 39 | { 40 | type Content = DataView; 41 | 42 | async fn from_body(body: Body) -> Result { 43 | let bytes = crate::utils::to_aligned(body.0) 44 | .await 45 | .map_err(Status::internal)?; 46 | 47 | DataView::using(bytes).map_err(|_| Status::invalid()) 48 | } 49 | } 50 | 51 | #[derive(PartialEq)] 52 | #[cfg_attr(test, derive(Debug))] 53 | pub struct MessageMetadata { 54 | /// The name of the service being targeted. 55 | pub(crate) service_name: &'static str, 56 | /// The message name/path. 57 | pub(crate) path: &'static str, 58 | } 59 | 60 | impl MessageMetadata { 61 | #[inline] 62 | /// Produces a uri path for the metadata. 63 | pub(crate) fn to_uri_path(&self) -> String { 64 | crate::to_uri_path(self.service_name, self.path) 65 | } 66 | } 67 | 68 | /// A zero-copy view of the message data and any additional metadata provided 69 | /// by the RPC system. 70 | /// 71 | /// The request contains the original request buffer which is used to create 72 | /// the 'view' of the given message type. 73 | pub struct Request 74 | where 75 | Msg: RequestContents, 76 | { 77 | pub(crate) remote_addr: SocketAddr, 78 | pub(crate) headers: HeaderMap, 79 | 80 | // A small hack to stop linters miss-guiding users 81 | // into thinking their messages are `!Sized` when in fact they are. 82 | // We don't want to box in release mode however. 83 | #[cfg(debug_assertions)] 84 | pub(crate) view: Box, 85 | #[cfg(not(debug_assertions))] 86 | pub(crate) view: Msg::Content, 87 | } 88 | 89 | impl Debug for Request 90 | where 91 | Msg: RequestContents, 92 | Msg::Content: Debug, 93 | { 94 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 95 | f.debug_struct("Request") 96 | .field("view", &self.view) 97 | .field("remote_addr", &self.remote_addr) 98 | .finish() 99 | } 100 | } 101 | 102 | impl Deref for Request 103 | where 104 | Msg: RequestContents, 105 | { 106 | type Target = Msg::Content; 107 | 108 | fn deref(&self) -> &Self::Target { 109 | &self.view 110 | } 111 | } 112 | 113 | impl Request 114 | where 115 | Msg: RequestContents, 116 | { 117 | pub(crate) fn new( 118 | remote_addr: SocketAddr, 119 | headers: HeaderMap, 120 | view: Msg::Content, 121 | ) -> Self { 122 | Self { 123 | remote_addr, 124 | headers, 125 | #[cfg(debug_assertions)] 126 | view: Box::new(view), 127 | #[cfg(not(debug_assertions))] 128 | view, 129 | } 130 | } 131 | 132 | #[cfg(debug_assertions)] 133 | #[inline] 134 | /// Consumes the request into the value of the message. 135 | pub fn into_inner(self) -> Msg::Content { 136 | *self.view 137 | } 138 | 139 | #[cfg(not(debug_assertions))] 140 | /// Consumes the request into the value of the message. 141 | pub fn into_inner(self) -> Msg::Content { 142 | self.view 143 | } 144 | 145 | #[cfg(debug_assertions)] 146 | #[inline] 147 | /// Consumes the request and returns the headers and message contents. 148 | pub fn into_parts(self) -> (HeaderMap, Msg::Content) { 149 | (self.headers, *self.view) 150 | } 151 | 152 | #[cfg(not(debug_assertions))] 153 | #[inline] 154 | /// Consumes the request and returns the headers and message contents. 155 | pub fn into_parts(self) -> (HeaderMap, Msg::Content) { 156 | (self.headers, self.view) 157 | } 158 | 159 | #[inline] 160 | /// The request headers. 161 | pub fn headers(&self) -> &HeaderMap { 162 | &self.headers 163 | } 164 | 165 | #[inline] 166 | /// The remote address of the incoming message. 167 | pub fn remote_addr(&self) -> SocketAddr { 168 | self.remote_addr 169 | } 170 | } 171 | 172 | #[cfg(feature = "test-utils")] 173 | impl Request 174 | where 175 | Msg: RequestContents + rkyv::Serialize, 176 | { 177 | /// A test utility for creating a mocked request. 178 | /// 179 | /// This takes the owned value of the msg and acts like the target request. 180 | /// 181 | /// This should be used for testing only. 182 | pub async fn using_owned(msg: Msg) -> Self { 183 | let bytes = crate::rkyv_tooling::to_view_bytes(&msg).unwrap(); 184 | let contents = Msg::from_body(Body::from(bytes.to_vec())).await.unwrap(); 185 | 186 | use std::net::{Ipv4Addr, SocketAddrV4}; 187 | 188 | let addr = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::from([127, 0, 0, 1]), 80)); 189 | Self::new(addr, HeaderMap::new(), contents) 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /datacake-rpc/src/rkyv_tooling/mod.rs: -------------------------------------------------------------------------------- 1 | use rkyv::ser::serializers::{ 2 | AlignedSerializer, 3 | CompositeSerializer, 4 | SharedSerializeMap, 5 | }; 6 | use rkyv::ser::Serializer; 7 | use rkyv::{AlignedVec, Fallible, Serialize}; 8 | 9 | mod scratch; 10 | mod view; 11 | 12 | use self::scratch::LazyScratch; 13 | pub use self::view::{DataView, InvalidView}; 14 | 15 | pub(crate) type DatacakeSerializer = 16 | CompositeSerializer, LazyScratch, SharedSerializeMap>; 17 | 18 | #[inline] 19 | /// Produces an aligned buffer of the serialized data with a CRC32 checksum attached 20 | /// to the last 4 bytes of the buffer. 21 | pub fn to_view_bytes( 22 | value: &T, 23 | ) -> Result::Error> 24 | where 25 | T: Serialize, 26 | { 27 | let mut serializer = DatacakeSerializer::new( 28 | AlignedSerializer::new(AlignedVec::with_capacity(512)), 29 | LazyScratch::default(), 30 | SharedSerializeMap::new(), 31 | ); 32 | 33 | serializer.serialize_value(value)?; 34 | 35 | let serializer = serializer.into_serializer(); 36 | let mut buffer = serializer.into_inner(); 37 | 38 | let checksum = crc32fast::hash(&buffer); 39 | buffer.extend_from_slice(&checksum.to_le_bytes()); 40 | 41 | Ok(buffer) 42 | } 43 | 44 | #[cfg(test)] 45 | mod tests { 46 | use std::collections::HashMap; 47 | 48 | use rkyv::{Archive, Serialize}; 49 | 50 | use super::*; 51 | 52 | #[derive(Archive, Serialize)] 53 | #[archive(check_bytes)] 54 | struct FixedSize { 55 | a: u32, 56 | b: f64, 57 | c: i64, 58 | buf: [u8; 12], 59 | } 60 | 61 | #[derive(Archive, Serialize)] 62 | #[archive(check_bytes)] 63 | struct AllocatedSize { 64 | a: u32, 65 | b: f64, 66 | c: HashMap, 67 | buf: Vec, 68 | } 69 | 70 | #[test] 71 | fn test_static_size_serialize() { 72 | let val = FixedSize { 73 | a: 123, 74 | b: 1.23, 75 | c: -123, 76 | buf: Default::default(), 77 | }; 78 | 79 | let buffer = to_view_bytes(&val).expect("Serialize struct"); 80 | 81 | let end = buffer.len(); 82 | 83 | rkyv::check_archived_root::(&buffer[..end - 4]) 84 | .expect("Get archived value"); 85 | 86 | let checksum_bytes = buffer[end - 4..].try_into().unwrap(); 87 | let expected_checksum = u32::from_le_bytes(checksum_bytes); 88 | let actual_checksum = crc32fast::hash(&buffer[..end - 4]); 89 | 90 | assert_eq!(expected_checksum, actual_checksum, "Checksums should match"); 91 | } 92 | 93 | #[test] 94 | fn test_heap_size_serialize() { 95 | let val = AllocatedSize { 96 | a: 123, 97 | b: 1.23, 98 | c: { 99 | let mut map = HashMap::new(); 100 | map.insert("hello".to_string(), 3); 101 | map 102 | }, 103 | buf: vec![4; 10], 104 | }; 105 | 106 | let buffer = to_view_bytes(&val).expect("Serialize struct"); 107 | 108 | let end = buffer.len(); 109 | 110 | rkyv::check_archived_root::(&buffer[..end - 4]) 111 | .expect("Get archived value"); 112 | 113 | let checksum_bytes = buffer[end - 4..].try_into().unwrap(); 114 | let expected_checksum = u32::from_le_bytes(checksum_bytes); 115 | let actual_checksum = crc32fast::hash(&buffer[..end - 4]); 116 | 117 | assert_eq!(expected_checksum, actual_checksum, "Checksums should match"); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /datacake-rpc/src/rkyv_tooling/scratch.rs: -------------------------------------------------------------------------------- 1 | use std::alloc::Layout; 2 | use std::ptr::NonNull; 3 | 4 | use rkyv::ser::serializers::{AllocScratch, BufferScratch, HeapScratch}; 5 | use rkyv::ser::ScratchSpace; 6 | use rkyv::{AlignedBytes, Fallible}; 7 | 8 | const STACK_SCRATCH_SIZE: usize = 1024; 9 | const HEAP_SCRATCH_SIZE: usize = 16 << 10; 10 | 11 | #[derive(Debug)] 12 | /// Allocates scratch space with a stack, fallback heap scratch, and then an alloc scratch. 13 | /// 14 | /// The fallback heap scratch is lazily allocated. 15 | pub struct LazyScratch { 16 | stack_scratch: StackScratch, 17 | heap_scratch: Option>, 18 | alloc_scratch: AllocScratch, 19 | } 20 | 21 | impl Default for LazyScratch { 22 | fn default() -> Self { 23 | Self { 24 | stack_scratch: StackScratch::default(), 25 | heap_scratch: None, 26 | alloc_scratch: AllocScratch::default(), 27 | } 28 | } 29 | } 30 | 31 | impl Fallible for LazyScratch { 32 | type Error = ::Error; 33 | } 34 | 35 | impl ScratchSpace for LazyScratch { 36 | #[inline] 37 | unsafe fn push_scratch( 38 | &mut self, 39 | layout: Layout, 40 | ) -> Result, Self::Error> { 41 | if let Ok(buf) = self.stack_scratch.push_scratch(layout) { 42 | return Ok(buf); 43 | } 44 | 45 | let heap_scratch = self.heap_scratch.get_or_insert_with(HeapScratch::new); 46 | 47 | if let Ok(buf) = heap_scratch.push_scratch(layout) { 48 | return Ok(buf); 49 | } 50 | 51 | self.alloc_scratch.push_scratch(layout) 52 | } 53 | 54 | #[inline] 55 | unsafe fn pop_scratch( 56 | &mut self, 57 | ptr: NonNull, 58 | layout: Layout, 59 | ) -> Result<(), Self::Error> { 60 | if self.stack_scratch.pop_scratch(ptr, layout).is_ok() { 61 | return Ok(()); 62 | } 63 | 64 | let heap_scratch = self.heap_scratch.get_or_insert_with(HeapScratch::new); 65 | 66 | if heap_scratch.pop_scratch(ptr, layout).is_ok() { 67 | return Ok(()); 68 | } 69 | 70 | self.alloc_scratch.pop_scratch(ptr, layout) 71 | } 72 | } 73 | 74 | #[derive(Debug)] 75 | /// A stack allocated scratch space. 76 | struct StackScratch { 77 | inner: BufferScratch>, 78 | } 79 | 80 | impl Default for StackScratch { 81 | #[inline] 82 | fn default() -> Self { 83 | Self { 84 | inner: BufferScratch::new(AlignedBytes::default()), 85 | } 86 | } 87 | } 88 | 89 | impl Fallible for StackScratch { 90 | type Error = > as Fallible>::Error; 91 | } 92 | 93 | impl ScratchSpace for StackScratch { 94 | #[inline] 95 | unsafe fn push_scratch( 96 | &mut self, 97 | layout: Layout, 98 | ) -> Result, Self::Error> { 99 | self.inner.push_scratch(layout) 100 | } 101 | 102 | #[inline] 103 | unsafe fn pop_scratch( 104 | &mut self, 105 | ptr: NonNull, 106 | layout: Layout, 107 | ) -> Result<(), Self::Error> { 108 | self.inner.pop_scratch(ptr, layout) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /datacake-rpc/src/rkyv_tooling/view.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Formatter}; 2 | use std::mem; 3 | use std::ops::Deref; 4 | 5 | use rkyv::de::deserializers::SharedDeserializeMap; 6 | use rkyv::{AlignedVec, Archive, Deserialize}; 7 | 8 | #[derive(Debug, thiserror::Error)] 9 | #[error("View cannot be made for type with provided data.")] 10 | /// The data provided is unable to be presented as the archived version 11 | /// of the view type. 12 | pub struct InvalidView; 13 | 14 | #[repr(C)] 15 | /// A block of data that can be accessed as if it is the archived value `T`. 16 | /// 17 | /// This allows for safe, true zero-copy deserialization avoiding unnecessary 18 | /// allocations if the situation does not require having an owned version of the value. 19 | pub struct DataView 20 | where 21 | T: Archive, 22 | T::Archived: 'static, 23 | { 24 | /// The view reference which lives as long as `data: D`. 25 | view: &'static rkyv::Archived, 26 | /// The owned buffer itself. 27 | /// 28 | /// This must live as long as the view derived from it. 29 | data: AlignedVec, 30 | } 31 | 32 | impl DataView 33 | where 34 | T: Archive, 35 | T::Archived: 'static, 36 | { 37 | /// Creates a new view using a provided buffer. 38 | pub fn using(data: AlignedVec) -> Result { 39 | // SAFETY: 40 | // This is safe as we own the data and keep it apart 41 | // of the view itself. 42 | let extended_buf = 43 | unsafe { mem::transmute::<&[u8], &'static [u8]>(data.as_slice()) }; 44 | 45 | if extended_buf.len() < 4 { 46 | return Err(InvalidView); 47 | } 48 | 49 | let end = extended_buf.len(); 50 | let checksum_bytes = extended_buf[end - 4..] 51 | .try_into() 52 | .map_err(|_| InvalidView)?; 53 | let expected_checksum = u32::from_le_bytes(checksum_bytes); 54 | 55 | let data_bytes = &extended_buf[..end - 4]; 56 | let actual_checksum = crc32fast::hash(data_bytes); 57 | 58 | if expected_checksum != actual_checksum { 59 | return Err(InvalidView); 60 | } 61 | 62 | let view = unsafe { rkyv::archived_root::(data_bytes) }; 63 | 64 | Ok(Self { data, view }) 65 | } 66 | 67 | #[inline] 68 | /// Gets the bytes representation of the dataview. 69 | pub fn as_bytes(&self) -> &[u8] { 70 | &self.data 71 | } 72 | 73 | #[inline] 74 | /// Consumes the bytes representation of the dataview. 75 | pub fn into_data(self) -> AlignedVec { 76 | self.data 77 | } 78 | } 79 | 80 | impl DataView 81 | where 82 | T: Archive, 83 | T::Archived: Deserialize + 'static, 84 | { 85 | #[inline] 86 | /// Deserializes the view into it's owned value T. 87 | pub fn deserialize_view(&self) -> Result { 88 | self.view 89 | .deserialize(&mut SharedDeserializeMap::default()) 90 | .map_err(|_| InvalidView) 91 | } 92 | } 93 | 94 | impl Clone for DataView 95 | where 96 | T: Archive, 97 | T::Archived: Debug + 'static, 98 | { 99 | fn clone(&self) -> Self { 100 | Self::using(self.data.clone()).expect("BUG: Valid data has become invalid?") 101 | } 102 | } 103 | 104 | impl Debug for DataView 105 | where 106 | T: Archive, 107 | T::Archived: Debug, 108 | { 109 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 110 | self.view.fmt(f) 111 | } 112 | } 113 | 114 | impl Deref for DataView 115 | where 116 | T: Archive, 117 | { 118 | type Target = T::Archived; 119 | 120 | fn deref(&self) -> &Self::Target { 121 | self.view 122 | } 123 | } 124 | 125 | impl PartialEq for DataView 126 | where 127 | T: Archive, 128 | T::Archived: PartialEq, 129 | { 130 | fn eq(&self, other: &Self) -> bool { 131 | self.view == other.view 132 | } 133 | } 134 | 135 | impl PartialEq for DataView 136 | where 137 | T: Archive, 138 | T::Archived: PartialEq, 139 | { 140 | fn eq(&self, other: &T) -> bool { 141 | self.view == other 142 | } 143 | } 144 | 145 | #[cfg(test)] 146 | mod tests { 147 | use rkyv::{Archive, Deserialize, Serialize}; 148 | 149 | use super::*; 150 | 151 | #[repr(C)] 152 | #[derive(Serialize, Deserialize, Archive, PartialEq, Eq, Debug)] 153 | #[archive(compare(PartialEq), check_bytes)] 154 | #[archive_attr(derive(Debug, PartialEq, Eq))] 155 | struct Demo { 156 | a: String, 157 | b: u64, 158 | } 159 | 160 | #[test] 161 | fn test_view() { 162 | let demo = Demo { 163 | a: "Jello".to_string(), 164 | b: 133, 165 | }; 166 | 167 | let bytes = crate::rkyv_tooling::to_view_bytes(&demo).unwrap(); 168 | let view: DataView = DataView::using(bytes).unwrap(); 169 | assert!(view == demo, "Original and view must match."); 170 | } 171 | 172 | #[test] 173 | fn test_view_missing_checksum() { 174 | let demo = Demo { 175 | a: "Jello".to_string(), 176 | b: 133, 177 | }; 178 | 179 | let bytes = rkyv::to_bytes::<_, 1024>(&demo).unwrap(); 180 | DataView::::using(bytes).expect_err("System should return invalid view."); 181 | } 182 | 183 | #[test] 184 | fn test_invalid_view() { 185 | let mut data = AlignedVec::new(); 186 | data.extend_from_slice(b"Hello, world!"); 187 | let res = DataView::::using(data); 188 | assert!(res.is_err(), "View should be rejected"); 189 | } 190 | 191 | #[test] 192 | fn test_deserialize() { 193 | let demo = Demo { 194 | a: "Jello".to_string(), 195 | b: 133, 196 | }; 197 | 198 | let bytes = crate::rkyv_tooling::to_view_bytes(&demo).unwrap(); 199 | let view: DataView = DataView::using(bytes).unwrap(); 200 | assert!(view == demo, "Original and view must match."); 201 | 202 | let value = view.deserialize_view().unwrap(); 203 | assert_eq!(value, demo, "Deserialized and original value should match.") 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /datacake-rpc/src/server.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeMap, BTreeSet}; 2 | use std::io; 3 | use std::net::SocketAddr; 4 | use std::sync::Arc; 5 | 6 | use parking_lot::{Mutex, RwLock}; 7 | use tokio::task::JoinHandle; 8 | 9 | use crate::handler::{HandlerKey, OpaqueMessageHandler, RpcService, ServiceRegistry}; 10 | 11 | /// A RPC server instance. 12 | /// 13 | /// This is used for listening for inbound connections and handling any RPC messages 14 | /// coming from clients. 15 | /// 16 | /// ```rust 17 | /// use rkyv::{Archive, Deserialize, Serialize}; 18 | /// use datacake_rpc::{Server, Handler, Request, RpcService, ServiceRegistry, Status}; 19 | /// use std::net::SocketAddr; 20 | /// 21 | /// #[repr(C)] 22 | /// #[derive(Serialize, Deserialize, Archive, PartialEq, Debug)] 23 | /// #[archive(compare(PartialEq), check_bytes)] 24 | /// #[archive_attr(derive(PartialEq, Debug))] 25 | /// pub struct MyMessage { 26 | /// name: String, 27 | /// age: u32, 28 | /// } 29 | /// 30 | /// pub struct EchoService; 31 | /// 32 | /// impl RpcService for EchoService { 33 | /// fn register_handlers(registry: &mut ServiceRegistry) { 34 | /// registry.add_handler::(); 35 | /// } 36 | /// } 37 | /// 38 | /// #[datacake_rpc::async_trait] 39 | /// impl Handler for EchoService { 40 | /// type Reply = MyMessage; 41 | /// 42 | /// async fn on_message(&self, msg: Request) -> Result { 43 | /// Ok(msg.deserialize_view().unwrap()) 44 | /// } 45 | /// } 46 | /// 47 | /// # #[tokio::main] 48 | /// # async fn main() -> anyhow::Result<()> { 49 | /// let bind = "127.0.0.1:8000".parse::()?; 50 | /// // Start the RPC server listening on our bind address. 51 | /// let server = Server::listen(bind).await?; 52 | /// 53 | /// // Once our server is running we can add or remove services. 54 | /// // Once a service is added it is able to begin handling RPC messages. 55 | /// server.add_service(EchoService); 56 | /// 57 | /// // Once a service is removed the server will reject messages for the 58 | /// // service that is no longer registered, 59 | /// server.remove_service(EchoService::service_name()); 60 | /// 61 | /// // We can add wait() here if we want to listen for messages forever. 62 | /// // server.wait().await; 63 | /// # Ok(()) 64 | /// # } 65 | /// ``` 66 | pub struct Server { 67 | state: ServerState, 68 | handle: JoinHandle<()>, 69 | } 70 | 71 | impl Server { 72 | /// Spawns the RPC server task and returns the server handle. 73 | pub async fn listen(addr: SocketAddr) -> io::Result { 74 | let state = ServerState::default(); 75 | let handle = crate::net::start_rpc_server(addr, state.clone()).await?; 76 | 77 | Ok(Self { state, handle }) 78 | } 79 | 80 | /// Adds a new service to the live RPC server. 81 | pub fn add_service(&self, service: Svc) 82 | where 83 | Svc: RpcService + Send + Sync + 'static, 84 | { 85 | let mut registry = ServiceRegistry::new(service); 86 | Svc::register_handlers(&mut registry); 87 | let handlers = registry.into_handlers(); 88 | self.state.add_handlers(Svc::service_name(), handlers); 89 | } 90 | 91 | /// Removes all handlers linked with the given service name. 92 | pub fn remove_service(&self, service_name: &str) { 93 | self.state.remove_handlers(service_name); 94 | } 95 | 96 | /// Signals the server to shutdown. 97 | pub fn shutdown(self) { 98 | self.handle.abort(); 99 | } 100 | 101 | /// Waits until the server exits. 102 | /// 103 | /// This typically is just a future that pends forever as the server 104 | /// will not exit unless an external force triggers it. 105 | pub async fn wait(self) { 106 | self.handle.await.expect("Wait for server handle."); 107 | } 108 | } 109 | 110 | #[derive(Clone, Default)] 111 | /// Represents the shared state of the RPC server. 112 | pub(crate) struct ServerState { 113 | services: Arc>>>, 114 | handlers: Arc>>>, 115 | } 116 | 117 | impl ServerState { 118 | /// Adds a new set of handlers to the server state. 119 | /// 120 | /// Handlers newly added will then be able to handle messages received by 121 | /// the already running RPC system. 122 | pub(crate) fn add_handlers( 123 | &self, 124 | service_name: &str, 125 | handlers: BTreeMap>, 126 | ) { 127 | { 128 | let mut lock = self.services.lock(); 129 | for key in handlers.keys() { 130 | lock.entry(service_name.to_string()) 131 | .or_default() 132 | .insert(*key); 133 | } 134 | } 135 | 136 | let mut lock = self.handlers.write(); 137 | lock.extend(handlers); 138 | } 139 | 140 | /// Removes a new set of handlers from the server state. 141 | pub(crate) fn remove_handlers(&self, service: &str) { 142 | let uris = { 143 | match self.services.lock().remove(service) { 144 | None => return, 145 | Some(uris) => uris, 146 | } 147 | }; 148 | 149 | let mut lock = self.handlers.write(); 150 | lock.retain(|key, _| uris.contains(key)); 151 | } 152 | 153 | /// Attempts to get the message handler for a specific service and message. 154 | pub(crate) fn get_handler( 155 | &self, 156 | uri: &str, 157 | ) -> Option> { 158 | let lock = self.handlers.read(); 159 | lock.get(&crate::hash(uri)).cloned() 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /datacake-rpc/src/utils.rs: -------------------------------------------------------------------------------- 1 | use bytes::Buf; 2 | use hyper::body::HttpBody; 3 | use hyper::Body; 4 | use rkyv::AlignedVec; 5 | 6 | pub async fn to_aligned( 7 | mut body: Body, 8 | ) -> Result::Error> { 9 | // If there's only 1 chunk, we can just return Buf::to_bytes() 10 | let first = if let Some(buf) = body.data().await { 11 | buf? 12 | } else { 13 | return Ok(AlignedVec::new()); 14 | }; 15 | 16 | let second = if let Some(buf) = body.data().await { 17 | buf? 18 | } else { 19 | let mut vec = AlignedVec::with_capacity(first.len()); 20 | vec.extend_from_slice(&first); 21 | return Ok(vec); 22 | }; 23 | 24 | // With more than 1 buf, we gotta flatten into a Vec first. 25 | let cap = first.remaining() + second.remaining() + body.size_hint().lower() as usize; 26 | let mut vec = AlignedVec::with_capacity(cap); 27 | vec.extend_from_slice(&first); 28 | vec.extend_from_slice(&second); 29 | 30 | while let Some(buf) = body.data().await { 31 | vec.extend_from_slice(&buf?); 32 | } 33 | 34 | Ok(vec) 35 | } 36 | -------------------------------------------------------------------------------- /datacake-rpc/tests/basic.rs: -------------------------------------------------------------------------------- 1 | use datacake_rpc::{ 2 | Channel, 3 | Handler, 4 | Request, 5 | RpcClient, 6 | RpcService, 7 | Server, 8 | ServiceRegistry, 9 | Status, 10 | }; 11 | use rkyv::{Archive, Deserialize, Serialize}; 12 | 13 | #[repr(C)] 14 | #[derive(Serialize, Deserialize, Archive, Debug)] 15 | #[archive(check_bytes)] 16 | #[archive_attr(derive(Debug))] 17 | pub struct MyMessage { 18 | name: String, 19 | age: u32, 20 | buffer: Vec, 21 | } 22 | 23 | pub struct MyService; 24 | 25 | impl RpcService for MyService { 26 | fn register_handlers(registry: &mut ServiceRegistry) { 27 | registry.add_handler::(); 28 | } 29 | } 30 | 31 | #[datacake_rpc::async_trait] 32 | impl Handler for MyService { 33 | type Reply = String; 34 | 35 | async fn on_message(&self, msg: Request) -> Result { 36 | Ok(msg.deserialize_view().unwrap().name) 37 | } 38 | } 39 | 40 | #[tokio::test] 41 | async fn test_basic() { 42 | let addr = test_helper::get_unused_addr(); 43 | 44 | let server = Server::listen(addr).await.unwrap(); 45 | server.add_service(MyService); 46 | println!("Listening to address {}!", addr); 47 | 48 | let client = Channel::connect(addr); 49 | println!("Connected to address {}!", addr); 50 | 51 | let rpc_client = RpcClient::::new(client); 52 | 53 | let msg1 = MyMessage { 54 | name: "Bobby".to_string(), 55 | age: 12, 56 | buffer: vec![0u8; 32 << 10], 57 | }; 58 | 59 | let resp = rpc_client.send(&msg1).await.unwrap(); 60 | assert_eq!(resp, msg1.name); 61 | 62 | server.shutdown(); 63 | } 64 | -------------------------------------------------------------------------------- /datacake-rpc/tests/many_messages.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use datacake_rpc::{ 4 | Channel, 5 | Handler, 6 | Request, 7 | RpcClient, 8 | RpcService, 9 | Server, 10 | ServiceRegistry, 11 | Status, 12 | }; 13 | use parking_lot::Mutex; 14 | use rkyv::{Archive, Deserialize, Serialize}; 15 | 16 | #[repr(C)] 17 | #[derive(Serialize, Deserialize, Archive, Debug)] 18 | #[archive(check_bytes)] 19 | #[archive_attr(derive(Debug))] 20 | pub struct IncCounter { 21 | name: String, 22 | value: u64, 23 | } 24 | 25 | #[repr(C)] 26 | #[derive(Serialize, Deserialize, Archive, Debug)] 27 | #[archive(check_bytes)] 28 | #[archive_attr(derive(Debug))] 29 | pub struct DecCounter { 30 | name: String, 31 | value: u64, 32 | } 33 | 34 | #[derive(Default)] 35 | pub struct CountingService { 36 | counters: Mutex>, 37 | } 38 | 39 | impl RpcService for CountingService { 40 | fn register_handlers(registry: &mut ServiceRegistry) { 41 | registry.add_handler::(); 42 | registry.add_handler::(); 43 | } 44 | } 45 | 46 | #[datacake_rpc::async_trait] 47 | impl Handler for CountingService { 48 | type Reply = u64; 49 | 50 | async fn on_message(&self, msg: Request) -> Result { 51 | let counter = msg.deserialize_view().expect("Get owned value."); 52 | 53 | let mut lock = self.counters.lock(); 54 | let value = lock.entry(counter.name).or_default(); 55 | (*value) += counter.value; 56 | 57 | Ok(*value) 58 | } 59 | } 60 | 61 | #[datacake_rpc::async_trait] 62 | impl Handler for CountingService { 63 | type Reply = u64; 64 | 65 | async fn on_message(&self, msg: Request) -> Result { 66 | let counter = msg.deserialize_view().expect("Get owned value."); 67 | 68 | let mut lock = self.counters.lock(); 69 | let value = lock.entry(counter.name).or_default(); 70 | (*value) -= counter.value; 71 | 72 | Ok(*value) 73 | } 74 | } 75 | 76 | #[tokio::test] 77 | async fn test_multiple_msgs() { 78 | let addr = test_helper::get_unused_addr(); 79 | 80 | let server = Server::listen(addr).await.unwrap(); 81 | server.add_service(CountingService::default()); 82 | println!("Listening to address {}!", addr); 83 | 84 | let client = Channel::connect(addr); 85 | println!("Connected to address {}!", addr); 86 | 87 | let rpc_client = RpcClient::::new(client); 88 | 89 | let msg = IncCounter { 90 | name: "Bobby".to_string(), 91 | value: 5, 92 | }; 93 | 94 | let resp = rpc_client.send(&msg).await.unwrap(); 95 | assert_eq!(resp, 5); 96 | 97 | let msg = DecCounter { 98 | name: "Bobby".to_string(), 99 | value: 3, 100 | }; 101 | let resp = rpc_client.send(&msg).await.unwrap(); 102 | assert_eq!(resp, 2); 103 | 104 | server.shutdown(); 105 | } 106 | -------------------------------------------------------------------------------- /datacake-rpc/tests/many_services.rs: -------------------------------------------------------------------------------- 1 | use datacake_rpc::{ 2 | Channel, 3 | Handler, 4 | Request, 5 | RpcClient, 6 | RpcService, 7 | Server, 8 | ServiceRegistry, 9 | Status, 10 | }; 11 | use rkyv::{Archive, Deserialize, Serialize}; 12 | 13 | #[repr(C)] 14 | #[derive(Serialize, Deserialize, Archive, Debug)] 15 | #[archive(check_bytes)] 16 | #[archive_attr(derive(Debug))] 17 | pub struct Payload { 18 | value: u64, 19 | } 20 | 21 | pub struct Add5Service; 22 | 23 | impl RpcService for Add5Service { 24 | fn register_handlers(registry: &mut ServiceRegistry) { 25 | registry.add_handler::(); 26 | } 27 | } 28 | 29 | #[datacake_rpc::async_trait] 30 | impl Handler for Add5Service { 31 | type Reply = u64; 32 | 33 | async fn on_message(&self, msg: Request) -> Result { 34 | dbg!(&msg); 35 | let counter = msg.deserialize_view().expect("Get owned value."); 36 | Ok(counter.value.saturating_add(5)) 37 | } 38 | } 39 | 40 | pub struct Sub5Service; 41 | 42 | impl RpcService for Sub5Service { 43 | fn register_handlers(registry: &mut ServiceRegistry) { 44 | registry.add_handler::(); 45 | } 46 | } 47 | 48 | #[datacake_rpc::async_trait] 49 | impl Handler for Sub5Service { 50 | type Reply = u64; 51 | 52 | async fn on_message(&self, msg: Request) -> Result { 53 | dbg!(&msg); 54 | let counter = msg.deserialize_view().expect("Get owned value."); 55 | Ok(counter.value.saturating_sub(5)) 56 | } 57 | } 58 | 59 | #[tokio::test] 60 | async fn test_multiple_services() { 61 | let addr = test_helper::get_unused_addr(); 62 | 63 | let server = Server::listen(addr).await.unwrap(); 64 | server.add_service(Add5Service); 65 | server.add_service(Sub5Service); 66 | println!("Listening to address {}!", addr); 67 | 68 | let client = Channel::connect(addr); 69 | println!("Connected to address {}!", addr); 70 | 71 | let msg = Payload { value: 5 }; 72 | 73 | let add_client = RpcClient::::new(client.clone()); 74 | let subtract_client = RpcClient::::new(client); 75 | 76 | let resp = add_client.send(&msg).await.unwrap(); 77 | assert_eq!(resp, 10); 78 | 79 | let resp = subtract_client.send(&msg).await.unwrap(); 80 | assert_eq!(resp, 0); 81 | 82 | let subtract_client = add_client.new_client::(); 83 | let resp = subtract_client.send(&msg).await.unwrap(); 84 | assert_eq!(resp, 0); 85 | 86 | server.shutdown(); 87 | } 88 | -------------------------------------------------------------------------------- /datacake-rpc/tests/passing_headers.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use datacake_rpc::{ 4 | Channel, 5 | Handler, 6 | Request, 7 | RpcClient, 8 | RpcService, 9 | Server, 10 | ServiceRegistry, 11 | Status, 12 | }; 13 | use http::HeaderValue; 14 | use rkyv::{Archive, Deserialize, Serialize}; 15 | 16 | #[derive(Serialize, Deserialize, Archive, Debug)] 17 | #[archive_attr(derive(Debug))] 18 | pub struct SingleHeader; 19 | 20 | #[derive(Serialize, Deserialize, Archive, Debug)] 21 | #[archive_attr(derive(Debug))] 22 | pub struct ManyHeaders; 23 | 24 | pub struct MyService; 25 | 26 | impl RpcService for MyService { 27 | fn register_handlers(registry: &mut ServiceRegistry) { 28 | registry.add_handler::(); 29 | registry.add_handler::(); 30 | } 31 | } 32 | 33 | #[datacake_rpc::async_trait] 34 | impl Handler for MyService { 35 | type Reply = Option; 36 | 37 | async fn on_message( 38 | &self, 39 | msg: Request, 40 | ) -> Result { 41 | let header = msg 42 | .headers() 43 | .get("hello") 44 | .map(|h| h.to_str().unwrap().to_string()); 45 | 46 | Ok(header) 47 | } 48 | } 49 | 50 | #[datacake_rpc::async_trait] 51 | impl Handler for MyService { 52 | type Reply = BTreeMap; 53 | 54 | async fn on_message( 55 | &self, 56 | msg: Request, 57 | ) -> Result { 58 | let mut headers = BTreeMap::new(); 59 | 60 | for (key, value) in msg.headers() { 61 | let value = value.to_str().unwrap().to_string(); 62 | 63 | headers.insert(key.to_string(), value); 64 | } 65 | 66 | Ok(headers) 67 | } 68 | } 69 | 70 | #[tokio::test] 71 | async fn test_sending_headers() { 72 | let addr = test_helper::get_unused_addr(); 73 | 74 | let server = Server::listen(addr).await.unwrap(); 75 | server.add_service(MyService); 76 | println!("Listening to address {}!", addr); 77 | 78 | let client = Channel::connect(addr); 79 | println!("Connected to address {}!", addr); 80 | 81 | let rpc_client = RpcClient::::new(client); 82 | 83 | let response = rpc_client 84 | .create_rpc_context() 85 | .set_header("hello", HeaderValue::from_static("world")) 86 | .send(&SingleHeader) 87 | .await 88 | .expect("Send RPC message"); 89 | 90 | assert_eq!( 91 | response, 92 | Some("world".to_string()), 93 | "Header should be received" 94 | ); 95 | 96 | server.shutdown(); 97 | } 98 | 99 | #[tokio::test] 100 | async fn test_sending_many_headers() { 101 | let addr = test_helper::get_unused_addr(); 102 | 103 | let server = Server::listen(addr).await.unwrap(); 104 | server.add_service(MyService); 105 | println!("Listening to address {}!", addr); 106 | 107 | let client = Channel::connect(addr); 108 | println!("Connected to address {}!", addr); 109 | 110 | let rpc_client = RpcClient::::new(client); 111 | 112 | let response = rpc_client 113 | .create_rpc_context() 114 | .set_headers([ 115 | ("name", HeaderValue::from_static("bobby")), 116 | ("trace-id", HeaderValue::from_static("1234")), 117 | ("tag", HeaderValue::from_static("food")), 118 | ]) 119 | .send(&ManyHeaders) 120 | .await 121 | .expect("Send RPC message"); 122 | 123 | let mut headers = BTreeMap::new(); 124 | headers.insert("name".to_string(), "bobby".to_string()); 125 | headers.insert("trace-id".to_string(), "1234".to_string()); 126 | headers.insert("tag".to_string(), "food".to_string()); 127 | headers.insert("content-length".to_string(), "4".to_string()); 128 | 129 | assert_eq!(response, headers, "Headers should be received",); 130 | 131 | server.shutdown(); 132 | } 133 | -------------------------------------------------------------------------------- /datacake-rpc/tests/service_error.rs: -------------------------------------------------------------------------------- 1 | use datacake_rpc::{ 2 | Channel, 3 | Handler, 4 | Request, 5 | RpcClient, 6 | RpcService, 7 | Server, 8 | ServiceRegistry, 9 | Status, 10 | }; 11 | use rkyv::{Archive, Deserialize, Serialize}; 12 | 13 | #[repr(C)] 14 | #[derive(Serialize, Deserialize, Archive, Debug)] 15 | #[archive(check_bytes)] 16 | #[archive_attr(derive(Debug))] 17 | pub struct MyMessage { 18 | name: String, 19 | age: u32, 20 | buffer: Vec, 21 | } 22 | 23 | pub struct MyService; 24 | 25 | impl RpcService for MyService { 26 | fn register_handlers(registry: &mut ServiceRegistry) { 27 | registry.add_handler::(); 28 | } 29 | } 30 | 31 | #[datacake_rpc::async_trait] 32 | impl Handler for MyService { 33 | type Reply = String; 34 | 35 | async fn on_message(&self, _msg: Request) -> Result { 36 | Err(Status::internal("Oops! Something went wrong!")) 37 | } 38 | } 39 | 40 | #[tokio::test] 41 | async fn test_service_error() { 42 | let addr = test_helper::get_unused_addr(); 43 | 44 | let server = Server::listen(addr).await.unwrap(); 45 | server.add_service(MyService); 46 | println!("Listening to address {}!", addr); 47 | 48 | let client = Channel::connect(addr); 49 | println!("Connected to address {}!", addr); 50 | 51 | let rpc_client = RpcClient::::new(client); 52 | 53 | let msg1 = MyMessage { 54 | name: "Bobby".to_string(), 55 | age: 12, 56 | buffer: vec![0u8; 32 << 10], 57 | }; 58 | 59 | let resp = rpc_client.send(&msg1).await; 60 | assert_eq!( 61 | resp, 62 | Err(Status::internal("Oops! Something went wrong!")), 63 | "Results should match." 64 | ); 65 | 66 | server.shutdown(); 67 | } 68 | -------------------------------------------------------------------------------- /datacake-rpc/tests/stream.rs: -------------------------------------------------------------------------------- 1 | use datacake_rpc::{ 2 | Body, 3 | Channel, 4 | Handler, 5 | Request, 6 | RpcClient, 7 | RpcService, 8 | Server, 9 | ServiceRegistry, 10 | Status, 11 | }; 12 | use rkyv::{Archive, Deserialize, Serialize}; 13 | 14 | #[repr(C)] 15 | #[derive(Serialize, Deserialize, Archive, Debug)] 16 | #[archive(check_bytes)] 17 | #[archive_attr(derive(Debug))] 18 | pub struct MyMessage { 19 | name: String, 20 | age: u32, 21 | buffer: Vec, 22 | } 23 | 24 | pub struct MyService; 25 | 26 | impl RpcService for MyService { 27 | fn register_handlers(registry: &mut ServiceRegistry) { 28 | registry.add_handler::(); 29 | } 30 | } 31 | 32 | #[datacake_rpc::async_trait] 33 | impl Handler for MyService { 34 | type Reply = Body; 35 | 36 | async fn on_message(&self, msg: Request) -> Result { 37 | let bytes = msg.buffer.as_ref().to_vec(); 38 | Ok(Body::from(bytes)) 39 | } 40 | } 41 | 42 | #[tokio::test] 43 | async fn test_stream_body() { 44 | let addr = test_helper::get_unused_addr(); 45 | 46 | let server = Server::listen(addr).await.unwrap(); 47 | server.add_service(MyService); 48 | println!("Listening to address {}!", addr); 49 | 50 | let client = Channel::connect(addr); 51 | println!("Connected to address {}!", addr); 52 | 53 | let rpc_client = RpcClient::::new(client); 54 | 55 | let msg1 = MyMessage { 56 | name: "Bobby".to_string(), 57 | age: 12, 58 | buffer: vec![0u8; 32 << 10], 59 | }; 60 | 61 | let resp = rpc_client.send(&msg1).await.unwrap(); 62 | let body = hyper::body::to_bytes(resp.into_inner()).await.unwrap(); 63 | assert_eq!(msg1.buffer, body.as_ref()); 64 | 65 | server.shutdown(); 66 | } 67 | -------------------------------------------------------------------------------- /datacake-rpc/tests/unknown_service.rs: -------------------------------------------------------------------------------- 1 | use datacake_rpc::{ 2 | Channel, 3 | Handler, 4 | Request, 5 | RpcClient, 6 | RpcService, 7 | Server, 8 | ServiceRegistry, 9 | Status, 10 | }; 11 | use rkyv::{Archive, Deserialize, Serialize}; 12 | 13 | #[repr(C)] 14 | #[derive(Serialize, Deserialize, Archive, Debug)] 15 | #[archive(check_bytes)] 16 | #[archive_attr(derive(Debug))] 17 | pub struct Payload { 18 | value: u64, 19 | } 20 | 21 | pub struct Add5Service; 22 | 23 | impl RpcService for Add5Service { 24 | fn register_handlers(registry: &mut ServiceRegistry) { 25 | registry.add_handler::(); 26 | } 27 | } 28 | 29 | #[datacake_rpc::async_trait] 30 | impl Handler for Add5Service { 31 | type Reply = u64; 32 | 33 | async fn on_message(&self, msg: Request) -> Result { 34 | dbg!(&msg); 35 | let counter = msg.deserialize_view().expect("Get owned value."); 36 | Ok(counter.value.saturating_add(5)) 37 | } 38 | } 39 | 40 | pub struct Sub5Service; 41 | 42 | impl RpcService for Sub5Service { 43 | fn register_handlers(_registry: &mut ServiceRegistry) {} 44 | } 45 | 46 | #[datacake_rpc::async_trait] 47 | impl Handler for Sub5Service { 48 | type Reply = u64; 49 | 50 | async fn on_message(&self, msg: Request) -> Result { 51 | dbg!(&msg); 52 | let counter = msg.deserialize_view().expect("Get owned value."); 53 | Ok(counter.value.saturating_sub(5)) 54 | } 55 | } 56 | 57 | #[tokio::test] 58 | async fn test_unknown_service() { 59 | let addr = test_helper::get_unused_addr(); 60 | 61 | let server = Server::listen(addr).await.unwrap(); 62 | server.add_service(Add5Service); 63 | println!("Listening to address {}!", addr); 64 | 65 | let client = Channel::connect(addr); 66 | println!("Connected to address {}!", addr); 67 | 68 | let msg = Payload { value: 5 }; 69 | 70 | let add_client = RpcClient::::new(client.clone()); 71 | let subtract_client = RpcClient::::new(client); 72 | 73 | let resp = add_client.send(&msg).await.unwrap(); 74 | assert_eq!(resp, 10); 75 | 76 | let res = subtract_client 77 | .send(&msg) 78 | .await 79 | .expect_err("Server should reject unknown service"); 80 | assert_eq!( 81 | res, 82 | Status::unavailable(format!( 83 | "Unknown service /{}/{}", 84 | Sub5Service::service_name(), 85 | >::path(), 86 | )), 87 | "Server should reject unknown service with message." 88 | ); 89 | 90 | server.shutdown(); 91 | } 92 | 93 | #[tokio::test] 94 | async fn test_unknown_message() { 95 | let addr = test_helper::get_unused_addr(); 96 | 97 | let server = Server::listen(addr).await.unwrap(); 98 | server.add_service(Add5Service); 99 | println!("Listening to address {}!", addr); 100 | 101 | let client = Channel::connect(addr); 102 | println!("Connected to address {}!", addr); 103 | 104 | let msg = Payload { value: 5 }; 105 | 106 | let add_client = RpcClient::::new(client.clone()); 107 | let subtract_client = RpcClient::::new(client); 108 | 109 | let resp = add_client.send(&msg).await.unwrap(); 110 | assert_eq!(resp, 10); 111 | 112 | let res = subtract_client 113 | .send(&msg) 114 | .await 115 | .expect_err("Server should reject unknown message"); 116 | assert_eq!( 117 | res, 118 | Status::unavailable(format!( 119 | "Unknown service /{}/{}", 120 | Sub5Service::service_name(), 121 | >::path(), 122 | )), 123 | "Server should reject unknown message with message." 124 | ); 125 | 126 | server.shutdown(); 127 | } 128 | -------------------------------------------------------------------------------- /datacake-sqlite/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datacake-sqlite" 3 | version = "0.5.0" 4 | edition = "2021" 5 | description = "A pre-built implementation of datacake's Storage trait using SQLite." 6 | license = "MIT" 7 | keywords = ["databases", "distributed"] 8 | categories = ["concurrency", "data-structures"] 9 | repository = "https://github.com/lnx-search/datacake" 10 | readme = "README.md" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | anyhow = "1" 16 | async-trait = "0.1.59" 17 | futures = "0.3.25" 18 | flume = "0.10.14" 19 | rusqlite = "0.28.0" 20 | thiserror = "1" 21 | 22 | tokio = { version = "1", default-features = false, features = ["rt"] } 23 | 24 | datacake-crdt = { version = "0.5", path = "../datacake-crdt" } 25 | datacake-eventual-consistency = { version = "0.6", path = "../datacake-eventual-consistency" } 26 | 27 | [features] 28 | bundled = ["rusqlite/bundled"] 29 | default = ["bundled"] 30 | 31 | [dev-dependencies] 32 | tracing = "0.1.37" 33 | tracing-subscriber = "0.3.16" 34 | 35 | test-helper = { path = "../test-helper" } 36 | uuid = { version = "1", features = ["v4"] } 37 | datacake-node = { version = "0.5", path = "../datacake-node" } 38 | datacake-eventual-consistency = { version = "0.6", path = "../datacake-eventual-consistency", features = ["test-utils"] } -------------------------------------------------------------------------------- /datacake-sqlite/README.md: -------------------------------------------------------------------------------- 1 | # Datacake SQLite 2 | 3 | A pre-built implementation of the datacake-eventual-consistency `Storage` trait, this allows you to set up 4 | a persistent cluster immediately without any hassle of implementing a correct store. 5 | 6 | For more info see https://github.com/lnx-search/datacake 7 | 8 | ## Setup 9 | It's important to note that this crate does bundle SQLite with it but it can be disabled by passing 10 | `default-features = false`. 11 | 12 | ## Example 13 | 14 | ```rust 15 | use anyhow::Result; 16 | use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 17 | use datacake_node::{ 18 | ConnectionConfig, 19 | Consistency, 20 | DCAwareSelector, 21 | DatacakeNodeBuilder, 22 | }; 23 | use datacake_sqlite::SqliteStorage; 24 | 25 | static KEYSPACE: &str = "sqlite-store"; 26 | 27 | #[tokio::test] 28 | async fn test_basic_sqlite_cluster() -> Result<()> { 29 | let _ = tracing_subscriber::fmt::try_init(); 30 | 31 | let store = SqliteStorage::open_in_memory().await?; 32 | 33 | let addr = test_helper::get_unused_addr(); 34 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 35 | 36 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 37 | .connect() 38 | .await?; 39 | let store = node 40 | .add_extension(EventuallyConsistentStoreExtension::new(store)) 41 | .await?; 42 | 43 | let handle = store.handle(); 44 | 45 | handle 46 | .put(KEYSPACE, 1, b"Hello, world".to_vec(), Consistency::All) 47 | .await 48 | .expect("Put value."); 49 | 50 | let doc = handle 51 | .get(KEYSPACE, 1) 52 | .await 53 | .expect("Get value.") 54 | .expect("Document should not be none"); 55 | assert_eq!(doc.id(), 1); 56 | assert_eq!(doc.data(), b"Hello, world"); 57 | 58 | handle 59 | .del(KEYSPACE, 1, Consistency::All) 60 | .await 61 | .expect("Del value."); 62 | let doc = handle.get(KEYSPACE, 1).await.expect("Get value."); 63 | assert!(doc.is_none(), "No document should not exist!"); 64 | 65 | handle 66 | .del(KEYSPACE, 2, Consistency::All) 67 | .await 68 | .expect("Del value which doesnt exist locally."); 69 | let doc = handle.get(KEYSPACE, 2).await.expect("Get value."); 70 | assert!(doc.is_none(), "No document should not exist!"); 71 | 72 | node.shutdown().await; 73 | 74 | Ok(()) 75 | } 76 | ``` -------------------------------------------------------------------------------- /datacake-sqlite/src/from_row_impl.rs: -------------------------------------------------------------------------------- 1 | use rusqlite::types::FromSql; 2 | use rusqlite::Row; 3 | 4 | use crate::db::FromRow; 5 | 6 | fn inc(n: &mut usize) -> usize { 7 | let v = *n; 8 | (*n) = v + 1; 9 | v 10 | } 11 | 12 | macro_rules! derive_tuple { 13 | ($($field:ident)*) => { 14 | impl<$($field: FromSql,)*> FromRow for ($($field,)*) { 15 | fn from_row(row: &Row) -> rusqlite::Result { 16 | let mut cursor = 0; 17 | Ok(( 18 | $( 19 | { 20 | let _name = stringify!($field); 21 | row.get(inc(&mut cursor))? 22 | }, 23 | )* 24 | )) 25 | } 26 | } 27 | }; 28 | } 29 | 30 | macro_rules! derive_common_tuples { 31 | () => {}; 32 | ($first:ident $($rest:ident)*) => { 33 | derive_tuple!($first $($rest)*); 34 | derive_common_tuples!($($rest)*); 35 | }; 36 | } 37 | 38 | derive_common_tuples!(T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14 T15 T16); 39 | -------------------------------------------------------------------------------- /datacake-sqlite/tests/basic_cluster.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use datacake_eventual_consistency::EventuallyConsistentStoreExtension; 3 | use datacake_node::{ 4 | ConnectionConfig, 5 | Consistency, 6 | DCAwareSelector, 7 | DatacakeNodeBuilder, 8 | }; 9 | use datacake_sqlite::SqliteStorage; 10 | 11 | static KEYSPACE: &str = "sqlite-store"; 12 | 13 | #[tokio::test] 14 | async fn test_basic_sqlite_cluster() -> Result<()> { 15 | let _ = tracing_subscriber::fmt::try_init(); 16 | 17 | let store = SqliteStorage::open_in_memory().await?; 18 | 19 | let addr = test_helper::get_unused_addr(); 20 | let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 21 | 22 | let node = DatacakeNodeBuilder::::new(1, connection_cfg) 23 | .connect() 24 | .await?; 25 | let store = node 26 | .add_extension(EventuallyConsistentStoreExtension::new(store)) 27 | .await?; 28 | 29 | let handle = store.handle(); 30 | 31 | handle 32 | .put(KEYSPACE, 1, b"Hello, world".to_vec(), Consistency::All) 33 | .await 34 | .expect("Put value."); 35 | 36 | let doc = handle 37 | .get(KEYSPACE, 1) 38 | .await 39 | .expect("Get value.") 40 | .expect("Document should not be none"); 41 | assert_eq!(doc.id(), 1); 42 | assert_eq!(doc.data(), b"Hello, world"); 43 | 44 | handle 45 | .del(KEYSPACE, 1, Consistency::All) 46 | .await 47 | .expect("Del value."); 48 | let doc = handle.get(KEYSPACE, 1).await.expect("Get value."); 49 | assert!(doc.is_none(), "No document should not exist!"); 50 | 51 | handle 52 | .del(KEYSPACE, 2, Consistency::All) 53 | .await 54 | .expect("Del value which doesnt exist locally."); 55 | let doc = handle.get(KEYSPACE, 2).await.expect("Get value."); 56 | assert!(doc.is_none(), "No document should not exist!"); 57 | 58 | node.shutdown().await; 59 | 60 | Ok(()) 61 | } 62 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Datacake Examples 2 | 3 | Here we have some nice examples of what you can do with datacake, feel free to contribute some of your own. 4 | 5 | - [replicated-kv](https://github.com/lnx-search/datacake/tree/main/examples/replicated-kv) A distributed key value store built on SQLite! And it's speedy 🚄 -------------------------------------------------------------------------------- /examples/replicated-kv/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "replicated-kv" 3 | version = "0.3.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | anyhow = "1" 10 | axum = "0.6.1" 11 | num_cpus = "1.14.0" 12 | crc32fast = "1.3.2" 13 | tracing = "0.1.37" 14 | tracing-subscriber = "0.3.16" 15 | futures = "0.3" 16 | serde_json = "1.0.89" 17 | 18 | serde = { version = "1", features = ["derive"] } 19 | clap = { version = "4", features = ["derive"] } 20 | tokio = { version = "1", features = ["full"] } 21 | rusqlite = { version = "0.28.0", features = ["bundled"] } 22 | datacake = { version = "0.8", path = "../..", features = ["datacake-sqlite"] } 23 | 24 | [dev-dependencies] 25 | uuid = { version = "1", features = ["v4"] } 26 | datacake = { version = "0.8", path = "../..", features = ["test-utils"] } 27 | -------------------------------------------------------------------------------- /examples/replicated-kv/README.md: -------------------------------------------------------------------------------- 1 | # Replicated KV store. 2 | 3 | This is a nice little example of a KV store implemented as two basic HTTP endpoints. 4 | 5 | ## Running 6 | ### Single node cluster 7 | ```shell 8 | cargo run --release -- --node-id 1 --data-dir "./my-data" 9 | ``` 10 | 11 | This will spawn a single node with an ID of `node-1` and store the data in the `my-data` directory which will be 12 | created if it doesn't already exist. 13 | 14 | 15 | ### 3 node cluster 16 | ```shell 17 | cargo run --release -- --node-id 1 --data-dir ./my-data/node-1-data --rest-listen-addr 127.0.0.1:8000 --cluster-listen-addr 127.0.0.1:8001 --seed 127.0.0.1:8003 --seed 127.0.0.1:8005 18 | cargo run --release -- --node-id 2 --data-dir ./my-data/node-2-data --rest-listen-addr 127.0.0.1:8002 --cluster-listen-addr 127.0.0.1:8003 --seed 127.0.0.1:8001 --seed 127.0.0.1:8005 19 | cargo run --release -- --node-id 3 --data-dir ./my-data/node-3-data --rest-listen-addr 127.0.0.1:8004 --cluster-listen-addr 127.0.0.1:8005 --seed 127.0.0.1:8001 --seed 127.0.0.1:8003 20 | ``` 21 | 22 | This will start a local 3 node cluster, for simplicity we've set the seeds to be each node's peers rather although this does 23 | not need to be every node in the cluster, normally 2 or 3 is sufficient for any size cluster larger than 3 nodes. 24 | 25 | ## Sending Requests 26 | Now we have a running cluster we can send requests with the request system of our choice, 27 | you'll want be sure to use the node's `rest-listen-addr` when sending the API calls. 28 | 29 | The API has the following endpoints: 30 | - GET `/:keyspace/:key` - Get and existing document from the store for the given keyspace and key. 31 | - POST `/:keyspace/:key` - Set a document value in the store. 32 | 33 | You can use a keyspace to organise and scale your storage, access to a single keyspace is always serial, 34 | but they are cheap to create and use, so you can offset this issue by just partitioning data across several 35 | keyspace sets. 36 | -------------------------------------------------------------------------------- /examples/replicated-kv/scripts/store-many-docs.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | 4 | import requests 5 | 6 | 7 | 8 | start = time.perf_counter() 9 | session = requests.session() 10 | for i in range(100_000): 11 | if (i / 1_000).is_integer(): 12 | print(f"Added {i} docs") 13 | 14 | r = session.post(f"http://127.0.0.1:8000/my-keyspace/{random.randint(1, 12345678791231)}", data=b'{"name": "tod"}') 15 | r.raise_for_status() 16 | 17 | total_time = time.perf_counter() - start 18 | print(f"Took {total_time:.2f} seconds!") 19 | -------------------------------------------------------------------------------- /examples/replicated-kv/src/main.rs: -------------------------------------------------------------------------------- 1 | mod storage; 2 | 3 | #[macro_use] 4 | extern crate tracing; 5 | 6 | use std::net::SocketAddr; 7 | 8 | use anyhow::Result; 9 | use axum::body::Bytes; 10 | use axum::extract::{Path, State}; 11 | use axum::http::StatusCode; 12 | use axum::routing::get; 13 | use axum::{Json, Router}; 14 | use clap::Parser; 15 | use datacake::eventual_consistency::{ 16 | EventuallyConsistentStoreExtension, 17 | ReplicatedStoreHandle, 18 | }; 19 | use datacake::node::{ 20 | ConnectionConfig, 21 | Consistency, 22 | DCAwareSelector, 23 | DatacakeNodeBuilder, 24 | }; 25 | use serde_json::json; 26 | 27 | use crate::storage::ShardedStorage; 28 | 29 | #[tokio::main] 30 | async fn main() -> Result<()> { 31 | tracing_subscriber::fmt::init(); 32 | let args: Args = Args::parse(); 33 | 34 | let storage = ShardedStorage::open_in_dir(&args.data_dir).await?; 35 | let connection_cfg = ConnectionConfig::new( 36 | args.cluster_listen_addr, 37 | args.public_addr.unwrap_or(args.cluster_listen_addr), 38 | args.seeds.into_iter(), 39 | ); 40 | 41 | let node = DatacakeNodeBuilder::::new(args.node_id, connection_cfg) 42 | .connect() 43 | .await?; 44 | let store = node 45 | .add_extension(EventuallyConsistentStoreExtension::new(storage)) 46 | .await?; 47 | 48 | let handle = store.handle(); 49 | 50 | let app = Router::new() 51 | .route("/:keyspace/:key", get(get_value).post(set_value)) 52 | .with_state(handle); 53 | 54 | info!("listening on {}", args.rest_listen_addr); 55 | let _ = axum::Server::bind(&args.rest_listen_addr) 56 | .serve(app.into_make_service()) 57 | .await; 58 | 59 | node.shutdown().await; 60 | 61 | Ok(()) 62 | } 63 | 64 | #[derive(Parser, Debug)] 65 | #[command(version, about, long_about = None)] 66 | pub struct Args { 67 | #[arg(long)] 68 | /// The unique ID of the node. 69 | node_id: u8, 70 | 71 | #[arg(long = "seed")] 72 | /// The set of seed nodes. 73 | /// 74 | /// This is used to kick start the auto-discovery of nodes within the cluster. 75 | seeds: Vec, 76 | 77 | #[arg(long, default_value = "127.0.0.1:8000")] 78 | /// The address for the REST server to listen on. 79 | /// 80 | /// This is what will serve the API. 81 | rest_listen_addr: SocketAddr, 82 | 83 | #[arg(long, default_value = "127.0.0.1:8001")] 84 | /// The address for the cluster RPC system to listen on. 85 | cluster_listen_addr: SocketAddr, 86 | 87 | #[arg(long)] 88 | /// The public address for the node to broadcast to other nodes. 89 | /// 90 | /// If not provided the `cluster_listen_addr` is used which will only 91 | /// work when running a cluster on the same local network. 92 | public_addr: Option, 93 | 94 | #[arg(long)] 95 | /// The path to store the data. 96 | data_dir: String, 97 | } 98 | 99 | #[derive(Debug, serde::Deserialize, serde::Serialize)] 100 | struct Params { 101 | keyspace: String, 102 | key: u64, 103 | } 104 | 105 | async fn get_value( 106 | Path(params): Path, 107 | State(handle): State>, 108 | ) -> Result { 109 | info!( 110 | doc_id = params.key, 111 | keyspace = params.keyspace, 112 | "Getting document!" 113 | ); 114 | 115 | let doc = handle 116 | .get(¶ms.keyspace, params.key) 117 | .await 118 | .map_err(|e| { 119 | error!(error = ?e, doc_id = params.key, "Failed to fetch doc."); 120 | StatusCode::INTERNAL_SERVER_ERROR 121 | })?; 122 | 123 | match doc { 124 | None => Err(StatusCode::NOT_FOUND), 125 | Some(doc) => Ok(Bytes::copy_from_slice(doc.data())), 126 | } 127 | } 128 | 129 | async fn set_value( 130 | Path(params): Path, 131 | State(handle): State>, 132 | data: Bytes, 133 | ) -> Result, StatusCode> { 134 | info!( 135 | doc_id = params.key, 136 | keyspace = params.keyspace, 137 | "Storing document!" 138 | ); 139 | 140 | handle 141 | .put(¶ms.keyspace, params.key, data, Consistency::EachQuorum) 142 | .await 143 | .map_err(|e| { 144 | error!(error = ?e, doc_id = params.key, "Failed to fetch doc."); 145 | StatusCode::INTERNAL_SERVER_ERROR 146 | })?; 147 | 148 | Ok(Json(json!({ 149 | "key": params.key, 150 | "keyspace": params.keyspace, 151 | }))) 152 | } 153 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | unstable_features = true 2 | combine_control_expr = false 3 | imports_layout = "HorizontalVertical" 4 | match_block_trailing_comma = true 5 | imports_granularity = "Module" 6 | group_imports = "StdExternalCrate" 7 | max_width = 89 8 | 9 | ignore = [ 10 | "datacake-eventual-consistency/src/rpc/chitchat_transport_api.rs.rs", 11 | "datacake-eventual-consistency/src/rpc/datacake_api.rs.rs", 12 | ] -------------------------------------------------------------------------------- /simulation-tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "simulation-tests" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | anyhow = "1" 10 | turmoil = "0.4.0" 11 | 12 | datacake = { version = "0.8.0", path = "..", features = ["simulation", "test-utils"] } 13 | tokio = { version = "1", features = ["full"] } 14 | rkyv = { version = "0.7.42", features = ["strict", "validation"] } 15 | -------------------------------------------------------------------------------- /simulation-tests/src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # lnx Datacake 2 | //! Easy to use tooling for building eventually consistent distributed data systems in Rust. 3 | //! 4 | //! > "Oh consistency where art thou?" - CF. 5 | //! 6 | //! ### Features ✨ 7 | //! - **Simple** setup, a cluster can be setup and ready to use with one trait. 8 | //! - Adjustable consistency levels when mutating state. 9 | //! - Data center aware replication prioritisation. 10 | //! - Pre-built test suite for `Storage` trait implementations to ensure correct functionality. 11 | //! 12 | //! ### The packages 13 | //! Datacake provides several utility libraries as well as some pre-made data store handlers: 14 | //! 15 | //! - `datacake-crdt` - A CRDT implementation based on a hybrid logical clock (HLC) 16 | //! provided in the form of the `HLCTimestamp`. 17 | //! - `datacake-node` - A cluster membership system and managed RPC built on top of chitchat. 18 | //! - `datacake-eventual-consistency` - Built on top of `datacake-crdt`, a batteries included framework 19 | //! for building eventually consistent, replicated systems where you only need to implement a basic 20 | //! storage trait. 21 | //! - `datacake-sqlite` - A pre-built and tested implementation of the datacake `Storage` trait built 22 | //! upon SQLite. 23 | //! - `datacake-lmdb` - A pre-built and tested implementation of the datacake `Storage` trait built 24 | //! upon LMDB. 25 | //! - `datacake-rpc` - A fast, zero-copy RCP framework with a familiar actor-like feel to it. 26 | //! 27 | //! ### Examples 28 | //! Check out some pre-built apps we have in the 29 | //! [example folder](https://github.com/lnx-search/datacake/tree/main/examples) 30 | //! 31 | //! You can also look at some heavier integration tests 32 | //! [here](https://github.com/lnx-search/datacake/tree/main/datacake-eventual-consistency/tests) 33 | //! 34 | //! #### Single Node Cluster 35 | //! Here's an example of a basic cluster with one node that runs on your local network, it uses almost all of the packages 36 | //! including: 37 | //! 38 | //! - `datacake-node` for the core node membership. 39 | //! - `datacake-crdt` for the HLCTimestamp and CRDT implementations 40 | //! - `datacake-eventually-consistency` for the eventually consistent replication of state. 41 | //! - `datacake-rpc` bundled up with everything for managing all the cluster RPC. 42 | //! 43 | //! ```rust 44 | //! use std::net::SocketAddr; 45 | //! use datacake::node::{Consistency, ConnectionConfig, DCAwareSelector, DatacakeNodeBuilder}; 46 | //! use datacake::eventual_consistency::test_utils::MemStore; 47 | //! use datacake::eventual_consistency::EventuallyConsistentStoreExtension; 48 | //! 49 | //! #[tokio::main] 50 | //! async fn main() -> anyhow::Result<()> { 51 | //! let addr = "127.0.0.1:8080".parse::().unwrap(); 52 | //! let connection_cfg = ConnectionConfig::new(addr, addr, Vec::::new()); 53 | //! let node = DatacakeNodeBuilder::::new(1, connection_cfg) 54 | //! .connect() 55 | //! .await 56 | //! .expect("Connect node."); 57 | //! 58 | //! let store = node 59 | //! .add_extension(EventuallyConsistentStoreExtension::new(MemStore::default())) 60 | //! .await 61 | //! .expect("Create store."); 62 | //! 63 | //! let handle = store.handle(); 64 | //! 65 | //! handle 66 | //! .put( 67 | //! "my-keyspace", 68 | //! 1, 69 | //! b"Hello, world! From keyspace 1.".to_vec(), 70 | //! Consistency::All, 71 | //! ) 72 | //! .await 73 | //! .expect("Put doc."); 74 | //! 75 | //! Ok(()) 76 | //! } 77 | //! ``` 78 | //! 79 | //! ### Why does Datacake exist? 80 | //! 81 | //! Datacake is the result of my attempts at bringing high-availability to [lnx](https://github.com/lnx-search/lnx) 82 | //! unlike languages like Erlang or Go, Rust currently has a fairly young ecosystem around distributed 83 | //! systems. This makes it very hard to build a replicated system in Rust without implementing a lot of things 84 | //! from scratch and without a lot of research into the area to begin with. 85 | //! 86 | //! Currently, the main algorithms available in Rust is [Raft](https://raft.github.io/) which is replication via 87 | //! consensus, overall it is a very good algorithm, and it's a very simple to understand algorithm however, 88 | //! I'm not currently satisfied that the current implementations are stable enough or are maintained in order to 89 | //! choose it. (Also for lnx's particular use case leader-less eventual consistency was more preferable.) 90 | //! 91 | //! Because of the above, I built Datacake with the aim of building a reliable, well tested, eventual consistent system 92 | //! akin to how Cassandra or more specifically how ScyllaDB behave with eventual consistent replication, but with a few 93 | //! core differences: 94 | //! 95 | //! - Datacake does not require an external source or read repair to clear tombstones. 96 | //! - The underlying CRDTs which are what actually power Datacake are kept purely in memory. 97 | //! - Partitioning and sharding is not (currently) supported. 98 | //! 99 | //! It's worth noting that Datacake itself does not implement the consensus and membership algorithms from scratch, instead 100 | //! we use [chitchat](https://github.com/quickwit-oss/chitchat) developed by [Quickwit](https://quickwit.io/) which is an 101 | //! implementation of the scuttlebutt algorithm. 102 | //! 103 | //! ### Inspirations and references 104 | //! - [CRDTs for Mortals by James Long](https://www.youtube.com/watch?v=iEFcmfmdh2w) 105 | //! - [Big(ger) Sets: Making CRDT Sets Scale in Riak by Russell Brown](https://www.youtube.com/watch?v=f20882ZSdkU) 106 | //! - ["CRDTs Illustrated" by Arnout Engelen](https://www.youtube.com/watch?v=9xFfOhasiOE) 107 | //! - ["Practical data synchronization with CRDTs" by Dmitry Ivanov](https://www.youtube.com/watch?v=veeWamWy8dk) 108 | //! - [CRDTs and the Quest for Distributed Consistency](https://www.youtube.com/watch?v=B5NULPSiOGw) 109 | //! - [Logical Physical Clocks and Consistent Snapshots in Globally Distributed Databases](https://cse.buffalo.edu/tech-reports/2014-04.pdf) 110 | //! 111 | //! ### Contributing 112 | //! Contributions are always welcome, although please open an issue for an idea about extending the main cluster system 113 | //! if you wish to extend or modify it heavily as something's are not always as simple as they seem. 114 | //! 115 | //! #### What sort of things could I contribute? 116 | //! 🧪 Tests! 🧪 Tests! 🧪 Tests! Joking aside testing is probably the most important part of the system, extending these 117 | //! tests in any way you might think of, big or small is a huge help :) 118 | //! 119 | //! ### Future Ideas 120 | //! - Multi-raft framework? 121 | //! - CASPaxos??? 122 | //! - More storage implementations? 123 | //! 124 | 125 | #[cfg(feature = "datacake-crdt")] 126 | /// A re-export of the `datacake-crdt` package, providing all of the 127 | /// hybrid logical clock and CRDT implementation. 128 | pub use datacake_crdt as crdt; 129 | #[cfg(feature = "datacake-eventual-consistency")] 130 | /// A re-export of the `datacake-eventual_consistency` package, A pre-built cluster extension that 131 | /// provides you with a eventually consistent store with automatic tombstone removal. 132 | pub use datacake_eventual_consistency as eventual_consistency; 133 | #[cfg(feature = "datacake-lmdb")] 134 | /// A re-export of the `datacake-lmdb` package, giving you a pre-built and tested storage 135 | /// implementation for the eventually consistent store, although this may not be suited for 136 | /// all applications, it is useful for most. 137 | pub use datacake_lmdb as lmdb; 138 | #[cfg(feature = "datacake-node")] 139 | /// A re-export of the `datacake-node` package, the core membership system for building 140 | /// your own cluster system. 141 | pub use datacake_node as node; 142 | #[cfg(feature = "datacake-rpc")] 143 | /// A re-export of the `datacake-rpc` package, this is built open HTTP/2 in a similar fashion 144 | /// to tonic, except it uses rust's `rkyv` package and support zero-copy deserialization 145 | /// in with an actor-like feel. 146 | pub use datacake_rpc as rpc; 147 | #[cfg(feature = "datacake-sqlite")] 148 | /// A re-export of the `datacake-sqlite` package, giving you a pre-built and tested storage 149 | /// implementation for the eventually consistent store, although this may not be suited for 150 | /// all applications, it is useful for testing and small datasets. 151 | pub use datacake_sqlite as sqlite; 152 | -------------------------------------------------------------------------------- /test-helper/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "test-helper" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | -------------------------------------------------------------------------------- /test-helper/src/addr.rs: -------------------------------------------------------------------------------- 1 | use std::net::{SocketAddr, TcpListener}; 2 | 3 | /// Gets a new socket address allocated by the OS. 4 | pub fn get_unused_addr() -> SocketAddr { 5 | let socket = TcpListener::bind("127.0.0.1:0").unwrap(); 6 | socket.local_addr().unwrap() 7 | } 8 | 9 | #[cfg(test)] 10 | mod tests { 11 | use super::*; 12 | 13 | #[test] 14 | fn test_get_unused_addr() { 15 | let addr = get_unused_addr(); 16 | TcpListener::bind(addr).expect("Connect to allocated address"); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /test-helper/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod addr; 2 | 3 | pub use addr::get_unused_addr; 4 | --------------------------------------------------------------------------------