├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── build_docker.sh ├── design ├── storage.md └── treewalker.md ├── rdb-analyzer ├── Cargo.toml ├── build.rs └── src │ ├── data │ ├── kv.rs │ ├── mod.rs │ ├── pathwalker.rs │ ├── pathwalker_test.rs │ ├── treewalker │ │ ├── asm │ │ │ ├── asm_test.rs │ │ │ ├── ast.rs │ │ │ ├── codegen.rs │ │ │ ├── language.lalrpop │ │ │ ├── mod.rs │ │ │ └── state.rs │ │ ├── bytecode.rs │ │ ├── exec.rs │ │ ├── exec_test.rs │ │ ├── mod.rs │ │ ├── serialize.rs │ │ ├── typeck.rs │ │ ├── typeck_test.rs │ │ ├── vm.rs │ │ └── vm_value.rs │ └── value.rs │ ├── kv_backend │ ├── foundationdb.rs │ ├── mock_kv.rs │ ├── mod.rs │ └── sqlite.rs │ ├── lib.rs │ ├── schema │ ├── compile.rs │ ├── compile_test.rs │ ├── grammar │ │ ├── ast.rs │ │ ├── error.rs │ │ ├── mod.rs │ │ └── parser.lalrpop │ └── mod.rs │ ├── storage_plan │ ├── conversion.rs │ ├── mod.rs │ ├── planner.rs │ └── planner_test.rs │ ├── test_util.rs │ └── util.rs ├── rdb-pgsvc ├── .cargo │ └── config ├── Cargo.toml └── src │ ├── dfvis.rs │ ├── main.rs │ ├── memkv.rs │ └── query.rs ├── rdb-proto ├── Cargo.toml ├── build.rs └── src │ ├── lib.rs │ └── proto │ ├── mod.rs │ └── rdbrpc.proto ├── rdb-server.Dockerfile ├── rdb-server ├── Cargo.toml └── src │ ├── exec.rs │ ├── exec_core.rs │ ├── httpapi.rs │ ├── main.rs │ ├── opt.rs │ ├── query_cache.rs │ ├── server.rs │ ├── state.rs │ ├── sys.rasm │ ├── sysquery.rs │ ├── system.rs │ ├── system_schema.rschema │ └── util.rs ├── rdbctl ├── Cargo.toml └── src │ ├── diff.rs │ └── main.rs ├── rust-toolchain └── rustfmt.toml /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | name: Build and test 9 | steps: 10 | - name: Checkout 11 | uses: actions/checkout@v2 12 | with: 13 | submodules: 'true' 14 | - name: Install Rust 15 | uses: actions-rs/toolchain@v1 16 | with: 17 | profile: minimal 18 | toolchain: stable 19 | - name: Install FoundationDB 20 | run: | 21 | wget https://www.foundationdb.org/downloads/6.3.15/ubuntu/installers/foundationdb-clients_6.3.15-1_amd64.deb 22 | sudo dpkg -i foundationdb-clients_6.3.15-1_amd64.deb 23 | wget https://www.foundationdb.org/downloads/6.3.15/ubuntu/installers/foundationdb-server_6.3.15-1_amd64.deb 24 | sudo dpkg -i foundationdb-server_6.3.15-1_amd64.deb 25 | - name: Test rdb-analyzer 26 | run: | 27 | cd rdb-analyzer 28 | cargo test 29 | cargo test --features test-with-fdb 30 | cargo test --features test-with-sqlite 31 | build-docker: 32 | runs-on: ubuntu-latest 33 | name: Build docker image 34 | steps: 35 | - name: Checkout 36 | uses: actions/checkout@v2 37 | with: 38 | submodules: 'true' 39 | - name: Install Rust 40 | uses: actions-rs/toolchain@v1 41 | with: 42 | profile: minimal 43 | toolchain: stable 44 | - name: Install rustfmt 45 | run: rustup component add rustfmt 46 | - name: Install FoundationDB client 47 | run: | 48 | wget https://www.foundationdb.org/downloads/6.3.15/ubuntu/installers/foundationdb-clients_6.3.15-1_amd64.deb 49 | sudo dpkg -i foundationdb-clients_6.3.15-1_amd64.deb 50 | - name: Build rdb-server 51 | run: | 52 | cd rdb-server 53 | cargo build --release 54 | - name: Build image 55 | run: ./build_docker.sh 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "rdb-server", 4 | "rdb-analyzer", 5 | "rdbctl", 6 | "rdb-proto", 7 | "rdb-pgsvc", 8 | ] 9 | 10 | [profile.release] 11 | debug = true 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Heyang Zhou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RefineDB 2 | 3 | ![Build status](https://github.com/losfair/RefineDB/actions/workflows/ci.yml/badge.svg) 4 | 5 | A strongly-typed document database that runs on any transactional key-value store. 6 | 7 | Currently supported backends are: 8 | 9 | - [FoundationDB](https://github.com/apple/foundationdb) for distributed deployment. 10 | - [SQLite](https://www.sqlite.org/index.html) for single-machine deployment. 11 | - A simple in-memory key-value store for the web playground. 12 | 13 | Try RefineDB on the [Web Playground](https://playground.rdb.univalence.me/)! 14 | 15 | **Warning: Not ready for production.** 16 | 17 | ## Motivation 18 | 19 | Databases should be more scalable than popular SQL databases, more structured than popular NoSQL databases, and support stronger 20 | static type checking than any of the current databases. So I decided to build RefineDB as "the kind of database that I want to use myself". 21 | 22 | RefineDB will be used as the database service of [rusty-workers](https://github.com/losfair/rusty-workers). 23 | 24 | ## Architecture 25 | 26 | ![Architecture](https://univalence.me/i/d32378a2042ef32d15bef3dd6dc1b73c_5100183c11cb7b6aa2a8049c00d80ffc.svg) 27 | 28 | ## Getting started 29 | 30 | Examples are a TODO but rdb-analyzer's [tests](https://github.com/losfair/RefineDB/blob/main/rdb-analyzer/src/data/treewalker/asm/asm_test.rs) and `rdb-server` (which uses RefineDB itself to store metadata) should give some basic insight on how the system works. 31 | 32 | ## Schemas and the type system 33 | 34 | In RefineDB, schemas are defined with types. For example, a part of a schema for a simple blog would look like: 35 | 36 | ``` 37 | type SiteConfig { 38 | site_name: string, 39 | registration_open: int64, 40 | } 41 | 42 | type BlogPost { 43 | @primary 44 | id: string, 45 | author_email: string, 46 | author_name: string, 47 | title: string, 48 | content: string, 49 | access_time: AccessTime, 50 | } 51 | 52 | type AccessTime { 53 | create_time: int64, 54 | update_time: int64, 55 | } 56 | 57 | export SiteConfig site_config; 58 | export set posts; 59 | ``` 60 | 61 | The primitive types are: 62 | 63 | - `int64`: 64-bit signed integer. 64 | - `double`: IEEE 754 double-precision floating point number. 65 | - `string`: UTF-8 string. 66 | - `bytes`: Byte array. 67 | - `set`: A set with element type `T`. 68 | 69 | Sum types are nice to have too, but I haven't implemented it yet. 70 | 71 | ## Queries: the TreeWalker VM and RefineAsm 72 | 73 | Queries in RefineDB are encoded as *data flow graphs*, and query execution is graph reduction. 74 | 75 | The TreeWalker VM is a massively concurrent data flow virtual machine for running the queries, but I haven't written documentation 76 | on its internals. 77 | 78 | RefineAsm is the textual representation of the query graph, with some syntactic sugar to make writing it easier. 79 | 80 | An example RefineAsm script for adding a post to the above blog schema: 81 | 82 | ``` 83 | type PostMap = map { 84 | id: string, 85 | author_email: string, 86 | author_name: string, 87 | title: string, 88 | content: string, 89 | access_time: map { 90 | create_time: int64, 91 | update_time: int64, 92 | }, 93 | }; 94 | export graph add_post(root: schema, post: PostMap) { 95 | s_insert root.posts $ call(build_post) [post]; 96 | } 97 | graph build_post(post: PostMap): BlogPost { 98 | return build_table(BlogPost) 99 | $ m_insert(access_time) (build_table(AccessTime) post.access_time) post; 100 | } 101 | ``` 102 | 103 | ## Storage plan and schema migration 104 | 105 | A storage plan is how a schema maps to entries in the key-value store. By separating schemas and storage plans, RefineDB's 106 | schemas are just "views" of the underlying keyspace and schema changes are fast. 107 | 108 | During a migration, added fields are automatically assigned new storage keys, and removed fields will not be auto-deleted from 109 | the storage. This allows multiple schema versions to co-exist, enables the client to choose which schema version to use, and 110 | prevents unintended data deletion. 111 | 112 | [Storage design doc](design/storage.md) 113 | 114 | ## License 115 | 116 | MIT 117 | -------------------------------------------------------------------------------- /build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | TMP="/tmp/rdb-docker-build-`uuidgen`" 6 | mkdir "$TMP" 7 | 8 | cleanup() 9 | { 10 | echo "Cleanup up temporary directory $TMP" 11 | rm -r "$TMP" 12 | } 13 | 14 | trap cleanup EXIT 15 | 16 | cd "`dirname $0`" 17 | 18 | cp ./target/release/rdb-server "$TMP/" 19 | cp ./rdb-server.Dockerfile "$TMP/Dockerfile" 20 | docker build --build-arg http_proxy --build-arg https_proxy -t "losfair/rdb-server" "$TMP" 21 | 22 | echo "Build done." 23 | -------------------------------------------------------------------------------- /design/storage.md: -------------------------------------------------------------------------------- 1 | # Storage 2 | 3 | ## Types 4 | 5 | RefineDB has three classes of types: 6 | 7 | - Primitives 8 | 9 | `int64`, `double`, `bytes`, `string` 10 | 11 | - Sets 12 | 13 | Each *set* contains many *tables* of the same type with a primary key. 14 | 15 | - Tables 16 | 17 | A *table* is similar to a *struct* in other programming languages. 18 | 19 | Tables can be recursive. 20 | 21 | ``` 22 | type SomeTable { 23 | field_1: int64, 24 | field_2: string, 25 | recursive: SomeTable?, 26 | } 27 | ``` 28 | 29 | ## Exports 30 | 31 | Data in a schema is organized as a tree. The root node is the schema itself, and *exports* 32 | are first-level child nodes of the root node. 33 | 34 | ``` 35 | export SomeTable t; 36 | export set s; 37 | ``` 38 | 39 | ## Storage plan 40 | 41 | A storage plan is an assignment of keyspaces to nodes, and is generated automatically from 42 | the schema on initial creation and migration. 43 | 44 | An example storage plan for the following schema: 45 | 46 | ``` 47 | type Item { 48 | @primary 49 | id: string, 50 | value: T, 51 | } 52 | type RecursiveItem { 53 | @primary 54 | id: string, 55 | value: T?, 56 | recursive: RecursiveItem?, 57 | } 58 | type Duration { 59 | start: T, 60 | end: T, 61 | } 62 | export set>> items; 63 | export set>> recursive_items; 64 | ``` 65 | 66 | is: 67 | 68 | ```yaml 69 | nodes: 70 | items: 71 | key: AXotdfGWgwakktB+ 72 | flattened: false 73 | subspace_reference: ~ 74 | packed: false 75 | set: 76 | key: AXotdfGWKeLiE7Fz 77 | flattened: true 78 | subspace_reference: ~ 79 | packed: false 80 | set: ~ 81 | children: 82 | id: 83 | key: AXotdfGWA87Wm+Ur 84 | flattened: false 85 | subspace_reference: ~ 86 | packed: false 87 | set: ~ 88 | children: {} 89 | value: 90 | key: AXotdfGWs5GgKdZm 91 | flattened: true 92 | subspace_reference: ~ 93 | packed: false 94 | set: ~ 95 | children: 96 | end: 97 | key: AXotdfGWbEWE3EGj 98 | flattened: false 99 | subspace_reference: ~ 100 | packed: false 101 | set: ~ 102 | children: {} 103 | start: 104 | key: AXotdfGWIeQU3Q0Y 105 | flattened: false 106 | subspace_reference: ~ 107 | packed: false 108 | set: ~ 109 | children: {} 110 | children: {} 111 | recursive_items: 112 | key: AXotdfGW1VpbW1gG 113 | flattened: false 114 | subspace_reference: ~ 115 | packed: false 116 | set: 117 | key: AXotdfGWGyKKcTOs 118 | flattened: true 119 | subspace_reference: ~ 120 | packed: false 121 | set: ~ 122 | children: 123 | id: 124 | key: AXotdfGWf7wisfyA 125 | flattened: false 126 | subspace_reference: ~ 127 | packed: false 128 | set: ~ 129 | children: {} 130 | recursive: 131 | key: AXotdfGWTYDkb+12 132 | flattened: false 133 | subspace_reference: AXotdfGWGyKKcTOs 134 | packed: false 135 | set: ~ 136 | children: {} 137 | value: 138 | key: AXotdfGWoLaa9Rub 139 | flattened: true 140 | subspace_reference: ~ 141 | packed: false 142 | set: ~ 143 | children: 144 | end: 145 | key: AXotdfGWpTkrojrV 146 | flattened: false 147 | subspace_reference: ~ 148 | packed: false 149 | set: ~ 150 | children: {} 151 | start: 152 | key: AXotdfGWY2xf+e24 153 | flattened: false 154 | subspace_reference: ~ 155 | packed: false 156 | set: ~ 157 | children: {} 158 | children: {} 159 | ``` 160 | 161 | The storage plan specifies the paths to all the fields reachable from exports. Some example paths for the storage plan above (generated 162 | by the `data::pathwalker_test::basic` test): 163 | 164 | ``` 165 | items -> [AXotdfGWgwakktB+] 166 | items[id == String("hello")] -> [AXotdfGWgwakktB+] [AmhlbGxvAA==] [AXotdfGWKeLiE7Fz] 167 | items[id == String("hello")].id -> [AXotdfGWgwakktB+] [AmhlbGxvAA==] [AXotdfGWA87Wm+Ur] 168 | items[id == String("hello")].value -> [AXotdfGWgwakktB+] [AmhlbGxvAA==] [AXotdfGWs5GgKdZm] 169 | items[id == String("hello")].value.end -> [AXotdfGWgwakktB+] [AmhlbGxvAA==] [AXotdfGWbEWE3EGj] 170 | items[id == String("hello")].value.start -> [AXotdfGWgwakktB+] [AmhlbGxvAA==] [AXotdfGWIeQU3Q0Y] 171 | recursive_items -> [AXotdfGW1VpbW1gG] 172 | recursive_items[id == String("hello")] -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWGyKKcTOs] 173 | recursive_items[id == String("hello")].id -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWf7wisfyA] 174 | recursive_items[id == String("hello")].recursive -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWTYDkb+12] 175 | recursive_items[id == String("hello")].recursive! -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWTYDkb+12] 176 | recursive_items[id == String("hello")].recursive!.id -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWTYDkb+12] [AXotdfGWf7wisfyA] 177 | recursive_items[id == String("hello")].recursive!.value -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWTYDkb+12] [AXotdfGWoLaa9Rub] 178 | recursive_items[id == String("hello")].recursive!.value! -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWTYDkb+12] [AXotdfGWoLaa9Rub] 179 | recursive_items[id == String("hello")].recursive!.value!.end -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWTYDkb+12] [AXotdfGWpTkrojrV] 180 | recursive_items[id == String("hello")].recursive!.value!.start -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWTYDkb+12] [AXotdfGWY2xf+e24] 181 | recursive_items[id == String("hello")].value -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWoLaa9Rub] 182 | recursive_items[id == String("hello")].value! -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWoLaa9Rub] 183 | recursive_items[id == String("hello")].value!.end -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWpTkrojrV] 184 | recursive_items[id == String("hello")].value!.start -> [AXotdfGW1VpbW1gG] [AmhlbGxvAA==] [AXotdfGWY2xf+e24] 185 | ``` 186 | 187 | The *set key* is derived from the value of the primary key of the member table type. 188 | 189 | Note that fields are *flattened* when doing so does not lead to ambiguity: `.value.start` and 190 | `.value.end` have the same key length as `.value` and `.id`. Currently two kinds of types are 191 | not flattened, for obvious reasons: 192 | 193 | - Sets. 194 | - Recursive tables. 195 | -------------------------------------------------------------------------------- /design/treewalker.md: -------------------------------------------------------------------------------- 1 | # TreeWalker 2 | 3 | Underlying RefineAsm, the TreeWalker VM is a massively concurrent data flow virtual machine for querying RefineDB. 4 | 5 | Implementation is at `rdb-analyzer/src/data/treewalker`. 6 | 7 | Documentation TODO. 8 | -------------------------------------------------------------------------------- /rdb-analyzer/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rdb-analyzer" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | lalrpop-util = "0.19.6" 10 | regex = "1" 11 | anyhow = "1" 12 | thiserror = "1" 13 | serde = { version = "1", features = ["derive", "rc"] } 14 | serde_json = "1" 15 | bumpalo = { version = "3.7", features = ["collections", "boxed"] } 16 | log = "0.4" 17 | pretty_env_logger = "0.4" 18 | indexmap = "1.6" 19 | phf = { version = "0.8", features = ["macros"] } 20 | rand = "0.8" 21 | hex = "0.4" 22 | rmp-serde = "0.15" 23 | snap = "1" 24 | serde_yaml = "0.8" 25 | base64 = "0.13" 26 | byteorder = "1" 27 | similar = { version = "1", features = ["inline"] } 28 | smallvec = { version = "1", features = ["serde"] } 29 | rpds = { version = "0.9", features = ["serde"] } 30 | async-trait = "0.1" 31 | futures = "0.3" 32 | async-recursion = "0.3.2" 33 | petgraph = "0.5" 34 | foundationdb = { version = "0.5", optional = true } 35 | rusqlite = { version = "0.25", optional = true } 36 | r2d2 = { version = "0.8", optional = true } 37 | r2d2_sqlite = { version = "0.18", optional = true } 38 | tokio = { version = "1", optional = true, features = ["full"] } 39 | 40 | [build-dependencies] 41 | lalrpop = "0.19.6" 42 | 43 | [dev-dependencies] 44 | console = "0.14.0" 45 | tokio = { version = "1", features = ["full"] } 46 | lazy_static = "1.4" 47 | 48 | [features] 49 | default = ["fdb-backend", "sqlite-backend"] 50 | fdb-backend = ["foundationdb", "tokio"] 51 | sqlite-backend = ["rusqlite", "r2d2", "r2d2_sqlite", "tokio"] 52 | test-with-fdb = ["fdb-backend"] 53 | test-with-sqlite = ["sqlite-backend"] 54 | -------------------------------------------------------------------------------- /rdb-analyzer/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | lalrpop::process_root().unwrap(); 3 | } 4 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/kv.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use async_trait::async_trait; 3 | use thiserror::Error; 4 | 5 | #[async_trait] 6 | pub trait KeyValueStore: Send + Sync { 7 | async fn begin_transaction(&self) -> Result>; 8 | } 9 | 10 | #[async_trait] 11 | pub trait KvTransaction: Send + Sync { 12 | async fn get(&self, key: &[u8]) -> Result>>; 13 | async fn put(&self, key: &[u8], value: &[u8]) -> Result<()>; 14 | async fn delete(&self, key: &[u8]) -> Result<()>; 15 | async fn delete_range(&self, start: &[u8], end: &[u8]) -> Result<()>; 16 | async fn scan_keys(&self, start: &[u8], end: &[u8]) -> Result>; 17 | async fn commit(self: Box) -> Result<(), KvError>; 18 | } 19 | 20 | #[async_trait] 21 | pub trait KvKeyIterator: Send + Sync { 22 | async fn next(&mut self) -> Result>>; 23 | } 24 | 25 | #[derive(Error, Debug)] 26 | pub enum KvError { 27 | #[error("conflict")] 28 | Conflict, 29 | 30 | #[error("commit state unknown")] 31 | CommitStateUnknown, 32 | } 33 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod kv; 2 | pub mod pathwalker; 3 | pub mod treewalker; 4 | pub mod value; 5 | 6 | #[cfg(test)] 7 | mod pathwalker_test; 8 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/pathwalker.rs: -------------------------------------------------------------------------------- 1 | use std::{ops::Deref, sync::Arc}; 2 | 3 | use anyhow::Result; 4 | 5 | use crate::storage_plan::{StorageNode, StoragePlan}; 6 | use thiserror::Error; 7 | 8 | use super::value::PrimitiveValue; 9 | 10 | #[derive(Error, Debug)] 11 | pub enum PathWalkerError { 12 | #[error("field not found: `{0}`")] 13 | FieldNotFound(String), 14 | 15 | #[error("enter_field called on set")] 16 | EnterFieldOnSet, 17 | 18 | #[error("cannot find referenced node for subspace reference")] 19 | ReferenceNodeNotFound, 20 | 21 | #[error("enter_set called on a non-set node")] 22 | NotSet, 23 | 24 | #[error("path too deep")] 25 | PathTooDeep, 26 | } 27 | 28 | const MAX_DEPTH: usize = 64; 29 | 30 | #[derive(Debug)] 31 | pub struct PathWalker<'a> { 32 | /// The "actual" storage node, with subspace references resolved. 33 | node: &'a StorageNode, 34 | 35 | /// The current key component. 36 | key: KeyCow<'a>, 37 | 38 | /// Link to the parent node. 39 | link: Option>>, 40 | 41 | /// Current nesting depth. 42 | depth: usize, 43 | 44 | /// Whether this node should be flattened. 45 | /// 46 | /// False if: 47 | /// - `node.flattened == false`. 48 | /// - This is a subspace reference. 49 | /// 50 | /// True otherwise. 51 | should_flatten: bool, 52 | 53 | /// Whether this node is an intermediate node that is not actually present in the key-value 54 | /// store. 55 | /// 56 | /// Used when detecting path integrity. 57 | is_intermediate: bool, 58 | 59 | path_segment: Option<&'a str>, 60 | } 61 | 62 | #[derive(Clone, Debug)] 63 | enum KeyCow<'a> { 64 | Borrowed(&'a [u8]), 65 | Owned(Arc<[u8]>), 66 | } 67 | 68 | impl<'a> Deref for KeyCow<'a> { 69 | type Target = [u8]; 70 | fn deref(&self) -> &Self::Target { 71 | match self { 72 | KeyCow::Borrowed(x) => *x, 73 | KeyCow::Owned(x) => &**x, 74 | } 75 | } 76 | } 77 | 78 | impl<'a> PathWalker<'a> { 79 | pub fn from_export(plan: &'a StoragePlan, export_name: &str) -> Result> { 80 | let (export_name, export) = plan 81 | .nodes 82 | .get_key_value(export_name) 83 | .ok_or_else(|| PathWalkerError::FieldNotFound(export_name.to_string()))?; 84 | 85 | Ok(Arc::new(Self { 86 | node: export, 87 | key: KeyCow::Borrowed(&export.key), 88 | link: None, 89 | depth: 1, 90 | should_flatten: export.flattened, 91 | is_intermediate: false, 92 | path_segment: Some(&**export_name), 93 | })) 94 | } 95 | } 96 | 97 | impl<'a> PartialEq for PathWalker<'a> { 98 | fn eq(&self, other: &Self) -> bool { 99 | self.generate_key_raw() == other.generate_key_raw() 100 | } 101 | } 102 | 103 | impl<'a> PathWalker<'a> { 104 | fn generate_key_raw(&self) -> Vec<&[u8]> { 105 | let mut components: Vec<&[u8]> = vec![]; 106 | 107 | // The leaf node should always have its key component appended 108 | components.push(&self.key); 109 | 110 | let mut link = self.link.as_ref(); 111 | 112 | while let Some(x) = link { 113 | if !x.should_flatten { 114 | components.push(&x.key); 115 | } 116 | link = x.link.as_ref(); 117 | } 118 | components.reverse(); 119 | components 120 | } 121 | 122 | fn check_and_add_depth(&self) -> Result { 123 | if self.depth >= MAX_DEPTH { 124 | Err(PathWalkerError::PathTooDeep.into()) 125 | } else { 126 | Ok(self.depth + 1) 127 | } 128 | } 129 | 130 | fn collect_non_intermediate_path_segments_on_path_including_self(&self) -> Vec<&'a str> { 131 | let mut link = Some(self); 132 | let mut result = vec![]; 133 | while let Some(x) = link { 134 | if !x.is_intermediate { 135 | if let Some(segment) = x.path_segment { 136 | result.push(segment); 137 | } else { 138 | result.push("(selector)"); 139 | } 140 | } 141 | link = x.link.as_ref().map(|x| &**x); 142 | } 143 | 144 | result.reverse(); 145 | 146 | result 147 | } 148 | 149 | pub fn all_non_intermediate_keys_on_path_excluding_self(&self) -> Vec<(Vec, Vec<&'a str>)> { 150 | let mut link = self.link.as_ref(); 151 | let mut result = vec![]; 152 | 153 | while let Some(x) = link { 154 | if !x.is_intermediate { 155 | let path_segments = x.collect_non_intermediate_path_segments_on_path_including_self(); 156 | result.push((x.generate_key(), path_segments)); 157 | } 158 | link = x.link.as_ref(); 159 | } 160 | result 161 | } 162 | 163 | pub fn node(&self) -> &'a StorageNode { 164 | self.node 165 | } 166 | 167 | pub fn generate_key(&self) -> Vec { 168 | let components = self.generate_key_raw(); 169 | let len = components.iter().fold(0, |a, b| a + b.len()); 170 | let mut key = Vec::with_capacity(len); 171 | for c in components.iter() { 172 | key.extend_from_slice(*c); 173 | } 174 | assert_eq!(key.len(), len); 175 | key 176 | } 177 | 178 | pub fn generate_key_pretty(&self) -> String { 179 | return self 180 | .generate_key_raw() 181 | .iter() 182 | .map(|x| format!("[{}]", base64::encode(x))) 183 | .collect::>() 184 | .join(" "); 185 | } 186 | 187 | pub fn enter_field(self: &Arc, field_name: &str) -> Result> { 188 | // This check is not necessary for correctness but let's optimize our error message 189 | if self.node.set.is_some() { 190 | return Err(PathWalkerError::EnterFieldOnSet.into()); 191 | } 192 | 193 | let (field_name, node) = self 194 | .node 195 | .children 196 | .get_key_value(field_name) 197 | .ok_or_else(|| PathWalkerError::FieldNotFound(field_name.to_string()))?; 198 | 199 | if let Some(subspace_reference) = node.subspace_reference { 200 | // Walk up the list 201 | let mut me = Some(self); 202 | while let Some(link) = me { 203 | // Here we use `link.node.key` instead of `link.key` to avoid conflicting with set keys. 204 | if link.node.key == subspace_reference { 205 | // Use the referenced node, with our own key. 206 | // And do not flatten. 207 | return Ok(Arc::new(Self { 208 | node: link.node, 209 | key: KeyCow::Borrowed(&node.key), 210 | link: Some(self.clone()), 211 | depth: self.check_and_add_depth()?, 212 | should_flatten: false, 213 | is_intermediate: false, 214 | path_segment: Some(&**field_name), 215 | })); 216 | } 217 | me = link.link.as_ref(); 218 | } 219 | return Err(PathWalkerError::ReferenceNodeNotFound.into()); 220 | } else { 221 | Ok(Arc::new(Self { 222 | node, 223 | key: KeyCow::Borrowed(&node.key), 224 | link: Some(self.clone()), 225 | depth: self.check_and_add_depth()?, 226 | should_flatten: node.flattened, 227 | is_intermediate: false, 228 | path_segment: Some(&**field_name), 229 | })) 230 | } 231 | } 232 | 233 | pub fn set_fast_scan_prefix(&self) -> Result> { 234 | self 235 | .node 236 | .set 237 | .as_ref() 238 | .ok_or_else(|| PathWalkerError::NotSet)?; 239 | 240 | let mut key = self.generate_key(); 241 | key.push(0x01u8); 242 | Ok(key) 243 | } 244 | 245 | pub fn set_data_prefix(&self) -> Result> { 246 | self 247 | .node 248 | .set 249 | .as_ref() 250 | .ok_or_else(|| PathWalkerError::NotSet)?; 251 | 252 | let mut key = self.generate_key(); 253 | key.push(0x00u8); 254 | Ok(key) 255 | } 256 | 257 | pub fn enter_set_raw(self: &Arc, primary_key: &[u8]) -> Result> { 258 | let set = &**self 259 | .node 260 | .set 261 | .as_ref() 262 | .ok_or_else(|| PathWalkerError::NotSet)?; 263 | 264 | // 0x00 - data 265 | // 0x01 - key only 266 | // 0x02 - index 267 | let mut dynamic_key_bytes = vec![0x00u8]; 268 | dynamic_key_bytes.extend_from_slice(primary_key); 269 | dynamic_key_bytes.push(0x00u8); 270 | 271 | let dynamic_key = KeyCow::Owned(Arc::from(dynamic_key_bytes.as_slice())); 272 | 273 | // The set key. 274 | let intermediate = Arc::new(Self { 275 | node: set, 276 | key: dynamic_key.clone(), 277 | link: Some(self.clone()), 278 | depth: self.check_and_add_depth()?, 279 | should_flatten: false, 280 | is_intermediate: true, 281 | path_segment: None, 282 | }); 283 | 284 | // And the table key. 285 | Ok(Arc::new(Self { 286 | node: set, 287 | key: KeyCow::Borrowed(&set.key), 288 | link: Some(intermediate.clone()), 289 | depth: intermediate.check_and_add_depth()?, 290 | should_flatten: true, 291 | is_intermediate: false, 292 | path_segment: None, 293 | })) 294 | } 295 | 296 | pub fn enter_set(self: &Arc, primary_key: &PrimitiveValue) -> Result> { 297 | self.enter_set_raw(&primary_key.serialize_for_key_component()) 298 | } 299 | } 300 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/pathwalker_test.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashSet, sync::Arc}; 2 | 3 | use bumpalo::Bump; 4 | 5 | use crate::{ 6 | data::value::PrimitiveValue, 7 | schema::{ 8 | compile::{compile, CompiledSchema, FieldAnnotationList, FieldType}, 9 | grammar::parse, 10 | }, 11 | storage_plan::{planner::generate_plan_for_schema, StorageNode, StoragePlan}, 12 | }; 13 | 14 | use super::pathwalker::PathWalker; 15 | 16 | fn print_path_examples( 17 | schema: &CompiledSchema, 18 | field: &FieldType, 19 | node: &StorageNode, 20 | walker: Arc, 21 | path: &String, 22 | recursion_set: &mut HashSet, 23 | ) { 24 | println!("{} -> {}", path, walker.generate_key_pretty()); 25 | match field { 26 | FieldType::Table(x) => { 27 | let specialized_ty = schema.types.get(x).unwrap(); 28 | for (name, (field, _)) in &specialized_ty.fields { 29 | if recursion_set.contains(&(field as *const _ as usize)) { 30 | continue; 31 | } 32 | recursion_set.insert(field as *const _ as usize); 33 | let path = format!("{}.{}", path, name); 34 | let walker = walker.enter_field(&**name).unwrap(); 35 | let node = walker.node(); 36 | print_path_examples(schema, field, node, walker, &path, recursion_set); 37 | recursion_set.remove(&(field as *const _ as usize)); 38 | } 39 | } 40 | FieldType::Primitive(_) => {} 41 | FieldType::Set(ty) => { 42 | let specialized_ty = match &**ty { 43 | FieldType::Table(x) => schema.types.get(x).unwrap(), 44 | _ => unreachable!(), 45 | }; 46 | let (primary_key_name, (primary_key_ty, _)) = specialized_ty 47 | .fields 48 | .iter() 49 | .find(|(_, (_, ann))| ann.as_slice().is_primary()) 50 | .unwrap(); 51 | let primary_key_example = match primary_key_ty { 52 | FieldType::Primitive(x) => PrimitiveValue::example_value_for_type(*x), 53 | _ => unreachable!(), 54 | }; 55 | let walker = walker.enter_set(&primary_key_example).unwrap(); 56 | let path = format!( 57 | "{}[{} == {:?}]", 58 | path, primary_key_name, primary_key_example 59 | ); 60 | let node = node.set.as_ref().unwrap(); 61 | print_path_examples(schema, &**ty, &**node, walker, &path, recursion_set); 62 | } 63 | } 64 | } 65 | 66 | #[test] 67 | fn basic() { 68 | let _ = pretty_env_logger::try_init(); 69 | let alloc = Bump::new(); 70 | let ast = parse( 71 | &alloc, 72 | r#" 73 | type Item { 74 | @primary 75 | id: string, 76 | value: T, 77 | } 78 | type RecursiveItem { 79 | @primary 80 | id: string, 81 | value: T, 82 | recursive: RecursiveItem, 83 | } 84 | type Duration { 85 | start: T, 86 | end: T, 87 | } 88 | export set>> items; 89 | export set>> recursive_items; 90 | "#, 91 | ) 92 | .unwrap(); 93 | let schema = compile(&ast).unwrap(); 94 | drop(ast); 95 | drop(alloc); 96 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 97 | println!( 98 | "{}", 99 | serde_yaml::to_string(&StoragePlan::::from(&plan)).unwrap() 100 | ); 101 | assert_eq!( 102 | PathWalker::from_export(&plan, "items") 103 | .unwrap() 104 | .enter_set(&PrimitiveValue::String("test".into())) 105 | .unwrap() 106 | .generate_key() 107 | .len(), 108 | 31 109 | ); 110 | assert_eq!( 111 | PathWalker::from_export(&plan, "items") 112 | .unwrap() 113 | .enter_set(&PrimitiveValue::String("test".into())) 114 | .unwrap() 115 | .enter_field("id") 116 | .unwrap() 117 | .generate_key() 118 | .len(), 119 | 31 120 | ); 121 | assert_eq!( 122 | PathWalker::from_export(&plan, "recursive_items") 123 | .unwrap() 124 | .enter_set(&PrimitiveValue::String("test".into())) 125 | .unwrap() 126 | .enter_field("value") 127 | .unwrap() 128 | .generate_key() 129 | .len(), 130 | 31 131 | ); 132 | assert_eq!( 133 | PathWalker::from_export(&plan, "recursive_items") 134 | .unwrap() 135 | .enter_set(&PrimitiveValue::String("test".into())) 136 | .unwrap() 137 | .enter_field("recursive") 138 | .unwrap() 139 | .generate_key() 140 | .len(), 141 | 31 142 | ); 143 | assert_eq!( 144 | PathWalker::from_export(&plan, "recursive_items") 145 | .unwrap() 146 | .enter_set(&PrimitiveValue::String("test".into())) 147 | .unwrap() 148 | .enter_field("recursive") 149 | .unwrap() 150 | .enter_field("value") 151 | .unwrap() 152 | .generate_key() 153 | .len(), 154 | 43 155 | ); 156 | assert_eq!( 157 | PathWalker::from_export(&plan, "recursive_items") 158 | .unwrap() 159 | .enter_set(&PrimitiveValue::String("test".into())) 160 | .unwrap() 161 | .enter_field("recursive") 162 | .unwrap() 163 | .enter_field("value") 164 | .unwrap() 165 | .enter_field("start") 166 | .unwrap() 167 | .generate_key() 168 | .len(), 169 | 43 170 | ); 171 | assert_eq!( 172 | PathWalker::from_export(&plan, "recursive_items") 173 | .unwrap() 174 | .enter_set(&PrimitiveValue::String("test".into())) 175 | .unwrap() 176 | .enter_field("recursive") 177 | .unwrap() 178 | .enter_field("recursive") 179 | .unwrap() 180 | .enter_field("value") 181 | .unwrap() 182 | .enter_field("start") 183 | .unwrap() 184 | .generate_key() 185 | .len(), 186 | 55 187 | ); 188 | for (export_name, export_ty) in &schema.exports { 189 | print_path_examples( 190 | &schema, 191 | export_ty, 192 | plan.nodes.get(&**export_name).unwrap(), 193 | PathWalker::from_export(&plan, &**export_name).unwrap(), 194 | &format!("{}", export_name), 195 | &mut HashSet::new(), 196 | ); 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/asm/ast.rs: -------------------------------------------------------------------------------- 1 | use bumpalo::collections::vec::Vec; 2 | 3 | use crate::schema::compile::PrimitiveType; 4 | 5 | pub struct Root<'a> { 6 | pub graphs: Vec<'a, &'a Graph<'a>>, 7 | pub type_aliases: Vec<'a, &'a TypeAlias<'a>>, 8 | } 9 | 10 | pub struct TypeAlias<'a> { 11 | pub name: &'a str, 12 | pub ty: Type<'a>, 13 | } 14 | 15 | pub enum Item<'a> { 16 | Graph(&'a Graph<'a>), 17 | TypeAlias(&'a TypeAlias<'a>), 18 | } 19 | 20 | pub struct Graph<'a> { 21 | pub name: &'a str, 22 | pub exported: bool, 23 | pub params: Vec<'a, (&'a str, Option>)>, 24 | pub return_type: Option>, 25 | pub stmts: Vec<'a, Stmt<'a>>, 26 | } 27 | 28 | pub struct Stmt<'a> { 29 | pub location: usize, 30 | pub kind: StmtKind<'a>, 31 | } 32 | 33 | pub enum StmtKind<'a> { 34 | Return { 35 | value: Expr<'a>, 36 | }, 37 | Node { 38 | name: Option<&'a str>, 39 | value: Expr<'a>, 40 | }, 41 | If { 42 | precondition: Expr<'a>, 43 | if_body: Vec<'a, Stmt<'a>>, 44 | else_body: Option>>, 45 | }, 46 | Throw { 47 | value: Expr<'a>, 48 | }, 49 | } 50 | 51 | pub struct Expr<'a> { 52 | pub location_start: usize, 53 | pub location_end: usize, 54 | pub kind: ExprKind<'a>, 55 | } 56 | 57 | pub enum Type<'a> { 58 | Table { 59 | name: &'a str, 60 | params: Vec<'a, Type<'a>>, 61 | }, 62 | Primitive(PrimitiveType), 63 | Set(&'a Type<'a>), 64 | List(&'a Type<'a>), 65 | Map(Vec<'a, (&'a str, Type<'a>)>), 66 | Bool, 67 | Schema, 68 | } 69 | 70 | pub enum ExprKind<'a> { 71 | LoadConst(Literal<'a>), 72 | BuildTable(Type<'a>, &'a Expr<'a>), 73 | BuildSet(&'a Expr<'a>), 74 | CreateMap, 75 | GetField(&'a str, &'a Expr<'a>), 76 | GetSetElement(&'a Expr<'a>, &'a Expr<'a>), 77 | InsertIntoMap(&'a str, &'a Expr<'a>, &'a Expr<'a>), 78 | InsertIntoTable(&'a str, &'a Expr<'a>, &'a Expr<'a>), 79 | InsertIntoSet(&'a Expr<'a>, &'a Expr<'a>), 80 | DeleteFromSet(&'a Expr<'a>, &'a Expr<'a>), 81 | DeleteFromMap(&'a str, &'a Expr<'a>), 82 | Eq(&'a Expr<'a>, &'a Expr<'a>), 83 | Ne(&'a Expr<'a>, &'a Expr<'a>), 84 | And(&'a Expr<'a>, &'a Expr<'a>), 85 | Or(&'a Expr<'a>, &'a Expr<'a>), 86 | Not(&'a Expr<'a>), 87 | Select(&'a Expr<'a>, &'a Expr<'a>), 88 | Node(&'a str), 89 | IsPresent(&'a Expr<'a>), 90 | IsNull(&'a Expr<'a>), 91 | OrElse(&'a Expr<'a>, &'a Expr<'a>), 92 | Call(&'a str, Vec<'a, Expr<'a>>), 93 | Add(&'a Expr<'a>, &'a Expr<'a>), 94 | Sub(&'a Expr<'a>, &'a Expr<'a>), 95 | CreateList(Type<'a>), 96 | Reduce(&'a str, &'a Expr<'a>, &'a Expr<'a>, &'a Expr<'a>), 97 | RangeReduce( 98 | &'a str, 99 | &'a Expr<'a>, 100 | &'a Expr<'a>, 101 | &'a Expr<'a>, 102 | &'a Expr<'a>, 103 | &'a Expr<'a>, 104 | ), 105 | Prepend(&'a Expr<'a>, &'a Expr<'a>), 106 | Pop(&'a Expr<'a>), 107 | Head(&'a Expr<'a>), 108 | } 109 | 110 | pub enum Literal<'a> { 111 | Null(Type<'a>), 112 | Bool(bool), 113 | Integer(i64), 114 | HexBytes(&'a [u8]), 115 | String(&'a str), 116 | EmptySet(Type<'a>), 117 | } 118 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/asm/language.lalrpop: -------------------------------------------------------------------------------- 1 | use super::ast::*; 2 | use super::TwAsmError; 3 | use super::state::State; 4 | use lalrpop_util::ParseError; 5 | use bumpalo::collections::vec::Vec as Bvec; 6 | use crate::schema::compile::PrimitiveType; 7 | 8 | grammar(state: &mut State<'input>); 9 | 10 | extern { 11 | type Error = TwAsmError; 12 | } 13 | 14 | pub Root: Root<'input> = { 15 | Comment* => Root { 16 | graphs: Bvec::from_iter_in(items.iter().filter_map(|x| match x { 17 | Item::Graph(x) => Some(*x), 18 | _ => None, 19 | }), &state.alloc), 20 | type_aliases: Bvec::from_iter_in(items.iter().filter_map(|x| match x { 21 | Item::TypeAlias(x) => Some(*x), 22 | _ => None, 23 | }), &state.alloc), 24 | } 25 | } 26 | 27 | Item: Item<'input> = { 28 | => Item::Graph(state.alloc.alloc(g)), 29 | => Item::TypeAlias(state.alloc.alloc(t)), 30 | } 31 | 32 | TypeAlias: TypeAlias<'input> = { 33 | Token<"type"> Token<"="> Token<";"> => TypeAlias { name, ty }, 34 | } 35 | 36 | Graph: Graph<'input> = { 37 | ?> Token<"graph"> 38 | Token<"("> )?), ",">> Token<")"> 39 | )?> 40 | Token<"{"> Token<"}"> => Graph { 41 | name, 42 | exported: exp.is_some(), 43 | params: Bvec::from_iter_in(params.into_iter().map(|x| (x.0, x.1)), &state.alloc), 44 | return_type, 45 | stmts: Bvec::from_iter_in(stmts.into_iter().map(|x| Stmt { 46 | location: x.0, 47 | kind: x.1, 48 | }), &state.alloc) 49 | } 50 | } 51 | 52 | Type: Type<'input> = { 53 | Token<"schema"> => Type::Schema, 54 | Token<"int64"> => Type::Primitive(PrimitiveType::Int64), 55 | Token<"string"> => Type::Primitive(PrimitiveType::String), 56 | Token<"bytes"> => Type::Primitive(PrimitiveType::Bytes), 57 | Token<"bool"> => Type::Bool, 58 | Token<"set"> Token<"<"> Token<">"> => Type::Set(state.alloc.alloc(ty)), 59 | Token<"list"> Token<"<"> Token<">"> => Type::List(state.alloc.alloc(ty)), 60 | Token<"map"> Token<"{"> Type), Token<",">>> Token<"}"> => Type::Map(Bvec::from_iter_in( 61 | members.into_iter().map(|x| (x.0, x.2)), 62 | &state.alloc 63 | )), 64 | >> Token<">">)?> => Type::Table { 65 | name, 66 | params: Bvec::from_iter_in(params.unwrap_or_default().into_iter(), &state.alloc), 67 | } 68 | } 69 | 70 | Stmt: StmtKind<'input> = { 71 | Token<"="> Token<";"> => StmtKind::Node { 72 | name: Some(name), 73 | value, 74 | }, 75 | Token<"return"> Token<";"> => StmtKind::Return { 76 | value, 77 | }, 78 | Token<"throw"> Token<";"> => StmtKind::Throw { 79 | value, 80 | }, 81 | Token<";"> => StmtKind::Node { 82 | name: None, 83 | value, 84 | }, 85 | Token<"if"> 86 | Token<"{"> Token<"}"> 87 | Token<"{"> Token<"}">)?> => StmtKind::If { 88 | precondition, 89 | if_body, 90 | else_body, 91 | } 92 | } 93 | 94 | StmtList: Bvec<'input, Stmt<'input>> = { 95 | => Bvec::from_iter_in(stmts.into_iter().map(|x| Stmt { 96 | location: x.0, 97 | kind: x.1, 98 | }), &state.alloc), 99 | } 100 | 101 | ExprRef: &'input Expr<'input> = { 102 | => state.alloc.alloc(e), 103 | } 104 | 105 | Expr: Expr<'input> = { 106 | => Expr { location_start, location_end, kind }, 107 | ExprL2, 108 | } 109 | 110 | ExprKind: ExprKind<'input> = { 111 | Token<"&&"> => ExprKind::And(x, y), 112 | Token<"||"> => ExprKind::Or(x, y), 113 | } 114 | 115 | ExprL2Ref: &'input Expr<'input> = { 116 | => state.alloc.alloc(e), 117 | } 118 | 119 | ExprL2: Expr<'input> = { 120 | => Expr { location_start, location_end, kind }, 121 | ExprL3, 122 | } 123 | 124 | ExprKindL2: ExprKind<'input> = { 125 | Token<"=="> => ExprKind::Eq(x, y), 126 | Token<"!="> => ExprKind::Ne(x, y), 127 | } 128 | 129 | ExprL3Ref: &'input Expr<'input> = { 130 | => state.alloc.alloc(e), 131 | } 132 | 133 | ExprL3: Expr<'input> = { 134 | => Expr { location_start, location_end, kind }, 135 | ExprL3Right, 136 | } 137 | 138 | ExprKindL3: ExprKind<'input> = { 139 | Token<"+"> => ExprKind::Add(x, y), 140 | Token<"-"> => ExprKind::Sub(x, y), 141 | Token<"??"> => ExprKind::OrElse(x, y), 142 | } 143 | 144 | ExprL3RightRef: &'input Expr<'input> = { 145 | => state.alloc.alloc(e), 146 | } 147 | 148 | ExprL3Right: Expr<'input> = { 149 | => Expr { location_start, location_end, kind }, 150 | ExprL4, 151 | } 152 | 153 | ExprKindL3Right: ExprKind<'input> = { 154 | Token<":"> => ExprKind::Prepend(x, y), 155 | } 156 | 157 | ExprL4Ref: &'input Expr<'input> = { 158 | => state.alloc.alloc(e), 159 | } 160 | 161 | ExprL4: Expr<'input> = { 162 | => Expr { location_start, location_end, kind }, 163 | ExprL5, 164 | } 165 | 166 | TrailingExprRef: &'input Expr<'input> = { 167 | => e, 168 | Token<"$"> => e, 169 | } 170 | 171 | ExprKindL4: ExprKind<'input> = { 172 | Token<"build_table"> Token<"("> Token<")"> => ExprKind::BuildTable(x, y), 173 | Token<"build_set"> => ExprKind::BuildSet(x), 174 | Token<"point_get"> => ExprKind::GetSetElement(x, y), 175 | Token<"m_insert"> Token<"("> Token<")"> => ExprKind::InsertIntoMap(x, y, z), 176 | Token<"t_insert"> Token<"("> Token<")"> => ExprKind::InsertIntoTable(x, y, z), 177 | Token<"s_insert"> => ExprKind::InsertIntoSet(y, z), 178 | Token<"m_delete"> Token<"("> Token<")"> => ExprKind::DeleteFromMap(x, y), 179 | Token<"s_delete"> => ExprKind::DeleteFromSet(y, z), 180 | Token<"select"> => ExprKind::Select(x, y), 181 | Token<"!"> => ExprKind::Not(x), 182 | Token<"is_present"> => ExprKind::IsPresent(x), 183 | Token<"is_null"> => ExprKind::IsNull(x), 184 | Token<"call"> Token<"("> Token<")"> Token<"["> > Token<"]"> => ExprKind::Call(name, Bvec::from_iter_in(params.into_iter(), &state.alloc)), 185 | Token<"reduce"> Token<"("> Token<")"> 186 | Token<"to"> )?> 187 | => if let Some(range) = range { 188 | ExprKind::RangeReduce( 189 | name, range.0, range.1, subgraph_param, reduce_init, list_or_set, 190 | ) 191 | } else { 192 | ExprKind::Reduce( 193 | name, subgraph_param, reduce_init, list_or_set, 194 | ) 195 | }, 196 | Token<"pop"> => ExprKind::Pop(x), 197 | Token<"head"> => ExprKind::Head(x), 198 | } 199 | 200 | ExprL5Ref: &'input Expr<'input> = { 201 | => state.alloc.alloc(e), 202 | } 203 | 204 | ExprL5: Expr<'input> = { 205 | => Expr { location_start, location_end, kind }, 206 | Token<"("> Token<")"> => e, 207 | } 208 | 209 | ExprKindL5: ExprKind<'input> = { 210 | => ExprKind::LoadConst(x), 211 | Token<"create_map"> => ExprKind::CreateMap, 212 | Token<"create_list"> Token<"("> Token<")"> => ExprKind::CreateList(ty), 213 | => ExprKind::Node(x), 214 | Token<"."> => ExprKind::GetField(x, y), 215 | } 216 | 217 | Identifier: &'input str = { 218 | > => state.resolve_str(s), 219 | 220 | // Workaround for identifiers that conflict with keywords 221 | > => state.resolve_str(s.strip_prefix("`").unwrap().strip_suffix("`").unwrap()), 222 | } 223 | 224 | Literal: Literal<'input> = { 225 | > =>? s.parse().map(Literal::Integer).map_err(|_| ParseError::User { 226 | error: TwAsmError::InvalidLiteral, 227 | }), 228 | > =>? i64::from_str_radix(s.strip_prefix("0x").unwrap(), 16).map(Literal::Integer).map_err(|_| ParseError::User { 229 | error: TwAsmError::InvalidLiteral, 230 | }), 231 | > =>? i64::from_str_radix(s.strip_prefix("0o").unwrap(), 8).map(Literal::Integer).map_err(|_| ParseError::User { 232 | error: TwAsmError::InvalidLiteral, 233 | }), 234 | > =>? i64::from_str_radix(s.strip_prefix("0b").unwrap(), 2).map(Literal::Integer).map_err(|_| ParseError::User { 235 | error: TwAsmError::InvalidLiteral, 236 | }), 237 | => Literal::String(state.resolve_str(&s)), 238 | => Literal::HexBytes(s), 239 | Token<"null"> Token<"<"> Token<">"> => Literal::Null(ty), 240 | Token<"true"> => Literal::Bool(true), 241 | Token<"false"> => Literal::Bool(false), 242 | Token<"empty_set"> Token<"<"> Token<">"> => Literal::EmptySet(member_ty), 243 | } 244 | 245 | StringLit: String = { 246 | > =>? serde_json::from_str::(s) 247 | .map_err(|_| ParseError::User { 248 | error: TwAsmError::InvalidLiteral, 249 | }), 250 | } 251 | 252 | HexBytesLit: &'input [u8] = { 253 | > =>? serde_json::from_str::(s.strip_prefix("h\"").unwrap().strip_suffix("\"").unwrap()) 254 | .map_err(|_| ParseError::User { 255 | error: TwAsmError::InvalidLiteral, 256 | }) 257 | .and_then(|x| hex::decode(&x) 258 | .map_err(|_| ParseError::User { 259 | error: TwAsmError::InvalidLiteral, 260 | }) 261 | .map(|x| state.alloc.alloc_slice_copy(&x) as &[u8]) 262 | ), 263 | } 264 | 265 | ZeroOrMore: Vec = { 266 | ?> => x.unwrap_or_default() 267 | } 268 | 269 | OneOrMore: Vec = { 270 | Delim? => { 271 | let mut items = vec![i1]; 272 | items.extend(i2.into_iter().map(|e| e.1)); 273 | items 274 | } 275 | } 276 | 277 | Token: I = { 278 | => s, 279 | } 280 | 281 | Comment: () = { 282 | r"//[^\n\r]*[\n\r]*" => { }, 283 | r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, 284 | } 285 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/asm/mod.rs: -------------------------------------------------------------------------------- 1 | use lalrpop_util::lalrpop_mod; 2 | 3 | mod ast; 4 | pub mod codegen; 5 | mod state; 6 | 7 | #[cfg(test)] 8 | mod asm_test; 9 | 10 | lalrpop_mod!(language, "/data/treewalker/asm/language.rs"); 11 | 12 | use thiserror::Error; 13 | 14 | #[derive(Error, Debug)] 15 | pub enum TwAsmError { 16 | #[error("invalid literal")] 17 | InvalidLiteral, 18 | 19 | #[error("type unsupported in table")] 20 | TypeUnsupportedInTable, 21 | 22 | #[error("node not found: {0}")] 23 | NodeNotFound(String), 24 | 25 | #[error("identifier not found: {0}")] 26 | IdentifierNotFound(String), 27 | 28 | #[error("duplicate return")] 29 | DuplicateReturn, 30 | 31 | #[error("param not found: {0}")] 32 | ParamNotFound(String), 33 | 34 | #[error("duplicate param: {0}")] 35 | DuplicateParam(String), 36 | 37 | #[error("duplicate graph: {0}")] 38 | DuplicateGraph(String), 39 | 40 | #[error("duplicate node name: {0}")] 41 | DuplicateNodeName(String), 42 | 43 | #[error("duplicate type alias: {0}")] 44 | DuplicateTypeAlias(String), 45 | 46 | #[error("graph not found: {0}")] 47 | GraphNotFound(String), 48 | } 49 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/asm/state.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | use bumpalo::Bump; 4 | 5 | pub struct State<'a> { 6 | pub alloc: &'a Bump, 7 | pub string_table: HashSet<&'a str>, 8 | } 9 | 10 | impl<'a> State<'a> { 11 | pub fn resolve_str(&mut self, s: &str) -> &'a str { 12 | match self.string_table.get(s) { 13 | Some(x) => x, 14 | None => { 15 | let s = self.alloc.alloc_str(s); 16 | self.string_table.insert(s); 17 | s 18 | } 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/bytecode.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use smallvec::{smallvec, SmallVec}; 3 | 4 | use super::vm_value::{VmConst, VmType}; 5 | 6 | #[derive(Serialize, Deserialize, Default, Debug)] 7 | pub struct TwScript { 8 | pub graphs: Vec, 9 | pub entry: u32, 10 | pub consts: Vec, 11 | pub idents: Vec, 12 | pub types: Vec>, 13 | } 14 | 15 | #[derive(Serialize, Deserialize, Debug)] 16 | pub struct TwGraph { 17 | /// Name. 18 | pub name: String, 19 | 20 | /// Whether this is exported. 21 | pub exported: bool, 22 | 23 | /// Topologically sorted nodes. 24 | /// 25 | /// (node, in_edges, precondition) 26 | pub nodes: Vec<(TwGraphNode, Vec, Option)>, 27 | 28 | /// The output value of this graph. 29 | pub output: Option, 30 | 31 | /// Param types. 32 | pub param_types: Vec, 33 | 34 | /// Output type. 35 | pub output_type: Option, 36 | } 37 | 38 | #[derive(Copy, Clone, Serialize, Deserialize, Debug)] 39 | pub enum TwGraphNode { 40 | /// T 41 | /// 42 | /// Const param: param_index 43 | LoadParam(u32), 44 | 45 | /// T 46 | /// 47 | /// Const param: const_index 48 | LoadConst(u32), 49 | 50 | /// Map -> Table 51 | /// 52 | /// Const param: ident (table_type) 53 | BuildTable(u32), 54 | 55 | /// List -> Set 56 | BuildSet, 57 | 58 | /// Map 59 | CreateMap, 60 | 61 | /// List 62 | /// 63 | /// Const param: ident (table_type) 64 | CreateList(u32), 65 | 66 | /// T -> List -> List 67 | PrependToList, 68 | 69 | /// List -> List 70 | PopFromList, 71 | 72 | /// List -> T 73 | ListHead, 74 | 75 | /// If has_range: U -> P -> T::PrimaryKeyValue (start_inclusive) -> T::PrimaryKeyValue (end_exclusive) -> (List | Set) -> P 76 | /// Otherwise: U -> P -> (List | Set) -> P 77 | /// 78 | /// Subgraph: (U, P, T) -> P 79 | /// 80 | /// Const param: (subgraph_index, has_range) 81 | Reduce(u32, bool), 82 | 83 | /// (Map | Table) -> T 84 | /// 85 | /// Const param: ident 86 | GetField(u32), 87 | 88 | /// T::PrimaryKeyValue -> Set -> T 89 | /// 90 | /// Point-get on a set. 91 | GetSetElement, 92 | 93 | /// U (subgraph parameter) -> Set -> T 94 | /// 95 | /// Filter the set with the given subgraph. 96 | /// 97 | /// Const param: subgraph_index 98 | FilterSet(u32), 99 | 100 | /// T -> Map -> Map 101 | /// 102 | /// Const param: ident 103 | InsertIntoMap(u32), 104 | 105 | /// T -> Table -> () 106 | /// 107 | /// This is an effect node. 108 | /// 109 | /// Const param: ident 110 | InsertIntoTable(u32), 111 | 112 | /// T -> Set -> () 113 | /// 114 | /// This is an effect node. 115 | InsertIntoSet, 116 | 117 | /// T::PrimaryKeyValue -> Set -> () 118 | /// 119 | /// Point-delete on a set. 120 | /// This is an effect node. 121 | /// 122 | /// Const param: ident 123 | DeleteFromSet, 124 | 125 | /// Map -> Map 126 | /// 127 | /// Const param: ident 128 | DeleteFromMap(u32), 129 | 130 | /// T -> T -> Bool 131 | Eq, 132 | 133 | /// T -> T -> Bool 134 | Ne, 135 | 136 | /// Bool -> Bool -> Bool 137 | And, 138 | 139 | /// Bool -> Bool -> Bool 140 | Or, 141 | 142 | /// Bool -> Bool 143 | Not, 144 | 145 | /// Fire if either of its parameters are satisfied. 146 | /// 147 | /// T -> T -> T 148 | Select, 149 | 150 | /// True if this table or set is actually present. 151 | /// 152 | /// Always true for fresh values, and true for resident values if its storage key exists. 153 | /// 154 | /// T -> Bool 155 | IsPresent, 156 | 157 | /// Whether this value is null. 158 | /// 159 | /// T -> Bool 160 | IsNull, 161 | 162 | /// T -> T 163 | Nop, 164 | 165 | /// Call subgraph. 166 | /// 167 | /// T* -> R 168 | /// 169 | /// Const param: subgraph index 170 | Call(u32), 171 | 172 | /// (int64 -> int64 -> int64) | (double -> double -> double) | (string -> string -> string) 173 | Add, 174 | 175 | /// (int64 -> int64 -> int64) | (double -> double -> double) 176 | Sub, 177 | 178 | /// string -> ! 179 | Throw, 180 | } 181 | 182 | impl TwGraphNode { 183 | pub fn is_select(&self) -> bool { 184 | match self { 185 | Self::Select => true, 186 | _ => false, 187 | } 188 | } 189 | pub fn subgraph_references(&self) -> SmallVec<[u32; 1]> { 190 | match self { 191 | Self::FilterSet(x) => smallvec![*x], 192 | Self::Call(x) => smallvec![*x], 193 | Self::Reduce(x, _) => smallvec![*x], 194 | _ => smallvec![], 195 | } 196 | } 197 | 198 | pub fn is_optional_chained(&self) -> bool { 199 | match self { 200 | TwGraphNode::IsNull 201 | | TwGraphNode::Nop 202 | | TwGraphNode::InsertIntoMap(_) 203 | | TwGraphNode::DeleteFromMap(_) 204 | | TwGraphNode::Reduce(_, _) 205 | | TwGraphNode::Throw => false, 206 | _ => true, 207 | } 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/exec_test.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use bumpalo::Bump; 4 | 5 | use crate::{ 6 | data::{ 7 | treewalker::{ 8 | bytecode::{TwGraph, TwGraphNode, TwScript}, 9 | exec::{generate_root_map, Executor}, 10 | typeck::GlobalTyckContext, 11 | vm::TwVm, 12 | vm_value::{VmConst, VmType}, 13 | }, 14 | value::PrimitiveValue, 15 | }, 16 | schema::{ 17 | compile::{compile, PrimitiveType}, 18 | grammar::parse, 19 | }, 20 | storage_plan::{planner::generate_plan_for_schema, StoragePlan}, 21 | test_util::create_kv, 22 | }; 23 | 24 | use super::vm_value::VmValue; 25 | 26 | #[tokio::test] 27 | async fn basic_exec() { 28 | let _ = pretty_env_logger::try_init(); 29 | let alloc = Bump::new(); 30 | let ast = parse( 31 | &alloc, 32 | r#" 33 | type Item { 34 | id: string, 35 | name: string, 36 | duration: Duration, 37 | } 38 | type Duration { 39 | start: T, 40 | end: T, 41 | } 42 | export Item some_item; 43 | "#, 44 | ) 45 | .unwrap(); 46 | let schema = compile(&ast).unwrap(); 47 | drop(ast); 48 | drop(alloc); 49 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 50 | let script = TwScript { 51 | graphs: vec![TwGraph { 52 | name: "".into(), 53 | exported: false, 54 | nodes: vec![ 55 | (TwGraphNode::LoadParam(0), vec![], None), // 0 56 | (TwGraphNode::GetField(0), vec![0], None), // 1 57 | (TwGraphNode::GetField(1), vec![1], None), // 2 58 | (TwGraphNode::GetField(2), vec![1], None), // 3 59 | (TwGraphNode::GetField(3), vec![3], None), // 4 60 | (TwGraphNode::CreateMap, vec![], None), // 5 61 | (TwGraphNode::InsertIntoMap(4), vec![2, 5], None), // 6 62 | (TwGraphNode::InsertIntoMap(5), vec![4, 6], None), // 7 63 | (TwGraphNode::LoadConst(0), vec![], None), // 8 64 | (TwGraphNode::InsertIntoTable(1), vec![8, 1], None), // 0 65 | ], 66 | output: Some(7), 67 | output_type: Some(1), 68 | param_types: vec![0], 69 | }], 70 | entry: 0, 71 | consts: vec![VmConst::Primitive(PrimitiveValue::String( 72 | "test_name".into(), 73 | ))], 74 | idents: vec![ 75 | "some_item".into(), 76 | "name".into(), 77 | "duration".into(), 78 | "start".into(), 79 | "field_1".into(), 80 | "field_2".into(), 81 | ], 82 | types: vec![ 83 | VmType::Schema, 84 | VmType::Map( 85 | vec![ 86 | ( 87 | "field_1".to_string(), 88 | VmType::Primitive(PrimitiveType::String), 89 | ), 90 | ( 91 | "field_2".to_string(), 92 | VmType::Primitive(PrimitiveType::Int64), 93 | ), 94 | ] 95 | .into_iter() 96 | .collect(), 97 | ), 98 | VmType::Primitive(PrimitiveType::Int64), 99 | ], 100 | }; 101 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 102 | let type_info = GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 103 | 104 | let kv = create_kv(); 105 | let mut executor = Executor::new(&vm, &*kv, &type_info); 106 | let output = executor 107 | .run_graph(0, &[Arc::new(generate_root_map(&schema, &plan).unwrap())]) 108 | .await 109 | .unwrap(); 110 | println!("{:?}", output); 111 | let output = output.unwrap(); 112 | match &*output { 113 | VmValue::Map(x) => { 114 | match &**x.elements.get("field_1").unwrap() { 115 | VmValue::Null(VmType::Primitive(PrimitiveType::String)) => {} 116 | _ => unreachable!(), 117 | } 118 | match &**x.elements.get("field_2").unwrap() { 119 | VmValue::Null(VmType::Primitive(PrimitiveType::Int64)) => {} 120 | _ => unreachable!(), 121 | } 122 | } 123 | _ => unreachable!(), 124 | } 125 | 126 | let script = TwScript { 127 | graphs: vec![TwGraph { 128 | name: "".into(), 129 | exported: false, 130 | nodes: vec![ 131 | (TwGraphNode::LoadParam(0), vec![], None), // 0 132 | (TwGraphNode::GetField(0), vec![0], None), // 1 133 | (TwGraphNode::GetField(1), vec![1], None), // 2 134 | ], 135 | output: Some(2), 136 | output_type: Some(1), 137 | param_types: vec![0], 138 | }], 139 | entry: 0, 140 | consts: vec![], 141 | idents: vec!["some_item".into(), "name".into()], 142 | types: vec![VmType::Schema, VmType::Primitive(PrimitiveType::String)], 143 | }; 144 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 145 | let type_info = GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 146 | let mut executor = Executor::new(&vm, &*kv, &type_info); 147 | let output = executor 148 | .run_graph(0, &[Arc::new(generate_root_map(&schema, &plan).unwrap())]) 149 | .await 150 | .unwrap(); 151 | println!("{:?}", output); 152 | match &*output.unwrap() { 153 | VmValue::Primitive(PrimitiveValue::String(x)) if x == "test_name" => {} 154 | _ => unreachable!(), 155 | }; 156 | } 157 | 158 | #[tokio::test] 159 | async fn set_queries() { 160 | let _ = pretty_env_logger::try_init(); 161 | let alloc = Bump::new(); 162 | let ast = parse( 163 | &alloc, 164 | r#" 165 | type Item { 166 | @primary 167 | id: string, 168 | name: string, 169 | } 170 | export set some_item; 171 | "#, 172 | ) 173 | .unwrap(); 174 | let schema = compile(&ast).unwrap(); 175 | drop(ast); 176 | drop(alloc); 177 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 178 | println!( 179 | "{}", 180 | serde_yaml::to_string(&StoragePlan::::from(&plan)).unwrap() 181 | ); 182 | let script = TwScript { 183 | graphs: vec![TwGraph { 184 | name: "".into(), 185 | exported: false, 186 | nodes: vec![ 187 | (TwGraphNode::LoadParam(0), vec![], None), // 0 188 | (TwGraphNode::LoadConst(0), vec![], None), // 1 189 | (TwGraphNode::LoadConst(1), vec![], None), // 2 190 | (TwGraphNode::CreateMap, vec![], None), // 3 191 | (TwGraphNode::InsertIntoMap(1), vec![1, 3], None), // 4 192 | (TwGraphNode::InsertIntoMap(2), vec![2, 4], None), // 5 193 | (TwGraphNode::BuildTable(3), vec![5], None), // 6 194 | (TwGraphNode::GetField(0), vec![0], None), // 7 195 | (TwGraphNode::InsertIntoSet, vec![6, 7], None), // 8 196 | ], 197 | output: None, 198 | output_type: None, 199 | param_types: vec![0], 200 | }], 201 | entry: 0, 202 | consts: vec![ 203 | VmConst::Primitive(PrimitiveValue::String("test_id".into())), 204 | VmConst::Primitive(PrimitiveValue::String("test_name".into())), 205 | ], 206 | idents: vec![ 207 | "some_item".into(), 208 | "id".into(), 209 | "name".into(), 210 | "Item<>".into(), 211 | ], 212 | types: vec![VmType::Schema], 213 | }; 214 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 215 | let type_info = GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 216 | 217 | let kv = create_kv(); 218 | let mut executor = Executor::new(&vm, &*kv, &type_info); 219 | executor 220 | .run_graph(0, &[Arc::new(generate_root_map(&schema, &plan).unwrap())]) 221 | .await 222 | .unwrap(); 223 | 224 | let script = TwScript { 225 | graphs: vec![TwGraph { 226 | name: "".into(), 227 | exported: false, 228 | nodes: vec![ 229 | (TwGraphNode::LoadParam(0), vec![], None), // 0 230 | (TwGraphNode::LoadConst(0), vec![], None), // 1 231 | (TwGraphNode::GetField(0), vec![0], None), // 2 232 | (TwGraphNode::GetSetElement, vec![1, 2], None), // 3 233 | (TwGraphNode::GetField(1), vec![3], None), // 4 234 | ], 235 | output: Some(4), 236 | output_type: Some(1), 237 | param_types: vec![0], 238 | }], 239 | entry: 0, 240 | consts: vec![VmConst::Primitive(PrimitiveValue::String("test_id".into()))], 241 | idents: vec!["some_item".into(), "name".into()], 242 | types: vec![VmType::Schema, VmType::Primitive(PrimitiveType::String)], 243 | }; 244 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 245 | let type_info = GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 246 | let mut executor = Executor::new(&vm, &*kv, &type_info); 247 | let output = executor 248 | .run_graph(0, &[Arc::new(generate_root_map(&schema, &plan).unwrap())]) 249 | .await 250 | .unwrap(); 251 | println!("{:?}", output); 252 | match &*output.unwrap() { 253 | VmValue::Primitive(PrimitiveValue::String(x)) if x == "test_name" => {} 254 | _ => unreachable!(), 255 | }; 256 | 257 | let script = TwScript { 258 | graphs: vec![TwGraph { 259 | name: "".into(), 260 | exported: false, 261 | nodes: vec![ 262 | (TwGraphNode::LoadParam(0), vec![], None), // 0 263 | (TwGraphNode::LoadConst(0), vec![], None), // 1 264 | (TwGraphNode::GetField(0), vec![0], None), // 2 265 | (TwGraphNode::DeleteFromSet, vec![1, 2], None), // 3 266 | ], 267 | output: None, 268 | output_type: None, 269 | param_types: vec![0], 270 | }], 271 | entry: 0, 272 | consts: vec![VmConst::Primitive(PrimitiveValue::String("test_id".into()))], 273 | idents: vec!["some_item".into(), "name".into()], 274 | types: vec![ 275 | VmType::::from(&schema), 276 | VmType::Primitive(PrimitiveType::String), 277 | ], 278 | }; 279 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 280 | let type_info = GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 281 | let mut executor = Executor::new(&vm, &*kv, &type_info); 282 | executor 283 | .run_graph(0, &[Arc::new(generate_root_map(&schema, &plan).unwrap())]) 284 | .await 285 | .unwrap(); 286 | 287 | let script = TwScript { 288 | graphs: vec![TwGraph { 289 | name: "".into(), 290 | exported: false, 291 | nodes: vec![ 292 | (TwGraphNode::LoadParam(0), vec![], None), // 0 293 | (TwGraphNode::LoadConst(0), vec![], None), // 1 294 | (TwGraphNode::GetField(0), vec![0], None), // 2 295 | (TwGraphNode::GetSetElement, vec![1, 2], None), // 3 296 | (TwGraphNode::GetField(1), vec![3], None), // 4 297 | ], 298 | output: Some(4), 299 | output_type: Some(1), 300 | param_types: vec![0], 301 | }], 302 | entry: 0, 303 | consts: vec![VmConst::Primitive(PrimitiveValue::String("test_id".into()))], 304 | idents: vec!["some_item".into(), "name".into()], 305 | types: vec![VmType::Schema, VmType::Primitive(PrimitiveType::String)], 306 | }; 307 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 308 | let type_info = GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 309 | let mut executor = Executor::new(&vm, &*kv, &type_info); 310 | let output = executor 311 | .run_graph(0, &[Arc::new(generate_root_map(&schema, &plan).unwrap())]) 312 | .await 313 | .unwrap(); 314 | println!("{:?}", output); 315 | match &*output.unwrap() { 316 | VmValue::Null(_) => {} 317 | _ => unreachable!(), 318 | }; 319 | } 320 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod asm; 2 | pub mod bytecode; 3 | pub mod exec; 4 | pub mod serialize; 5 | pub mod typeck; 6 | pub mod vm; 7 | pub mod vm_value; 8 | 9 | #[cfg(test)] 10 | mod typeck_test; 11 | 12 | #[cfg(test)] 13 | mod exec_test; 14 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/serialize.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::BTreeMap, sync::Arc}; 2 | 3 | use anyhow::Result; 4 | 5 | use crate::{ 6 | data::{ 7 | treewalker::vm_value::{VmListValue, VmMapValue}, 8 | value::PrimitiveValue, 9 | }, 10 | schema::compile::PrimitiveType, 11 | }; 12 | 13 | use super::vm_value::{VmType, VmValue}; 14 | use serde::{Deserialize, Serialize}; 15 | use thiserror::Error; 16 | 17 | #[derive(Error, Debug)] 18 | pub enum SerializeError { 19 | #[error("unserializable value")] 20 | Unserializable, 21 | 22 | #[error("type mismatch")] 23 | TypeMismatch, 24 | 25 | #[error("type mismatch during unwrapping")] 26 | UnwrapTypeMismatch, 27 | 28 | #[error("unexpected null value")] 29 | UnexpectedNullValue, 30 | 31 | #[error("missing required field: `{0}`")] 32 | MissingRequiredField(String), 33 | } 34 | 35 | #[derive(Serialize, Deserialize, Debug)] 36 | #[serde(untagged)] 37 | pub enum SerializedVmValue { 38 | String(String), 39 | Bool(bool), 40 | Bytes(Vec), 41 | Int64(i64), 42 | Double(f64), 43 | Null(Option), 44 | Tagged(TaggedVmValue), 45 | } 46 | 47 | #[derive(Serialize, Deserialize, Debug)] 48 | pub enum TaggedVmValue { 49 | M(BTreeMap), 50 | L(Vec), 51 | } 52 | 53 | #[derive(Default, Debug, Clone)] 54 | pub struct VmValueEncodeConfig { 55 | pub enable_bytes: bool, 56 | pub enable_int64: bool, 57 | pub enable_double: bool, 58 | } 59 | 60 | #[derive(Serialize, Deserialize, Debug)] 61 | pub enum Never {} 62 | 63 | impl SerializedVmValue { 64 | pub fn try_unwrap_bool(&self) -> Result { 65 | match self { 66 | Self::Bool(x) => Ok(*x), 67 | _ => Err(SerializeError::UnwrapTypeMismatch.into()), 68 | } 69 | } 70 | pub fn try_unwrap_list(&self) -> Result<&Vec> { 71 | match self { 72 | Self::Tagged(TaggedVmValue::L(x)) => Ok(x), 73 | _ => Err(SerializeError::UnwrapTypeMismatch.into()), 74 | } 75 | } 76 | pub fn check_nonnull(&self) -> Result<()> { 77 | match self { 78 | Self::Null(_) => Err(SerializeError::UnexpectedNullValue.into()), 79 | _ => Ok(()), 80 | } 81 | } 82 | pub fn try_unwrap_map( 83 | &self, 84 | required_fields: &[&str], 85 | ) -> Result<&BTreeMap> { 86 | match self { 87 | Self::Tagged(TaggedVmValue::M(x)) => { 88 | for f in required_fields { 89 | if !x.contains_key(*f) { 90 | return Err(SerializeError::MissingRequiredField(f.to_string()).into()); 91 | } 92 | } 93 | Ok(x) 94 | } 95 | _ => Err(SerializeError::UnwrapTypeMismatch.into()), 96 | } 97 | } 98 | pub fn try_unwrap_string(&self) -> Result<&String> { 99 | match self { 100 | Self::String(x) => Ok(x), 101 | _ => Err(SerializeError::UnwrapTypeMismatch.into()), 102 | } 103 | } 104 | pub fn try_unwrap_bytes(&self) -> Result<&Vec> { 105 | match self { 106 | Self::Bytes(x) => Ok(x), 107 | _ => Err(SerializeError::UnwrapTypeMismatch.into()), 108 | } 109 | } 110 | pub fn try_unwrap_int64(&self) -> Result { 111 | match self { 112 | Self::Int64(x) => Ok(*x), 113 | _ => Err(SerializeError::UnwrapTypeMismatch.into()), 114 | } 115 | } 116 | 117 | pub fn encode(v: &VmValue, config: &VmValueEncodeConfig) -> Result { 118 | match v { 119 | VmValue::Map(x) => Ok(Self::Tagged(TaggedVmValue::M( 120 | x.elements 121 | .iter() 122 | .map(|(k, v)| Self::encode(&**v, config).map(|x| (k.to_string(), x))) 123 | .collect::>()?, 124 | ))), 125 | VmValue::Null(_) => Ok(Self::Null(None)), 126 | VmValue::Bool(x) => Ok(Self::Bool(*x)), 127 | VmValue::Primitive(x) => match x { 128 | PrimitiveValue::Bytes(x) => { 129 | if config.enable_bytes { 130 | Ok(Self::Bytes(x.clone())) 131 | } else { 132 | Ok(Self::String(base64::encode(x))) 133 | } 134 | } 135 | PrimitiveValue::Double(x) => { 136 | if config.enable_double { 137 | Ok(Self::Double(f64::from_bits(*x))) 138 | } else { 139 | Ok(Self::String(format!("{}", f64::from_bits(*x)))) 140 | } 141 | } 142 | PrimitiveValue::Int64(x) => { 143 | if config.enable_int64 { 144 | Ok(Self::Int64(*x)) 145 | } else { 146 | Ok(Self::String(format!("{}", x))) 147 | } 148 | } 149 | PrimitiveValue::String(x) => Ok(Self::String(x.clone())), 150 | }, 151 | VmValue::List(x) => { 152 | let out = x 153 | .node 154 | .iter() 155 | .map(|x| Self::encode(&**x, config)) 156 | .collect::>()?; 157 | Ok(Self::Tagged(TaggedVmValue::L(out))) 158 | } 159 | _ => { 160 | log::debug!("encode: unserializable: {:?}", v); 161 | Err(SerializeError::Unserializable.into()) 162 | } 163 | } 164 | } 165 | 166 | pub fn decode<'a>(&self, ty: &VmType<&'a str>) -> Result> { 167 | use SerializedVmValue as S; 168 | match (self, ty) { 169 | (S::Tagged(TaggedVmValue::M(x)), VmType::Map(map_ty)) => { 170 | let mut res = VmMapValue { 171 | elements: Default::default(), 172 | }; 173 | for (k, field_ty) in map_ty { 174 | if let Some(v) = x.get(*k) { 175 | res.elements.insert_mut(*k, Arc::new(v.decode(field_ty)?)); 176 | } else { 177 | res 178 | .elements 179 | .insert_mut(*k, Arc::new(VmValue::Null(field_ty.clone()))); 180 | } 181 | } 182 | Ok(VmValue::Map(res)) 183 | } 184 | (S::Tagged(TaggedVmValue::L(x)), VmType::List(list_ty)) => { 185 | let res = VmListValue { 186 | member_ty: (*list_ty.ty).clone(), 187 | node: x 188 | .iter() 189 | .map(|x| x.decode(&*list_ty.ty).map(Arc::new)) 190 | .collect::>()?, 191 | }; 192 | Ok(VmValue::List(res)) 193 | } 194 | (S::Null(None), _) => Ok(VmValue::Null(ty.clone())), 195 | (S::Bool(x), VmType::Bool) => Ok(VmValue::Bool(*x)), 196 | (S::String(x), VmType::Primitive(PrimitiveType::String)) => { 197 | Ok(VmValue::Primitive(PrimitiveValue::String(x.clone()))) 198 | } 199 | (S::Bytes(x), VmType::Primitive(PrimitiveType::String)) => Ok(VmValue::Primitive( 200 | PrimitiveValue::String(String::from_utf8_lossy(x).to_string()), 201 | )), 202 | (S::String(x), VmType::Primitive(PrimitiveType::Int64)) => { 203 | Ok(VmValue::Primitive(PrimitiveValue::Int64(x.parse()?))) 204 | } 205 | (S::Int64(x), VmType::Primitive(PrimitiveType::Int64)) => { 206 | Ok(VmValue::Primitive(PrimitiveValue::Int64(*x))) 207 | } 208 | (S::Double(x), VmType::Primitive(PrimitiveType::Int64)) => { 209 | Ok(VmValue::Primitive(PrimitiveValue::Int64(*x as i64))) 210 | } 211 | (S::String(x), VmType::Primitive(PrimitiveType::Double)) => { 212 | Ok(VmValue::Primitive(PrimitiveValue::Double(x.parse()?))) 213 | } 214 | (S::Int64(x), VmType::Primitive(PrimitiveType::Double)) => Ok(VmValue::Primitive( 215 | PrimitiveValue::Double((*x as f64).to_bits()), 216 | )), 217 | (S::Double(x), VmType::Primitive(PrimitiveType::Double)) => { 218 | Ok(VmValue::Primitive(PrimitiveValue::Double(x.to_bits()))) 219 | } 220 | (S::String(x), VmType::Primitive(PrimitiveType::Bytes)) => Ok(VmValue::Primitive( 221 | PrimitiveValue::Bytes(base64::decode(x)?), 222 | )), 223 | (S::Bytes(x), VmType::Primitive(PrimitiveType::Bytes)) => { 224 | Ok(VmValue::Primitive(PrimitiveValue::Bytes(x.clone()))) 225 | } 226 | _ => { 227 | log::debug!("decode: type mismatch: `{:?}`, `{}`", self, ty); 228 | Err(SerializeError::TypeMismatch.into()) 229 | } 230 | } 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/typeck_test.rs: -------------------------------------------------------------------------------- 1 | use bumpalo::Bump; 2 | use rpds::RedBlackTreeMapSync; 3 | 4 | use crate::{ 5 | data::{ 6 | treewalker::{ 7 | bytecode::{TwGraph, TwGraphNode}, 8 | typeck::GlobalTyckContext, 9 | vm::TwVm, 10 | vm_value::{VmConst, VmType}, 11 | }, 12 | value::PrimitiveValue, 13 | }, 14 | schema::{ 15 | compile::{compile, PrimitiveType}, 16 | grammar::parse, 17 | }, 18 | storage_plan::planner::generate_plan_for_schema, 19 | }; 20 | 21 | use super::{bytecode::TwScript, vm_value::VmTableType}; 22 | 23 | const SIMPLE_SCHEMA: &str = r#" 24 | type Item { 25 | inner: T, 26 | inner2: T, 27 | @primary 28 | something_else: string, 29 | } 30 | type Duration { 31 | start: T, 32 | end: T, 33 | } 34 | type Recursive { 35 | inner: Recursive, 36 | } 37 | type BinaryTree { 38 | left: BinaryTree, 39 | right: BinaryTree, 40 | value: T, 41 | } 42 | 43 | type TrinaryTree { 44 | left: TrinaryTree, 45 | middle: TrinaryTree, 46 | right: TrinaryTree, 47 | value: T, 48 | } 49 | 50 | type InternalSet { 51 | @primary 52 | key: bytes, 53 | s: set>, 54 | } 55 | 56 | type Wrapper { 57 | @primary 58 | value: T, 59 | } 60 | 61 | export set>> items; 62 | export Recursive item; 63 | export BinaryTree a_binary_tree; 64 | export InternalSet an_internal_set; 65 | export set nested_internal_sets; 66 | export TrinaryTree a_trinary_tree; 67 | "#; 68 | 69 | /* 70 | fn root_map<'a>(schema: &'a CompiledSchema, plan: &'a StoragePlan) -> VmValue<'a> { 71 | let mut m = RedBlackTreeMapSync::new_sync(); 72 | m.insert_mut( 73 | "a_trinary_tree", 74 | Arc::new(VmValue::Table(VmTableValue { 75 | ty: "TrinaryTree", 76 | kind: VmTableValueKind::Resident(Arc::new(VmResidentPath { 77 | storage_key: VmResidentStorageKey::Fixed( 78 | &plan.nodes.get("TrinaryTree").unwrap().key, 79 | ), 80 | prev: None, 81 | })), 82 | })), 83 | ); 84 | VmValue::Map(VmMapValue { elements: m }) 85 | } 86 | */ 87 | 88 | #[test] 89 | fn basic_typeck() { 90 | let _ = pretty_env_logger::try_init(); 91 | let alloc = Bump::new(); 92 | let ast = parse(&alloc, SIMPLE_SCHEMA).unwrap(); 93 | let schema = compile(&ast).unwrap(); 94 | drop(ast); 95 | drop(alloc); 96 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 97 | let script = TwScript { 98 | graphs: vec![TwGraph { 99 | name: "".into(), 100 | exported: false, 101 | nodes: vec![ 102 | (TwGraphNode::LoadParam(0), vec![], None), // 0 103 | (TwGraphNode::GetField(0), vec![0], None), // 1 104 | (TwGraphNode::GetField(1), vec![1], None), // 2 105 | (TwGraphNode::GetField(2), vec![2], None), // 3 106 | (TwGraphNode::GetField(3), vec![3], None), // 4 107 | ], 108 | output: Some(4), 109 | output_type: Some(1), 110 | param_types: vec![0], 111 | }], 112 | entry: 0, 113 | consts: vec![], 114 | idents: vec![ 115 | "a_trinary_tree".into(), 116 | "middle".into(), 117 | "left".into(), 118 | "value".into(), 119 | ], 120 | types: vec![VmType::Schema, VmType::Primitive(PrimitiveType::Int64)], 121 | }; 122 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 123 | GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 124 | } 125 | 126 | #[test] 127 | fn filter_set() { 128 | let _ = pretty_env_logger::try_init(); 129 | let alloc = Bump::new(); 130 | let ast = parse(&alloc, SIMPLE_SCHEMA).unwrap(); 131 | let schema = compile(&ast).unwrap(); 132 | drop(ast); 133 | drop(alloc); 134 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 135 | let script = TwScript { 136 | graphs: vec![ 137 | TwGraph { 138 | name: "".into(), 139 | exported: false, 140 | nodes: vec![ 141 | (TwGraphNode::LoadParam(0), vec![], None), // 0 142 | (TwGraphNode::GetField(0), vec![0], None), // 1 143 | (TwGraphNode::LoadConst(1), vec![], None), // 2 144 | (TwGraphNode::FilterSet(1), vec![2, 1], None), // 3 145 | ], 146 | output: Some(3), 147 | output_type: Some(1), 148 | param_types: vec![0], 149 | }, 150 | TwGraph { 151 | name: "".into(), 152 | exported: false, 153 | nodes: vec![ 154 | (TwGraphNode::LoadConst(0), vec![], None), // 0 155 | ], 156 | output: Some(0), 157 | output_type: Some(2), 158 | param_types: vec![3, 3], 159 | }, 160 | ], 161 | entry: 0, 162 | consts: vec![ 163 | VmConst::Bool(true), 164 | VmConst::Null(VmType::Primitive(PrimitiveType::Int64)), 165 | ], 166 | idents: vec![ 167 | "items".into(), 168 | "middle".into(), 169 | "left".into(), 170 | "value".into(), 171 | ], 172 | types: vec![ 173 | VmType::Schema, 174 | VmType::Table(VmTableType { 175 | name: "Item>".into(), 176 | }), 177 | VmType::Bool, 178 | VmType::Unknown, 179 | ], 180 | }; 181 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 182 | GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 183 | } 184 | 185 | #[test] 186 | fn basic_typeck_fail_unknown_name() { 187 | let _ = pretty_env_logger::try_init(); 188 | let alloc = Bump::new(); 189 | let ast = parse(&alloc, SIMPLE_SCHEMA).unwrap(); 190 | let schema = compile(&ast).unwrap(); 191 | drop(ast); 192 | drop(alloc); 193 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 194 | let script = TwScript { 195 | graphs: vec![TwGraph { 196 | name: "".into(), 197 | exported: false, 198 | nodes: vec![ 199 | (TwGraphNode::LoadParam(0), vec![], None), // 0 200 | (TwGraphNode::GetField(0), vec![0], None), // 1 201 | (TwGraphNode::GetField(1), vec![1], None), // 2 202 | (TwGraphNode::GetField(2), vec![2], None), // 3 203 | (TwGraphNode::GetField(3), vec![3], None), // 4 204 | ], 205 | output: Some(4), 206 | output_type: Some(1), 207 | param_types: vec![0], 208 | }], 209 | entry: 0, 210 | consts: vec![], 211 | idents: vec![ 212 | "a_trinary_tree".into(), 213 | "middle".into(), 214 | "left_".into(), 215 | "value".into(), 216 | ], 217 | types: vec![VmType::Schema, VmType::Primitive(PrimitiveType::Int64)], 218 | }; 219 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 220 | assert!( 221 | GlobalTyckContext::new(&vm) 222 | .unwrap() 223 | .typeck() 224 | .unwrap_err() 225 | .to_string() 226 | == "field `left_` is not present in table `TrinaryTree`" 227 | ); 228 | } 229 | 230 | #[test] 231 | fn basic_typeck_output_type_mismatch() { 232 | let _ = pretty_env_logger::try_init(); 233 | let alloc = Bump::new(); 234 | let ast = parse(&alloc, SIMPLE_SCHEMA).unwrap(); 235 | let schema = compile(&ast).unwrap(); 236 | drop(ast); 237 | drop(alloc); 238 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 239 | let script = TwScript { 240 | graphs: vec![TwGraph { 241 | name: "".into(), 242 | exported: false, 243 | nodes: vec![ 244 | (TwGraphNode::LoadParam(0), vec![], None), // 0 245 | (TwGraphNode::GetField(0), vec![0], None), // 1 246 | (TwGraphNode::GetField(1), vec![1], None), // 2 247 | (TwGraphNode::GetField(2), vec![2], None), // 3 248 | (TwGraphNode::GetField(3), vec![3], None), // 4 249 | ], 250 | output: Some(4), 251 | output_type: Some(1), 252 | param_types: vec![0], 253 | }], 254 | entry: 0, 255 | consts: vec![], 256 | idents: vec![ 257 | "a_trinary_tree".into(), 258 | "middle".into(), 259 | "left".into(), 260 | "value".into(), 261 | ], 262 | types: vec![VmType::Schema, VmType::Primitive(PrimitiveType::String)], 263 | }; 264 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 265 | assert!( 266 | GlobalTyckContext::new(&vm) 267 | .unwrap() 268 | .typeck() 269 | .unwrap_err() 270 | .to_string() 271 | == "type `Primitive(String)` is not covariant from `Primitive(Int64)`" 272 | ); 273 | } 274 | 275 | #[test] 276 | fn typeck_set_point_get() { 277 | let _ = pretty_env_logger::try_init(); 278 | let alloc = Bump::new(); 279 | let ast = parse(&alloc, SIMPLE_SCHEMA).unwrap(); 280 | let schema = compile(&ast).unwrap(); 281 | drop(ast); 282 | drop(alloc); 283 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 284 | let mut expected_result_type = RedBlackTreeMapSync::new_sync(); 285 | expected_result_type.insert_mut( 286 | "start".to_string(), 287 | VmType::::Primitive(PrimitiveType::Int64), 288 | ); 289 | expected_result_type.insert_mut( 290 | "the_item".to_string(), 291 | VmType::::Table(VmTableType { 292 | name: "Item>".into(), 293 | }), 294 | ); 295 | let script = TwScript { 296 | graphs: vec![TwGraph { 297 | name: "".into(), 298 | exported: false, 299 | nodes: vec![ 300 | (TwGraphNode::LoadParam(0), vec![], None), // 0 301 | (TwGraphNode::LoadConst(0), vec![], None), // 1 302 | (TwGraphNode::GetField(0), vec![0], None), // 2 303 | (TwGraphNode::GetSetElement, vec![1, 2], None), // 3 304 | (TwGraphNode::GetField(2), vec![3], None), // 4 305 | (TwGraphNode::GetField(3), vec![4], None), // 5 306 | (TwGraphNode::CreateMap, vec![], None), // 6 307 | (TwGraphNode::InsertIntoMap(4), vec![3, 6], None), // 7 308 | (TwGraphNode::InsertIntoMap(3), vec![5, 7], None), // 8 309 | ], 310 | output: Some(8), 311 | output_type: Some(1), 312 | param_types: vec![0], 313 | }], 314 | entry: 0, 315 | consts: vec![VmConst::Primitive(PrimitiveValue::String("test".into()))], 316 | idents: vec![ 317 | "items".into(), 318 | "something_else".into(), 319 | "inner2".into(), 320 | "start".into(), 321 | "the_item".into(), 322 | ], 323 | types: vec![VmType::Schema, VmType::Map(expected_result_type)], 324 | }; 325 | let vm = TwVm::new(&schema, &plan, &script).unwrap(); 326 | GlobalTyckContext::new(&vm).unwrap().typeck().unwrap(); 327 | } 328 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/treewalker/vm.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, sync::Arc}; 2 | 3 | use anyhow::Result; 4 | 5 | use crate::{schema::compile::CompiledSchema, storage_plan::StoragePlan}; 6 | 7 | use super::{ 8 | bytecode::TwScript, 9 | vm_value::{VmType, VmValue}, 10 | }; 11 | use thiserror::Error; 12 | 13 | #[derive(Error, Debug)] 14 | pub enum VmError { 15 | #[error("exported graph not found: `{0}`")] 16 | ExportedGraphNotFound(String), 17 | } 18 | 19 | pub struct TwVm<'a> { 20 | pub schema: &'a CompiledSchema, 21 | pub storage_plan: &'a StoragePlan, 22 | pub script: &'a TwScript, 23 | pub consts: Vec>>, 24 | pub types: Vec>, 25 | pub exported_graph_name_index: HashMap<&'a str, usize>, 26 | } 27 | 28 | impl<'a> TwVm<'a> { 29 | pub fn new( 30 | schema: &'a CompiledSchema, 31 | storage_plan: &'a StoragePlan, 32 | script: &'a TwScript, 33 | ) -> Result { 34 | let consts = script 35 | .consts 36 | .iter() 37 | .map(|x| VmValue::from_const(schema, x).map(Arc::new)) 38 | .collect::>>()?; 39 | let types = script 40 | .types 41 | .iter() 42 | .map(|x| VmType::<&'a str>::from(x)) 43 | .collect::>(); 44 | 45 | let mut exported_graph_name_index = HashMap::new(); 46 | for (i, g) in script.graphs.iter().enumerate() { 47 | if g.exported { 48 | exported_graph_name_index.insert(g.name.as_str(), i); 49 | } 50 | } 51 | 52 | Ok(Self { 53 | schema, 54 | storage_plan, 55 | script, 56 | consts, 57 | types, 58 | exported_graph_name_index, 59 | }) 60 | } 61 | 62 | pub fn lookup_exported_graph_by_name(&self, name: &str) -> Result { 63 | Ok( 64 | self 65 | .exported_graph_name_index 66 | .get(name) 67 | .copied() 68 | .ok_or_else(|| VmError::ExportedGraphNotFound(name.into()))?, 69 | ) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /rdb-analyzer/src/data/value.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::BTreeMap, fmt::Display, iter::FromIterator}; 2 | 3 | use byteorder::{BigEndian, ByteOrder}; 4 | use serde::{Deserialize, Serialize}; 5 | use smallvec::{smallvec, SmallVec}; 6 | 7 | use crate::schema::compile::PrimitiveType; 8 | 9 | #[derive(Serialize, Deserialize)] 10 | pub enum PackedValue { 11 | /// Primitive value. 12 | P(PrimitiveValue), 13 | 14 | /// Key-value map. 15 | M(BTreeMap), 16 | 17 | /// Set. 18 | S(Vec), 19 | } 20 | 21 | #[derive(Clone, Serialize, Eq, PartialEq, Hash, Deserialize, Debug)] 22 | #[serde(untagged)] 23 | pub enum PrimitiveValue { 24 | String(String), 25 | Bytes(Vec), 26 | Int64(i64), 27 | Double(u64), 28 | } 29 | 30 | const TOP_BIT: u64 = 1u64 << 63; 31 | 32 | impl Display for PrimitiveValue { 33 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 34 | match self { 35 | Self::String(x) => write!(f, "{}", serde_json::to_string(x).unwrap()), 36 | Self::Bytes(x) => write!(f, "h\"{}\"", hex::encode(x)), 37 | Self::Int64(x) => write!(f, "{}", x), 38 | Self::Double(x) => write!(f, "{}", f64::from_bits(*x)), 39 | } 40 | } 41 | } 42 | 43 | impl PrimitiveValue { 44 | pub fn get_type(&self) -> PrimitiveType { 45 | match self { 46 | PrimitiveValue::Bytes(_) => PrimitiveType::Bytes, 47 | PrimitiveValue::String(_) => PrimitiveType::String, 48 | PrimitiveValue::Int64(_) => PrimitiveType::Int64, 49 | PrimitiveValue::Double(_) => PrimitiveType::Double, 50 | } 51 | } 52 | 53 | pub fn unwrap_string(&self) -> &String { 54 | match self { 55 | PrimitiveValue::String(x) => x, 56 | _ => panic!("PrimitiveValue::unwrap_string: not a string: {:?}", self), 57 | } 58 | } 59 | 60 | /// https://activesphere.com/blog/2018/08/17/order-preserving-serialization 61 | pub fn serialize_for_key_component(&self) -> SmallVec<[u8; 9]> { 62 | match self { 63 | PrimitiveValue::Bytes(x) => SmallVec::from_iter( 64 | std::iter::once(0x01u8) 65 | .chain( 66 | x.iter() 67 | .map(|&x| -> SmallVec<[u8; 2]> { 68 | if x == 0 { 69 | smallvec![0x00, 0xff] 70 | } else { 71 | smallvec![x] 72 | } 73 | }) 74 | .flatten(), 75 | ) 76 | .chain([0x00u8].iter().copied()), 77 | ), 78 | PrimitiveValue::String(x) => { 79 | SmallVec::from_iter(std::iter::once(0x02u8).chain(x.as_bytes().iter().copied())) 80 | } 81 | PrimitiveValue::Int64(x) => { 82 | // Flip the top bit for order preservation. 83 | let x = (*x as u64) ^ TOP_BIT; 84 | 85 | let mut buf = smallvec![0u8; 9]; 86 | buf[0] = 0x03; 87 | BigEndian::write_u64(&mut buf[1..], x); 88 | buf 89 | } 90 | PrimitiveValue::Double(x) => { 91 | let x = *x; 92 | 93 | let x = if x & TOP_BIT != 0 { !x } else { x ^ TOP_BIT }; 94 | 95 | let mut buf = smallvec![0u8; 9]; 96 | buf[0] = 0x04; 97 | BigEndian::write_u64(&mut buf[1..], x); 98 | buf 99 | } 100 | } 101 | } 102 | 103 | #[cfg(test)] 104 | pub fn example_value_for_type(ty: PrimitiveType) -> Self { 105 | match ty { 106 | PrimitiveType::Bytes => Self::Bytes(vec![0xbe, 0xef]), 107 | PrimitiveType::String => Self::String("hello".into()), 108 | PrimitiveType::Int64 => Self::Int64(42), 109 | PrimitiveType::Double => Self::Double(3.14f64.to_bits()), 110 | } 111 | } 112 | 113 | pub fn default_value_for_type(ty: PrimitiveType) -> Self { 114 | match ty { 115 | PrimitiveType::Bytes => Self::Bytes(vec![]), 116 | PrimitiveType::String => Self::String("".into()), 117 | PrimitiveType::Int64 => Self::Int64(0), 118 | PrimitiveType::Double => Self::Double(0), 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /rdb-analyzer/src/kv_backend/foundationdb.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::data::kv::{KeyValueStore, KvError, KvKeyIterator, KvTransaction}; 4 | use anyhow::Result; 5 | use async_trait::async_trait; 6 | use foundationdb::{ 7 | future::FdbValues, options::TransactionOption, Database, KeySelector, RangeOption, Transaction, 8 | }; 9 | 10 | pub struct FdbKvStore { 11 | db: Arc, 12 | prefix: Arc<[u8]>, 13 | } 14 | 15 | pub struct FdbTxn { 16 | inner: Arc, 17 | prefix: Arc<[u8]>, 18 | } 19 | 20 | impl FdbKvStore { 21 | pub fn new(db: Arc, prefix: &[u8]) -> Self { 22 | Self { 23 | db, 24 | prefix: Arc::from(prefix), 25 | } 26 | } 27 | } 28 | 29 | #[async_trait] 30 | impl KeyValueStore for FdbKvStore { 31 | async fn begin_transaction(&self) -> Result> { 32 | let txn = self.db.create_trx()?; 33 | 34 | // Required for RefineDB execution semantics 35 | txn.set_option(TransactionOption::ReadYourWritesDisable)?; 36 | 37 | Ok(Box::new(FdbTxn { 38 | inner: Arc::new(txn), 39 | prefix: self.prefix.clone(), 40 | })) 41 | } 42 | } 43 | 44 | #[async_trait] 45 | impl KvTransaction for FdbTxn { 46 | async fn get(&self, k: &[u8]) -> Result>> { 47 | let k = self 48 | .prefix 49 | .iter() 50 | .chain(k.iter()) 51 | .copied() 52 | .collect::>(); 53 | log::trace!("get {}", base64::encode(&k)); 54 | let res = self.inner.get(&k, false).await?; 55 | Ok(res.map(|x| x.to_vec())) 56 | } 57 | 58 | async fn put(&self, k: &[u8], v: &[u8]) -> Result<()> { 59 | let k = self 60 | .prefix 61 | .iter() 62 | .chain(k.iter()) 63 | .copied() 64 | .collect::>(); 65 | log::trace!("put {} {}", base64::encode(&k), base64::encode(&v)); 66 | self.inner.set(&k, &v); 67 | Ok(()) 68 | } 69 | 70 | async fn delete(&self, k: &[u8]) -> Result<()> { 71 | let k = self 72 | .prefix 73 | .iter() 74 | .chain(k.iter()) 75 | .copied() 76 | .collect::>(); 77 | log::trace!("clear {}", base64::encode(&k)); 78 | self.inner.clear(&k); 79 | Ok(()) 80 | } 81 | 82 | async fn scan_keys(&self, start: &[u8], end: &[u8]) -> Result> { 83 | let start = self 84 | .prefix 85 | .iter() 86 | .chain(start.iter()) 87 | .copied() 88 | .collect::>(); 89 | let end = self 90 | .prefix 91 | .iter() 92 | .chain(end.iter()) 93 | .copied() 94 | .collect::>(); 95 | 96 | let range: RangeOption = (start..end).into(); 97 | Ok(Box::new(FdbIterator { 98 | txn: self.inner.clone(), 99 | prefix: self.prefix.clone(), 100 | values: None, 101 | range, 102 | iteration: 1, 103 | })) 104 | } 105 | 106 | async fn delete_range(&self, start: &[u8], end: &[u8]) -> Result<()> { 107 | let start = self 108 | .prefix 109 | .iter() 110 | .chain(start.iter()) 111 | .copied() 112 | .collect::>(); 113 | let end = self 114 | .prefix 115 | .iter() 116 | .chain(end.iter()) 117 | .copied() 118 | .collect::>(); 119 | log::trace!( 120 | "clear_range {} {}", 121 | base64::encode(&start), 122 | base64::encode(&end) 123 | ); 124 | self.inner.clear_range(&start, &end); 125 | Ok(()) 126 | } 127 | 128 | async fn commit(self: Box) -> Result<(), KvError> { 129 | Arc::try_unwrap(self.inner) 130 | .map_err(|_| { 131 | log::error!("some iterators are not dropped at commit time"); 132 | KvError::CommitStateUnknown 133 | })? 134 | .commit() 135 | .await 136 | .map_err(|e| { 137 | // XXX: Is this correct? 138 | if e.is_retryable_not_committed() { 139 | KvError::Conflict 140 | } else { 141 | KvError::CommitStateUnknown 142 | } 143 | }) 144 | .map(|_| ()) 145 | } 146 | } 147 | 148 | pub struct FdbIterator { 149 | txn: Arc, 150 | prefix: Arc<[u8]>, 151 | values: Option<(FdbValues, usize)>, 152 | range: RangeOption<'static>, 153 | iteration: usize, 154 | } 155 | 156 | #[async_trait] 157 | impl KvKeyIterator for FdbIterator { 158 | async fn next(&mut self) -> Result>> { 159 | if self.values.is_none() { 160 | log::trace!("get_range iteration {}", self.iteration); 161 | let values = self 162 | .txn 163 | .get_range(&self.range, self.iteration, false) 164 | .await?; 165 | if values.len() == 0 { 166 | return Ok(None); 167 | } 168 | self.iteration += 1; 169 | self.values = Some((values, 0)); 170 | } 171 | 172 | let (values, value_index) = self.values.as_mut().unwrap(); 173 | let raw_key = values[*value_index].key(); 174 | let key = raw_key.strip_prefix(&*self.prefix).unwrap().to_vec(); 175 | if *value_index + 1 == values.len() { 176 | self.range.begin = KeySelector::first_greater_than(raw_key.to_vec()); 177 | self.values = None; 178 | } else { 179 | *value_index += 1; 180 | } 181 | 182 | log::trace!("got key: {}", base64::encode(&key)); 183 | 184 | Ok(Some(key)) 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /rdb-analyzer/src/kv_backend/mock_kv.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashMap, 3 | sync::{ 4 | atomic::{AtomicU64, Ordering}, 5 | Arc, 6 | }, 7 | }; 8 | 9 | use async_trait::async_trait; 10 | use rpds::RedBlackTreeMapSync; 11 | use tokio::sync::Mutex; 12 | 13 | use crate::data::kv::{KeyValueStore, KvError, KvKeyIterator, KvTransaction}; 14 | use anyhow::Result; 15 | 16 | /// A mocked KV store that simulates MVCC with snapshot isolation. 17 | pub struct MockKv { 18 | store: MockStore, 19 | } 20 | 21 | pub struct MockTransaction { 22 | id: u64, 23 | store: MockStore, 24 | read_buffer: RedBlackTreeMapSync, (Option>, u64)>, 25 | buffer: Mutex, (Option>, u64)>>, 26 | modified: Mutex, u64>>, 27 | } 28 | 29 | #[derive(Clone)] 30 | struct MockStore { 31 | data: Arc, (Option>, u64)>>>, 32 | txn_count: Arc, 33 | } 34 | 35 | struct MockIterator { 36 | map: RedBlackTreeMapSync, (Option>, u64)>, 37 | current: Vec, 38 | end: Vec, 39 | } 40 | 41 | impl MockKv { 42 | pub fn new() -> Self { 43 | MockKv { 44 | store: MockStore { 45 | data: Arc::new(Mutex::new(RedBlackTreeMapSync::new_sync())), 46 | txn_count: Arc::new(AtomicU64::new(0)), 47 | }, 48 | } 49 | } 50 | } 51 | 52 | #[async_trait] 53 | impl KeyValueStore for MockKv { 54 | async fn begin_transaction(&self) -> Result> { 55 | let buffer = self.store.data.lock().await.clone(); 56 | Ok(Box::new(MockTransaction { 57 | id: self.store.txn_count.fetch_add(1, Ordering::SeqCst) + 1, 58 | store: self.store.clone(), 59 | read_buffer: buffer.clone(), 60 | buffer: Mutex::new(buffer), 61 | modified: Mutex::new(HashMap::new()), 62 | })) 63 | } 64 | } 65 | 66 | #[async_trait] 67 | impl KvTransaction for MockTransaction { 68 | async fn get(&self, key: &[u8]) -> Result>> { 69 | log::trace!("[txn {}] get {}", self.id, base64::encode(key)); 70 | Ok( 71 | self 72 | .read_buffer 73 | .get(key) 74 | .and_then(|x| x.0.as_ref()) 75 | .cloned(), 76 | ) 77 | } 78 | 79 | async fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { 80 | log::trace!( 81 | "[txn {}] put {} {}", 82 | self.id, 83 | base64::encode(key), 84 | base64::encode(value) 85 | ); 86 | let mut buffer = self.buffer.lock().await; 87 | let mut modified = self.modified.lock().await; 88 | let version = buffer.get(key).map(|x| x.1).unwrap_or_default(); 89 | buffer.insert_mut(key.to_vec(), (Some(value.to_vec()), version + 1)); 90 | if !modified.contains_key(key) { 91 | modified.insert(key.to_vec(), version); 92 | } 93 | Ok(()) 94 | } 95 | 96 | async fn delete(&self, key: &[u8]) -> Result<()> { 97 | log::trace!("[txn {}] delete {}", self.id, base64::encode(key)); 98 | let mut buffer = self.buffer.lock().await; 99 | let mut modified = self.modified.lock().await; 100 | let version = buffer.get(key).map(|x| x.1).unwrap_or_default(); 101 | buffer.insert_mut(key.to_vec(), (None, version + 1)); 102 | if !modified.contains_key(key) { 103 | modified.insert(key.to_vec(), version); 104 | } 105 | Ok(()) 106 | } 107 | 108 | async fn scan_keys(&self, start: &[u8], end: &[u8]) -> Result> { 109 | Ok(Box::new(MockIterator { 110 | map: self.buffer.lock().await.clone(), 111 | current: start.to_vec(), 112 | end: end.to_vec(), 113 | })) 114 | } 115 | 116 | async fn commit(self: Box) -> Result<(), KvError> { 117 | let buffer = self.buffer.into_inner(); 118 | let modified = self.modified.into_inner(); 119 | 120 | let mut data = self.store.data.lock().await; 121 | for (k, initial_version) in &modified { 122 | if data.get(k).map(|x| x.1).unwrap_or_default() != *initial_version { 123 | log::trace!("[txn {}] commit CONFLICT", self.id); 124 | return Err(KvError::Conflict); 125 | } 126 | } 127 | 128 | for (k, _) in modified { 129 | let value = buffer.get(&k).unwrap().clone(); 130 | data.insert_mut(k, value); 131 | } 132 | log::trace!("[txn {}] commit OK", self.id); 133 | Ok(()) 134 | } 135 | 136 | async fn delete_range(&self, start: &[u8], end: &[u8]) -> Result<()> { 137 | log::trace!( 138 | "[txn {}] delete_range {} {}", 139 | self.id, 140 | base64::encode(start), 141 | base64::encode(end) 142 | ); 143 | let mut buffer = self.buffer.lock().await; 144 | let mut modified = self.modified.lock().await; 145 | 146 | let mut to_delete = vec![]; 147 | for (k, _) in buffer.range(start.to_vec()..end.to_vec()) { 148 | to_delete.push(k.clone()); 149 | } 150 | 151 | log::trace!( 152 | "[txn {}] deleted {} keys in range", 153 | self.id, 154 | to_delete.len() 155 | ); 156 | 157 | for key in to_delete { 158 | let version = buffer.get(&key).map(|x| x.1).unwrap_or_default(); 159 | buffer.insert_mut(key.clone(), (None, version + 1)); 160 | if !modified.contains_key(&key) { 161 | modified.insert(key, version); 162 | } 163 | } 164 | Ok(()) 165 | } 166 | } 167 | 168 | #[async_trait] 169 | impl KvKeyIterator for MockIterator { 170 | async fn next(&mut self) -> Result>> { 171 | let mut range = self.map.range(self.current.clone()..self.end.clone()); 172 | loop { 173 | if let Some((k, v)) = range.next() { 174 | // Move to next 175 | self.current = k.iter().copied().chain(std::iter::once(0x00u8)).collect(); 176 | match &v.0 { 177 | Some(_) => break Ok(Some(k.clone())), 178 | None => {} 179 | } 180 | } else { 181 | break Ok(None); 182 | } 183 | } 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /rdb-analyzer/src/kv_backend/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "fdb-backend")] 2 | pub mod foundationdb; 3 | 4 | #[cfg(feature = "sqlite-backend")] 5 | pub mod sqlite; 6 | 7 | #[cfg(test)] 8 | pub mod mock_kv; 9 | -------------------------------------------------------------------------------- /rdb-analyzer/src/kv_backend/sqlite.rs: -------------------------------------------------------------------------------- 1 | use std::{pin::Pin, sync::Arc}; 2 | 3 | use crate::data::kv::{KeyValueStore, KvError, KvKeyIterator, KvTransaction}; 4 | use anyhow::Result; 5 | use async_trait::async_trait; 6 | use r2d2::{Pool, PooledConnection}; 7 | use r2d2_sqlite::SqliteConnectionManager; 8 | use rusqlite::{named_params, OptionalExtension, Transaction}; 9 | use std::future::Future; 10 | use thiserror::Error; 11 | use tokio::{ 12 | runtime::Builder, 13 | sync::{ 14 | mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}, 15 | oneshot, Mutex, 16 | }, 17 | task::{spawn_blocking, spawn_local, LocalSet}, 18 | }; 19 | 20 | pub struct SqliteKvStore { 21 | global: Arc, 22 | table: Arc, 23 | prefix: Arc<[u8]>, 24 | } 25 | 26 | #[derive(Error, Debug)] 27 | pub enum SqliteKvError { 28 | #[error("interrupted")] 29 | Interrupted, 30 | } 31 | 32 | pub struct GlobalSqliteStore { 33 | conn_pool: Pool, 34 | task_tx: UnboundedSender, 35 | } 36 | 37 | type Task = Box Pin>> + Send>; 38 | 39 | impl GlobalSqliteStore { 40 | pub fn open_leaky(path: &str) -> Result> { 41 | let manager = SqliteConnectionManager::file(path).with_init(|c| { 42 | c.execute_batch( 43 | r#" 44 | PRAGMA journal_mode=WAL; 45 | create table if not exists system (k blob primary key, v blob); 46 | create table if not exists system_meta (k blob primary key, v blob); 47 | create table if not exists user_data (k blob primary key, v blob); 48 | "#, 49 | ) 50 | }); 51 | 52 | let (task_tx, task_rx) = unbounded_channel(); 53 | let me = Arc::new(Self { 54 | conn_pool: Pool::new(manager)?, 55 | task_tx, 56 | }); 57 | 58 | let me2 = me.clone(); 59 | 60 | // Isolate SQLite work onto its own thread 61 | std::thread::spawn(move || { 62 | let rt = Builder::new_current_thread().enable_all().build().unwrap(); 63 | LocalSet::new().block_on(&rt, me2.run_worker(task_rx)) 64 | }); 65 | Ok(me) 66 | } 67 | 68 | async fn run_worker(self: Arc, mut task_rx: UnboundedReceiver) -> ! { 69 | loop { 70 | let task = task_rx.recv().await.unwrap(); 71 | spawn_local(task()); 72 | } 73 | } 74 | } 75 | 76 | impl SqliteKvStore { 77 | pub fn new(global: Arc, table: &str, prefix: &[u8]) -> Self { 78 | Self { 79 | global, 80 | table: Arc::from(table), 81 | prefix: Arc::from(prefix), 82 | } 83 | } 84 | } 85 | 86 | #[async_trait] 87 | impl KeyValueStore for SqliteKvStore { 88 | async fn begin_transaction(&self) -> Result> { 89 | let conn = { 90 | let g = self.global.clone(); 91 | spawn_blocking(move || g.conn_pool.get()).await?? 92 | }; 93 | 94 | let (work_tx, work_rx) = unbounded_channel(); 95 | self 96 | .global 97 | .task_tx 98 | .send(Box::new(|| { 99 | Box::pin(async move { txn_worker(conn, work_rx).await }) 100 | })) 101 | .unwrap_or_else(|_| unreachable!()); 102 | Ok(Box::new(SqliteKvTxn { 103 | work_tx, 104 | log: Mutex::new(vec![]), 105 | table: self.table.clone(), 106 | prefix: self.prefix.clone(), 107 | })) 108 | } 109 | } 110 | 111 | type Work = Box< 112 | dyn for<'a> FnOnce(&'a mut Option) -> Pin + 'a>> + Send, 113 | >; 114 | 115 | pub struct SqliteKvTxn { 116 | work_tx: UnboundedSender, 117 | log: Mutex>, 118 | table: Arc, 119 | prefix: Arc<[u8]>, 120 | } 121 | 122 | enum ModOp { 123 | Put(Vec, Vec), 124 | Delete(Vec), 125 | DeleteRange(Vec, Vec), 126 | } 127 | 128 | async fn txn_worker( 129 | mut conn: PooledConnection, 130 | mut work_rx: UnboundedReceiver, 131 | ) { 132 | let mut txn = Some(match conn.transaction() { 133 | Ok(x) => x, 134 | Err(e) => { 135 | log::error!("txn_worker: transaction creation error: {:?}", e); 136 | return; 137 | } 138 | }); 139 | 140 | loop { 141 | let work = match work_rx.recv().await { 142 | Some(x) => x, 143 | None => { 144 | log::debug!("txn_worker: ending transaction"); 145 | return; 146 | } 147 | }; 148 | work(&mut txn).await; 149 | } 150 | } 151 | 152 | impl SqliteKvTxn { 153 | async fn run< 154 | G: FnOnce(&mut Option) -> Result + Send + 'static, 155 | R: Send + 'static, 156 | >( 157 | &self, 158 | f: G, 159 | ) -> Result { 160 | let (tx, rx) = oneshot::channel(); 161 | let res = self.work_tx.send(Box::new(move |txn| { 162 | Box::pin(async move { 163 | // Don't check the error here in case of asynchronous cancellation on `rx`. 164 | let _ = tx.send(f(txn)); 165 | }) 166 | })); 167 | let res = match res { 168 | Ok(_) => rx.await.unwrap_or_else(|e| Err(anyhow::Error::from(e))), 169 | Err(_) => Err(anyhow::Error::from(SqliteKvError::Interrupted)), 170 | }; 171 | res 172 | } 173 | } 174 | 175 | #[async_trait] 176 | impl KvTransaction for SqliteKvTxn { 177 | async fn get(&self, key: &[u8]) -> Result>> { 178 | let key = self 179 | .prefix 180 | .iter() 181 | .copied() 182 | .chain(key.iter().copied()) 183 | .collect::>(); 184 | let table = self.table.clone(); 185 | self 186 | .run(move |txn| { 187 | let mut stmt = txn 188 | .as_mut() 189 | .unwrap() 190 | .prepare_cached(&format!("select v from {} where k = ?", table))?; 191 | let value: Option> = stmt.query_row(&[&key], |x| x.get(0)).optional()?; 192 | Ok(value) 193 | }) 194 | .await 195 | } 196 | 197 | async fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { 198 | let key = self 199 | .prefix 200 | .iter() 201 | .copied() 202 | .chain(key.iter().copied()) 203 | .collect::>(); 204 | let value = value.to_vec(); 205 | self.log.lock().await.push(ModOp::Put(key, value)); 206 | Ok(()) 207 | } 208 | 209 | async fn delete(&self, key: &[u8]) -> Result<()> { 210 | let key = self 211 | .prefix 212 | .iter() 213 | .copied() 214 | .chain(key.iter().copied()) 215 | .collect::>(); 216 | self.log.lock().await.push(ModOp::Delete(key)); 217 | Ok(()) 218 | } 219 | 220 | async fn delete_range(&self, start: &[u8], end: &[u8]) -> Result<()> { 221 | let start = self 222 | .prefix 223 | .iter() 224 | .copied() 225 | .chain(start.iter().copied()) 226 | .collect::>(); 227 | let end = self 228 | .prefix 229 | .iter() 230 | .copied() 231 | .chain(end.iter().copied()) 232 | .collect::>(); 233 | self.log.lock().await.push(ModOp::DeleteRange(start, end)); 234 | Ok(()) 235 | } 236 | 237 | async fn scan_keys(&self, start: &[u8], end: &[u8]) -> Result> { 238 | let start = self 239 | .prefix 240 | .iter() 241 | .copied() 242 | .chain(start.iter().copied()) 243 | .collect::>(); 244 | let end = self 245 | .prefix 246 | .iter() 247 | .copied() 248 | .chain(end.iter().copied()) 249 | .collect::>(); 250 | let table = self.table.clone(); 251 | let prefix_len = self.prefix.len(); 252 | self 253 | .run(move |txn| { 254 | let mut stmt = txn.as_mut().unwrap().prepare_cached(&format!( 255 | "select k from {} where k >= ? and k < ? order by k desc", 256 | table 257 | ))?; 258 | let keys: Vec> = stmt 259 | .query_map(&[&start, &end], |x| x.get(0))? 260 | .map(|x| x.map_err(anyhow::Error::from)) 261 | .collect::>()?; 262 | Ok(Box::new(SqliteKvIterator { 263 | keys: keys.into_iter().map(|x| x[prefix_len..].to_vec()).collect(), 264 | }) as Box) 265 | }) 266 | .await 267 | } 268 | 269 | async fn commit(self: Box) -> Result<(), KvError> { 270 | let log = std::mem::replace(&mut *self.log.try_lock().unwrap(), vec![]); 271 | let table = self.table.clone(); 272 | self 273 | .run(move |txn| { 274 | let txn = txn.take().unwrap(); 275 | for op in log { 276 | match op { 277 | ModOp::Put(key, value) => { 278 | let mut stmt = txn.prepare_cached(&format!( 279 | "insert into {} (k, v) values(:k, :v) on conflict(k) do update set v = :v", 280 | table 281 | ))?; 282 | stmt.execute(named_params! { ":k": &key, ":v": &value })?; 283 | } 284 | ModOp::Delete(key) => { 285 | let mut stmt = txn.prepare_cached(&format!("delete from {} where k = ?", table))?; 286 | stmt.execute(&[&key])?; 287 | } 288 | ModOp::DeleteRange(start, end) => { 289 | let mut stmt = 290 | txn.prepare_cached(&format!("delete from {} where k >= ? and k < ?", table))?; 291 | stmt.execute(&[&start, &end])?; 292 | } 293 | } 294 | } 295 | txn.commit()?; 296 | Ok(()) 297 | }) 298 | .await 299 | .map_err(|e| { 300 | if let Some(x) = e.downcast_ref::() { 301 | match x { 302 | rusqlite::Error::SqliteFailure(_, reason) => { 303 | if let Some(reason) = reason { 304 | if reason == "database is locked" { 305 | return KvError::Conflict; 306 | } 307 | } 308 | } 309 | _ => {} 310 | } 311 | } 312 | log::error!("sqlite commit error: {:?}", e); 313 | KvError::CommitStateUnknown 314 | }) 315 | } 316 | } 317 | 318 | pub struct SqliteKvIterator { 319 | keys: Vec>, 320 | } 321 | 322 | #[async_trait] 323 | impl KvKeyIterator for SqliteKvIterator { 324 | async fn next(&mut self) -> Result>> { 325 | Ok(self.keys.pop()) 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /rdb-analyzer/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | mod util; 3 | pub mod data; 4 | pub mod kv_backend; 5 | pub mod schema; 6 | pub mod storage_plan; 7 | 8 | #[cfg(test)] 9 | mod test_util; 10 | -------------------------------------------------------------------------------- /rdb-analyzer/src/schema/compile_test.rs: -------------------------------------------------------------------------------- 1 | use bumpalo::Bump; 2 | 3 | use super::{compile::compile, grammar::parse}; 4 | 5 | #[test] 6 | fn test_compile_simple() { 7 | let _ = pretty_env_logger::try_init(); 8 | let alloc = Bump::new(); 9 | let ast = parse( 10 | &alloc, 11 | r#" 12 | type Item { 13 | inner: T, 14 | something_else: string, 15 | } 16 | type Duration { 17 | start: T, 18 | end: T, 19 | } 20 | type Recursive { 21 | inner: Recursive, 22 | } 23 | export set>> items; 24 | export Recursive item; 25 | "#, 26 | ) 27 | .unwrap(); 28 | let output = compile(&ast).unwrap(); 29 | println!("{}", output); 30 | } 31 | 32 | #[test] 33 | fn upper_case_start_letter_in_type_names() { 34 | let _ = pretty_env_logger::try_init(); 35 | let alloc = Bump::new(); 36 | let ast = parse( 37 | &alloc, 38 | r#" 39 | type item { 40 | } 41 | export item x; 42 | "#, 43 | ) 44 | .unwrap(); 45 | assert!(compile(&ast) 46 | .unwrap_err() 47 | .to_string() 48 | .starts_with("type name must start with an upper-case letter")); 49 | } 50 | 51 | #[test] 52 | fn index_constraints_case_1a() { 53 | let _ = pretty_env_logger::try_init(); 54 | let alloc = Bump::new(); 55 | let ast = parse( 56 | &alloc, 57 | r#" 58 | type Item { 59 | @unique key1: T, 60 | @unique key2: T, 61 | } 62 | export Item item; 63 | "#, 64 | ) 65 | .unwrap(); 66 | let output = compile(&ast).unwrap(); 67 | println!("{}", output); 68 | } 69 | 70 | #[test] 71 | fn index_constraints_case_2() { 72 | let _ = pretty_env_logger::try_init(); 73 | let alloc = Bump::new(); 74 | let ast = parse( 75 | &alloc, 76 | r#" 77 | type Item { 78 | @unique key1: T, 79 | @unique key2: Wrapped, 80 | } 81 | type Wrapped { 82 | inner: T, 83 | } 84 | export Item item; 85 | "#, 86 | ) 87 | .unwrap(); 88 | assert!(compile(&ast).is_err()); 89 | } 90 | 91 | #[test] 92 | fn no_primitive_types_in_set() { 93 | let _ = pretty_env_logger::try_init(); 94 | let alloc = Bump::new(); 95 | let ast = parse( 96 | &alloc, 97 | r#" 98 | export set something; 99 | "#, 100 | ) 101 | .unwrap(); 102 | assert!(compile(&ast).is_err()); 103 | } 104 | 105 | #[test] 106 | fn primary_keys() { 107 | let _ = pretty_env_logger::try_init(); 108 | let alloc = Bump::new(); 109 | let ast = parse( 110 | &alloc, 111 | r#" 112 | type Item { 113 | @primary key: T, 114 | } 115 | export Item something; 116 | "#, 117 | ) 118 | .unwrap(); 119 | compile(&ast).unwrap(); 120 | } 121 | 122 | #[test] 123 | fn at_most_one_primary_key() { 124 | let _ = pretty_env_logger::try_init(); 125 | let alloc = Bump::new(); 126 | let ast = parse( 127 | &alloc, 128 | r#" 129 | type Item { 130 | @primary key1: T, 131 | @primary key2: T, 132 | } 133 | export Item something; 134 | "#, 135 | ) 136 | .unwrap(); 137 | assert!(compile(&ast) 138 | .unwrap_err() 139 | .to_string() 140 | .contains("has multiple primary keys")); 141 | } 142 | -------------------------------------------------------------------------------- /rdb-analyzer/src/schema/grammar/ast.rs: -------------------------------------------------------------------------------- 1 | use bumpalo::collections::vec::Vec; 2 | 3 | pub struct Schema<'a> { 4 | pub items: Vec<'a, SchemaItem<'a>>, 5 | } 6 | 7 | pub enum SchemaItem<'a> { 8 | Type(&'a TypeItem<'a>), 9 | Export(&'a ExportItem<'a>), 10 | } 11 | 12 | pub struct TypeItem<'a> { 13 | pub annotations: Vec<'a, Annotation<'a>>, 14 | pub location: usize, 15 | pub name: Identifier<'a>, 16 | pub generics: Vec<'a, Identifier<'a>>, 17 | pub fields: Vec<'a, TypeField<'a>>, 18 | } 19 | 20 | pub struct ExportItem<'a> { 21 | pub location: usize, 22 | pub ty: TypeExpr<'a>, 23 | pub table_name: Identifier<'a>, 24 | } 25 | 26 | pub struct TypeField<'a> { 27 | pub annotations: Vec<'a, Annotation<'a>>, 28 | pub location: usize, 29 | pub name: Identifier<'a>, 30 | pub value: TypeExpr<'a>, 31 | } 32 | 33 | pub enum TypeExpr<'a> { 34 | Unit(Identifier<'a>), 35 | Specialize(Identifier<'a>, Vec<'a, TypeExpr<'a>>), 36 | } 37 | 38 | pub struct Annotation<'a> { 39 | pub name: Identifier<'a>, 40 | pub args: Vec<'a, Literal<'a>>, 41 | } 42 | 43 | pub struct Identifier<'a>(pub &'a str); 44 | 45 | pub enum Literal<'a> { 46 | Integer(i64), 47 | String(&'a str), 48 | Bytes(&'a [u8]), 49 | } 50 | -------------------------------------------------------------------------------- /rdb-analyzer/src/schema/grammar/error.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | #[derive(Error, Debug)] 4 | pub enum SchemaError { 5 | #[error("invalid literal")] 6 | InvalidLiteral, 7 | } 8 | -------------------------------------------------------------------------------- /rdb-analyzer/src/schema/grammar/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod ast; 2 | pub mod error; 3 | 4 | use std::collections::HashSet; 5 | 6 | use anyhow::Result; 7 | use bumpalo::Bump; 8 | use lalrpop_util::lalrpop_mod; 9 | 10 | lalrpop_mod!(pub schema, "/schema/grammar/parser.rs"); 11 | 12 | use schema::SchemaSourceParser; 13 | 14 | pub struct State<'a> { 15 | alloc: &'a Bump, 16 | string_table: HashSet<&'a str>, 17 | } 18 | 19 | impl<'a> State<'a> { 20 | pub fn resolve_str(&mut self, s: &str) -> &'a str { 21 | match self.string_table.get(s) { 22 | Some(x) => x, 23 | None => { 24 | let s = self.alloc.alloc_str(s); 25 | self.string_table.insert(s); 26 | s 27 | } 28 | } 29 | } 30 | } 31 | 32 | pub fn parse<'a>(alloc: &'a Bump, input: &str) -> Result> { 33 | // Clone this to satisfy lifetimes 34 | let input = alloc.alloc_str(input); 35 | let mut st: State<'a> = State { 36 | alloc, 37 | string_table: HashSet::new(), 38 | }; 39 | let parser = SchemaSourceParser::new(); 40 | let schema = parser 41 | .parse(&mut st, input) 42 | .map_err(|x| x.map_token(|x| x.to_string()))?; 43 | Ok(schema) 44 | } 45 | -------------------------------------------------------------------------------- /rdb-analyzer/src/schema/grammar/parser.lalrpop: -------------------------------------------------------------------------------- 1 | use super::ast::*; 2 | use super::error::SchemaError; 3 | use lalrpop_util::ParseError; 4 | use super::State; 5 | use bumpalo::collections::vec::Vec as Bvec; 6 | 7 | grammar(state: &mut State<'input>); 8 | 9 | extern { 10 | type Error = SchemaError; 11 | } 12 | 13 | pub SchemaSource: Schema<'input> = { 14 | Comment* => Schema { 15 | items: Bvec::from_iter_in(items.into_iter(), &state.alloc), 16 | } 17 | } 18 | 19 | SchemaItem: SchemaItem<'input> = { 20 | => SchemaItem::Type(state.alloc.alloc(x)), 21 | => SchemaItem::Export(state.alloc.alloc(x)), 22 | } 23 | 24 | TypeItem: TypeItem<'input> = { 25 | Token<"type"> Token<"{"> >> Token<"}"> Token<";">? => TypeItem { 26 | location, 27 | annotations: Bvec::from_iter_in(annotations.into_iter(), &state.alloc), 28 | name, 29 | generics: Bvec::from_iter_in(generics.unwrap_or_default().into_iter(), &state.alloc), 30 | fields: Bvec::from_iter_in(fields.into_iter(), &state.alloc), 31 | } 32 | } 33 | 34 | ExportItem: ExportItem<'input> = { 35 | Token<"export"> Token<";"> => ExportItem { 36 | location, 37 | ty, 38 | table_name, 39 | } 40 | } 41 | 42 | TypeGenericList: Vec> = { 43 | Token<"<"> >> Token<">"> => x 44 | } 45 | 46 | TypeField: TypeField<'input> = { 47 | Token<":"> => TypeField { 48 | annotations: Bvec::from_iter_in(annotations.into_iter(), &state.alloc), 49 | location, name, value, 50 | }, 51 | } 52 | 53 | Annotation: Annotation<'input> = { 54 | "@" > ")")?> => Annotation { 55 | name, 56 | args: Bvec::from_iter_in(args.unwrap_or_default().into_iter(), &state.alloc), 57 | } 58 | } 59 | 60 | TypeExpr: TypeExpr<'input> = { 61 | Token<"<"> >> Token<">"> => TypeExpr::Specialize( 62 | x, 63 | Bvec::from_iter_in(args.into_iter(), &state.alloc), 64 | ), 65 | => TypeExpr::Unit(x), 66 | } 67 | 68 | Identifier: Identifier<'input> = { 69 | > => Identifier(s), 70 | } 71 | 72 | Literal: Literal<'input> = { 73 | > =>? s.parse().map(Literal::Integer).map_err(|_| ParseError::User { 74 | error: SchemaError::InvalidLiteral, 75 | }), 76 | > =>? i64::from_str_radix(s.strip_prefix("0x").unwrap(), 16).map(Literal::Integer).map_err(|_| ParseError::User { 77 | error: SchemaError::InvalidLiteral, 78 | }), 79 | > =>? i64::from_str_radix(s.strip_prefix("0o").unwrap(), 8).map(Literal::Integer).map_err(|_| ParseError::User { 80 | error: SchemaError::InvalidLiteral, 81 | }), 82 | > =>? i64::from_str_radix(s.strip_prefix("0b").unwrap(), 2).map(Literal::Integer).map_err(|_| ParseError::User { 83 | error: SchemaError::InvalidLiteral, 84 | }), 85 | => Literal::String(state.resolve_str(&s)), 86 | => Literal::Bytes(s), 87 | } 88 | 89 | StringLit: String = { 90 | > =>? serde_json::from_str::(s) 91 | .map_err(|_| ParseError::User { 92 | error: SchemaError::InvalidLiteral, 93 | }), 94 | } 95 | 96 | HexBytesLit: &'input [u8] = { 97 | > =>? serde_json::from_str::(s.strip_prefix("h\"").unwrap().strip_suffix("\"").unwrap()) 98 | .map_err(|_| ParseError::User { 99 | error: SchemaError::InvalidLiteral, 100 | }) 101 | .and_then(|x| hex::decode(&x) 102 | .map_err(|_| ParseError::User { 103 | error: SchemaError::InvalidLiteral, 104 | }) 105 | .map(|x| state.alloc.alloc_slice_copy(&x) as &[u8]) 106 | ), 107 | } 108 | 109 | 110 | ZeroOrMore: Vec = { 111 | ?> => x.unwrap_or_default() 112 | } 113 | 114 | OneOrMore: Vec = { 115 | Delim? => { 116 | let mut items = vec![i1]; 117 | items.extend(i2.into_iter().map(|e| e.1)); 118 | items 119 | } 120 | } 121 | 122 | Token: I = { 123 | => s, 124 | } 125 | 126 | Comment: () = { 127 | r"//[^\n\r]*[\n\r]*" => { }, 128 | r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, 129 | } 130 | -------------------------------------------------------------------------------- /rdb-analyzer/src/schema/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod compile; 2 | pub mod grammar; 3 | 4 | #[cfg(test)] 5 | mod compile_test; 6 | -------------------------------------------------------------------------------- /rdb-analyzer/src/storage_plan/conversion.rs: -------------------------------------------------------------------------------- 1 | use std::convert::{TryFrom, TryInto}; 2 | 3 | use thiserror::Error; 4 | 5 | use super::{StorageKey, StorageNode, StoragePlan}; 6 | 7 | #[derive(Error, Debug)] 8 | pub enum StorageKeyConversionError { 9 | #[error("cannot decode base64-encoded storage key")] 10 | Base64Decode, 11 | } 12 | 13 | impl From<&StoragePlan> for StoragePlan { 14 | fn from(that: &StoragePlan) -> Self { 15 | Self { 16 | nodes: that 17 | .nodes 18 | .iter() 19 | .map(|(k, v)| (k.clone(), StorageNode::::from(v))) 20 | .collect(), 21 | } 22 | } 23 | } 24 | 25 | impl From<&StorageNode> for StorageNode { 26 | fn from(that: &StorageNode) -> Self { 27 | Self { 28 | key: base64::encode(&that.key), 29 | flattened: that.flattened, 30 | subspace_reference: that.subspace_reference.map(|x| base64::encode(&x)), 31 | set: that.set.as_ref().map(|x| Box::new(Self::from(&**x))), 32 | children: that 33 | .children 34 | .iter() 35 | .map(|(k, v)| (k.clone(), Self::from(v))) 36 | .collect(), 37 | } 38 | } 39 | } 40 | 41 | impl TryFrom<&StoragePlan> for StoragePlan { 42 | type Error = StorageKeyConversionError; 43 | 44 | fn try_from(that: &StoragePlan) -> Result { 45 | Ok(Self { 46 | nodes: that 47 | .nodes 48 | .iter() 49 | .map(|(k, v)| StorageNode::::try_from(v).map(|v| (k.clone(), v))) 50 | .collect::>()?, 51 | }) 52 | } 53 | } 54 | 55 | impl TryFrom<&StorageNode> for StorageNode { 56 | type Error = StorageKeyConversionError; 57 | 58 | fn try_from(that: &StorageNode) -> Result { 59 | Ok(Self { 60 | key: base64::decode(&that.key) 61 | .map_err(|_| StorageKeyConversionError::Base64Decode) 62 | .and_then(|x| { 63 | x.try_into() 64 | .map_err(|_| StorageKeyConversionError::Base64Decode) 65 | })?, 66 | flattened: that.flattened, 67 | subspace_reference: that 68 | .subspace_reference 69 | .as_ref() 70 | .map(|x| base64::decode(&x)) 71 | .transpose() 72 | .map_err(|_| StorageKeyConversionError::Base64Decode)? 73 | .map(|x| { 74 | x.try_into() 75 | .map_err(|_| StorageKeyConversionError::Base64Decode) 76 | }) 77 | .transpose()?, 78 | set: that 79 | .set 80 | .as_ref() 81 | .map(|x| Self::try_from(&**x).map(Box::new)) 82 | .transpose()?, 83 | children: that 84 | .children 85 | .iter() 86 | .map(|(k, v)| Self::try_from(v).map(|v| (k.clone(), v))) 87 | .collect::>()?, 88 | }) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /rdb-analyzer/src/storage_plan/mod.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use serde::{Deserialize, Serialize}; 3 | use std::{collections::BTreeMap, fmt::Display, io::Write, sync::Arc}; 4 | 5 | pub mod conversion; 6 | pub mod planner; 7 | 8 | #[cfg(test)] 9 | mod planner_test; 10 | 11 | pub type StorageKey = [u8; 12]; 12 | 13 | #[derive(Default, Clone, Serialize, Deserialize)] 14 | pub struct StoragePlan { 15 | pub nodes: BTreeMap, StorageNode>, 16 | } 17 | 18 | #[derive(Clone, Debug, Serialize, Deserialize)] 19 | pub struct StorageNode { 20 | pub key: SK, 21 | pub flattened: bool, 22 | pub subspace_reference: Option, 23 | pub set: Option>>, 24 | pub children: BTreeMap, StorageNode>, 25 | } 26 | 27 | impl StoragePlan { 28 | pub fn serialize_compressed(&self) -> Result> { 29 | let serialized = rmp_serde::to_vec_named(self)?; 30 | let mut buf = Vec::new(); 31 | snap::write::FrameEncoder::new(&mut buf).write_all(&serialized)?; 32 | Ok(buf) 33 | } 34 | pub fn deserialize_compressed(data: &[u8]) -> Result { 35 | Ok(rmp_serde::from_read(snap::read::FrameDecoder::new(data))?) 36 | } 37 | } 38 | 39 | impl Display for StoragePlan { 40 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 41 | for (node_name, node) in &self.nodes { 42 | write!(f, "top-level node: {}{}", node_name, node)?; 43 | } 44 | Ok(()) 45 | } 46 | } 47 | 48 | impl StorageNode { 49 | fn display_fmt(&self, indent: usize, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 50 | write!( 51 | f, 52 | " {}{}{}", 53 | hex::encode(&self.key.as_ref()), 54 | if let Some(x) = self.subspace_reference { 55 | format!(" subspace_reference({})", base64::encode(&x)) 56 | } else { 57 | "".into() 58 | }, 59 | if self.flattened { " flattened" } else { "" }, 60 | )?; 61 | write!(f, "\n")?; 62 | 63 | match &self.set { 64 | Some(x) => { 65 | for _ in 0..indent + 1 { 66 | write!(f, ".")?; 67 | } 68 | write!(f, "")?; 69 | x.display_fmt(indent + 1, f)?; 70 | } 71 | _ => { 72 | for (child_name, child_node) in &self.children { 73 | for _ in 0..indent + 1 { 74 | write!(f, ".")?; 75 | } 76 | write!(f, "{}", child_name)?; 77 | child_node.display_fmt(indent + 1, f)?; 78 | } 79 | } 80 | } 81 | Ok(()) 82 | } 83 | } 84 | 85 | impl Display for StorageNode { 86 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 87 | self.display_fmt(0, f) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /rdb-analyzer/src/storage_plan/planner_test.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | 3 | use bumpalo::Bump; 4 | use console::Style; 5 | use similar::{ChangeTag, TextDiff}; 6 | 7 | use crate::{ 8 | schema::{compile::compile, grammar::parse}, 9 | storage_plan::StoragePlan, 10 | }; 11 | 12 | use super::planner::generate_plan_for_schema; 13 | 14 | const SIMPLE_SCHEMA: &str = r#" 15 | type Item { 16 | inner: T, 17 | inner2: T, 18 | @primary 19 | something_else: string, 20 | } 21 | type Duration { 22 | start: T, 23 | end: T, 24 | } 25 | type Recursive { 26 | inner: Recursive, 27 | } 28 | type BinaryTree { 29 | left: BinaryTree, 30 | right: BinaryTree, 31 | value: T, 32 | } 33 | 34 | type TrinaryTree { 35 | left: TrinaryTree, 36 | middle: TrinaryTree, 37 | right: TrinaryTree, 38 | value: T, 39 | } 40 | 41 | type InternalSet { 42 | @primary 43 | key: bytes, 44 | s: set>, 45 | } 46 | 47 | type Wrapper { 48 | @primary 49 | value: T, 50 | } 51 | 52 | export set>> items; 53 | export Recursive item; 54 | export BinaryTree a_binary_tree; 55 | export InternalSet an_internal_set; 56 | export set nested_internal_sets; 57 | export TrinaryTree a_trinary_tree; 58 | "#; 59 | 60 | #[test] 61 | fn test_planner_simple() { 62 | let _ = pretty_env_logger::try_init(); 63 | let alloc = Bump::new(); 64 | let ast = parse(&alloc, SIMPLE_SCHEMA).unwrap(); 65 | let output = compile(&ast).unwrap(); 66 | drop(ast); 67 | drop(alloc); 68 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 69 | println!( 70 | "{}", 71 | serde_yaml::to_string(&StoragePlan::::from(&plan)).unwrap() 72 | ); 73 | } 74 | 75 | #[test] 76 | fn planner_example_1() { 77 | let _ = pretty_env_logger::try_init(); 78 | let alloc = Bump::new(); 79 | let ast = parse( 80 | &alloc, 81 | r#" 82 | type Item { 83 | @primary 84 | id: string, 85 | value: T, 86 | } 87 | type RecursiveItem { 88 | @primary 89 | id: string, 90 | value: T, 91 | recursive: RecursiveItem, 92 | } 93 | type Duration { 94 | start: T, 95 | end: T, 96 | } 97 | export set>> items; 98 | export set>> recursive_items; 99 | "#, 100 | ) 101 | .unwrap(); 102 | let output = compile(&ast).unwrap(); 103 | drop(ast); 104 | drop(alloc); 105 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 106 | println!( 107 | "{}", 108 | serde_yaml::to_string(&StoragePlan::::from(&plan)).unwrap() 109 | ); 110 | } 111 | 112 | #[test] 113 | fn recursion_cycles() { 114 | let _ = pretty_env_logger::try_init(); 115 | let alloc = Bump::new(); 116 | let ast = parse( 117 | &alloc, 118 | r#" 119 | type A { 120 | @primary 121 | id: string, 122 | value: B, 123 | } 124 | type B { 125 | value1: A, 126 | value2: C, 127 | } 128 | type C { 129 | value: T, 130 | that1: A, 131 | that2: B, 132 | } 133 | export set> items; 134 | "#, 135 | ) 136 | .unwrap(); 137 | let output = compile(&ast).unwrap(); 138 | drop(ast); 139 | drop(alloc); 140 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 141 | println!( 142 | "{}", 143 | serde_yaml::to_string(&StoragePlan::::from(&plan)).unwrap() 144 | ); 145 | } 146 | 147 | #[test] 148 | fn test_yaml_serialization() { 149 | let _ = pretty_env_logger::try_init(); 150 | let alloc = Bump::new(); 151 | let ast = parse(&alloc, SIMPLE_SCHEMA).unwrap(); 152 | let output = compile(&ast).unwrap(); 153 | drop(ast); 154 | drop(alloc); 155 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 156 | let plan2 = serde_yaml::to_string(&StoragePlan::::from(&plan)).unwrap(); 157 | let plan2: StoragePlan = serde_yaml::from_str(&plan2).unwrap(); 158 | let plan2 = StoragePlan::try_from(&plan2).unwrap(); 159 | assert_eq!( 160 | plan.serialize_compressed().unwrap(), 161 | plan2.serialize_compressed().unwrap() 162 | ); 163 | } 164 | 165 | #[test] 166 | fn test_many_binary_trees() { 167 | let _ = pretty_env_logger::try_init(); 168 | let alloc = Bump::new(); 169 | let ast = parse( 170 | &alloc, 171 | r#" 172 | type BinaryTree { 173 | left: BinaryTree, 174 | right: BinaryTree, 175 | value: T, 176 | } 177 | type Tuple { 178 | @primary 179 | first: A, 180 | second: B, 181 | } 182 | export BinaryTree binary_tree; 183 | export set>> set_of_binary_trees; 184 | export BinaryTree>> binary_tree_of_sets; 185 | export BinaryTree> binary_tree_of_binary_trees; 186 | export BinaryTree>> complex_structure; 187 | "#, 188 | ) 189 | .unwrap(); 190 | let output = compile(&ast).unwrap(); 191 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 192 | println!( 193 | "test_many_binary_trees: serialized size of plan: {}", 194 | plan.serialize_compressed().unwrap().len() 195 | ); 196 | } 197 | 198 | #[test] 199 | fn test_tuple_set() { 200 | let _ = pretty_env_logger::try_init(); 201 | let alloc = Bump::new(); 202 | let ast = parse( 203 | &alloc, 204 | r#" 205 | type Tuple { 206 | first: A, 207 | second: B, 208 | } 209 | type SetBox { 210 | inner: set>, 211 | } 212 | type Box { 213 | @primary 214 | inner: T, 215 | } 216 | export Tuple, set>> something; 217 | "#, 218 | ) 219 | .unwrap(); 220 | let output = compile(&ast).unwrap(); 221 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 222 | println!("{}", plan); 223 | } 224 | 225 | #[test] 226 | fn test_set_member_with_primary_key() { 227 | let _ = pretty_env_logger::try_init(); 228 | let alloc = Bump::new(); 229 | let ast = parse( 230 | &alloc, 231 | r#" 232 | type A { 233 | @primary 234 | a: int64, 235 | } 236 | export set some_set; 237 | "#, 238 | ) 239 | .unwrap(); 240 | let output = compile(&ast).unwrap(); 241 | generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 242 | } 243 | 244 | #[test] 245 | fn test_set_member_without_primary_key() { 246 | let _ = pretty_env_logger::try_init(); 247 | let alloc = Bump::new(); 248 | let ast = parse( 249 | &alloc, 250 | r#" 251 | type A { 252 | a: int64, 253 | } 254 | export set some_set; 255 | "#, 256 | ) 257 | .unwrap(); 258 | let output = compile(&ast).unwrap(); 259 | match generate_plan_for_schema(&Default::default(), &Default::default(), &output) { 260 | Ok(_) => panic!("test_set_member_without_primary_key: did not get expected error"), 261 | Err(e) => assert!(e.to_string().contains("has no primary key")), 262 | } 263 | } 264 | 265 | fn run_planner_migration_stats(old: &str, new: &str) -> (usize, usize) { 266 | struct Line(Option); 267 | 268 | impl std::fmt::Display for Line { 269 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 270 | match self.0 { 271 | None => write!(f, " "), 272 | Some(idx) => write!(f, "{:<4}", idx + 1), 273 | } 274 | } 275 | } 276 | 277 | let alloc = Bump::new(); 278 | let ast = parse(&alloc, old).unwrap(); 279 | let schema1 = compile(&ast).unwrap(); 280 | drop(ast); 281 | drop(alloc); 282 | let plan1 = generate_plan_for_schema(&Default::default(), &Default::default(), &schema1).unwrap(); 283 | 284 | let alloc = Bump::new(); 285 | let ast = parse(&alloc, new).unwrap(); 286 | let schema2 = compile(&ast).unwrap(); 287 | drop(ast); 288 | drop(alloc); 289 | 290 | let plan2 = generate_plan_for_schema(&plan1, &schema1, &schema2).unwrap(); 291 | 292 | let plan1 = serde_yaml::to_string(&StoragePlan::::from(&plan1)).unwrap(); 293 | let plan2 = serde_yaml::to_string(&StoragePlan::::from(&plan2)).unwrap(); 294 | let diff = TextDiff::from_lines(&plan1, &plan2); 295 | let mut insert_count = 0usize; 296 | let mut delete_count = 0usize; 297 | for (idx, group) in diff.grouped_ops(3).iter().enumerate() { 298 | if idx > 0 { 299 | println!("{:-^1$}", "-", 80); 300 | } 301 | for op in group { 302 | for change in diff.iter_inline_changes(op) { 303 | let (sign, s) = match change.tag() { 304 | ChangeTag::Delete => { 305 | delete_count += 1; 306 | ("-", Style::new().red()) 307 | } 308 | ChangeTag::Insert => { 309 | insert_count += 1; 310 | ("+", Style::new().green()) 311 | } 312 | ChangeTag::Equal => (" ", Style::new().dim()), 313 | }; 314 | print!( 315 | "{}{} |{}", 316 | console::style(Line(change.old_index())).dim(), 317 | console::style(Line(change.new_index())).dim(), 318 | s.apply_to(sign).bold(), 319 | ); 320 | for (emphasized, value) in change.iter_strings_lossy() { 321 | if emphasized { 322 | print!("{}", s.apply_to(value).underlined().on_black()); 323 | } else { 324 | print!("{}", s.apply_to(value)); 325 | } 326 | } 327 | if change.missing_newline() { 328 | println!(); 329 | } 330 | } 331 | } 332 | } 333 | (insert_count, delete_count) 334 | } 335 | 336 | #[test] 337 | fn test_planner_migration_identity() { 338 | let _ = pretty_env_logger::try_init(); 339 | let (insert_count, delete_count) = run_planner_migration_stats(SIMPLE_SCHEMA, SIMPLE_SCHEMA); 340 | assert!(insert_count == 0); 341 | assert!(delete_count == 0); 342 | } 343 | 344 | #[test] 345 | fn test_planner_migration_add_and_remove_field_simple() { 346 | let _ = pretty_env_logger::try_init(); 347 | let old = r#" 348 | type Item { 349 | a: int64, 350 | b: string, 351 | c: bytes, 352 | } 353 | export Item data; 354 | "#; 355 | let new = r#" 356 | type Item { 357 | a: int64, 358 | b: string, 359 | c: bytes, 360 | d: string, 361 | } 362 | export Item data; 363 | "#; 364 | let (insert_count_1, delete_count_1) = run_planner_migration_stats(old, new); 365 | assert!(insert_count_1 > 0); 366 | assert!(delete_count_1 == 0); 367 | println!( 368 | "test_planner_migration_add_and_remove_field_simple: insert {}, delete {}", 369 | insert_count_1, delete_count_1 370 | ); 371 | let (insert_count_2, delete_count_2) = run_planner_migration_stats(new, old); 372 | assert!(insert_count_2 == 0); 373 | assert!(delete_count_2 > 0); 374 | assert_eq!(insert_count_2, delete_count_1); 375 | assert_eq!(delete_count_2, insert_count_1); 376 | } 377 | 378 | #[test] 379 | fn test_planner_migration_add_and_remove_field_complex() { 380 | let _ = pretty_env_logger::try_init(); 381 | let old = r#" 382 | type BinaryTree { 383 | left: BinaryTree, 384 | right: BinaryTree, 385 | value: T, 386 | } 387 | export BinaryTree data; 388 | "#; 389 | let new = r#" 390 | type BinaryTree { 391 | left: BinaryTree, 392 | right: BinaryTree, 393 | value: T, 394 | value2: T, 395 | } 396 | export BinaryTree data; 397 | "#; 398 | let (insert_count_1, delete_count_1) = run_planner_migration_stats(old, new); 399 | assert!(insert_count_1 > 0); 400 | assert!(delete_count_1 == 0); 401 | println!( 402 | "test_planner_migration_add_and_remove_field_complex: insert {}, delete {}", 403 | insert_count_1, delete_count_1 404 | ); 405 | let (insert_count_2, delete_count_2) = run_planner_migration_stats(new, old); 406 | assert!(insert_count_2 == 0); 407 | assert!(delete_count_2 > 0); 408 | assert_eq!(insert_count_2, delete_count_1); 409 | assert_eq!(delete_count_2, insert_count_1); 410 | } 411 | 412 | #[test] 413 | fn test_planner_migration_field_rename() { 414 | let _ = pretty_env_logger::try_init(); 415 | let old = r#" 416 | type Item { 417 | a: int64, 418 | c: int64, 419 | } 420 | export Item data; 421 | "#; 422 | let new = r#" 423 | type Item { 424 | @rename_from("a") 425 | b: int64, 426 | c: int64 427 | } 428 | export Item data; 429 | "#; 430 | let (insert_count_1, delete_count_1) = run_planner_migration_stats(old, new); 431 | assert_eq!(insert_count_1, 1); 432 | assert_eq!(delete_count_1, 1); 433 | println!( 434 | "test_planner_migration_field_rename: insert {}, delete {}", 435 | insert_count_1, delete_count_1 436 | ); 437 | } 438 | 439 | #[test] 440 | fn primitive_exports() { 441 | let _ = pretty_env_logger::try_init(); 442 | let alloc = Bump::new(); 443 | let ast = parse( 444 | &alloc, 445 | r#" 446 | export int64 a; 447 | export string b; 448 | "#, 449 | ) 450 | .unwrap(); 451 | let output = compile(&ast).unwrap(); 452 | let plan = generate_plan_for_schema(&Default::default(), &Default::default(), &output).unwrap(); 453 | println!("{}", plan); 454 | } 455 | -------------------------------------------------------------------------------- /rdb-analyzer/src/test_util.rs: -------------------------------------------------------------------------------- 1 | use crate::data::kv::KeyValueStore; 2 | 3 | #[cfg(feature = "test-with-fdb")] 4 | fn ensure_fdb_ready() { 5 | use foundationdb::{tuple::Subspace, Database}; 6 | use std::sync::Once; 7 | static FDB_BOOT: Once = Once::new(); 8 | FDB_BOOT.call_once(|| { 9 | let network = unsafe { foundationdb::boot() }; 10 | std::mem::forget(network); 11 | 12 | std::thread::spawn(|| { 13 | tokio::runtime::Builder::new_current_thread() 14 | .enable_all() 15 | .build() 16 | .unwrap() 17 | .block_on(async move { 18 | let db = Database::default().unwrap(); 19 | let txn = db.create_trx().unwrap(); 20 | txn.clear_subspace_range(&Subspace::all().subspace(&"rdbtest")); 21 | txn.commit().await.unwrap(); 22 | }); 23 | }) 24 | .join() 25 | .unwrap_or_else(|_| panic!("db init failed")) 26 | }); 27 | } 28 | 29 | #[cfg(not(any(feature = "test-with-fdb", feature = "test-with-sqlite")))] 30 | pub fn create_kv() -> Box { 31 | use crate::kv_backend::mock_kv::MockKv; 32 | Box::new(MockKv::new()) 33 | } 34 | 35 | #[cfg(feature = "test-with-fdb")] 36 | pub fn create_kv() -> Box { 37 | use crate::kv_backend::foundationdb::FdbKvStore; 38 | use foundationdb::{tuple::Subspace, Database}; 39 | use rand::RngCore; 40 | use std::sync::Arc; 41 | ensure_fdb_ready(); 42 | 43 | let mut isolation_id = [0u8; 16]; 44 | rand::thread_rng().fill_bytes(&mut isolation_id[..]); 45 | let isolation_id = hex::encode(&isolation_id); 46 | 47 | Box::new(FdbKvStore::new( 48 | Arc::new(Database::default().unwrap()), 49 | Subspace::all() 50 | .subspace(&format!("rdbtest")) 51 | .subspace(&isolation_id) 52 | .bytes(), 53 | )) 54 | } 55 | 56 | #[cfg(feature = "test-with-sqlite")] 57 | pub fn create_kv() -> Box { 58 | use crate::kv_backend::sqlite::{GlobalSqliteStore, SqliteKvStore}; 59 | use rand::RngCore; 60 | use std::{sync::Arc, time::SystemTime}; 61 | 62 | lazy_static::lazy_static! { 63 | static ref TEMP_FILE: String = format!( 64 | "{}/rdb-test-sqlite-{}-{}.db", 65 | std::env::temp_dir().to_string_lossy(), 66 | SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_millis(), 67 | std::process::id(), 68 | ); 69 | static ref GLOBAL: Arc = GlobalSqliteStore::open_leaky(&*TEMP_FILE).unwrap(); 70 | } 71 | 72 | let mut isolation_id = [0u8; 16]; 73 | rand::thread_rng().fill_bytes(&mut isolation_id[..]); 74 | 75 | Box::new(SqliteKvStore::new( 76 | GLOBAL.clone(), 77 | "user_data", 78 | &isolation_id, 79 | )) 80 | } 81 | -------------------------------------------------------------------------------- /rdb-analyzer/src/util.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashSet, hash::Hash}; 2 | 3 | pub fn first_duplicate(iter: T) -> Option 4 | where 5 | T: IntoIterator, 6 | T::Item: Eq + Hash, 7 | { 8 | let mut uniq = HashSet::new(); 9 | for x in iter { 10 | if uniq.contains(&x) { 11 | return Some(x); 12 | } 13 | uniq.insert(x); 14 | } 15 | None 16 | } 17 | 18 | macro_rules! unwrap_enum { 19 | ($value:expr, $pattern:pat => $extracted_value:expr) => { 20 | match $value { 21 | $pattern => $extracted_value, 22 | _ => panic!("enum variant mismatch"), 23 | } 24 | }; 25 | } 26 | -------------------------------------------------------------------------------- /rdb-pgsvc/.cargo/config: -------------------------------------------------------------------------------- 1 | [build] 2 | target = "wasm32-unknown-emscripten" 3 | 4 | [target.wasm32-unknown-emscripten] 5 | rustflags = [ 6 | "-C", "link-arg=-s", 7 | "-C", "link-arg=EXTRA_EXPORTED_RUNTIME_METHODS=['allocateUTF8', 'UTF8ToString']", 8 | ] 9 | -------------------------------------------------------------------------------- /rdb-pgsvc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rdb-pgsvc" 3 | version = "0.1.0" 4 | edition = "2018" 5 | description = "RefineDB playground service." 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | rdb-analyzer = { path = "../rdb-analyzer", default-features = false } 11 | anyhow = "1" 12 | thiserror = "1" 13 | serde = { version = "1", features = ["derive", "rc"] } 14 | serde_json = "1" 15 | log = "0.4" 16 | pretty_env_logger = "0.4" 17 | bumpalo = { version = "3.7", features = ["collections", "boxed"] } 18 | similar = { version = "1", features = ["inline"] } 19 | serde_yaml = "0.8" 20 | rmp-serde = "0.15" 21 | hex = "0.4" 22 | sha2 = "0.9" 23 | libc = "0.2" 24 | petgraph = "0.5" 25 | rpds = { version = "0.9", features = ["serde"] } 26 | async-trait = "0.1" 27 | base64 = "0.13" 28 | futures = "0.3" 29 | -------------------------------------------------------------------------------- /rdb-pgsvc/src/dfvis.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use rdb_analyzer::data::treewalker::vm::TwVm; 3 | use serde::Serialize; 4 | 5 | #[derive(Serialize)] 6 | pub struct VisNode { 7 | id: usize, 8 | label: String, 9 | shape: Option, 10 | group: usize, 11 | x: Option, 12 | y: Option, 13 | fixed: Option, 14 | } 15 | 16 | #[derive(Serialize)] 17 | struct VisNodeFixed { 18 | x: bool, 19 | y: bool, 20 | } 21 | 22 | #[derive(Serialize)] 23 | pub struct VisEdge { 24 | from: usize, 25 | to: usize, 26 | dashes: bool, 27 | color: Option, 28 | arrows: &'static str, 29 | label: Option, 30 | } 31 | 32 | #[derive(Serialize, Default)] 33 | pub struct VisualizedDataflow { 34 | nodes: Vec, 35 | edges: Vec, 36 | } 37 | 38 | struct Visualizer<'a, 'b> { 39 | vm: &'b TwVm<'a>, 40 | output: VisualizedDataflow, 41 | } 42 | 43 | impl<'a, 'b> Visualizer<'a, 'b> { 44 | fn visualize_df(&mut self) -> Result<()> { 45 | for (i, g) in self.vm.script.graphs.iter().enumerate() { 46 | let id = self.output.nodes.len(); 47 | assert_eq!(id, i); 48 | self.output.nodes.push(VisNode { 49 | id, 50 | label: format!("graph:{}", g.name), 51 | shape: Some("diamond".into()), 52 | group: i, 53 | x: None, 54 | y: None, 55 | fixed: None, 56 | }); 57 | } 58 | let n_graphs = self.vm.script.graphs.len(); 59 | for i in 0..n_graphs { 60 | self.visualize_graph(i)?; 61 | } 62 | Ok(()) 63 | } 64 | 65 | fn visualize_graph(&mut self, graph_index: usize) -> Result<()> { 66 | let g = &self.vm.script.graphs[graph_index]; 67 | let mut node_id_in_output: Vec = Vec::with_capacity(g.nodes.len()); 68 | for (n, in_edges, condition) in &g.nodes { 69 | let id = self.output.nodes.len(); 70 | self.output.nodes.push(VisNode { 71 | id, 72 | label: format!("{:?}", n), 73 | shape: None, 74 | group: graph_index, 75 | x: None, 76 | y: None, 77 | fixed: None, 78 | }); 79 | let mut has_deps = false; 80 | for (i, in_edge) in in_edges.iter().enumerate() { 81 | self.output.edges.push(VisEdge { 82 | from: node_id_in_output[*in_edge as usize], 83 | to: id, 84 | dashes: false, 85 | color: None, 86 | arrows: "to", 87 | label: Some(format!("{}", i)), 88 | }); 89 | has_deps = true; 90 | } 91 | if let Some(x) = condition { 92 | self.output.edges.push(VisEdge { 93 | from: node_id_in_output[*x as usize], 94 | to: id, 95 | dashes: true, 96 | color: None, 97 | arrows: "to", 98 | label: None, 99 | }); 100 | has_deps = true; 101 | } 102 | if !has_deps { 103 | self.output.edges.push(VisEdge { 104 | from: graph_index, 105 | to: id, 106 | dashes: true, 107 | color: None, 108 | arrows: "to", 109 | label: None, 110 | }); 111 | } 112 | let subgraph_references = n.subgraph_references(); 113 | for subgraph_id in subgraph_references { 114 | self.output.edges.push(VisEdge { 115 | from: id, 116 | to: subgraph_id as usize, 117 | dashes: true, 118 | color: Some("red".into()), 119 | arrows: "to", 120 | label: None, 121 | }); 122 | } 123 | node_id_in_output.push(id); 124 | } 125 | 126 | Ok(()) 127 | } 128 | } 129 | 130 | pub fn visualize_df(vm: &TwVm) -> Result { 131 | let mut vis = Visualizer { 132 | vm, 133 | output: VisualizedDataflow::default(), 134 | }; 135 | vis.visualize_df()?; 136 | Ok(serde_json::to_string(&vis.output)?) 137 | } 138 | -------------------------------------------------------------------------------- /rdb-pgsvc/src/main.rs: -------------------------------------------------------------------------------- 1 | mod dfvis; 2 | mod memkv; 3 | mod query; 4 | 5 | use std::{ffi::CStr, os::raw::c_char, panic::AssertUnwindSafe, ptr::NonNull, sync::Arc}; 6 | 7 | use anyhow::Result; 8 | use bumpalo::Bump; 9 | use dfvis::visualize_df; 10 | use memkv::MemKv; 11 | use query::{get_vm_graphs, run_vm_query, VmGraphQuery}; 12 | use rdb_analyzer::{ 13 | data::treewalker::{ 14 | asm::codegen::compile_twscript, 15 | bytecode::TwScript, 16 | typeck::{GlobalTyckContext, GlobalTypeInfo}, 17 | vm::TwVm, 18 | }, 19 | schema::{ 20 | compile::{compile, CompiledSchema}, 21 | grammar::parse, 22 | }, 23 | storage_plan::{planner::generate_plan_for_schema, StoragePlan}, 24 | }; 25 | 26 | fn main() {} 27 | 28 | #[no_mangle] 29 | pub extern "C" fn rdb_pgsvc_init() { 30 | wrap("rdb_pgsvc_init", || { 31 | std::env::set_var("RUST_LOG", "debug"); 32 | pretty_env_logger::init_timed(); 33 | log::info!("rdb_pgsvc initialized"); 34 | Ok(()) 35 | }); 36 | } 37 | 38 | #[no_mangle] 39 | pub extern "C" fn rdb_drop_schema(_: Option>) {} 40 | 41 | #[no_mangle] 42 | pub extern "C" fn rdb_drop_plan(_: Option>) {} 43 | 44 | #[no_mangle] 45 | pub extern "C" fn rdb_drop_twscript<'a>(_: Option>) {} 46 | 47 | #[no_mangle] 48 | pub extern "C" fn rdb_drop_vm<'a>(_: Option>>) {} 49 | 50 | #[no_mangle] 51 | pub extern "C" fn rdb_drop_global_type_info<'a>(_: Option>>) {} 52 | 53 | #[no_mangle] 54 | pub extern "C" fn rdb_acquire_memkv(x: Option<&Arc>) -> Option>> { 55 | x.map(|x| Box::new(x.clone())) 56 | } 57 | 58 | #[no_mangle] 59 | pub extern "C" fn rdb_release_memkv(_: Option>>) {} 60 | 61 | #[no_mangle] 62 | pub unsafe extern "C" fn rdb_compile_schema(schema: *const c_char) -> Option> { 63 | wrap("rdb_compile_schema", || { 64 | let schema = CStr::from_ptr(schema); 65 | let schema = schema.to_str()?; 66 | let schema = compile(&parse(&Bump::new(), schema)?)?; 67 | Ok(Box::new(schema)) 68 | }) 69 | } 70 | 71 | #[no_mangle] 72 | pub unsafe extern "C" fn rdb_dfasm(source: *const c_char) -> Option> { 73 | wrap("rdb_dfasm", || { 74 | let source = CStr::from_ptr(source); 75 | let source = source.to_str()?; 76 | let twscript = compile_twscript(source)?; 77 | Ok(Box::new(twscript)) 78 | }) 79 | } 80 | 81 | #[no_mangle] 82 | pub extern "C" fn rdb_vm_create<'a>( 83 | schema: &'a CompiledSchema, 84 | plan: &'a StoragePlan, 85 | script: &'a TwScript, 86 | ) -> Option>> { 87 | wrap("rdb_vm_create", || { 88 | Ok(Box::new(TwVm::new(schema, plan, script)?)) 89 | }) 90 | } 91 | 92 | #[no_mangle] 93 | pub extern "C" fn rdb_vm_tyck<'a>(vm: &TwVm<'a>) -> Option>> { 94 | wrap("rdb_vm_tyck", || { 95 | Ok(Box::new(GlobalTyckContext::new(vm)?.typeck()?)) 96 | }) 97 | } 98 | 99 | #[no_mangle] 100 | pub extern "C" fn rdb_vm_visualize_df<'a>(vm: &TwVm<'a>) -> Option> { 101 | wrap("rdb_vm_visualize_df", || Ok(mkcstr(&visualize_df(vm)?))) 102 | } 103 | 104 | #[no_mangle] 105 | pub extern "C" fn rdb_vm_get_graphs<'a>(vm: &TwVm<'a>) -> Option> { 106 | wrap("rdb_vm_get_graphs", || { 107 | Ok(mkcstr(&serde_json::to_string(&get_vm_graphs(vm))?)) 108 | }) 109 | } 110 | 111 | #[no_mangle] 112 | pub extern "C" fn rdb_vm_run_query<'a>( 113 | vm: &TwVm<'a>, 114 | kv: &Arc, 115 | type_info: &GlobalTypeInfo<'a>, 116 | query: *const c_char, 117 | ) -> Option> { 118 | wrap("rdb_vm_run_query", || { 119 | let query = unsafe { CStr::from_ptr(query) }; 120 | let query: VmGraphQuery = serde_json::from_str(query.to_str()?)?; 121 | Ok(mkcstr(&serde_json::to_string(&run_vm_query( 122 | vm, &**kv, type_info, &query, 123 | )?)?)) 124 | }) 125 | } 126 | 127 | #[no_mangle] 128 | pub extern "C" fn rdb_memkv_create() -> Option>> { 129 | wrap("rdb_memkv_create", || Ok(Box::new(Arc::new(MemKv::new())))) 130 | } 131 | 132 | #[no_mangle] 133 | pub extern "C" fn rdb_generate_storage_plan( 134 | schema: &CompiledSchema, 135 | old_schema: Option<&CompiledSchema>, 136 | old_plan: Option<&StoragePlan>, 137 | ) -> Option> { 138 | wrap("rdb_generate_storage_plan", || { 139 | let mut reference_schema = &CompiledSchema::default(); 140 | let mut reference_plan = &StoragePlan::default(); 141 | 142 | if let Some(old_schema) = old_schema { 143 | let old_plan = old_plan.unwrap(); 144 | reference_schema = old_schema; 145 | reference_plan = old_plan; 146 | } 147 | 148 | let new_plan = generate_plan_for_schema(reference_plan, reference_schema, schema)?; 149 | Ok(Box::new(new_plan)) 150 | }) 151 | } 152 | 153 | #[no_mangle] 154 | pub extern "C" fn rdb_pretty_print_storage_plan(plan: &StoragePlan) -> Option> { 155 | wrap("rdb_pretty_print_storage_plan", || { 156 | let s = format!( 157 | "{}", 158 | serde_yaml::to_string(&StoragePlan::::from(plan)).unwrap() 159 | ); 160 | Ok(mkcstr(&s)) 161 | }) 162 | } 163 | 164 | fn wrap(name: &str, x: impl FnOnce() -> Result) -> Option { 165 | match std::panic::catch_unwind(AssertUnwindSafe(x)) { 166 | Ok(Ok(x)) => Some(x), 167 | Ok(Err(e)) => { 168 | log::error!("{}: error: {:?}", name, e); 169 | None 170 | } 171 | Err(_) => { 172 | log::error!("{}: panic", name); 173 | None 174 | } 175 | } 176 | } 177 | 178 | fn mkcstr(s: &str) -> NonNull { 179 | let s = s.as_bytes(); 180 | unsafe { 181 | let p = libc::malloc(s.len() + 1); 182 | if p.is_null() { 183 | panic!("mkcstr: malloc failed"); 184 | } 185 | { 186 | let slice = std::slice::from_raw_parts_mut(p as *mut u8, s.len() + 1); 187 | slice[..s.len()].copy_from_slice(s); 188 | slice[s.len()] = 0; 189 | } 190 | NonNull::new_unchecked(p as *mut c_char) 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /rdb-pgsvc/src/memkv.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashMap, 3 | sync::{ 4 | atomic::{AtomicU64, Ordering}, 5 | Arc, 6 | }, 7 | }; 8 | 9 | use async_trait::async_trait; 10 | use rdb_analyzer::data::kv::{KeyValueStore, KvError, KvKeyIterator, KvTransaction}; 11 | use rpds::RedBlackTreeMapSync; 12 | use std::sync::Mutex; 13 | 14 | use anyhow::Result; 15 | 16 | /// An in-memory KV store that simulates MVCC with snapshot isolation. 17 | pub struct MemKv { 18 | store: MemStore, 19 | } 20 | 21 | pub struct MemTransaction { 22 | id: u64, 23 | store: MemStore, 24 | read_buffer: RedBlackTreeMapSync, (Option>, u64)>, 25 | buffer: Mutex, (Option>, u64)>>, 26 | modified: Mutex, u64>>, 27 | } 28 | 29 | #[derive(Clone)] 30 | struct MemStore { 31 | data: Arc, (Option>, u64)>>>, 32 | txn_count: Arc, 33 | } 34 | 35 | struct MemIterator { 36 | map: RedBlackTreeMapSync, (Option>, u64)>, 37 | current: Vec, 38 | end: Vec, 39 | } 40 | 41 | impl MemKv { 42 | pub fn new() -> Self { 43 | MemKv { 44 | store: MemStore { 45 | data: Arc::new(Mutex::new(RedBlackTreeMapSync::new_sync())), 46 | txn_count: Arc::new(AtomicU64::new(0)), 47 | }, 48 | } 49 | } 50 | } 51 | 52 | impl MemKv { 53 | pub async fn dump(&self) -> RedBlackTreeMapSync, (Option>, u64)> { 54 | self.store.data.lock().unwrap().clone() 55 | } 56 | } 57 | 58 | #[async_trait] 59 | impl KeyValueStore for MemKv { 60 | async fn begin_transaction(&self) -> Result> { 61 | let buffer = self.store.data.lock().unwrap().clone(); 62 | Ok(Box::new(MemTransaction { 63 | id: self.store.txn_count.fetch_add(1, Ordering::SeqCst) + 1, 64 | store: self.store.clone(), 65 | read_buffer: buffer.clone(), 66 | buffer: Mutex::new(buffer), 67 | modified: Mutex::new(HashMap::new()), 68 | })) 69 | } 70 | } 71 | 72 | #[async_trait] 73 | impl KvTransaction for MemTransaction { 74 | async fn get(&self, key: &[u8]) -> Result>> { 75 | log::trace!("[txn {}] get {}", self.id, base64::encode(key)); 76 | Ok( 77 | self 78 | .read_buffer 79 | .get(key) 80 | .and_then(|x| x.0.as_ref()) 81 | .cloned(), 82 | ) 83 | } 84 | 85 | async fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { 86 | log::trace!( 87 | "[txn {}] put {} {}", 88 | self.id, 89 | base64::encode(key), 90 | base64::encode(value) 91 | ); 92 | let mut buffer = self.buffer.lock().unwrap(); 93 | let mut modified = self.modified.lock().unwrap(); 94 | let version = buffer.get(key).map(|x| x.1).unwrap_or_default(); 95 | buffer.insert_mut(key.to_vec(), (Some(value.to_vec()), version + 1)); 96 | if !modified.contains_key(key) { 97 | modified.insert(key.to_vec(), version); 98 | } 99 | Ok(()) 100 | } 101 | 102 | async fn delete(&self, key: &[u8]) -> Result<()> { 103 | log::trace!("[txn {}] delete {}", self.id, base64::encode(key)); 104 | let mut buffer = self.buffer.lock().unwrap(); 105 | let mut modified = self.modified.lock().unwrap(); 106 | let version = buffer.get(key).map(|x| x.1).unwrap_or_default(); 107 | buffer.insert_mut(key.to_vec(), (None, version + 1)); 108 | if !modified.contains_key(key) { 109 | modified.insert(key.to_vec(), version); 110 | } 111 | Ok(()) 112 | } 113 | 114 | async fn scan_keys(&self, start: &[u8], end: &[u8]) -> Result> { 115 | Ok(Box::new(MemIterator { 116 | map: self.buffer.lock().unwrap().clone(), 117 | current: start.to_vec(), 118 | end: end.to_vec(), 119 | })) 120 | } 121 | 122 | async fn commit(self: Box) -> Result<(), KvError> { 123 | let buffer = self.buffer.into_inner().unwrap(); 124 | let modified = self.modified.into_inner().unwrap(); 125 | 126 | let mut data = self.store.data.lock().unwrap(); 127 | for (k, initial_version) in &modified { 128 | if data.get(k).map(|x| x.1).unwrap_or_default() != *initial_version { 129 | log::trace!("[txn {}] commit CONFLICT", self.id); 130 | return Err(KvError::Conflict); 131 | } 132 | } 133 | 134 | for (k, _) in modified { 135 | let value = buffer.get(&k).unwrap().clone(); 136 | data.insert_mut(k, value); 137 | } 138 | log::trace!("[txn {}] commit OK", self.id); 139 | Ok(()) 140 | } 141 | 142 | async fn delete_range(&self, start: &[u8], end: &[u8]) -> Result<()> { 143 | log::trace!( 144 | "[txn {}] delete_range {} {}", 145 | self.id, 146 | base64::encode(start), 147 | base64::encode(end) 148 | ); 149 | let mut buffer = self.buffer.lock().unwrap(); 150 | let mut modified = self.modified.lock().unwrap(); 151 | 152 | let mut to_delete = vec![]; 153 | for (k, _) in buffer.range(start.to_vec()..end.to_vec()) { 154 | to_delete.push(k.clone()); 155 | } 156 | 157 | log::trace!( 158 | "[txn {}] deleted {} keys in range", 159 | self.id, 160 | to_delete.len() 161 | ); 162 | 163 | for key in to_delete { 164 | let version = buffer.get(&key).map(|x| x.1).unwrap_or_default(); 165 | buffer.insert_mut(key.clone(), (None, version + 1)); 166 | if !modified.contains_key(&key) { 167 | modified.insert(key, version); 168 | } 169 | } 170 | Ok(()) 171 | } 172 | } 173 | 174 | #[async_trait] 175 | impl KvKeyIterator for MemIterator { 176 | async fn next(&mut self) -> Result>> { 177 | let mut range = self.map.range(self.current.clone()..self.end.clone()); 178 | loop { 179 | if let Some((k, v)) = range.next() { 180 | // Move to next 181 | self.current = k.iter().copied().chain(std::iter::once(0x00u8)).collect(); 182 | match &v.0 { 183 | Some(_) => break Ok(Some(k.clone())), 184 | None => {} 185 | } 186 | } else { 187 | break Ok(None); 188 | } 189 | } 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /rdb-pgsvc/src/query.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use anyhow::Result; 4 | use rdb_analyzer::{ 5 | data::{ 6 | kv::KeyValueStore, 7 | treewalker::{ 8 | bytecode::TwGraph, 9 | exec::{generate_root_map, Executor}, 10 | serialize::{SerializedVmValue, TaggedVmValue}, 11 | typeck::GlobalTypeInfo, 12 | vm::TwVm, 13 | vm_value::VmType, 14 | }, 15 | }, 16 | schema::compile::PrimitiveType, 17 | }; 18 | use serde::{Deserialize, Serialize}; 19 | use thiserror::Error; 20 | 21 | #[derive(Serialize, Default)] 22 | pub struct VmGlobalGraphInfo { 23 | pub graphs: Vec, 24 | } 25 | 26 | #[derive(Serialize)] 27 | pub struct VmGraphInfo { 28 | pub name: String, 29 | pub query_template: String, 30 | pub params: Vec, 31 | } 32 | 33 | #[derive(Serialize, Deserialize)] 34 | pub struct VmGraphQuery { 35 | pub graph: String, 36 | pub params: Vec, 37 | } 38 | 39 | #[derive(Error, Debug)] 40 | pub enum QueryError { 41 | #[error("graph not found")] 42 | GraphNotFound, 43 | 44 | #[error("param count mismatch")] 45 | ParamCountMismatch, 46 | } 47 | 48 | pub fn get_vm_graphs(vm: &TwVm) -> VmGlobalGraphInfo { 49 | let mut res = VmGlobalGraphInfo::default(); 50 | for g in &vm.script.graphs { 51 | if !g.exported { 52 | continue; 53 | } 54 | 55 | let query_template = match generate_example_query(vm, g) { 56 | Ok(x) => serde_json::to_string_pretty(&x).unwrap(), 57 | Err(e) => { 58 | log::error!("generate_example_query error: {:?}", e); 59 | "// example query generation failed".into() 60 | } 61 | }; 62 | 63 | res.graphs.push(VmGraphInfo { 64 | name: g.name.clone(), 65 | query_template, 66 | params: g 67 | .param_types 68 | .iter() 69 | .map(|x| format!("{}", vm.types[*x as usize])) 70 | .collect(), 71 | }); 72 | } 73 | res 74 | } 75 | 76 | pub fn run_vm_query<'a>( 77 | vm: &TwVm<'a>, 78 | kv: &dyn KeyValueStore, 79 | type_info: &GlobalTypeInfo<'a>, 80 | query: &VmGraphQuery, 81 | ) -> Result> { 82 | let mut executor = Executor::new(vm, kv, type_info); 83 | let (i, g) = vm 84 | .script 85 | .graphs 86 | .iter() 87 | .enumerate() 88 | .find(|(_, x)| x.name == query.graph) 89 | .ok_or_else(|| QueryError::GraphNotFound)?; 90 | if query.params.len() != g.param_types.len() { 91 | return Err(QueryError::ParamCountMismatch.into()); 92 | } 93 | let param_types = g 94 | .param_types 95 | .iter() 96 | .map(|x| &vm.types[*x as usize]) 97 | .collect::>(); 98 | let params = query 99 | .params 100 | .iter() 101 | .zip(param_types.iter()) 102 | .map(|(x, ty)| match ty { 103 | VmType::Schema => generate_root_map(vm.schema, vm.storage_plan).map(Arc::new), 104 | _ => x.decode(ty).map(Arc::new), 105 | }) 106 | .collect::>>()?; 107 | let res = futures::executor::block_on(executor.run_graph(i, ¶ms))?; 108 | Ok( 109 | res 110 | .map(|x| SerializedVmValue::encode(&*x, &Default::default())) 111 | .transpose()?, 112 | ) 113 | } 114 | 115 | fn generate_example_query(vm: &TwVm, g: &TwGraph) -> Result { 116 | let params = g 117 | .param_types 118 | .iter() 119 | .map(|x| &vm.types[*x as usize]) 120 | .map(|x| generate_example_param(x)) 121 | .collect::>>()?; 122 | Ok(VmGraphQuery { 123 | graph: g.name.clone(), 124 | params, 125 | }) 126 | } 127 | 128 | fn generate_example_param(ty: &VmType<&str>) -> Result { 129 | Ok(match ty { 130 | VmType::Bool => SerializedVmValue::Bool(false), 131 | VmType::Map(x) => SerializedVmValue::Tagged(TaggedVmValue::M( 132 | x.iter() 133 | .map(|(k, v)| generate_example_param(v).map(|x| (k.to_string(), x))) 134 | .collect::>()?, 135 | )), 136 | VmType::Primitive(x) => match x { 137 | PrimitiveType::Bytes => SerializedVmValue::String("".into()), 138 | PrimitiveType::String => SerializedVmValue::String("".into()), 139 | PrimitiveType::Int64 => SerializedVmValue::String("0".into()), 140 | PrimitiveType::Double => SerializedVmValue::String("0.0".into()), 141 | }, 142 | _ => SerializedVmValue::Null(None), 143 | }) 144 | } 145 | -------------------------------------------------------------------------------- /rdb-proto/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rdb-proto" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | tonic = "0.4" 10 | prost = "0.7" 11 | 12 | [build-dependencies] 13 | tonic-build = "0.4" 14 | -------------------------------------------------------------------------------- /rdb-proto/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | tonic_build::compile_protos("src/proto/rdbrpc.proto").unwrap(); 3 | } 4 | -------------------------------------------------------------------------------- /rdb-proto/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod proto; 2 | pub use prost; 3 | pub use tonic; 4 | -------------------------------------------------------------------------------- /rdb-proto/src/proto/mod.rs: -------------------------------------------------------------------------------- 1 | tonic::include_proto!("rdbrpc"); 2 | -------------------------------------------------------------------------------- /rdb-proto/src/proto/rdbrpc.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package rdbrpc; 3 | 4 | service RdbControl { 5 | rpc createNamespace(CreateNamespaceRequest) returns (CreateNamespaceReply) {} 6 | rpc listNamespace(ListNamespaceRequest) returns (ListNamespaceReply) {} 7 | rpc deleteNamespace(DeleteNamespaceRequest) returns (DeleteNamespaceReply) {} 8 | rpc createDeployment(CreateDeploymentRequest) returns (CreateDeploymentReply) {} 9 | rpc getDeployment(GetDeploymentRequest) returns (GetDeploymentReply) {} 10 | rpc listDeployment(ListDeploymentRequest) returns (ListDeploymentReply) {} 11 | rpc deleteDeployment(DeleteDeploymentRequest) returns (DeleteDeploymentReply) {} 12 | rpc createQueryScript(CreateQueryScriptRequest) returns (CreateQueryScriptReply) {} 13 | rpc getQueryScript(GetQueryScriptRequest) returns (GetQueryScriptReply) {} 14 | rpc listQueryScript(ListQueryScriptRequest) returns (ListQueryScriptReply) {} 15 | rpc deleteQueryScript(DeleteQueryScriptRequest) returns (DeleteQueryScriptReply) {} 16 | } 17 | 18 | message CreateNamespaceRequest { 19 | string id = 1; 20 | } 21 | 22 | message DeleteNamespaceRequest { 23 | string id = 1; 24 | } 25 | 26 | message CreateNamespaceReply { 27 | bool created = 1; 28 | } 29 | 30 | message DeleteNamespaceReply { 31 | bool deleted = 1; 32 | } 33 | 34 | message ListNamespaceRequest { 35 | } 36 | 37 | message ListNamespaceReply { 38 | repeated NamespaceBasicInfo namespaces = 1; 39 | } 40 | 41 | message NamespaceBasicInfo { 42 | string id = 1; 43 | int64 create_time = 2; 44 | } 45 | 46 | message CreateDeploymentRequest { 47 | string namespace_id = 1; 48 | string schema = 2; 49 | string plan = 3; 50 | string description = 4; 51 | } 52 | 53 | message CreateDeploymentReply { 54 | DeploymentId deployment_id = 1; 55 | } 56 | 57 | message DeploymentId { 58 | string id = 1; 59 | } 60 | 61 | message GetDeploymentRequest { 62 | string namespace_id = 1; 63 | string deployment_id = 2; 64 | } 65 | 66 | message GetDeploymentReply { 67 | DeploymentFullInfo info = 1; 68 | } 69 | 70 | message ListDeploymentRequest { 71 | string namespace_id = 1; 72 | } 73 | 74 | message ListDeploymentReply { 75 | repeated DeploymentBasicInfo deployments = 1; 76 | } 77 | 78 | message DeploymentBasicInfo { 79 | string id = 1; 80 | int64 create_time = 2; 81 | string description = 3; 82 | } 83 | 84 | message DeploymentFullInfo { 85 | string id = 1; 86 | int64 create_time = 2; 87 | string description = 3; 88 | string schema = 4; 89 | string plan = 5; 90 | } 91 | 92 | message CreateQueryScriptRequest { 93 | string namespace_id = 1; 94 | string id = 2; 95 | string associated_deployment = 3; 96 | string script = 4; 97 | } 98 | 99 | message CreateQueryScriptReply { 100 | bool created = 1; 101 | } 102 | 103 | message DeleteQueryScriptRequest { 104 | string namespace_id = 1; 105 | string id = 2; 106 | } 107 | 108 | message DeleteQueryScriptReply { 109 | bool deleted = 1; 110 | } 111 | 112 | message DeleteDeploymentRequest { 113 | string namespace_id = 1; 114 | string id = 2; 115 | } 116 | 117 | message DeleteDeploymentReply { 118 | bool deleted = 1; 119 | } 120 | 121 | message ListQueryScriptRequest { 122 | string namespace_id = 1; 123 | } 124 | 125 | message ListQueryScriptReply { 126 | repeated QueryScriptBasicInfo query_scripts = 1; 127 | } 128 | 129 | message GetQueryScriptRequest { 130 | string namespace_id = 1; 131 | string query_script_id = 2; 132 | } 133 | 134 | message GetQueryScriptReply { 135 | QueryScriptFullInfo info = 1; 136 | } 137 | 138 | message QueryScriptBasicInfo { 139 | string id = 1; 140 | string associated_deployment = 2; 141 | int64 create_time = 3; 142 | } 143 | 144 | message QueryScriptFullInfo { 145 | string id = 1; 146 | string associated_deployment = 2; 147 | string script = 3; 148 | int64 create_time = 4; 149 | } 150 | -------------------------------------------------------------------------------- /rdb-server.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ARG http_proxy 4 | ENV http_proxy=$http_proxy 5 | 6 | ARG https_proxy 7 | ENV https_proxy=$https_proxy 8 | 9 | RUN apt update 10 | RUN apt install -y libssl1.1 ca-certificates wget 11 | RUN wget -O /tmp/foundationdb-clients.deb https://www.foundationdb.org/downloads/6.3.15/ubuntu/installers/foundationdb-clients_6.3.15-1_amd64.deb 12 | RUN dpkg -i /tmp/foundationdb-clients.deb 13 | COPY rdb-server / 14 | 15 | EXPOSE 8080/tcp 16 | EXPOSE 8081/tcp 17 | ENTRYPOINT /rdb-server --http-listen 0.0.0.0:8080 --grpc-listen 0.0.0.0:8081 18 | -------------------------------------------------------------------------------- /rdb-server/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rdb-server" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | rdb-analyzer = { path = "../rdb-analyzer" } 10 | rdb-proto = { path = "../rdb-proto" } 11 | async-trait = "0.1" 12 | tokio = { version = "1", features = ["full"] } 13 | futures = "0.3" 14 | rand = "0.8" 15 | log = "0.4" 16 | pretty_env_logger = "0.4" 17 | anyhow = "1" 18 | thiserror = "1" 19 | serde = { version = "1", features = ["derive", "rc"] } 20 | serde_json = "1" 21 | foundationdb = "0.5" 22 | structopt = "0.3" 23 | once_cell = "1" 24 | bumpalo = { version = "3.7", features = ["collections"] } 25 | similar = { version = "1", features = ["inline"] } 26 | serde_yaml = "0.8" 27 | console = "0.14.0" 28 | rmp-serde = "0.15" 29 | hex = "0.4" 30 | sha2 = "0.9" 31 | base64 = "0.13" 32 | maplit = "1" 33 | uuid = { version = "0.8", features = ["v4"] } 34 | warp = "0.3" 35 | lru = "0.6" 36 | sysinfo = "0.18" 37 | rusqlite = "0.25" 38 | r2d2 = "0.8" 39 | r2d2_sqlite = "0.18" 40 | bytes = "1" 41 | -------------------------------------------------------------------------------- /rdb-server/src/exec.rs: -------------------------------------------------------------------------------- 1 | use std::{panic::AssertUnwindSafe, sync::Arc, time::Duration}; 2 | 3 | use anyhow::Result; 4 | use futures::FutureExt; 5 | use rdb_analyzer::data::{ 6 | kv::KeyValueStore, 7 | treewalker::{ 8 | exec::Executor, 9 | serialize::{SerializedVmValue, VmValueEncodeConfig}, 10 | vm_value::VmType, 11 | }, 12 | }; 13 | use tokio::{task::yield_now, time::sleep}; 14 | 15 | use crate::exec_core::ExecContext; 16 | use thiserror::Error; 17 | 18 | const QUERY_TIMEOUT: Duration = Duration::from_secs(5); 19 | 20 | #[derive(Error, Debug)] 21 | pub enum ExecError { 22 | #[error("graph executor panicked")] 23 | GraphExecutorPanic, 24 | 25 | #[error("param count mismatch: expected {0}, got {1}")] 26 | ParamCountMismatch(usize, usize), 27 | 28 | #[error("query timeout")] 29 | Timeout, 30 | } 31 | 32 | impl ExecContext { 33 | pub async fn run_exported_graph( 34 | &self, 35 | kv: &dyn KeyValueStore, 36 | name: &str, 37 | params: &[SerializedVmValue], 38 | serialization_config: &VmValueEncodeConfig, 39 | ) -> Result { 40 | let run_fut = 41 | AssertUnwindSafe(self.run_exported_graph_inner(kv, name, params, serialization_config)) 42 | .catch_unwind(); 43 | let timeout_fut = sleep(QUERY_TIMEOUT); 44 | tokio::select! { 45 | res = run_fut => { 46 | res.unwrap_or_else(|_| Err(ExecError::GraphExecutorPanic.into())) 47 | } 48 | _ = timeout_fut => Err(ExecError::Timeout.into()), 49 | } 50 | } 51 | 52 | async fn run_exported_graph_inner( 53 | &self, 54 | kv: &dyn KeyValueStore, 55 | name: &str, 56 | params: &[SerializedVmValue], 57 | serialization_config: &VmValueEncodeConfig, 58 | ) -> Result { 59 | let graph_index = self.vm().lookup_exported_graph_by_name(name)?; 60 | let param_types = &self.type_info().graphs[graph_index].params; 61 | 62 | // We also need raw types because we need a way to detect the `Schema` pseudo-type. 63 | let raw_param_types = self.vm().script.graphs[graph_index] 64 | .param_types 65 | .iter() 66 | .map(|x| &self.vm().types[*x as usize]) 67 | .collect::>(); 68 | assert_eq!(param_types.len(), raw_param_types.len()); 69 | if param_types.len() != params.len() { 70 | return Err(ExecError::ParamCountMismatch(param_types.len(), params.len()).into()); 71 | } 72 | let mut executor = Executor::new(self.vm(), kv, self.type_info()); 73 | executor.set_yield_fn(|| Box::pin(yield_now())); 74 | executor.set_sleep_fn(|x| Box::pin(sleep(x))); 75 | let params = params 76 | .iter() 77 | .zip(param_types) 78 | .zip(raw_param_types) 79 | .map(|((v, ty), raw_ty)| match raw_ty { 80 | VmType::Schema => Ok(self.root_map().clone()), 81 | _ => v.decode(ty).map(Arc::new), 82 | }) 83 | .collect::>>()?; 84 | let output = executor 85 | .run_graph(graph_index, ¶ms) 86 | .await? 87 | .map(|x| SerializedVmValue::encode(&*x, serialization_config)) 88 | .transpose()?; 89 | Ok(output.unwrap_or_else(|| SerializedVmValue::Null(None))) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /rdb-server/src/exec_core.rs: -------------------------------------------------------------------------------- 1 | use std::{mem::ManuallyDrop, sync::Arc}; 2 | 3 | use anyhow::Result; 4 | use rdb_analyzer::{ 5 | data::treewalker::{ 6 | asm::codegen::compile_twscript, 7 | bytecode::TwScript, 8 | exec::generate_root_map, 9 | typeck::{GlobalTyckContext, GlobalTypeInfo}, 10 | vm::TwVm, 11 | vm_value::VmValue, 12 | }, 13 | schema::compile::CompiledSchema, 14 | storage_plan::StoragePlan, 15 | }; 16 | 17 | pub struct SchemaContext { 18 | pub schema: CompiledSchema, 19 | pub plan: StoragePlan, 20 | } 21 | 22 | pub struct ExecContext { 23 | _schema_ctx: Arc, 24 | _script: Box, 25 | dangerous: ManuallyDrop>, 26 | } 27 | 28 | struct DangerousExecContext<'a> { 29 | vm: TwVm<'a>, 30 | type_info: GlobalTypeInfo<'a>, 31 | root_map: Arc>, 32 | } 33 | 34 | impl ExecContext { 35 | pub fn load(schema_ctx: Arc, script: &str) -> Result { 36 | let script = Box::new(compile_twscript(script)?); 37 | let vm = TwVm::new(&schema_ctx.schema, &schema_ctx.plan, &*script)?; 38 | let type_info = GlobalTyckContext::new(&vm)?.typeck()?; 39 | let root_map = Arc::new(generate_root_map(&schema_ctx.schema, &schema_ctx.plan)?); 40 | let dangerous_ctx = DangerousExecContext { 41 | vm, 42 | type_info, 43 | root_map, 44 | }; 45 | let dangerous_ctx = ManuallyDrop::new(unsafe { 46 | std::mem::transmute::, DangerousExecContext<'static>>(dangerous_ctx) 47 | }); 48 | Ok(Self { 49 | _schema_ctx: schema_ctx, 50 | _script: script, 51 | dangerous: dangerous_ctx, 52 | }) 53 | } 54 | 55 | pub fn vm<'a>(&'a self) -> &'a TwVm<'a> { 56 | &self.dangerous.vm 57 | } 58 | 59 | pub fn type_info<'a>(&'a self) -> &'a GlobalTypeInfo<'a> { 60 | &self.dangerous.type_info 61 | } 62 | 63 | pub fn root_map<'a>(&'a self) -> &Arc> { 64 | &self.dangerous.root_map 65 | } 66 | } 67 | 68 | impl Drop for ExecContext { 69 | fn drop(&mut self) { 70 | // Ensure that `dangerous` is dropped before other fields 71 | unsafe { 72 | ManuallyDrop::drop(&mut self.dangerous); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /rdb-server/src/httpapi.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, net::ToSocketAddrs, sync::Arc}; 2 | 3 | use anyhow::Result; 4 | use bumpalo::Bump; 5 | use bytes::Bytes; 6 | use rdb_analyzer::{ 7 | data::treewalker::serialize::{SerializedVmValue, VmValueEncodeConfig}, 8 | schema::{compile::compile, grammar::parse}, 9 | storage_plan::StoragePlan, 10 | }; 11 | use warp::{ 12 | hyper::{Body, Response}, 13 | reject::Reject, 14 | reply::Json, 15 | Filter, Rejection, 16 | }; 17 | 18 | use crate::{ 19 | exec_core::{ExecContext, SchemaContext}, 20 | query_cache::QueryCacheKey, 21 | state::get_state, 22 | sysquery::{lookup_deployment, lookup_query_script, ns_to_kv_prefix_with_appended_zero}, 23 | }; 24 | 25 | struct ApiReject(anyhow::Error); 26 | 27 | impl ApiReject { 28 | fn new(x: anyhow::Error) -> Self { 29 | log::error!("api reject: {:?}", x); 30 | Self(x) 31 | } 32 | } 33 | 34 | impl Debug for ApiReject { 35 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 36 | write!(f, "{}", self.0) 37 | } 38 | } 39 | 40 | impl Reject for ApiReject {} 41 | 42 | pub async fn run_http_server(addr: impl ToSocketAddrs) -> ! { 43 | let query_route_json = warp::path("query") 44 | .and(warp::path::param()) // namespace 45 | .and(warp::path::param()) // query script id 46 | .and(warp::path::param()) // name of the graph 47 | .and(warp::filters::header::exact( 48 | "Content-Type", 49 | "application/json", 50 | )) 51 | .and(warp::body::content_length_limit(1024 * 256)) 52 | .and(warp::body::json()) 53 | .and_then(invoke_query); 54 | let query_route_msgpack = warp::path("query") 55 | .and(warp::path::param()) // namespace 56 | .and(warp::path::param()) // query script id 57 | .and(warp::path::param()) // name of the graph 58 | .and(warp::filters::header::exact( 59 | "Content-Type", 60 | "application/x-msgpack", 61 | )) 62 | .and(warp::body::content_length_limit(1024 * 256)) 63 | .and(warp::body::bytes()) 64 | .and_then(invoke_query_msgpack); 65 | let routes = warp::post().and(query_route_json.or(query_route_msgpack)); 66 | let addr = addr 67 | .to_socket_addrs() 68 | .unwrap() 69 | .next() 70 | .expect("no socket addrs"); 71 | warp::serve(routes).run(addr).await; 72 | unreachable!() 73 | } 74 | 75 | async fn invoke_query( 76 | namespace_id: String, 77 | query_script_id: String, 78 | graph_name: String, 79 | graph_params: Vec, 80 | ) -> Result { 81 | do_invoke_query( 82 | namespace_id, 83 | query_script_id, 84 | graph_name, 85 | graph_params, 86 | &Default::default(), 87 | ) 88 | .await 89 | .map(|x| warp::reply::json(&x)) 90 | .map_err(|e| warp::reject::custom(ApiReject::new(e))) 91 | } 92 | 93 | async fn invoke_query_msgpack( 94 | namespace_id: String, 95 | query_script_id: String, 96 | graph_name: String, 97 | graph_params: Bytes, 98 | ) -> Result, Rejection> { 99 | let graph_params: Vec = rmp_serde::from_slice(&graph_params) 100 | .map_err(|e| warp::reject::custom(ApiReject::new(anyhow::Error::from(e))))?; 101 | do_invoke_query( 102 | namespace_id, 103 | query_script_id, 104 | graph_name, 105 | graph_params, 106 | &VmValueEncodeConfig { 107 | enable_bytes: true, 108 | enable_double: true, 109 | enable_int64: true, 110 | }, 111 | ) 112 | .await 113 | .and_then(|x| rmp_serde::to_vec_named(&x).map_err(anyhow::Error::from)) 114 | .and_then(|x| { 115 | Response::builder() 116 | .header("Content-Type", "application/x-msgpack") 117 | .body(Body::from(x)) 118 | .map_err(anyhow::Error::from) 119 | }) 120 | .map_err(|e| warp::reject::custom(ApiReject::new(e))) 121 | } 122 | 123 | async fn do_invoke_query( 124 | namespace_id: String, 125 | query_script_id: String, 126 | graph_name: String, 127 | graph_params: Vec, 128 | serialization_config: &VmValueEncodeConfig, 129 | ) -> Result { 130 | let st = get_state(); 131 | let kv_prefix = ns_to_kv_prefix_with_appended_zero(&namespace_id).await?; 132 | let kv = (st.data_store_generator)(&kv_prefix); 133 | 134 | let exec_ctx; 135 | if let Some(x) = st 136 | .query_cache 137 | .get_hot(&namespace_id, &query_script_id) 138 | .await 139 | { 140 | exec_ctx = x; 141 | } else { 142 | let query_script = lookup_query_script(&namespace_id, &query_script_id).await?; 143 | 144 | let qc_key = QueryCacheKey { 145 | namespace_id: namespace_id.clone(), 146 | query_script_id: query_script_id.clone(), 147 | deployment_id: query_script.associated_deployment.clone(), 148 | query_script_create_time: query_script.create_time, 149 | }; 150 | if let Some(x) = st.query_cache.get(&qc_key).await { 151 | exec_ctx = x; 152 | } else { 153 | let deployment = 154 | lookup_deployment(&namespace_id, &query_script.associated_deployment).await?; 155 | let schema = compile(&parse(&Bump::new(), &deployment.schema)?)?; 156 | let plan = StoragePlan::deserialize_compressed(&deployment.plan)?; 157 | let schema_ctx = Arc::new(SchemaContext { schema, plan }); 158 | exec_ctx = Arc::new(ExecContext::load(schema_ctx, &query_script.script)?); 159 | log::info!("Loaded query script {:?}.", qc_key); 160 | st.query_cache.put(qc_key, exec_ctx.clone()).await; 161 | } 162 | } 163 | 164 | let output = exec_ctx 165 | .run_exported_graph(&*kv, &graph_name, &graph_params, serialization_config) 166 | .await?; 167 | Ok(output) 168 | } 169 | -------------------------------------------------------------------------------- /rdb-server/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use anyhow::Result; 4 | use foundationdb::{tuple::Subspace, Database}; 5 | use rdb_analyzer::{ 6 | data::kv::KeyValueStore, 7 | kv_backend::{ 8 | foundationdb::FdbKvStore, 9 | sqlite::{GlobalSqliteStore, SqliteKvStore}, 10 | }, 11 | }; 12 | use rdb_proto::{proto::rdb_control_server::RdbControlServer, tonic::transport::Server}; 13 | use structopt::StructOpt; 14 | use tokio::runtime::Runtime; 15 | 16 | use crate::{ 17 | httpapi::run_http_server, 18 | opt::Opt, 19 | query_cache::{QueryCache, QueryCacheParams}, 20 | server::ControlServer, 21 | state::{set_state, DataStoreGenerator, ServerState}, 22 | system::SystemSchema, 23 | }; 24 | mod exec; 25 | mod exec_core; 26 | mod httpapi; 27 | mod opt; 28 | mod query_cache; 29 | mod server; 30 | mod state; 31 | mod sysquery; 32 | mod system; 33 | mod util; 34 | 35 | fn main() { 36 | pretty_env_logger::init_timed(); 37 | let network = unsafe { foundationdb::boot() }; 38 | 39 | Runtime::new() 40 | .unwrap() 41 | .block_on(async move { run().await }) 42 | .unwrap(); 43 | 44 | // Required for safety 45 | drop(network); 46 | } 47 | 48 | async fn run() -> Result<()> { 49 | let opt = Opt::from_args(); 50 | 51 | let data_store_generator: DataStoreGenerator; 52 | let system_store: Box; 53 | let system_metadata_store: Box; 54 | if let Some(x) = &opt.fdb_cluster { 55 | if opt.sqlite_db.is_some() { 56 | panic!("cannot select multiple kv backends"); 57 | } 58 | let db = Arc::new(Database::new(Some(x))?); 59 | let keyspace = 60 | Subspace::all().subspace(opt.fdb_keyspace.as_ref().expect("missing fdb-keyspace")); 61 | 62 | system_store = Box::new(FdbKvStore::new( 63 | db.clone(), 64 | keyspace.subspace(&"System").bytes(), 65 | )); 66 | system_metadata_store = Box::new(FdbKvStore::new( 67 | db.clone(), 68 | keyspace.subspace(&"SystemMeta").bytes(), 69 | )); 70 | data_store_generator = Box::new(move |namespace| { 71 | Box::new(FdbKvStore::new( 72 | db.clone(), 73 | &keyspace 74 | .subspace(&"D") 75 | .bytes() 76 | .iter() 77 | .copied() 78 | .chain(namespace.iter().copied()) 79 | .collect::>(), 80 | )) 81 | }); 82 | } else if let Some(x) = &opt.sqlite_db { 83 | if opt.fdb_cluster.is_some() || opt.fdb_keyspace.is_some() { 84 | panic!("cannot select multiple kv backends"); 85 | } 86 | let backend = GlobalSqliteStore::open_leaky(x)?; 87 | system_store = Box::new(SqliteKvStore::new(backend.clone(), "system", b"")); 88 | system_metadata_store = Box::new(SqliteKvStore::new(backend.clone(), "system_meta", b"")); 89 | data_store_generator = Box::new(move |namespace| { 90 | Box::new(SqliteKvStore::new(backend.clone(), "user_data", namespace)) 91 | }); 92 | } else { 93 | panic!("no kv backend selected"); 94 | } 95 | 96 | let system_schema = SystemSchema::new( 97 | opt.migration_hash.clone(), 98 | &*system_store, 99 | &*system_metadata_store, 100 | ) 101 | .await; 102 | let query_cache = QueryCache::new(QueryCacheParams { 103 | process_memory_threshold_kb: opt.process_memory_threshold_kb, 104 | }); 105 | 106 | set_state(ServerState { 107 | data_store_generator, 108 | system_store, 109 | system_schema, 110 | query_cache, 111 | }); 112 | 113 | log::info!("RefineDB started."); 114 | 115 | let http_listen = opt.http_listen.clone(); 116 | tokio::spawn(async move { run_http_server(http_listen).await }); 117 | 118 | Server::builder() 119 | .add_service(RdbControlServer::new(ControlServer)) 120 | .serve(opt.grpc_listen.parse()?) 121 | .await?; 122 | 123 | Ok(()) 124 | } 125 | -------------------------------------------------------------------------------- /rdb-server/src/opt.rs: -------------------------------------------------------------------------------- 1 | use structopt::StructOpt; 2 | 3 | #[derive(Debug, StructOpt)] 4 | #[structopt(name = "rdb-server", about = "RefineDB server.")] 5 | pub struct Opt { 6 | /// FoundationDB cluster file. 7 | #[structopt(long, env = "RDB_FDB_CLUSTER")] 8 | pub fdb_cluster: Option, 9 | 10 | /// FoundationDB root keyspace. 11 | #[structopt(long, env = "RDB_FDB_KEYSPACE")] 12 | pub fdb_keyspace: Option, 13 | 14 | /// Path to the SQLite database. 15 | #[structopt(long, env = "RDB_SQLITE_DB")] 16 | pub sqlite_db: Option, 17 | 18 | /// GRPC listen address. 19 | #[structopt(long, env = "RDB_GRPC_LISTEN")] 20 | pub grpc_listen: String, 21 | 22 | /// HTTP API listen address. 23 | #[structopt(long, env = "RDB_HTTP_LISTEN")] 24 | pub http_listen: String, 25 | 26 | /// Migration hash. 27 | #[structopt(long, env = "RDB_MIGRATION_HASH")] 28 | pub migration_hash: Option, 29 | 30 | /// Process memory threshold (in KiB) for query cache. 31 | #[structopt( 32 | long, 33 | default_value = "524288", 34 | env = "RDB_PROCESS_MEMORY_THRESHOLD_KB" 35 | )] 36 | pub process_memory_threshold_kb: u64, 37 | } 38 | -------------------------------------------------------------------------------- /rdb-server/src/query_cache.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | sync::{Arc, Weak}, 3 | time::{Duration, Instant}, 4 | }; 5 | 6 | use lru::LruCache; 7 | use sysinfo::{get_current_pid, ProcessExt, System, SystemExt}; 8 | use tokio::{sync::Mutex, time::sleep}; 9 | 10 | use crate::exec_core::ExecContext; 11 | 12 | /// The minimum threshold to shrink query cache to. 13 | const MIN_QUERY_CACHE_SIZE: usize = 64; 14 | 15 | /// Items per step when shrinking the query cache due to memory threshold metrics. 16 | const QUERY_CACHE_SHRINK_STEP_SIZE: usize = 16; 17 | 18 | const HOT_ITEM_TTL: Duration = Duration::from_secs(3); 19 | 20 | pub struct QueryCache { 21 | items: Mutex>>, 22 | hot_items: Mutex>, 23 | params: QueryCacheParams, 24 | } 25 | 26 | struct HotItem { 27 | exec_ctx: Arc, 28 | create_time: Instant, 29 | } 30 | 31 | #[derive(Clone, Debug)] 32 | pub struct QueryCacheParams { 33 | pub process_memory_threshold_kb: u64, 34 | } 35 | 36 | #[derive(Clone, Debug, Eq, PartialEq, Hash)] 37 | pub struct QueryCacheKey { 38 | /// Namespace id. 39 | pub namespace_id: String, 40 | 41 | /// Generated deployment id. 42 | pub deployment_id: String, 43 | 44 | /// User-provided query script id. 45 | pub query_script_id: String, 46 | 47 | /// In case the query script is updated. 48 | pub query_script_create_time: i64, 49 | } 50 | 51 | impl QueryCache { 52 | pub fn new(params: QueryCacheParams) -> Arc { 53 | let me = Arc::new(Self { 54 | items: Mutex::new(LruCache::unbounded()), 55 | hot_items: Mutex::new(LruCache::unbounded()), 56 | params, 57 | }); 58 | let me_weak = Arc::downgrade(&me); 59 | tokio::spawn(async move { 60 | Self::gc(me_weak).await; 61 | }); 62 | me 63 | } 64 | 65 | pub async fn get_hot( 66 | &self, 67 | namespace_id: &str, 68 | query_script_id: &str, 69 | ) -> Option> { 70 | let hot_items = self.hot_items.lock().await; 71 | 72 | // Peek. Don't update LRU state. 73 | if let Some(x) = hot_items.peek(&(namespace_id.to_string(), query_script_id.to_string())) { 74 | Some(x.exec_ctx.clone()) 75 | } else { 76 | None 77 | } 78 | } 79 | 80 | pub async fn get(&self, key: &QueryCacheKey) -> Option> { 81 | let items = self.items.lock().await; 82 | let item = items.peek(key).cloned(); 83 | drop(items); 84 | 85 | // Insert into hot cache. 86 | if let Some(item) = &item { 87 | self.hot_items.lock().await.put( 88 | (key.namespace_id.clone(), key.query_script_id.clone()), 89 | HotItem { 90 | exec_ctx: item.clone(), 91 | create_time: Instant::now(), 92 | }, 93 | ); 94 | } 95 | 96 | item 97 | } 98 | 99 | pub async fn put(&self, key: QueryCacheKey, value: Arc) { 100 | self.items.lock().await.put(key, value); 101 | } 102 | 103 | async fn gc(me: Weak) { 104 | let system = System::new_all(); 105 | loop { 106 | sleep(Duration::from_secs(1)).await; 107 | let me = match me.upgrade() { 108 | Some(x) => x, 109 | None => { 110 | log::warn!("gc: exiting"); 111 | break; 112 | } 113 | }; 114 | 115 | // Step 1: Hot item expiration. 116 | { 117 | let mut hot_items = me.hot_items.lock().await; 118 | 119 | // Get current time after locking hot_items to ensure: forall x in hot_items, now >= x.create_time. 120 | let now = Instant::now(); 121 | 122 | let mut pop_count = 0usize; 123 | 124 | while let Some((_, v)) = hot_items.peek_lru() { 125 | let dur = now.duration_since(v.create_time); 126 | if dur > HOT_ITEM_TTL { 127 | hot_items.pop_lru().unwrap(); 128 | pop_count += 1; 129 | } else { 130 | break; 131 | } 132 | } 133 | 134 | if pop_count > 0 { 135 | log::debug!("gc: Removed {} hot item(s) from cache.", pop_count); 136 | } 137 | } 138 | 139 | // Step 2: Process memory threshold. 140 | let process = system 141 | .get_process(get_current_pid().unwrap()) 142 | .expect("cannot get current process"); 143 | let memory_usage_kb = process.memory(); 144 | if memory_usage_kb > me.params.process_memory_threshold_kb { 145 | let mut items = me.items.lock().await; 146 | if items.len() > MIN_QUERY_CACHE_SIZE { 147 | log::warn!( 148 | "Memory usage ({} KiB) exceeds threshold ({} KiB) and the query cache contains {} items. Shrinking query cache by {} items.", 149 | memory_usage_kb, 150 | me.params.process_memory_threshold_kb, 151 | items.len(), 152 | QUERY_CACHE_SHRINK_STEP_SIZE, 153 | ); 154 | for _ in 0..QUERY_CACHE_SHRINK_STEP_SIZE { 155 | items.pop_lru(); 156 | } 157 | } 158 | } 159 | } 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /rdb-server/src/state.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use once_cell::sync::OnceCell; 4 | use rdb_analyzer::data::kv::KeyValueStore; 5 | 6 | use crate::{query_cache::QueryCache, system::SystemSchema}; 7 | 8 | pub type DataStoreGenerator = Box Box + Send + Sync>; 9 | 10 | pub struct ServerState { 11 | pub data_store_generator: DataStoreGenerator, 12 | pub system_store: Box, 13 | pub system_schema: SystemSchema, 14 | pub query_cache: Arc, 15 | } 16 | 17 | static STATE: OnceCell = OnceCell::new(); 18 | 19 | pub fn set_state(st: ServerState) { 20 | STATE 21 | .set(st) 22 | .unwrap_or_else(|_| panic!("set_state: attempting to set state twice")); 23 | } 24 | 25 | pub fn get_state() -> &'static ServerState { 26 | STATE.get().expect("get_state: not initialized") 27 | } 28 | -------------------------------------------------------------------------------- /rdb-server/src/sys.rasm: -------------------------------------------------------------------------------- 1 | type DeploymentBasicInfoMap = map { 2 | id: string, 3 | create_time: int64, 4 | description: string, 5 | }; 6 | 7 | type DeploymentFullMap = map { 8 | id: string, 9 | description: string, 10 | `schema`: string, 11 | plan: bytes, 12 | create_time: int64, 13 | }; 14 | 15 | type NamespaceMap = map { 16 | id: string, 17 | kv_prefix: bytes, 18 | create_time: int64, 19 | }; 20 | 21 | type QueryScriptFullMap = map { 22 | id: string, 23 | associated_deployment: string, 24 | script: string, 25 | create_time: int64, 26 | }; 27 | 28 | type QueryScriptBasicInfoMap = map { 29 | id: string, 30 | associated_deployment: string, 31 | create_time: int64, 32 | }; 33 | 34 | export graph ns_to_kv_prefix(root: schema, namespace_id: string): bytes { 35 | return (point_get root.system.namespaces namespace_id).kv_prefix; 36 | } 37 | 38 | export graph add_namespace(root: schema, namespace_id: string, kv_prefix: bytes, create_time: int64): bool { 39 | ns = root.system.namespaces; 40 | if is_present $ point_get ns namespace_id { 41 | r1 = false; 42 | } else { 43 | s_insert root.system.namespaces $ 44 | build_table(Namespace) $ 45 | m_insert(id) namespace_id $ 46 | m_insert(kv_prefix) kv_prefix $ 47 | m_insert(deployments) empty_set $ 48 | m_insert(query_scripts) empty_set $ 49 | m_insert(create_time) create_time $ 50 | create_map; 51 | r2 = true; 52 | } 53 | return select r1 r2; 54 | } 55 | 56 | export graph add_deployment(root: schema, namespace_id: string, deployment: DeploymentFullMap): bool { 57 | ns = point_get root.system.namespaces namespace_id; 58 | if !is_present ns { 59 | r1 = false; 60 | } else { 61 | if is_present $ point_get ns.deployments deployment.id { 62 | r2 = false; 63 | } else { 64 | s_insert ns.deployments $ build_table(Deployment) deployment; 65 | r3 = true; 66 | } 67 | } 68 | return select r1 $ select r2 r3; 69 | } 70 | 71 | export graph get_deployment(root: schema, namespace_id: string, deployment_id: string): DeploymentFullMap { 72 | ns = point_get root.system.namespaces namespace_id; 73 | if !is_present ns { 74 | r1 = null; 75 | } else { 76 | depl = point_get ns.deployments deployment_id; 77 | if !is_present depl { 78 | r2 = null; 79 | } else { 80 | r3 = m_insert(id) depl.id $ 81 | m_insert(create_time) depl.create_time $ 82 | m_insert(description) depl.description $ 83 | m_insert(`schema`) depl.`schema` $ 84 | m_insert(plan) depl.plan $ 85 | create_map; 86 | } 87 | } 88 | return select r1 $ select r2 r3; 89 | } 90 | 91 | export graph list_namespaces(root: schema): list { 92 | return reduce(fold_namespaces) create_map create_list(NamespaceMap) root.system.namespaces; 93 | } 94 | 95 | graph fold_namespaces(_unused: map{}, current: list, item: Namespace): list { 96 | return ( 97 | m_insert(id) item.id $ 98 | m_insert(create_time) item.create_time $ 99 | m_insert(kv_prefix) item.kv_prefix $ 100 | create_map 101 | ) : current; 102 | } 103 | 104 | export graph list_deployment(root: schema, namespace_id: string): list { 105 | ns = point_get root.system.namespaces namespace_id; 106 | if !is_present ns { 107 | r1 = null>; 108 | } else { 109 | r2 = reduce(fold_deployments) create_map create_list(DeploymentBasicInfoMap) ns.deployments; 110 | } 111 | return select r1 r2; 112 | } 113 | 114 | export graph delete_deployment(root: schema, namespace_id: string, deployment_id: string): bool { 115 | ns = point_get root.system.namespaces namespace_id; 116 | if !is_present ns { 117 | r1 = false; 118 | } else { 119 | if is_present $ point_get ns.deployments deployment_id { 120 | s_delete ns.deployments deployment_id; 121 | r2 = true; 122 | } else { 123 | r3 = false; 124 | } 125 | } 126 | return select r1 $ select r2 r3; 127 | } 128 | 129 | graph fold_deployments(_unused: map{}, current: list, item: Deployment): list { 130 | return ( 131 | m_insert(id) item.id $ 132 | m_insert(create_time) item.create_time $ 133 | m_insert(description) item.description $ 134 | create_map 135 | ) : current; 136 | } 137 | 138 | export graph delete_namespace(root: schema, namespace_id: string): bool { 139 | ns = root.system.namespaces; 140 | if is_present $ point_get ns namespace_id { 141 | s_delete ns namespace_id; 142 | r1 = true; 143 | } else { 144 | r2 = false; 145 | } 146 | return select r1 r2; 147 | } 148 | 149 | export graph add_or_update_query_script(root: schema, namespace_id: string, qs: QueryScriptFullMap): bool { 150 | ns = point_get root.system.namespaces namespace_id; 151 | if !is_present ns { 152 | r1 = false; 153 | } else { 154 | s_insert ns.query_scripts $ build_table(QueryScript) qs; 155 | r2 = true; 156 | } 157 | return select r1 r2; 158 | } 159 | 160 | export graph get_query_script(root: schema, namespace_id: string, qs_id: string): QueryScriptFullMap { 161 | ns = point_get root.system.namespaces namespace_id; 162 | if !is_present ns { 163 | r1 = null; 164 | } else { 165 | qs = point_get ns.query_scripts qs_id; 166 | if !is_present qs { 167 | r2 = null; 168 | } else { 169 | r3 = m_insert(id) qs.id $ 170 | m_insert(create_time) qs.create_time $ 171 | m_insert(associated_deployment) qs.associated_deployment $ 172 | m_insert(script) qs.script $ 173 | create_map; 174 | } 175 | } 176 | return select r1 $ select r2 r3; 177 | } 178 | 179 | export graph delete_query_script(root: schema, namespace_id: string, qs_id: string): bool { 180 | ns = point_get root.system.namespaces namespace_id; 181 | if !is_present ns { 182 | r1 = false; 183 | } else { 184 | if is_present $ point_get ns.query_scripts qs_id { 185 | s_delete ns.query_scripts qs_id; 186 | r2 = true; 187 | } else { 188 | r3 = false; 189 | } 190 | } 191 | return select r1 $ select r2 r3; 192 | } 193 | 194 | export graph list_query_script(root: schema, namespace_id: string): list { 195 | ns = point_get root.system.namespaces namespace_id; 196 | if !is_present ns { 197 | r1 = null>; 198 | } else { 199 | r2 = reduce(fold_query_scripts) create_map create_list(QueryScriptBasicInfoMap) ns.query_scripts; 200 | } 201 | return select r1 r2; 202 | } 203 | 204 | graph fold_query_scripts(_unused: map{}, current: list, item: QueryScript): list { 205 | return ( 206 | m_insert(id) item.id $ 207 | m_insert(associated_deployment) item.associated_deployment $ 208 | m_insert(create_time) item.create_time $ 209 | create_map 210 | ) : current; 211 | } 212 | -------------------------------------------------------------------------------- /rdb-server/src/sysquery.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use rdb_analyzer::data::treewalker::serialize::{SerializedVmValue, VmValueEncodeConfig}; 3 | 4 | use crate::state::get_state; 5 | use thiserror::Error; 6 | 7 | #[derive(Error, Debug)] 8 | pub enum SysQueryError { 9 | #[error("namespace not found")] 10 | NamespaceNotFound, 11 | 12 | #[error("query script not found")] 13 | QueryScriptNotFound, 14 | } 15 | 16 | pub struct QueryScript { 17 | pub id: String, 18 | pub create_time: i64, 19 | pub associated_deployment: String, 20 | pub script: String, 21 | } 22 | 23 | pub struct Deployment { 24 | pub id: String, 25 | pub description: String, 26 | pub schema: String, 27 | pub plan: Vec, 28 | pub create_time: i64, 29 | } 30 | 31 | pub async fn ns_to_kv_prefix_with_appended_zero(ns_id: &str) -> Result> { 32 | let st = get_state(); 33 | let res = st 34 | .system_schema 35 | .exec_ctx 36 | .run_exported_graph( 37 | &*st.system_store, 38 | "ns_to_kv_prefix", 39 | &[ 40 | SerializedVmValue::Null(None), 41 | SerializedVmValue::String(ns_id.into()), 42 | ], 43 | &VmValueEncodeConfig { 44 | enable_bytes: true, 45 | enable_double: true, 46 | enable_int64: true, 47 | }, 48 | ) 49 | .await?; 50 | match res { 51 | SerializedVmValue::Null(_) => Err(SysQueryError::NamespaceNotFound.into()), 52 | _ => Ok({ 53 | let mut x = res.try_unwrap_bytes()?.clone(); 54 | x.push(0); 55 | x 56 | }), 57 | } 58 | } 59 | 60 | pub async fn lookup_query_script(ns_id: &str, qs_id: &str) -> Result { 61 | let st = get_state(); 62 | let res = st 63 | .system_schema 64 | .exec_ctx 65 | .run_exported_graph( 66 | &*st.system_store, 67 | "get_query_script", 68 | &[ 69 | SerializedVmValue::Null(None), 70 | SerializedVmValue::String(ns_id.into()), 71 | SerializedVmValue::String(qs_id.into()), 72 | ], 73 | &VmValueEncodeConfig { 74 | enable_bytes: true, 75 | enable_double: true, 76 | enable_int64: true, 77 | }, 78 | ) 79 | .await?; 80 | match res { 81 | SerializedVmValue::Null(_) => Err(SysQueryError::QueryScriptNotFound.into()), 82 | _ => { 83 | let m = res.try_unwrap_map(&["id", "create_time", "associated_deployment", "script"])?; 84 | Ok(QueryScript { 85 | id: m.get("id").unwrap().try_unwrap_string()?.clone(), 86 | create_time: m.get("create_time").unwrap().try_unwrap_int64()?, 87 | associated_deployment: m 88 | .get("associated_deployment") 89 | .unwrap() 90 | .try_unwrap_string()? 91 | .clone(), 92 | script: m.get("script").unwrap().try_unwrap_string()?.clone(), 93 | }) 94 | } 95 | } 96 | } 97 | 98 | pub async fn lookup_deployment(namespace_id: &str, deployment_id: &str) -> Result { 99 | let st = get_state(); 100 | let res = st 101 | .system_schema 102 | .exec_ctx 103 | .run_exported_graph( 104 | &*st.system_store, 105 | "get_deployment", 106 | &[ 107 | SerializedVmValue::Null(None), 108 | SerializedVmValue::String(namespace_id.into()), 109 | SerializedVmValue::String(deployment_id.into()), 110 | ], 111 | &VmValueEncodeConfig { 112 | enable_bytes: true, 113 | enable_double: true, 114 | enable_int64: true, 115 | }, 116 | ) 117 | .await?; 118 | let res = res.try_unwrap_map(&["id", "create_time", "description", "schema", "plan"])?; 119 | let depl = Deployment { 120 | id: res.get("id").unwrap().try_unwrap_string()?.clone(), 121 | description: res.get("description").unwrap().try_unwrap_string()?.clone(), 122 | schema: res.get("schema").unwrap().try_unwrap_string()?.clone(), 123 | plan: res.get("plan").unwrap().try_unwrap_bytes()?.clone(), 124 | create_time: res.get("create_time").unwrap().try_unwrap_int64()?, 125 | }; 126 | Ok(depl) 127 | } 128 | -------------------------------------------------------------------------------- /rdb-server/src/system.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use bumpalo::Bump; 4 | use console::Style; 5 | use rdb_analyzer::{ 6 | data::kv::KeyValueStore, 7 | schema::{compile::compile, grammar::parse}, 8 | storage_plan::{planner::generate_plan_for_schema, StoragePlan}, 9 | }; 10 | use sha2::{Digest, Sha256}; 11 | use similar::{ChangeTag, TextDiff}; 12 | 13 | use crate::exec_core::{ExecContext, SchemaContext}; 14 | 15 | pub struct SystemSchema { 16 | pub exec_ctx: ExecContext, 17 | } 18 | 19 | pub const SCHEMA: &str = include_str!("./system_schema.rschema"); 20 | pub const SYS_RASM: &str = include_str!("./sys.rasm"); 21 | 22 | impl SystemSchema { 23 | pub async fn new( 24 | migration_hash: Option, 25 | _store: &dyn KeyValueStore, 26 | meta_store: &dyn KeyValueStore, 27 | ) -> Self { 28 | let schema = compile(&parse(&Bump::new(), SCHEMA).unwrap()).unwrap(); 29 | let txn = meta_store.begin_transaction().await.unwrap(); 30 | let old_schema_text = txn 31 | .get(b"schema") 32 | .await 33 | .unwrap() 34 | .map(|x| String::from_utf8(x)) 35 | .transpose() 36 | .unwrap(); 37 | let old_plan = txn 38 | .get(b"plan") 39 | .await 40 | .unwrap() 41 | .map(|x| StoragePlan::deserialize_compressed(&x)) 42 | .transpose() 43 | .unwrap(); 44 | 45 | let plan = if let Some(old_schema_text) = old_schema_text { 46 | let old_schema = compile(&parse(&Bump::new(), &old_schema_text).unwrap()).unwrap(); 47 | let old_plan = old_plan.expect("old plan not found"); 48 | let new_plan = generate_plan_for_schema(&old_plan, &old_schema, &schema).unwrap(); 49 | 50 | let old_plan_serialized = rmp_serde::to_vec_named(&old_plan).unwrap(); 51 | let new_plan_serialized = rmp_serde::to_vec_named(&new_plan).unwrap(); 52 | 53 | if old_schema_text.as_str() != SCHEMA || old_plan_serialized != new_plan_serialized { 54 | // Migration required 55 | let mut hasher = Sha256::new(); 56 | 57 | // XXX: Plan may contain randomly generated data and we only know that the schema doesn't change across restarts 58 | hasher.update(SCHEMA.as_bytes()); 59 | let hash = hex::encode(&hasher.finalize()[..]); 60 | if migration_hash != Some(hash.clone()) { 61 | print_diff(&old_plan, &new_plan); 62 | log::error!("Schema change detected. Please check the storage plan diff and rerun the server with `--migration-hash={}`.", hash); 63 | std::process::abort(); 64 | } 65 | log::warn!("Applying schema migration."); 66 | txn.put(b"schema", SCHEMA.as_bytes()).await.unwrap(); 67 | txn 68 | .put(b"plan", &new_plan.serialize_compressed().unwrap()) 69 | .await 70 | .unwrap(); 71 | txn.commit().await.unwrap(); 72 | } else { 73 | log::info!("Schema unchanged."); 74 | drop(txn); 75 | } 76 | new_plan 77 | } else { 78 | let new_plan = 79 | generate_plan_for_schema(&Default::default(), &Default::default(), &schema).unwrap(); 80 | log::warn!("Creating system schema."); 81 | txn.put(b"schema", SCHEMA.as_bytes()).await.unwrap(); 82 | txn 83 | .put(b"plan", &new_plan.serialize_compressed().unwrap()) 84 | .await 85 | .unwrap(); 86 | txn.commit().await.unwrap(); 87 | new_plan 88 | }; 89 | 90 | let exec_ctx = ExecContext::load(Arc::new(SchemaContext { schema, plan }), SYS_RASM).unwrap(); 91 | 92 | Self { exec_ctx } 93 | } 94 | } 95 | 96 | fn print_diff(plan1: &StoragePlan, plan2: &StoragePlan) { 97 | struct Line(Option); 98 | 99 | impl std::fmt::Display for Line { 100 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 101 | match self.0 { 102 | None => write!(f, " "), 103 | Some(idx) => write!(f, "{:<4}", idx + 1), 104 | } 105 | } 106 | } 107 | 108 | let plan1 = serde_yaml::to_string(&StoragePlan::::from(plan1)).unwrap(); 109 | let plan2 = serde_yaml::to_string(&StoragePlan::::from(plan2)).unwrap(); 110 | let diff = TextDiff::from_lines(&plan1, &plan2); 111 | for (idx, group) in diff.grouped_ops(3).iter().enumerate() { 112 | if idx > 0 { 113 | println!("{:-^1$}", "-", 80); 114 | } 115 | for op in group { 116 | for change in diff.iter_inline_changes(op) { 117 | let (sign, s) = match change.tag() { 118 | ChangeTag::Delete => ("-", Style::new().red()), 119 | ChangeTag::Insert => ("+", Style::new().green()), 120 | ChangeTag::Equal => (" ", Style::new().dim()), 121 | }; 122 | print!( 123 | "{}{} |{}", 124 | console::style(Line(change.old_index())).dim(), 125 | console::style(Line(change.new_index())).dim(), 126 | s.apply_to(sign).bold(), 127 | ); 128 | for (emphasized, value) in change.iter_strings_lossy() { 129 | if emphasized { 130 | print!("{}", s.apply_to(value).underlined().on_black()); 131 | } else { 132 | print!("{}", s.apply_to(value)); 133 | } 134 | } 135 | if change.missing_newline() { 136 | println!(); 137 | } 138 | } 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /rdb-server/src/system_schema.rschema: -------------------------------------------------------------------------------- 1 | type System { 2 | namespaces: set, 3 | } 4 | 5 | type Namespace { 6 | @primary 7 | id: string, 8 | kv_prefix: bytes, 9 | deployments: set, 10 | query_scripts: set, 11 | create_time: int64, 12 | } 13 | 14 | type Deployment { 15 | @primary 16 | id: string, 17 | description: string, 18 | schema: string, 19 | plan: bytes, 20 | create_time: int64, 21 | } 22 | 23 | type QueryScript { 24 | @primary 25 | id: string, 26 | associated_deployment: string, 27 | script: string, 28 | create_time: int64, 29 | } 30 | 31 | export System system; 32 | -------------------------------------------------------------------------------- /rdb-server/src/util.rs: -------------------------------------------------------------------------------- 1 | use std::time::{SystemTime, UNIX_EPOCH}; 2 | 3 | pub fn current_millis() -> u64 { 4 | SystemTime::now() 5 | .duration_since(UNIX_EPOCH) 6 | .unwrap() 7 | .as_millis() as u64 8 | } 9 | -------------------------------------------------------------------------------- /rdbctl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rdbctl" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | rdb-analyzer = { path = "../rdb-analyzer" } 10 | rdb-proto = { path = "../rdb-proto" } 11 | clap = "3.0.0-beta.2" 12 | async-trait = "0.1" 13 | tokio = { version = "1", features = ["full"] } 14 | futures = "0.3" 15 | rand = "0.8" 16 | log = "0.4" 17 | pretty_env_logger = "0.4" 18 | anyhow = "1" 19 | thiserror = "1" 20 | serde = { version = "1", features = ["derive", "rc"] } 21 | serde_json = "1" 22 | once_cell = "1" 23 | bumpalo = { version = "3.7", features = ["collections"] } 24 | similar = { version = "1", features = ["inline"] } 25 | serde_yaml = "0.8" 26 | console = "0.14.0" 27 | rmp-serde = "0.15" 28 | hex = "0.4" 29 | sha2 = "0.9" 30 | dialoguer = "0.8" 31 | ctrlc = "3" 32 | -------------------------------------------------------------------------------- /rdbctl/src/diff.rs: -------------------------------------------------------------------------------- 1 | use console::Style; 2 | use rdb_analyzer::storage_plan::StoragePlan; 3 | use similar::{ChangeTag, TextDiff}; 4 | 5 | pub fn print_diff(plan1: &StoragePlan, plan2: &StoragePlan) -> (usize, usize) { 6 | struct Line(Option); 7 | 8 | impl std::fmt::Display for Line { 9 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 10 | match self.0 { 11 | None => write!(f, " "), 12 | Some(idx) => write!(f, "{:<4}", idx + 1), 13 | } 14 | } 15 | } 16 | 17 | let mut num_insert = 0usize; 18 | let mut num_delete = 0usize; 19 | 20 | let plan1 = serde_yaml::to_string(&StoragePlan::::from(plan1)).unwrap(); 21 | let plan2 = serde_yaml::to_string(&StoragePlan::::from(plan2)).unwrap(); 22 | let diff = TextDiff::from_lines(&plan1, &plan2); 23 | for (idx, group) in diff.grouped_ops(3).iter().enumerate() { 24 | if idx > 0 { 25 | eprintln!("{:-^1$}", "-", 80); 26 | } 27 | for op in group { 28 | for change in diff.iter_inline_changes(op) { 29 | let (sign, s) = match change.tag() { 30 | ChangeTag::Delete => { 31 | num_delete += 1; 32 | ("-", Style::new().for_stderr().red()) 33 | } 34 | ChangeTag::Insert => { 35 | num_insert += 1; 36 | ("+", Style::new().for_stderr().green()) 37 | } 38 | ChangeTag::Equal => (" ", Style::new().for_stderr().dim()), 39 | }; 40 | eprint!( 41 | "{}{} |{}", 42 | console::style(Line(change.old_index())).for_stderr().dim(), 43 | console::style(Line(change.new_index())).for_stderr().dim(), 44 | s.apply_to(sign).bold(), 45 | ); 46 | for (emphasized, value) in change.iter_strings_lossy() { 47 | if emphasized { 48 | eprint!("{}", s.apply_to(value).underlined().on_black()); 49 | } else { 50 | eprint!("{}", s.apply_to(value)); 51 | } 52 | } 53 | if change.missing_newline() { 54 | eprintln!(); 55 | } 56 | } 57 | } 58 | } 59 | 60 | (num_insert, num_delete) 61 | } 62 | -------------------------------------------------------------------------------- /rdbctl/src/main.rs: -------------------------------------------------------------------------------- 1 | mod diff; 2 | 3 | use std::convert::TryFrom; 4 | 5 | use anyhow::Result; 6 | 7 | use bumpalo::Bump; 8 | use clap::{AppSettings, Clap}; 9 | use dialoguer::{theme::ColorfulTheme, Confirm}; 10 | use rdb_analyzer::{ 11 | schema::{compile::compile, grammar::parse}, 12 | storage_plan::{planner::generate_plan_for_schema, StorageKey, StoragePlan}, 13 | }; 14 | use rdb_proto::{ 15 | proto::{ 16 | rdb_control_client::RdbControlClient, CreateDeploymentRequest, CreateNamespaceRequest, 17 | CreateQueryScriptRequest, DeleteNamespaceRequest, DeleteQueryScriptRequest, 18 | GetDeploymentRequest, GetQueryScriptRequest, ListDeploymentRequest, ListNamespaceRequest, 19 | ListQueryScriptRequest, 20 | }, 21 | tonic::Request, 22 | }; 23 | use thiserror::Error; 24 | use tokio::task::block_in_place; 25 | 26 | use crate::diff::print_diff; 27 | 28 | /// RefineDB CLI. 29 | #[derive(Clap)] 30 | #[clap(version = "0.1", author = "Heyang Zhou ")] 31 | #[clap(setting = AppSettings::ColoredHelp)] 32 | struct Opts { 33 | /// Server URL. 34 | #[clap(short, long)] 35 | server: String, 36 | #[clap(subcommand)] 37 | subcmd: SubCommand, 38 | } 39 | 40 | #[derive(Clap)] 41 | enum SubCommand { 42 | /// Create a namespace. 43 | CreateNamespace(CreateNamespace), 44 | 45 | /// List namespaces. 46 | ListNamespace(ListNamespace), 47 | 48 | /// Delete a namespace. 49 | DeleteNamespace(DeleteNamespace), 50 | 51 | /// Create a deployment. 52 | CreateDeployment(CreateDeployment), 53 | 54 | /// List deployments. 55 | ListDeployment(ListDeployment), 56 | 57 | /// Create query script. 58 | CreateQueryScript(CreateQueryScript), 59 | 60 | /// Get query script. 61 | GetQueryScript(GetQueryScript), 62 | 63 | /// Delete query script. 64 | DeleteQueryScript(DeleteQueryScript), 65 | 66 | /// List query scripts. 67 | ListQueryScript(ListQueryScript), 68 | } 69 | 70 | #[derive(Clap)] 71 | struct CreateNamespace { 72 | namespace_id: String, 73 | } 74 | 75 | #[derive(Clap)] 76 | struct ListNamespace {} 77 | 78 | #[derive(Clap)] 79 | struct DeleteNamespace { 80 | namespace_id: String, 81 | } 82 | 83 | #[derive(Clap)] 84 | struct CreateDeployment { 85 | /// The source deployment to migrate from. 86 | #[clap(long)] 87 | migrate_from: Option, 88 | 89 | /// Path to the new schema. 90 | #[clap(long)] 91 | schema: String, 92 | 93 | /// Deployment description. 94 | #[clap(long)] 95 | description: Option, 96 | 97 | /// Namespace id. 98 | #[clap(long)] 99 | namespace: String, 100 | } 101 | 102 | #[derive(Clap)] 103 | struct ListDeployment { 104 | namespace_id: String, 105 | } 106 | 107 | #[derive(Clap)] 108 | struct CreateQueryScript { 109 | /// Namespace id. 110 | #[clap(long)] 111 | namespace: String, 112 | 113 | /// Query script id. 114 | #[clap(long)] 115 | id: String, 116 | 117 | /// The associated deployment id. 118 | #[clap(long)] 119 | deployment: String, 120 | 121 | /// Path to the script. 122 | #[clap(short, long)] 123 | script: String, 124 | } 125 | 126 | #[derive(Clap)] 127 | struct GetQueryScript { 128 | /// Namespace id. 129 | #[clap(long)] 130 | namespace: String, 131 | 132 | /// Query script id. 133 | #[clap(long)] 134 | id: String, 135 | } 136 | 137 | #[derive(Clap)] 138 | struct DeleteQueryScript { 139 | /// Namespace id. 140 | #[clap(long)] 141 | namespace: String, 142 | 143 | /// Query script id. 144 | #[clap(long)] 145 | id: String, 146 | } 147 | 148 | #[derive(Clap)] 149 | struct ListQueryScript { 150 | namespace: String, 151 | } 152 | 153 | #[derive(Error, Debug)] 154 | enum CliError { 155 | #[error("reference deployment not found")] 156 | ReferenceDeploymentNotFound, 157 | 158 | #[error("deployment not created")] 159 | DeploymentNotCreated, 160 | 161 | #[error("aborted by user")] 162 | AbortedByUser, 163 | 164 | #[error("query script not found")] 165 | QueryScriptNotFound, 166 | } 167 | 168 | #[tokio::main] 169 | async fn main() -> Result<()> { 170 | if std::env::var("RUST_LOG").is_err() { 171 | std::env::set_var("RUST_LOG", "info"); 172 | } 173 | pretty_env_logger::init_timed(); 174 | let opts: Opts = Opts::parse(); 175 | 176 | // Reset the terminal on ctrl-c (in case we are in a prompt) 177 | ctrlc::set_handler(move || { 178 | let term = console::Term::stdout(); 179 | let _ = term.show_cursor(); 180 | std::process::exit(1); 181 | })?; 182 | 183 | let mut client = RdbControlClient::connect(opts.server.clone()).await?; 184 | 185 | match &opts.subcmd { 186 | SubCommand::CreateNamespace(x) => { 187 | let req = Request::new(CreateNamespaceRequest { 188 | id: x.namespace_id.clone(), 189 | }); 190 | let res = client.create_namespace(req).await?; 191 | println!( 192 | "{}", 193 | serde_json::to_string(&serde_json::json!({ 194 | "created": res.get_ref().created, 195 | }))? 196 | ); 197 | } 198 | SubCommand::ListNamespace(_) => { 199 | let req = Request::new(ListNamespaceRequest {}); 200 | let res = client.list_namespace(req).await?; 201 | println!( 202 | "{}", 203 | serde_json::to_string( 204 | &res 205 | .get_ref() 206 | .namespaces 207 | .iter() 208 | .map(|x| serde_json::json!({ 209 | "id": x.id, 210 | "create_time": x.create_time, 211 | })) 212 | .collect::>() 213 | )? 214 | ); 215 | } 216 | SubCommand::DeleteNamespace(x) => { 217 | let req = Request::new(DeleteNamespaceRequest { 218 | id: x.namespace_id.clone(), 219 | }); 220 | let res = client.delete_namespace(req).await?; 221 | println!( 222 | "{}", 223 | serde_json::to_string(&serde_json::json!({ 224 | "deleted": res.get_ref().deleted, 225 | }))? 226 | ); 227 | } 228 | SubCommand::CreateDeployment(subopts) => { 229 | let schema_text = std::fs::read_to_string(&subopts.schema)?; 230 | 231 | let new_schema = compile(&parse(&Bump::new(), &schema_text)?)?; 232 | let new_plan = if let Some(reference) = &subopts.migrate_from { 233 | let reference_deployment = client 234 | .get_deployment(Request::new(GetDeploymentRequest { 235 | namespace_id: subopts.namespace.clone(), 236 | deployment_id: reference.clone(), 237 | })) 238 | .await?; 239 | let info = reference_deployment 240 | .get_ref() 241 | .info 242 | .as_ref() 243 | .ok_or_else(|| CliError::ReferenceDeploymentNotFound)?; 244 | let reference_schema = compile(&parse(&Bump::new(), &info.schema)?)?; 245 | let reference_plan: StoragePlan = serde_yaml::from_str(&info.plan)?; 246 | let reference_plan = StoragePlan::::try_from(&reference_plan)?; 247 | let new_plan = generate_plan_for_schema(&reference_plan, &reference_schema, &new_schema)?; 248 | 249 | let (n_insert, n_delete) = print_diff(&reference_plan, &new_plan); 250 | if n_insert != 0 || n_delete != 0 { 251 | let proceed = block_in_place(|| { 252 | Confirm::with_theme(&ColorfulTheme::default()) 253 | .with_prompt("Do you wish to apply the new storage plan?") 254 | .interact() 255 | })?; 256 | if !proceed { 257 | return Err(CliError::AbortedByUser.into()); 258 | } 259 | log::info!("Storage plan migrated from reference deployment."); 260 | } else { 261 | log::info!("Storage plan unchanged."); 262 | } 263 | new_plan 264 | } else { 265 | generate_plan_for_schema(&Default::default(), &Default::default(), &new_schema)? 266 | }; 267 | 268 | let res = client 269 | .create_deployment(Request::new(CreateDeploymentRequest { 270 | namespace_id: subopts.namespace.clone(), 271 | schema: schema_text, 272 | plan: serde_yaml::to_string(&StoragePlan::::from(&new_plan))?, 273 | description: subopts.description.clone().unwrap_or_default(), 274 | })) 275 | .await?; 276 | let deployment_id = res 277 | .get_ref() 278 | .deployment_id 279 | .as_ref() 280 | .ok_or_else(|| CliError::DeploymentNotCreated)?; 281 | println!( 282 | "{}", 283 | serde_json::to_string(&serde_json::json!({ 284 | "id": deployment_id.id, 285 | }))? 286 | ); 287 | } 288 | SubCommand::ListDeployment(subopts) => { 289 | let req = Request::new(ListDeploymentRequest { 290 | namespace_id: subopts.namespace_id.clone(), 291 | }); 292 | let res = client.list_deployment(req).await?; 293 | println!( 294 | "{}", 295 | serde_json::to_string( 296 | &res 297 | .get_ref() 298 | .deployments 299 | .iter() 300 | .map(|x| serde_json::json!({ 301 | "id": x.id, 302 | "create_time": x.create_time, 303 | "description": x.description, 304 | })) 305 | .collect::>() 306 | )? 307 | ); 308 | } 309 | SubCommand::CreateQueryScript(subopts) => { 310 | let script = std::fs::read_to_string(&subopts.script)?; 311 | let req = Request::new(CreateQueryScriptRequest { 312 | namespace_id: subopts.namespace.clone(), 313 | id: subopts.id.clone(), 314 | associated_deployment: subopts.deployment.clone(), 315 | script, 316 | }); 317 | let res = client.create_query_script(req).await?; 318 | println!( 319 | "{}", 320 | serde_json::to_string(&serde_json::json!({ 321 | "created": res.get_ref().created, 322 | }))? 323 | ); 324 | } 325 | SubCommand::ListQueryScript(subopts) => { 326 | let req = Request::new(ListQueryScriptRequest { 327 | namespace_id: subopts.namespace.clone(), 328 | }); 329 | let res = client.list_query_script(req).await?; 330 | println!( 331 | "{}", 332 | serde_json::to_string( 333 | &res 334 | .get_ref() 335 | .query_scripts 336 | .iter() 337 | .map(|x| serde_json::json!({ 338 | "id": x.id, 339 | "associated_deployment": x.associated_deployment, 340 | "create_time": x.create_time, 341 | })) 342 | .collect::>() 343 | )? 344 | ); 345 | } 346 | SubCommand::DeleteQueryScript(subopts) => { 347 | let req = Request::new(DeleteQueryScriptRequest { 348 | namespace_id: subopts.namespace.clone(), 349 | id: subopts.id.clone(), 350 | }); 351 | let res = client.delete_query_script(req).await?; 352 | println!( 353 | "{}", 354 | serde_json::to_string(&serde_json::json!({ 355 | "deleted": res.get_ref().deleted, 356 | }))? 357 | ); 358 | } 359 | SubCommand::GetQueryScript(subopts) => { 360 | let req = Request::new(GetQueryScriptRequest { 361 | namespace_id: subopts.namespace.clone(), 362 | query_script_id: subopts.id.clone(), 363 | }); 364 | let res = client.get_query_script(req).await?; 365 | let info = res 366 | .get_ref() 367 | .info 368 | .as_ref() 369 | .ok_or_else(|| CliError::QueryScriptNotFound)?; 370 | println!( 371 | "{}", 372 | serde_json::to_string(&serde_json::json!({ 373 | "id": info.id, 374 | "script": info.script, 375 | "associated_deployment": info.associated_deployment, 376 | "create_time": info.create_time, 377 | }))? 378 | ); 379 | } 380 | } 381 | 382 | Ok(()) 383 | } 384 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | stable-2021-07-29 2 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 2 2 | --------------------------------------------------------------------------------