├── Cargo.toml ├── README.md ├── benchheap.bat └── src ├── atomfile.rs ├── axumtest.rs ├── basicatomfile.rs ├── bench.rs ├── block.rs ├── blockpagestg.rs ├── buf.rs ├── builtin.rs ├── bytes.rs ├── cexp.rs ├── compact.rs ├── compile.rs ├── dividedstg.rs ├── exec.rs ├── expr.rs ├── gentrans.rs ├── heap.rs ├── lib.rs ├── page.rs ├── parse.rs ├── pstore.rs ├── run.rs ├── sortedfile.rs ├── stg.rs ├── stgwin.rs ├── sys.rs ├── table.rs ├── test.rs ├── util.rs ├── value.rs └── wmap.rs /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustdb" 3 | version = "5.2.112" 4 | edition = "2021" 5 | authors = ["George Barwood"] 6 | description = "SQL database" 7 | license = "MIT OR Apache-2.0" 8 | repository = "https://github.com/georgebarwood/RustDB/" 9 | categories = ["database-implementations"] 10 | include = ["/src"] 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | rustc-hash = "1.1.0" 16 | serde = { version = "1.0.131", features = ["derive","rc"], optional=true } 17 | pstd = { version = "0.1.0", optional=true } 18 | 19 | [features] 20 | default = ["builtin","pack","verify","table","max","renumber","gentrans"] 21 | gentrans = [] 22 | serde = ["dep:serde","pstd/serde"] 23 | builtin = [] 24 | table = [] 25 | max = ["builtin","table"] 26 | pack = [] 27 | renumber = [] 28 | verify = [] 29 | unsafe-optim = ["pstd/unsafe-optim"] 30 | log = [] 31 | log-execute= [] 32 | compact = [] 33 | pstd = ["dep:pstd"] 34 | 35 | [dev-dependencies] 36 | rand = "0.8.4" 37 | sqlite = "0.32.0" 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rustdb 2 | 3 | Database with SQL-like language implemented in Rust. 4 | 5 | The SQL-like language is relatively minimal, and does not (currently) include features such as joins or views. Instead it has high performance SET .. FROM … and FOR .. FROM statements to access database tables, generally using an INDEX. 6 | 7 | Read-only transactions run immediately and concurrently on a virtual read-only copy of the database, and cannot be blocked. 8 | 9 | Write transactions run sequentially (and should typically execute in around 100 micro-seconds). 10 | 11 | The Storage trait allows a variety of underlying storage, including SimpleFileStorage, MemFile and AtomicFile. 12 | 13 | Data is accessed either by a Transaction interface or directly ( as an offset into a page of byte data ). 14 | 15 | Transactions can be logged, allowing database replication. 16 | 17 | See https://github.com/georgebarwood/rustweb2 for example program : a webserver based on rustdb database, with database browsing, password hashing, database replication, email transmission and timed jobs. 18 | 19 | crates.io : https://crates.io/crates/rustdb 20 | 21 | documentation: https://docs.rs/rustdb/latest/rustdb/ 22 | 23 | blog: https://rustdb.wordpress.com/ 24 | -------------------------------------------------------------------------------- /benchheap.bat: -------------------------------------------------------------------------------- 1 | cargo test heap --release -- --nocapture 2 | cargo test heap --release --features unsafe_opt -- --nocapture -------------------------------------------------------------------------------- /src/atomfile.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | buf::ReadBufStg, wmap::WMap, Arc, BasicAtomicFile, Data, Limits, Mutex, RwLock, Storage, 3 | }; 4 | 5 | /// Based on [BasicAtomicFile] which makes sure that database updates are all-or-nothing. 6 | /// Provides read buffering for small reads, and a thread to perform commit asyncronously. 7 | pub struct AtomicFile { 8 | map: WMap, 9 | cf: Arc>, 10 | size: u64, 11 | tx: std::sync::mpsc::Sender<(u64, WMap)>, 12 | busy: Arc>, 13 | map_lim: usize, 14 | } 15 | 16 | impl AtomicFile { 17 | /// Construct AtomicFile with default limits. stg is the main underlying storage, upd is temporary storage for updates during commit. 18 | pub fn new(stg: Box, upd: Box) -> Box { 19 | Self::new_with_limits(stg, upd, &Limits::default()) 20 | } 21 | 22 | /// Construct Atomic file with specified limits. 23 | pub fn new_with_limits( 24 | stg: Box, 25 | upd: Box, 26 | lim: &Limits, 27 | ) -> Box { 28 | let size = stg.size(); 29 | let mut baf = BasicAtomicFile::new(stg.clone(), upd, lim); 30 | let (tx, rx) = std::sync::mpsc::channel::<(u64, WMap)>(); 31 | let cf = Arc::new(RwLock::new(CommitFile::new(stg, lim.rbuf_mem))); 32 | let busy = Arc::new(Mutex::new(())); // Lock held while async save thread is active. 33 | 34 | // Start the thread which does save asyncronously. 35 | let (cf1, busy1) = (cf.clone(), busy.clone()); 36 | std::thread::spawn(move || { 37 | while let Ok((size, map)) = rx.recv() { 38 | let _lock = busy1.lock(); 39 | baf.map = map; 40 | baf.commit(size); 41 | cf1.write().unwrap().done_one(); 42 | } 43 | }); 44 | Box::new(Self { 45 | map: WMap::default(), 46 | cf, 47 | size, 48 | tx, 49 | busy, 50 | map_lim: lim.map_lim, 51 | }) 52 | } 53 | } 54 | 55 | impl Storage for AtomicFile { 56 | fn commit(&mut self, size: u64) { 57 | self.size = size; 58 | if self.map.is_empty() { 59 | return; 60 | } 61 | if self.cf.read().unwrap().map.len() > self.map_lim { 62 | self.wait_complete(); 63 | } 64 | let map = std::mem::take(&mut self.map); 65 | let cf = &mut *self.cf.write().unwrap(); 66 | cf.todo += 1; 67 | map.to_storage(cf); 68 | self.tx.send((size, map)).unwrap(); 69 | } 70 | 71 | fn size(&self) -> u64 { 72 | self.size 73 | } 74 | 75 | fn read(&self, start: u64, data: &mut [u8]) { 76 | self.map.read(start, data, &*self.cf.read().unwrap()); 77 | } 78 | 79 | fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) { 80 | self.map.write(start, data, off, len); 81 | } 82 | 83 | fn write(&mut self, start: u64, data: &[u8]) { 84 | let len = data.len(); 85 | let d = Arc::new(data.to_vec()); 86 | self.write_data(start, d, 0, len); 87 | } 88 | 89 | fn wait_complete(&self) { 90 | while self.cf.read().unwrap().todo != 0 { 91 | #[cfg(feature = "log")] 92 | println!("AtomicFile::wait_complete - waiting for writer process"); 93 | let _x = self.busy.lock(); 94 | } 95 | } 96 | } 97 | 98 | struct CommitFile { 99 | stg: Box, 100 | map: WMap, 101 | todo: usize, 102 | } 103 | 104 | impl CommitFile { 105 | fn new(stg: Box, buf_mem: usize) -> Self { 106 | Self { 107 | stg: ReadBufStg::<256>::new(stg, 50, buf_mem / 256), 108 | map: WMap::default(), 109 | todo: 0, 110 | } 111 | } 112 | 113 | fn done_one(&mut self) { 114 | self.todo -= 1; 115 | if self.todo == 0 { 116 | self.map = WMap::default(); 117 | self.stg.reset(); 118 | } 119 | } 120 | } 121 | 122 | impl Storage for CommitFile { 123 | fn commit(&mut self, _size: u64) { 124 | panic!() 125 | } 126 | 127 | fn size(&self) -> u64 { 128 | panic!() 129 | } 130 | 131 | fn read(&self, start: u64, data: &mut [u8]) { 132 | self.map.read(start, data, &*self.stg); 133 | } 134 | 135 | fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) { 136 | self.map.write(start, data, off, len); 137 | } 138 | 139 | fn write(&mut self, _start: u64, _data: &[u8]) { 140 | panic!() 141 | } 142 | } 143 | 144 | #[test] 145 | pub fn test() { 146 | use crate::stg::MemFile; 147 | use rand::Rng; 148 | /* Idea of test is to check AtomicFile and MemFile behave the same */ 149 | 150 | let ta = crate::test::test_amount(); 151 | 152 | let mut rng = rand::thread_rng(); 153 | 154 | for _ in 0..100 { 155 | let mut s1 = AtomicFile::new(MemFile::new(), MemFile::new()); 156 | let mut s2 = MemFile::new(); 157 | 158 | for _ in 0..1000 * ta { 159 | let off: usize = rng.gen::() % 100; 160 | let mut len = 1 + rng.gen::() % 20; 161 | let w: bool = rng.gen(); 162 | if w { 163 | let mut bytes = Vec::new(); 164 | while len > 0 { 165 | len -= 1; 166 | let b: u8 = rng.gen::(); 167 | bytes.push(b); 168 | } 169 | s1.write(off as u64, &bytes); 170 | s2.write(off as u64, &bytes); 171 | } else { 172 | let mut b2 = vec![0; len]; 173 | let mut b3 = vec![0; len]; 174 | s1.read(off as u64, &mut b2); 175 | s2.read(off as u64, &mut b3); 176 | assert!(b2 == b3); 177 | } 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /src/axumtest.rs: -------------------------------------------------------------------------------- 1 | use mimalloc::MiMalloc; 2 | 3 | /// Memory allocator ( MiMalloc ). 4 | #[global_allocator] 5 | static MEMALLOC: MiMalloc = MiMalloc; 6 | 7 | use axum::{ 8 | extract::{Extension, Form, Multipart, Path, Query}, 9 | routing::get, 10 | AddExtensionLayer, Router, 11 | }; 12 | 13 | use tower::ServiceBuilder; 14 | use tower_cookies::{CookieManagerLayer, Cookies}; 15 | 16 | use tokio::sync::{mpsc, oneshot}; 17 | 18 | use rustdb::{ 19 | c_value, check_types, AccessPagedData, Block, CExp, CExpPtr, CompileFunc, DataKind, Database, 20 | EvalEnv, Expr, GenQuery, Part, SharedPagedData, SimpleFileStorage, Value, DB, INITSQL, 21 | }; 22 | 23 | use std::{collections::BTreeMap, rc::Rc, sync::Arc, thread}; 24 | 25 | /// Query to be sent to server thread, implements IntoResponse. 26 | struct ServerQuery { 27 | pub x: Box, 28 | } 29 | 30 | impl ServerQuery { 31 | pub fn new() -> Self { 32 | Self { 33 | x: Box::new(GenQuery::new()), 34 | } 35 | } 36 | } 37 | 38 | /// Message to server thread, includes oneshot Sender for reply. 39 | struct ServerMessage { 40 | pub sq: ServerQuery, 41 | pub tx: oneshot::Sender, 42 | } 43 | 44 | /// State shared with handlers. 45 | #[derive(Clone)] 46 | struct SharedState { 47 | /// Sender channel for sending queries to server thread. 48 | tx: mpsc::Sender, 49 | /// Shared storage used for read-only queries. 50 | spd: Arc, 51 | } 52 | 53 | /// Get database with extra registered builtin functions. 54 | fn get_db(apd: AccessPagedData, sql: &str) -> DB { 55 | let db = Database::new(apd, sql); 56 | let list = [("ARGON", DataKind::Binary, CompileFunc::Value(c_argon))]; 57 | for (name, typ, cf) in list { 58 | db.register(name, typ, cf); 59 | } 60 | db 61 | } 62 | 63 | #[tokio::main] 64 | /// Execution starts here. 65 | async fn main() { 66 | // console_subscriber::init(); 67 | let sfs = Box::new(SimpleFileStorage::new("C:/Users/pc/rust/sftest01.rustdb")); 68 | let spd = Arc::new(SharedPagedData::new(sfs)); 69 | 70 | let (tx, mut rx) = mpsc::channel::(1); 71 | 72 | let state = Arc::new(SharedState { tx, spd }); 73 | let wapd = state.spd.open_write(); 74 | 75 | // This is the server thread (synchronous). 76 | thread::spawn(move || { 77 | let db = get_db(wapd, INITSQL); 78 | loop { 79 | let mut sm = rx.blocking_recv().unwrap(); 80 | db.run_timed("EXEC web.Main()", &mut *sm.sq.x); 81 | let updates = db.save(); 82 | if updates > 0 { 83 | println!("Pages updated={}", updates); 84 | let ser = serde_json::to_string(&sm.sq.x).unwrap(); 85 | println!("Serialised query={}", ser); 86 | } 87 | let _x = sm.tx.send(sm.sq); 88 | } 89 | }); 90 | 91 | // build our application with a single route 92 | let app = Router::new().route("/*key", get(h_get).post(h_post)).layer( 93 | ServiceBuilder::new() 94 | .layer(CookieManagerLayer::new()) 95 | .layer(AddExtensionLayer::new(state)), 96 | ); 97 | 98 | // run it with hyper on localhost:3000 99 | axum::Server::bind(&"0.0.0.0:3000".parse().unwrap()) 100 | .serve(app.into_make_service()) 101 | .await 102 | .unwrap(); 103 | } 104 | 105 | /// Get BTreeMap of cookies from Cookies. 106 | fn map_cookies(cookies: Cookies) -> BTreeMap { 107 | let mut result = BTreeMap::new(); 108 | for cookie in cookies.list() { 109 | let (name, value) = cookie.name_value(); 110 | result.insert(name.to_string(), value.to_string()); 111 | } 112 | result 113 | } 114 | 115 | /// Get Vec of Parts from MultiPart. 116 | async fn map_parts(mp: Option) -> Vec { 117 | let mut result = Vec::new(); 118 | if let Some(mut mp) = mp { 119 | while let Some(field) = mp.next_field().await.unwrap() { 120 | let name = field.name().unwrap().to_string(); 121 | let file_name = match field.file_name() { 122 | Some(s) => s.to_string(), 123 | None => "".to_string(), 124 | }; 125 | let content_type = match field.content_type() { 126 | Some(s) => s.to_string(), 127 | None => "".to_string(), 128 | }; 129 | let mut data = Vec::new(); 130 | let mut text = "".to_string(); 131 | if content_type.is_empty() { 132 | if let Ok(s) = field.text().await { 133 | text = s; 134 | } 135 | } else if let Ok(bytes) = field.bytes().await { 136 | data = bytes.to_vec() 137 | } 138 | result.push(Part { 139 | name, 140 | file_name, 141 | content_type, 142 | data: Arc::new(data), 143 | text, 144 | }); 145 | } 146 | } 147 | result 148 | } 149 | 150 | /// Handler for http GET requests. 151 | async fn h_get( 152 | state: Extension>, 153 | path: Path, 154 | params: Query>, 155 | cookies: Cookies, 156 | ) -> ServerQuery { 157 | // Build the ServerQuery. 158 | let mut sq = ServerQuery::new(); 159 | sq.x.path = path.0; 160 | sq.x.params = params.0; 161 | sq.x.cookies = map_cookies(cookies); 162 | 163 | let blocking_task = tokio::task::spawn_blocking(move || { 164 | // GET requests should be read-only. 165 | let apd = state.spd.open_read(); 166 | let db = get_db(apd, ""); 167 | db.run_timed("EXEC web.Main()", &mut *sq.x); 168 | sq 169 | }); 170 | blocking_task.await.unwrap() 171 | } 172 | 173 | /// Handler for http POST requests. 174 | async fn h_post( 175 | state: Extension>, 176 | path: Path, 177 | params: Query>, 178 | cookies: Cookies, 179 | form: Option>>, 180 | multipart: Option, 181 | ) -> ServerQuery { 182 | // Build the ServerQuery. 183 | let mut sq = ServerQuery::new(); 184 | sq.x.path = path.0; 185 | sq.x.params = params.0; 186 | sq.x.cookies = map_cookies(cookies); 187 | if let Some(Form(form)) = form { 188 | sq.x.form = form; 189 | } else { 190 | sq.x.parts = map_parts(multipart).await; 191 | } 192 | 193 | // Send query to database thread ( and get it back ). 194 | let (tx, rx) = oneshot::channel::(); 195 | let _err = state.tx.send(ServerMessage { sq, tx }).await; 196 | let result = rx.await.unwrap(); 197 | 198 | result 199 | } 200 | 201 | use axum::{ 202 | body::{Bytes, Full}, 203 | http::{header::HeaderName, status::StatusCode, HeaderValue, Response}, 204 | response::IntoResponse, 205 | }; 206 | 207 | impl IntoResponse for ServerQuery { 208 | type Body = Full; 209 | type BodyError = std::convert::Infallible; 210 | 211 | fn into_response(self) -> Response { 212 | let mut res = Response::new(Full::from(self.x.output)); 213 | 214 | *res.status_mut() = StatusCode::from_u16(self.x.status_code).unwrap(); 215 | 216 | for (name, value) in &self.x.headers { 217 | res.headers_mut().insert( 218 | HeaderName::from_lowercase(name.as_bytes()).unwrap(), 219 | HeaderValue::from_str(value).unwrap(), 220 | ); 221 | } 222 | res 223 | } 224 | } 225 | 226 | ///////////////////////////// 227 | 228 | use argon2rs::argon2i_simple; 229 | 230 | /// Compile call to ARGON. 231 | fn c_argon(b: &Block, args: &mut [Expr]) -> CExpPtr { 232 | check_types(b, args, &[DataKind::String, DataKind::String]); 233 | let password = c_value(b, &mut args[0]); 234 | let salt = c_value(b, &mut args[1]); 235 | Box::new(Argon { password, salt }) 236 | } 237 | 238 | /// Compiled call to ARGON. 239 | struct Argon { 240 | password: CExpPtr, 241 | salt: CExpPtr, 242 | } 243 | impl CExp for Argon { 244 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> Value { 245 | let pw = self.password.eval(ee, d).str(); 246 | let salt = self.salt.eval(ee, d).str(); 247 | 248 | let result = argon2i_simple(&pw, &salt).to_vec(); 249 | Value::RcBinary(Rc::new(result)) 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /src/basicatomfile.rs: -------------------------------------------------------------------------------- 1 | use crate::{buf::WriteBuffer, wmap::DataSlice, wmap::WMap, Arc, Data, Limits, Storage}; 2 | 3 | /// Basis for [crate::AtomicFile] ( unbuffered alternative ). 4 | pub struct BasicAtomicFile { 5 | /// The main underlying storage. 6 | stg: WriteBuffer, 7 | /// Temporary storage for updates during commit. 8 | upd: WriteBuffer, 9 | /// Map of writes. 10 | pub map: WMap, 11 | /// List of writes. 12 | list: Vec<(u64, DataSlice)>, 13 | size: u64, 14 | } 15 | 16 | impl BasicAtomicFile { 17 | /// stg is the main underlying storage, upd is temporary storage for updates during commit. 18 | pub fn new(stg: Box, upd: Box, lim: &Limits) -> Box { 19 | let size = stg.size(); 20 | let mut result = Box::new(Self { 21 | stg: WriteBuffer::new(stg, lim.swbuf), 22 | upd: WriteBuffer::new(upd, lim.uwbuf), 23 | map: WMap::default(), 24 | list: Vec::new(), 25 | size, 26 | }); 27 | result.init(); 28 | result 29 | } 30 | 31 | /// Apply outstanding updates. 32 | fn init(&mut self) { 33 | let end = self.upd.stg.read_u64(0); 34 | let size = self.upd.stg.read_u64(8); 35 | if end == 0 { 36 | return; 37 | } 38 | assert!(end == self.upd.stg.size()); 39 | let mut pos = 16; 40 | while pos < end { 41 | let start = self.upd.stg.read_u64(pos); 42 | pos += 8; 43 | let len = self.upd.stg.read_u64(pos); 44 | pos += 8; 45 | let mut buf = vec![0; len as usize]; 46 | self.upd.stg.read(pos, &mut buf); 47 | pos += len; 48 | self.stg.write(start, &buf); 49 | } 50 | self.stg.commit(size); 51 | self.upd.commit(0); 52 | } 53 | 54 | /// Perform the specified phase ( 1 or 2 ) of a two-phase commit. 55 | pub fn commit_phase(&mut self, size: u64, phase: u8) { 56 | if self.map.is_empty() && self.list.is_empty() { 57 | return; 58 | } 59 | if phase == 1 { 60 | self.list = self.map.to_vec(); 61 | 62 | // Write the updates to upd. 63 | // First set the end position to zero. 64 | self.upd.write_u64(0, 0); 65 | self.upd.write_u64(8, size); 66 | self.upd.commit(16); // Not clear if this is necessary. 67 | 68 | // Write the update records. 69 | let mut stg_written = false; 70 | let mut pos: u64 = 16; 71 | for (start, v) in self.list.iter() { 72 | let (start, len, data) = (*start, v.len as u64, v.all()); 73 | if start >= self.size { 74 | // Writes beyond current stg size can be written directly. 75 | stg_written = true; 76 | self.stg.write(start, data); 77 | } else { 78 | self.upd.write_u64(pos, start); 79 | pos += 8; 80 | self.upd.write_u64(pos, len); 81 | pos += 8; 82 | self.upd.write(pos, data); 83 | pos += len; 84 | } 85 | } 86 | if stg_written { 87 | self.stg.commit(size); 88 | } 89 | self.upd.commit(pos); // Not clear if this is necessary. 90 | 91 | // Set the end position. 92 | self.upd.write_u64(0, pos); 93 | self.upd.write_u64(8, size); 94 | self.upd.commit(pos); 95 | } else { 96 | for (start, v) in self.list.iter() { 97 | if *start < self.size { 98 | // Writes beyond current stg size have already been written. 99 | self.stg.write(*start, v.all()); 100 | } 101 | } 102 | self.list.clear(); 103 | self.stg.commit(size); 104 | self.upd.commit(0); 105 | } 106 | } 107 | } 108 | 109 | impl Storage for BasicAtomicFile { 110 | fn commit(&mut self, size: u64) { 111 | self.commit_phase(size, 1); 112 | self.commit_phase(size, 2); 113 | self.size = size; 114 | } 115 | 116 | fn size(&self) -> u64 { 117 | self.size 118 | } 119 | 120 | fn read(&self, start: u64, data: &mut [u8]) { 121 | self.map.read(start, data, &*self.stg.stg); 122 | } 123 | 124 | fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) { 125 | self.map.write(start, data, off, len); 126 | } 127 | 128 | fn write(&mut self, start: u64, data: &[u8]) { 129 | let len = data.len(); 130 | let d = Arc::new(data.to_vec()); 131 | self.write_data(start, d, 0, len); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/bench.rs: -------------------------------------------------------------------------------- 1 | /* Each test should first create a table with two columns, insert 8,192 identical rows 'Alice', 1000. 2 | Then (the timed part) should total the second column ( result 8,192,000 ) and do this 1,000 times. 3 | */ 4 | 5 | #[test] 6 | fn sqlite_test() { 7 | let connection = sqlite::open(":memory:").unwrap(); 8 | 9 | let sql = " 10 | CREATE TABLE users (Id INTEGER PRIMARY KEY, name TEXT, age INTEGER); 11 | INSERT INTO users(name,age) VALUES ('Alice', 1000);"; 12 | connection.execute(sql).unwrap(); 13 | 14 | let sql = "INSERT INTO users(name,age) SELECT name, age FROM users"; 15 | 16 | // Create 8192 records (each iteration should double number of records) 17 | for _i in 0..13 { 18 | connection.execute(sql).unwrap(); 19 | } 20 | 21 | let mut results = Vec::new(); 22 | for _outer in 0..100 { 23 | let start = std::time::Instant::now(); 24 | for _i in 0..10 { 25 | let sql = "SELECT SUM(age) FROM users"; 26 | connection.execute(sql).unwrap(); 27 | } 28 | results.push(start.elapsed().as_micros() as u64); 29 | } 30 | print_results("sqlite_test", results); 31 | } 32 | 33 | #[test] 34 | fn rustdb_test() { 35 | use crate::*; 36 | 37 | // let stg = AtomicFile::new(MemFile::new(), DummyFile::new()); 38 | let stg = MemFile::new(); 39 | 40 | let mut bmap = BuiltinMap::default(); 41 | standard_builtins(&mut bmap); 42 | let bmap = Arc::new(bmap); 43 | 44 | let spd = SharedPagedData::new(stg); 45 | let wapd = AccessPagedData::new_writer(spd.clone()); 46 | let db = Database::new(wapd, "", bmap.clone()); 47 | 48 | let mut tr = GenTransaction::default(); 49 | 50 | let sql = " 51 | CREATE SCHEMA test GO 52 | CREATE TABLE test.users (name string, age int) GO"; 53 | 54 | db.run(&sql, &mut tr); 55 | 56 | let sql = "DECLARE @i int SET @i = 8192 57 | WHILE @i > 0 58 | BEGIN 59 | INSERT INTO test.users(name,age) VALUES ('Alice', 1000) 60 | SET @i -= 1 61 | END"; 62 | 63 | db.run(&sql, &mut tr); 64 | 65 | let mut results = Vec::new(); 66 | for _outer in 0..100 { 67 | let start = std::time::Instant::now(); 68 | 69 | for _i in 0..10 { 70 | let sql = 71 | "DECLARE @total int FOR @total += age FROM test.users BEGIN END SELECT ''|@total"; 72 | let mut tr = GenTransaction::default(); 73 | db.run(&sql, &mut tr); 74 | assert_eq!(tr.rp.output, b"8192000"); 75 | } 76 | 77 | results.push(start.elapsed().as_micros() as u64); 78 | } 79 | print_results("rustdb_test", results); 80 | } 81 | 82 | #[test] 83 | fn rustdb_direct_test() { 84 | use crate::*; 85 | 86 | // let stg = AtomicFile::new(MemFile::new(), MemFile::new()); 87 | let stg = MemFile::new(); 88 | 89 | let mut bmap = BuiltinMap::default(); 90 | standard_builtins(&mut bmap); 91 | let bmap = Arc::new(bmap); 92 | 93 | let spd = SharedPagedData::new(stg); 94 | let wapd = AccessPagedData::new_writer(spd.clone()); 95 | 96 | let db = Database::new(wapd, "", bmap.clone()); 97 | 98 | let mut tr = GenTransaction::default(); 99 | 100 | let sql = " 101 | CREATE SCHEMA test GO 102 | CREATE TABLE test.users (name string, age int) GO"; 103 | 104 | db.run(&sql, &mut tr); 105 | 106 | let sql = "DECLARE @i int SET @i = 8192 107 | WHILE @i > 0 108 | BEGIN 109 | INSERT INTO test.users(name,age) VALUES ('Alice', 1000) 110 | SET @i -= 1 111 | END"; 112 | 113 | db.run(&sql, &mut tr); 114 | 115 | let mut results = Vec::new(); 116 | for _outer in 0..100 { 117 | let start = std::time::Instant::now(); 118 | for _i in 0..10 { 119 | let ut = db.table("test", "users"); 120 | assert!(data_kind(ut.info.typ[1]) == DataKind::Int); 121 | assert!(data_size(ut.info.typ[1]) == 8); 122 | let col_off = ut.info.off[1]; 123 | let mut total = 0; 124 | for (pp, off) in ut.scan(&db) { 125 | let p = &pp.borrow(); 126 | // let a = ut.access(p, off); total += a.int(1); 127 | total += util::iget(&p.data, off + col_off, 8); 128 | } 129 | assert_eq!(total, 8192000); 130 | } 131 | results.push(start.elapsed().as_micros() as u64); 132 | } 133 | print_results("rustdb_direct_test", results); 134 | } 135 | 136 | #[cfg(test)] 137 | pub fn print_results(name: &str, mut results: Vec) { 138 | results.sort(); 139 | let n = results.len() / 10; 140 | let results = &results[0..n]; 141 | let mut total = 0; 142 | for result in results { 143 | total += result; 144 | } 145 | println!( 146 | "{} average time={} sorted results={:?}", 147 | name, 148 | total / (n as u64), 149 | results 150 | ); 151 | } 152 | -------------------------------------------------------------------------------- /src/block.rs: -------------------------------------------------------------------------------- 1 | use crate::{util, Arc, BTreeSet, Data, Storage}; 2 | use std::cmp::min; 3 | 4 | /// Magic Value ( first word of file for version check). 5 | const MAGIC: u64 = u64::from_le_bytes(*b"RDBV1.07"); 6 | 7 | /// Reserved area for client. 8 | pub const RSVD_SIZE: usize = 16; 9 | 10 | /// Size of file header. 11 | const HSIZE: u64 = 48 + RSVD_SIZE as u64; 12 | 13 | /// Manages allocation and deallocation of numbered relocatable fixed size blocks from underlying Storage. 14 | /// 15 | /// Blocks are numbered. A map of the location of each block is kept at the start of the storage (after the header). 16 | /// 17 | /// Blocks can be relocated by adjusting the map entry to point to the new location. 18 | /// 19 | /// On save, the set of free block numbers is processed and any associated blocks are freed. 20 | /// 21 | /// When a block is freed, the last block is relocated to fill it. 22 | 23 | pub struct BlockStg { 24 | stg: Box, 25 | bn_count: u64, // Number of block numbers. 26 | blk_count: u64, // Number of blocks. 27 | first_blk: u64, // First block. 28 | first_free: u64, // First free block number. 29 | rsvd: [u8; RSVD_SIZE], 30 | free: BTreeSet, // Temporary set of free block numbers. 31 | header_dirty: bool, 32 | rsvd_dirty: bool, 33 | is_new: bool, 34 | nsz: usize, // Number of bytes for block number. 35 | blk_size: u64, // Block Size including block number for relocation. 36 | alloc_bit: u64, // Bit that indicates block info represents allocated page. 37 | } 38 | 39 | impl BlockStg { 40 | /// Block number mask. 41 | fn num_mask(&self) -> u64 { 42 | self.alloc_bit - 1 43 | } 44 | 45 | /// Construct BlockStg with specified underlying Storage and block capacity. 46 | /// For existing file, block capacity will be read from file header. 47 | pub fn new(stg: Box, blk_cap: u64) -> Self { 48 | let is_new = stg.size() == 0; 49 | let blk_cap = if is_new { blk_cap } else { stg.read_u64(40) }; 50 | let bits = 64 - blk_cap.ilog(2) as usize; 51 | let nsz = (bits + 8) / 8; // Number of bytes for block number, plus an extra bit (for self.alloc_bit). 52 | let blk_size = blk_cap + nsz as u64; 53 | let alloc_bit = 1 << (nsz * 8 - 1); 54 | let hblks = (HSIZE + blk_size - 1) / blk_size; // Blocks required for file header. 55 | 56 | let mut x = Self { 57 | stg, 58 | bn_count: 0, 59 | blk_count: hblks, 60 | first_blk: hblks, 61 | first_free: alloc_bit - 1, 62 | rsvd: [0; RSVD_SIZE], 63 | free: BTreeSet::default(), 64 | header_dirty: false, 65 | rsvd_dirty: false, 66 | is_new, 67 | nsz, 68 | blk_size, 69 | alloc_bit, 70 | }; 71 | if is_new { 72 | x.stg.write_u64(0, MAGIC); 73 | x.write_header(); 74 | } else { 75 | assert!( 76 | x.stg.read_u64(0) == MAGIC, 77 | "Database File Invalid (maybe wrong version)" 78 | ); 79 | x.read_header(); 80 | } 81 | #[cfg(feature = "log")] 82 | println!( 83 | "BlockStg::new block size={} allocated={} first={}", 84 | x.blk_size, 85 | x.blk_count - x.first_blk, 86 | x.first_blk 87 | ); 88 | x 89 | } 90 | 91 | /// Get the block capacity. 92 | pub fn blk_cap(&self) -> u64 { 93 | self.blk_size - self.nsz as u64 94 | } 95 | 96 | /// Get size of a block number in bytes. 97 | pub fn nsz(&self) -> usize { 98 | self.nsz 99 | } 100 | 101 | /// Is this new storage. 102 | pub fn is_new(&self) -> bool { 103 | self.is_new 104 | } 105 | 106 | /// Allocate a new block number. 107 | pub fn new_block(&mut self) -> u64 { 108 | if let Some(bn) = self.free.pop_first() { 109 | bn 110 | } else { 111 | let mut bn = self.first_free; 112 | if bn != self.num_mask() { 113 | self.first_free = self.get_binfo(bn); 114 | } else { 115 | bn = self.bn_count; 116 | self.bn_count += 1; 117 | } 118 | self.header_dirty = true; 119 | bn 120 | } 121 | } 122 | 123 | /// Release a block number. 124 | pub fn drop_block(&mut self, bn: u64) { 125 | debug_assert!(!self.free.contains(&bn)); // Not a comprehensive check as bn could be in free chain. 126 | self.free.insert(bn); 127 | } 128 | 129 | /// Set numbered block/offset to specified data. 130 | pub fn set(&mut self, bn: u64, offset: u64, data: &[u8]) { 131 | let n = data.len(); 132 | let data = Arc::new(data.to_vec()); 133 | self.set_data(bn, offset, data, 0, n); 134 | } 135 | 136 | /// Set numbered block/offset to specified slice of Data. 137 | pub fn set_data(&mut self, bn: u64, offset: u64, data: Data, s: usize, n: usize) { 138 | debug_assert!(!self.free.contains(&bn)); 139 | 140 | self.expand_binfo(bn); 141 | let mut pb = self.get_binfo(bn); 142 | if pb & self.alloc_bit == 0 { 143 | pb = self.blk_count; 144 | self.blk_count += 1; 145 | 146 | self.header_dirty = true; 147 | self.set_binfo(bn, self.alloc_bit | pb); 148 | // Write block number at start of block, to allow relocation. 149 | self.set_num(pb * self.blk_size, bn); 150 | } 151 | pb &= self.num_mask(); 152 | debug_assert!(self.nsz as u64 + offset + n as u64 <= self.blk_size); 153 | let offset = pb * self.blk_size + self.nsz as u64 + offset; 154 | self.stg.write_data(offset, data, s, n); 155 | } 156 | 157 | /// Get data from specified numbered block and offset. 158 | pub fn get(&self, bn: u64, offset: u64, data: &mut [u8]) { 159 | debug_assert!(!self.free.contains(&bn), "bn={}", bn); 160 | 161 | let pb = self.get_binfo(bn); 162 | if pb & self.alloc_bit != 0 { 163 | let pb = pb & self.num_mask(); 164 | let avail = self.blk_size - (self.nsz as u64 + offset); 165 | let n = min(data.len(), avail as usize); 166 | self.stg.read( 167 | pb * self.blk_size + self.nsz as u64 + offset, 168 | &mut data[0..n], 169 | ); 170 | } 171 | } 172 | 173 | /// Set the reserved area in the storage header. 174 | pub fn set_rsvd(&mut self, rsvd: [u8; RSVD_SIZE]) { 175 | self.rsvd = rsvd; 176 | self.rsvd_dirty = true; 177 | self.header_dirty = true; 178 | } 179 | 180 | /// Get the reserved area from the storage header. 181 | pub fn get_rsvd(&self) -> [u8; RSVD_SIZE] { 182 | self.rsvd 183 | } 184 | 185 | /// Save changes to underlying storage. 186 | pub fn save(&mut self) { 187 | // Process the set of freed page numbers, adding any associated blocks to a map of free blocks. 188 | let flist = std::mem::take(&mut self.free); 189 | let mut free_blocks = BTreeSet::default(); 190 | for bn in flist.iter().rev() { 191 | let bn = *bn; 192 | let info = self.get_binfo(bn); 193 | if info & self.alloc_bit != 0 { 194 | let pb = info & self.num_mask(); 195 | free_blocks.insert(pb); 196 | } 197 | self.set_binfo(bn, self.first_free); 198 | self.first_free = bn; 199 | self.header_dirty = true; 200 | } 201 | 202 | // Relocate blocks from end of file to fill free blocks. 203 | while !free_blocks.is_empty() { 204 | self.blk_count -= 1; 205 | self.header_dirty = true; 206 | let last = self.blk_count; 207 | // If the last block is not a free block, relocate it using a free block. 208 | if !free_blocks.remove(&last) { 209 | let to = free_blocks.pop_first().unwrap(); 210 | self.relocate(last, to); 211 | } 212 | } 213 | 214 | if self.header_dirty { 215 | self.write_header(); 216 | self.header_dirty = false; 217 | } 218 | 219 | #[cfg(feature = "log")] 220 | println!( 221 | "BlockStg::save allocated blocks={}", 222 | self.blk_count - self.first_blk 223 | ); 224 | 225 | self.stg.commit(self.blk_count * self.blk_size); 226 | } 227 | 228 | /// Wait for save to complete. 229 | pub fn wait_complete(&self) { 230 | self.stg.wait_complete(); 231 | } 232 | 233 | /// Write header fields to underlying storage. 234 | fn write_header(&mut self) { 235 | self.stg.write_u64(8, self.blk_count); 236 | self.stg.write_u64(16, self.bn_count); 237 | self.stg.write_u64(24, self.first_free); 238 | self.stg.write_u64(32, self.first_blk); 239 | self.stg.write_u64(40, self.blk_cap()); 240 | if self.rsvd_dirty { 241 | self.stg.write(48, &self.rsvd); 242 | self.rsvd_dirty = false; 243 | } 244 | } 245 | 246 | /// Read the header fields from underlying storage. 247 | fn read_header(&mut self) { 248 | self.blk_count = self.stg.read_u64(8); 249 | self.bn_count = self.stg.read_u64(16); 250 | self.first_free = self.stg.read_u64(24); 251 | self.first_blk = self.stg.read_u64(32); 252 | self.stg.read(48, &mut self.rsvd); 253 | } 254 | 255 | /// Relocate block, from and to are block numbers. 256 | fn relocate(&mut self, from: u64, to: u64) { 257 | if from == to { 258 | return; 259 | } 260 | 261 | let mut buf = vec![0; self.blk_size as usize]; 262 | self.stg.read(from * self.blk_size, &mut buf); 263 | 264 | let bn = util::get(&buf, 0, self.nsz); 265 | 266 | debug_assert_eq!(self.get_binfo(bn), self.alloc_bit | from); 267 | 268 | self.set_binfo(bn, self.alloc_bit | to); 269 | self.stg.write_vec(to * self.blk_size, buf); 270 | } 271 | 272 | /// Expand the map to accomodate the specified block number. 273 | fn expand_binfo(&mut self, bn: u64) { 274 | let target = HSIZE + (bn + 1) * self.nsz as u64; 275 | while target > self.first_blk * self.blk_size { 276 | self.relocate(self.first_blk, self.blk_count); 277 | self.clear_block(self.first_blk); 278 | self.first_blk += 1; 279 | self.blk_count += 1; 280 | self.header_dirty = true; 281 | } 282 | } 283 | 284 | /// Fill the specified block with zeroes. 285 | fn clear_block(&mut self, pb: u64) { 286 | let buf = vec![0; self.blk_size as usize]; 287 | self.stg.write_vec(pb * self.blk_size, buf); 288 | } 289 | 290 | /// Set the value associated with the specified block number. 291 | fn set_binfo(&mut self, bn: u64, value: u64) { 292 | self.expand_binfo(bn); 293 | let off = HSIZE + bn * self.nsz as u64; 294 | self.set_num(off, value); 295 | } 296 | 297 | /// Get the value associated with the specified block number. 298 | fn get_binfo(&self, bn: u64) -> u64 { 299 | let off = HSIZE + bn * self.nsz as u64; 300 | if off + self.nsz as u64 > self.first_blk * self.blk_size { 301 | return 0; 302 | } 303 | self.get_num(off) 304 | } 305 | 306 | /// Write number to specified offset in underlying storage. 307 | fn set_num(&mut self, offset: u64, num: u64) { 308 | self.stg.write(offset, &num.to_le_bytes()[0..self.nsz]); 309 | debug_assert_eq!(self.get_num(offset), num); 310 | } 311 | 312 | /// Read number from specified offset in underlying storage. 313 | fn get_num(&self, offset: u64) -> u64 { 314 | let mut bytes = [0; 8]; 315 | self.stg.read(offset, &mut bytes[0..self.nsz]); 316 | u64::from_le_bytes(bytes) 317 | } 318 | } 319 | 320 | #[test] 321 | fn block_test() { 322 | let blk_cap = 10000; 323 | let data = b"hello there"; 324 | let stg = crate::MemFile::new(); 325 | let mut bf = BlockStg::new(stg.clone(), blk_cap); 326 | let bnx = bf.new_block(); 327 | let bny = bf.new_block(); 328 | let bn = bf.new_block(); 329 | 330 | bf.set(bnx, 2, data); 331 | bf.set(bny, 1, data); 332 | bf.set(bn, 0, data); 333 | let mut buf = vec![0; data.len()]; 334 | bf.get(bn, 0, &mut buf); 335 | assert_eq!(&buf, &data); 336 | 337 | bf.drop_block(bnx); 338 | bf.drop_block(bny); 339 | 340 | bf.save(); 341 | 342 | let bf = BlockStg::new(stg.clone(), blk_cap); 343 | let mut buf = vec![0; data.len()]; 344 | bf.get(bn, 0, &mut buf); 345 | assert_eq!(&buf, &data); 346 | } 347 | -------------------------------------------------------------------------------- /src/blockpagestg.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | dividedstg::{DividedStg, FD, FD_SIZE}, 3 | nd, util, Arc, BTreeSet, Data, Limits, PageStorage, PageStorageInfo, Storage, 4 | }; 5 | 6 | /// Implementation of [PageStorage] using [DividedStg]. 7 | 8 | /* 9 | 10 | File 0 (PN_FILE) has a header ( allocation info and FDs ) then info for each numbered page, a 16-bit size and index into sub-file. 11 | 12 | First word of allocated page is 64-bit page number ( to allow relocation ). 13 | 14 | */ 15 | 16 | pub struct BlockPageStg { 17 | /// Underlying Divided Storage. 18 | pub ds: DividedStg, 19 | alloc_pn: u64, 20 | first_free_pn: u64, 21 | fd: Vec, 22 | free_pn: BTreeSet, // Temporary set of free page numbers. 23 | header_dirty: bool, 24 | is_new: bool, 25 | psi: SizeInfo, 26 | header_size: u64, 27 | zbytes: Data, 28 | } 29 | 30 | const PN_FILE: usize = 0; // Page number sub-file, has header and info (size,index) for each numbered page. 31 | const NOT_PN: u64 = u64::MAX >> 16; // Special value to denote end of list of free page numbers. 32 | const PAGE_HSIZE: usize = 8; // Space for 64-bit page number to allow page to be relocated. 33 | const HEADER_SIZE: usize = 24; // Space in PN_FILE for storing alloc_pn, first_free_pn, max_div, sizes. 34 | 35 | impl BlockPageStg { 36 | /// Construct from specified Storage and limits. 37 | pub fn new(stg: Box, lim: &Limits) -> Box { 38 | let is_new = stg.size() == 0; 39 | 40 | let sizes = lim.page_sizes; 41 | let max_div = lim.max_div; 42 | let ds = DividedStg::new(stg, lim.blk_cap); 43 | let blk_cap = ds.blk_cap as usize; 44 | 45 | let mut s = Self { 46 | ds, 47 | alloc_pn: 0, 48 | first_free_pn: NOT_PN, 49 | fd: Vec::new(), 50 | free_pn: BTreeSet::default(), 51 | header_dirty: true, 52 | is_new, 53 | psi: SizeInfo { 54 | blk_cap, 55 | max_div, 56 | sizes, 57 | }, 58 | header_size: 0, 59 | zbytes: nd(), 60 | }; 61 | 62 | if is_new { 63 | for _i in 0..sizes + 1 { 64 | s.fd.push(s.ds.new_file()); 65 | } 66 | s.ds.set_root(&s.fd[0]); 67 | } else { 68 | s.read_header(); 69 | } 70 | 71 | // Page sizes are assumed to fit in u16. 72 | assert!( 73 | s.psi.max_size_page() <= u16::MAX as usize, 74 | "Max page size is 65535" 75 | ); 76 | 77 | s.header_size = (HEADER_SIZE + s.psi.sizes * FD_SIZE) as u64; 78 | s.zbytes = Arc::new(vec![0; s.psi.max_size_page()]); 79 | 80 | #[cfg(feature = "log")] 81 | println!("bps new alloc={:?}", &s.allocs()); 82 | 83 | Box::new(s) 84 | } 85 | 86 | /// Read page number file header. 87 | fn read_header(&mut self) { 88 | self.fd.clear(); 89 | self.fd.push(self.ds.get_root()); 90 | 91 | let mut buf = [0; HEADER_SIZE]; 92 | self.read(PN_FILE, 0, &mut buf); 93 | self.alloc_pn = util::getu64(&buf, 0); 94 | self.first_free_pn = util::getu64(&buf, 8); 95 | 96 | self.psi.max_div = util::get(&buf, 16, 4) as usize; 97 | self.psi.sizes = util::get(&buf, 20, 4) as usize; 98 | 99 | let sizes = self.psi.sizes; 100 | let mut buf = vec![0; FD_SIZE * sizes]; 101 | self.read(PN_FILE, HEADER_SIZE as u64, &mut buf); 102 | 103 | for fx in 0..sizes { 104 | let off = fx * FD_SIZE; 105 | self.fd.push(self.ds.load_fd(&buf[off..])); 106 | } 107 | self.header_dirty = false; 108 | } 109 | 110 | /// Write page number file header. 111 | fn write_header(&mut self) { 112 | let mut buf = vec![0; self.header_size as usize]; 113 | util::setu64(&mut buf, self.alloc_pn); 114 | util::setu64(&mut buf[8..], self.first_free_pn); 115 | util::set(&mut buf, 16, self.psi.max_div as u64, 4); 116 | util::set(&mut buf, 20, self.psi.sizes as u64, 4); 117 | 118 | for fx in 0..self.psi.sizes { 119 | let off = HEADER_SIZE + fx * FD_SIZE; 120 | self.ds.save_fd(&self.fd[fx + 1], &mut buf[off..]); 121 | } 122 | self.write(PN_FILE, 0, &buf); 123 | self.header_dirty = false; 124 | 125 | #[cfg(feature = "log")] 126 | println!("bps write_header allocs={:?}", &self.allocs()); 127 | } 128 | 129 | #[cfg(feature = "log")] 130 | fn allocs(&self) -> Vec { 131 | (0..self.psi.sizes() + 1).map(|x| self.alloc(x)).collect() 132 | } 133 | 134 | /// Get page size of sub-file ( fx > 0 ). 135 | fn page_size(&self, fx: usize) -> u64 { 136 | (self.psi.size(fx) + PAGE_HSIZE) as u64 137 | } 138 | 139 | /// Get sub-file size. 140 | fn fsize(&self, fx: usize) -> u64 { 141 | let size = self.fd[fx].size(); 142 | if fx == 0 && size < self.header_size { 143 | self.header_size 144 | } else { 145 | size 146 | } 147 | } 148 | 149 | /// Use sub-file size to calculate allocation index. 150 | fn alloc(&self, fx: usize) -> u64 { 151 | let size = self.fsize(fx); 152 | if fx == 0 { 153 | (size - self.header_size) / 8 154 | } else { 155 | let ps = self.page_size(fx); 156 | (size + ps - 1) / ps 157 | } 158 | } 159 | 160 | /// Free page by relocating last page in sub-file to fill gap and truncating. 161 | fn free_page(&mut self, fx: usize, ix: u64) { 162 | if fx != 0 { 163 | let last = self.alloc(fx) - 1; 164 | let ps = self.page_size(fx); 165 | if last != ix { 166 | let mut buf = vec![0; ps as usize]; 167 | self.read(fx, last * ps, &mut buf); 168 | let pn = util::getu64(&buf, 0); 169 | let (fx1, _size, ix1) = self.get_pn_info(pn); 170 | assert!(fx1 == fx && ix1 == last); 171 | self.update_ix(pn, ix); 172 | self.write_data(fx, ix * ps, Arc::new(buf)); 173 | } 174 | self.truncate(fx, last * ps); 175 | } 176 | } 177 | 178 | /// Set numbered page info. 179 | fn set_pn_info(&mut self, pn: u64, size: usize, ix: u64) { 180 | let off = self.header_size + pn * 8; 181 | let eof = self.fsize(PN_FILE); 182 | if off > eof { 183 | self.clear(PN_FILE, eof, off - eof); 184 | } 185 | let mut buf = [0; 8]; 186 | util::set(&mut buf, 0, ix, 6); 187 | util::set(&mut buf, 6, size as u64, 2); 188 | self.write(PN_FILE, off, &buf); 189 | } 190 | 191 | /// Get info about numbered page ( file index, size, index ). 192 | fn get_pn_info(&self, pn: u64) -> (usize, usize, u64) { 193 | let off = self.header_size + pn * 8; 194 | if off >= self.fsize(0) { 195 | return (0, 0, 0); 196 | } 197 | let mut buf = [0; 8]; 198 | self.read(PN_FILE, off, &mut buf); 199 | let ix = util::get(&buf, 0, 6); 200 | let size = util::get(&buf, 6, 2) as usize; 201 | let fx = if size == 0 { 0 } else { self.psi.index(size) }; 202 | (fx, size, ix) 203 | } 204 | 205 | /// Update ix for numbered page ( for relocation ). 206 | fn update_ix(&mut self, pn: u64, ix: u64) { 207 | let off = self.header_size + pn * 8; 208 | self.write(PN_FILE, off, &ix.to_le_bytes()[0..6]); 209 | } 210 | 211 | /// Clear sub-file region. 212 | fn clear(&mut self, fx: usize, off: u64, n: u64) { 213 | let z = Arc::new(vec![0; n as usize]); 214 | self.write_data(fx, off, z); 215 | } 216 | 217 | /// Write sub-file. 218 | fn write(&mut self, fx: usize, off: u64, data: &[u8]) { 219 | let data = Arc::new(data.to_vec()); 220 | self.write_data(fx, off, data); 221 | } 222 | 223 | /// Write sub-file Data. 224 | fn write_data(&mut self, fx: usize, off: u64, data: Data) { 225 | let n = data.len(); 226 | self.write_data_n(fx, off, data, n); 227 | } 228 | 229 | /// Write sub-file Data up to n bytes. 230 | fn write_data_n(&mut self, fx: usize, off: u64, data: Data, n: usize) { 231 | self.ds.write_data(&mut self.fd[fx], off, data, n); 232 | self.save_fd(fx); 233 | } 234 | 235 | /// Truncate sub-file. 236 | fn truncate(&mut self, fx: usize, off: u64) { 237 | self.ds.truncate(&mut self.fd[fx], off); 238 | self.save_fd(fx); 239 | } 240 | 241 | /// Save sub-file descriptor after write or truncate operation. 242 | fn save_fd(&mut self, fx: usize) { 243 | let fd = &mut self.fd[fx]; 244 | if fd.changed { 245 | fd.changed = false; 246 | self.header_dirty = true; 247 | if fx == 0 { 248 | self.ds.set_root(fd); 249 | } 250 | } 251 | } 252 | 253 | /// Read sub-file. 254 | fn read(&self, fx: usize, off: u64, data: &mut [u8]) { 255 | self.ds.read(&self.fd[fx], off, data); 256 | } 257 | } 258 | 259 | impl PageStorage for BlockPageStg { 260 | fn is_new(&self) -> bool { 261 | self.is_new 262 | } 263 | 264 | fn new_page(&mut self) -> u64 { 265 | if let Some(pn) = self.free_pn.pop_first() { 266 | pn 267 | } else { 268 | self.header_dirty = true; 269 | let pn = self.first_free_pn; 270 | if pn != NOT_PN { 271 | let (_fx, _size, next) = self.get_pn_info(pn); 272 | self.first_free_pn = next; 273 | pn 274 | } else { 275 | let pn = self.alloc_pn; 276 | self.alloc_pn += 1; 277 | pn 278 | } 279 | } 280 | } 281 | 282 | fn drop_page(&mut self, pn: u64) { 283 | self.free_pn.insert(pn); 284 | } 285 | 286 | fn info(&self) -> Box { 287 | Box::new(self.psi.clone()) 288 | } 289 | 290 | fn set_page(&mut self, pn: u64, data: Data) { 291 | let size = data.len(); 292 | let fx = self.psi.index(size); 293 | let ps = self.page_size(fx); 294 | let (old_fx, mut old_size, mut ix) = self.get_pn_info(pn); 295 | if size != old_size { 296 | if fx != old_fx { 297 | self.free_page(old_fx, ix); 298 | ix = self.alloc(fx); 299 | old_size = ps as usize - PAGE_HSIZE; 300 | self.write(fx, ix * ps, &pn.to_le_bytes()); 301 | self.set_pn_info(pn, size, ix); 302 | } 303 | self.set_pn_info(pn, size, ix); 304 | } 305 | 306 | let off = PAGE_HSIZE as u64 + ix * ps; 307 | self.write_data_n(fx, off, data, size); 308 | 309 | // Clear unused space in page. 310 | if old_size > size { 311 | self.write_data_n(fx, off + size as u64, self.zbytes.clone(), old_size - size); 312 | } 313 | } 314 | 315 | fn get_page(&self, pn: u64) -> Data { 316 | let (fx, size, ix) = self.get_pn_info(pn); 317 | if fx == 0 { 318 | return nd(); 319 | } 320 | let mut data = vec![0; size]; 321 | let off = PAGE_HSIZE as u64 + ix * self.page_size(fx); 322 | self.read(fx, off, &mut data); 323 | Arc::new(data) 324 | } 325 | 326 | fn size(&self, pn: u64) -> usize { 327 | self.get_pn_info(pn).1 328 | } 329 | 330 | fn save(&mut self) { 331 | // Free the temporary set of free logical pages. 332 | let flist = std::mem::take(&mut self.free_pn); 333 | for pn in flist.iter().rev() { 334 | let pn = *pn; 335 | let (fx, _size, ix) = self.get_pn_info(pn); 336 | self.free_page(fx, ix); 337 | self.set_pn_info(pn, 0, self.first_free_pn); 338 | self.first_free_pn = pn; 339 | self.header_dirty = true; 340 | } 341 | if self.header_dirty { 342 | self.write_header(); 343 | } 344 | self.ds.save(); 345 | } 346 | 347 | fn rollback(&mut self) { 348 | self.free_pn.clear(); 349 | self.read_header(); 350 | } 351 | 352 | fn wait_complete(&self) { 353 | self.ds.wait_complete(); 354 | } 355 | 356 | #[cfg(feature = "verify")] 357 | fn get_free(&mut self) -> (crate::HashSet, u64) { 358 | let mut free = crate::HashSet::default(); 359 | let mut pn = self.first_free_pn; 360 | while pn != NOT_PN { 361 | assert!(free.insert(pn)); 362 | let (_fx, _size, next) = self.get_pn_info(pn); 363 | pn = next; 364 | } 365 | (free, self.alloc_pn) 366 | } 367 | 368 | #[cfg(feature = "renumber")] 369 | fn load_free_pages(&mut self) -> Option { 370 | let mut pn = self.first_free_pn; 371 | if pn == NOT_PN { 372 | return None; 373 | } 374 | while pn != NOT_PN { 375 | let (_sx, _size, next) = self.get_pn_info(pn); 376 | self.drop_page(pn); 377 | pn = next; 378 | } 379 | self.first_free_pn = NOT_PN; 380 | self.header_dirty = true; 381 | Some(self.alloc_pn - self.free_pn.len() as u64) 382 | } 383 | 384 | #[cfg(feature = "renumber")] 385 | fn renumber(&mut self, pn: u64) -> u64 { 386 | let new_pn = self.new_page(); 387 | let (fx, size, ix) = self.get_pn_info(pn); 388 | if fx != 0 { 389 | let off = ix * self.page_size(fx); 390 | self.write(fx, off, &new_pn.to_le_bytes()); 391 | } 392 | self.set_pn_info(new_pn, size, ix); 393 | self.set_pn_info(pn, 0, 0); 394 | self.drop_page(pn); 395 | new_pn 396 | } 397 | 398 | #[cfg(feature = "renumber")] 399 | fn set_alloc_pn(&mut self, target: u64) { 400 | assert!(self.first_free_pn == NOT_PN); 401 | self.alloc_pn = target; 402 | self.header_dirty = true; 403 | self.free_pn.clear(); 404 | self.truncate(PN_FILE, self.header_size + target * 8); 405 | } 406 | } 407 | 408 | #[derive(Clone)] 409 | struct SizeInfo { 410 | blk_cap: usize, 411 | max_div: usize, 412 | sizes: usize, 413 | } 414 | 415 | impl PageStorageInfo for SizeInfo { 416 | /// The number of different page sizes. 417 | fn sizes(&self) -> usize { 418 | self.sizes 419 | } 420 | 421 | /// Size index for given page size. 422 | fn index(&self, size: usize) -> usize { 423 | let r = self.blk_cap / (size + PAGE_HSIZE); 424 | if r >= self.max_div { 425 | 1 426 | } else { 427 | 1 + self.max_div - r 428 | } 429 | } 430 | 431 | /// Page size for given index. 432 | fn size(&self, ix: usize) -> usize { 433 | debug_assert!(ix > 0 && ix <= self.sizes); 434 | let size = self.blk_cap / (1 + self.max_div - ix); 435 | size - PAGE_HSIZE 436 | } 437 | } 438 | 439 | #[test] 440 | fn test_block_page_stg() { 441 | let stg = crate::MemFile::new(); 442 | let limits = Limits::default(); 443 | let mut bps = BlockPageStg::new(stg.clone(), &limits); 444 | 445 | let pn = bps.new_page(); 446 | let data = Arc::new(b"hello george".to_vec()); 447 | 448 | bps.set_page(pn, data.clone()); 449 | 450 | bps.save(); 451 | let mut bps = BlockPageStg::new(stg, &limits); 452 | 453 | let data1 = bps.get_page(pn); 454 | assert!(data == data1); 455 | 456 | bps.save(); 457 | } 458 | -------------------------------------------------------------------------------- /src/buf.rs: -------------------------------------------------------------------------------- 1 | use crate::{stg::Storage, HashMap, Mutex}; 2 | use std::cmp::min; 3 | 4 | /// Write Buffer. 5 | pub struct WriteBuffer { 6 | ix: usize, 7 | pos: u64, 8 | /// Underlying storage. 9 | pub stg: Box, 10 | buf: Vec, 11 | #[cfg(feature = "log")] 12 | log: Log, 13 | } 14 | 15 | #[cfg(feature = "log")] 16 | struct Log { 17 | write: u64, 18 | flush: u64, 19 | total: u64, 20 | first_flush_time: std::time::Instant, 21 | } 22 | 23 | impl WriteBuffer { 24 | /// Construct. 25 | pub fn new(stg: Box, buf_size: usize) -> Self { 26 | Self { 27 | ix: 0, 28 | pos: u64::MAX, 29 | stg, 30 | buf: vec![0; buf_size], 31 | #[cfg(feature = "log")] 32 | log: Log { 33 | write: 0, 34 | flush: 0, 35 | total: 0, 36 | first_flush_time: std::time::Instant::now(), 37 | }, 38 | } 39 | } 40 | 41 | /// Write data to specified offset, 42 | pub fn write(&mut self, off: u64, data: &[u8]) { 43 | if self.pos + self.ix as u64 != off { 44 | self.flush(off); 45 | } 46 | let mut done: usize = 0; 47 | let mut todo: usize = data.len(); 48 | #[cfg(feature = "log")] 49 | { 50 | self.log.write += 1; 51 | self.log.total += todo as u64; 52 | } 53 | while todo > 0 { 54 | let mut n: usize = self.buf.len() - self.ix; 55 | if n == 0 { 56 | self.flush(off + done as u64); 57 | n = self.buf.len(); 58 | } 59 | if n > todo { 60 | n = todo; 61 | } 62 | self.buf[self.ix..self.ix + n].copy_from_slice(&data[done..done + n]); 63 | todo -= n; 64 | done += n; 65 | self.ix += n; 66 | } 67 | } 68 | 69 | fn flush(&mut self, new_pos: u64) { 70 | if self.ix > 0 { 71 | self.stg.write(self.pos, &self.buf[0..self.ix]); 72 | #[cfg(feature = "log")] 73 | { 74 | if self.log.flush == 0 { 75 | self.log.first_flush_time = std::time::Instant::now(); 76 | } 77 | self.log.flush += 1; 78 | } 79 | } 80 | self.ix = 0; 81 | self.pos = new_pos; 82 | } 83 | 84 | /// Commit. 85 | pub fn commit(&mut self, size: u64) { 86 | self.flush(u64::MAX); 87 | self.stg.commit(size); 88 | #[cfg(feature = "log")] 89 | { 90 | if size > 0 { 91 | println!( 92 | "WriteBuffer commit size={size} write={} flush={} total={} time(micros)={}", 93 | self.log.write, 94 | self.log.flush, 95 | self.log.total, 96 | self.log.first_flush_time.elapsed().as_micros() 97 | ); 98 | } 99 | self.log.write = 0; 100 | self.log.flush = 0; 101 | self.log.total = 0; 102 | } 103 | } 104 | 105 | /// Write u64. 106 | pub fn write_u64(&mut self, start: u64, value: u64) { 107 | self.write(start, &value.to_le_bytes()); 108 | } 109 | } 110 | 111 | /// ReadBufStg buffers small (up to limit) reads to the underlying storage using multiple buffers. Only supported functions are read and reset. 112 | /// 113 | /// See implementation of AtomicFile for how this is used in conjunction with WMap. 114 | /// 115 | /// N is buffer size. 116 | 117 | pub struct ReadBufStg { 118 | stg: Box, 119 | buf: Mutex>, 120 | limit: usize, 121 | } 122 | 123 | impl Drop for ReadBufStg { 124 | fn drop(&mut self) { 125 | self.reset(); 126 | } 127 | } 128 | 129 | impl ReadBufStg { 130 | /// limit is the size of a read that is considered "small", max_buf is the maximum number of buffers used. 131 | pub fn new(stg: Box, limit: usize, max_buf: usize) -> Box { 132 | Box::new(Self { 133 | stg, 134 | buf: Mutex::new(ReadBuffer::::new(max_buf)), 135 | limit, 136 | }) 137 | } 138 | } 139 | 140 | impl Storage for ReadBufStg { 141 | /// Read data from storage. 142 | fn read(&self, start: u64, data: &mut [u8]) { 143 | if data.len() <= self.limit { 144 | self.buf.lock().unwrap().read(&*self.stg, start, data); 145 | } else { 146 | self.stg.read(start, data); 147 | } 148 | } 149 | 150 | /// Clears the buffers. 151 | fn reset(&mut self) { 152 | self.buf.lock().unwrap().reset(); 153 | } 154 | 155 | /// Panics. 156 | fn size(&self) -> u64 { 157 | panic!() 158 | } 159 | 160 | /// Panics. 161 | fn write(&mut self, _start: u64, _data: &[u8]) { 162 | panic!(); 163 | } 164 | 165 | /// Panics. 166 | fn commit(&mut self, _size: u64) { 167 | panic!(); 168 | } 169 | } 170 | 171 | struct ReadBuffer { 172 | map: HashMap>, 173 | max_buf: usize, 174 | reads: u64, 175 | } 176 | 177 | impl ReadBuffer { 178 | fn new(max_buf: usize) -> Self { 179 | Self { 180 | map: HashMap::default(), 181 | max_buf, 182 | reads: 0, 183 | } 184 | } 185 | 186 | fn reset(&mut self) { 187 | #[cfg(feature = "log")] 188 | println!( 189 | "ReadBuffer reset entries={} reads={}", 190 | self.map.len(), 191 | self.reads 192 | ); 193 | self.reads = 0; 194 | self.map.clear(); 195 | } 196 | 197 | fn read(&mut self, stg: &dyn Storage, off: u64, data: &mut [u8]) { 198 | let mut done = 0; 199 | while done < data.len() { 200 | let off = off + done as u64; 201 | let sector = off / N as u64; 202 | let disp = (off % N as u64) as usize; 203 | let amount = min(data.len() - done, N - disp); 204 | 205 | self.reads += 1; 206 | 207 | let p = self.map.entry(sector).or_insert_with(|| { 208 | let mut p: Box<[u8; N]> = vec![0; N].try_into().unwrap(); 209 | stg.read(sector * N as u64, &mut *p); 210 | p 211 | }); 212 | data[done..done + amount].copy_from_slice(&p[disp..disp + amount]); 213 | done += amount; 214 | } 215 | if self.map.len() >= self.max_buf { 216 | self.reset(); 217 | } 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/builtin.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | c_int, c_value, Block, BuiltinMap, CExp, CExpPtr, CompileFunc, DataKind, EvalEnv, Expr, Rc, 3 | Value, 4 | }; 5 | 6 | /// Add builtin functions to specified [BuiltinMap]. 7 | pub fn standard_builtins(map: &mut BuiltinMap) { 8 | let list = [ 9 | ("ARG", DataKind::String, CompileFunc::Value(c_arg)), 10 | ("HEADER", DataKind::Int, CompileFunc::Int(c_header)), 11 | ("STATUSCODE", DataKind::Int, CompileFunc::Int(c_status_code)), 12 | ("FILEATTR", DataKind::String, CompileFunc::Value(c_fileattr)), 13 | ( 14 | "FILECONTENT", 15 | DataKind::Binary, 16 | CompileFunc::Value(c_filecontent), 17 | ), 18 | ("GLOBAL", DataKind::Int, CompileFunc::Int(c_global)), 19 | ("CONTAINS", DataKind::Int, CompileFunc::Int(c_contains)), 20 | ("REPLACE", DataKind::String, CompileFunc::Value(c_replace)), 21 | ( 22 | "SUBSTRING", 23 | DataKind::String, 24 | CompileFunc::Value(c_substring), 25 | ), 26 | ( 27 | "BINSUBSTRING", 28 | DataKind::Binary, 29 | CompileFunc::Value(c_binsubstring), 30 | ), 31 | ("LEN", DataKind::Int, CompileFunc::Int(c_len)), 32 | ("BINLEN", DataKind::Int, CompileFunc::Int(c_bin_len)), 33 | ("PARSEINT", DataKind::Int, CompileFunc::Int(c_parse_int)), 34 | ( 35 | "PARSEFLOAT", 36 | DataKind::Float, 37 | CompileFunc::Float(c_parse_float), 38 | ), 39 | ( 40 | "EXCEPTION", 41 | DataKind::String, 42 | CompileFunc::Value(c_exception), 43 | ), 44 | ("LASTID", DataKind::Int, CompileFunc::Int(c_lastid)), 45 | ("ALLOCPAGE", DataKind::Int, CompileFunc::Int(c_allocpage)), 46 | #[cfg(feature = "pack")] 47 | ("REPACKFILE", DataKind::Int, CompileFunc::Int(c_repackfile)), 48 | #[cfg(feature = "verify")] 49 | ("VERIFYDB", DataKind::String, CompileFunc::Value(c_verifydb)), 50 | #[cfg(feature = "renumber")] 51 | ("RENUMBER", DataKind::Int, CompileFunc::Int(c_renumber)), 52 | ("BINTOSTR", DataKind::String, CompileFunc::Value(c_bintostr)), 53 | ]; 54 | for (name, typ, cf) in list { 55 | map.insert(name.to_string(), (typ, cf)); 56 | } 57 | } 58 | /// Check number and kinds of arguments. 59 | pub fn check_types(b: &Block, args: &mut [Expr], dk: &[DataKind]) { 60 | if args.len() != dk.len() { 61 | panic!("wrong number of args"); 62 | } 63 | for (i, e) in args.iter_mut().enumerate() { 64 | let k = b.kind(e); 65 | if k != dk[i] { 66 | panic!( 67 | "Builtin function arg {} type mismatch expected {:?} got {:?}", 68 | i + 1, 69 | dk[i], 70 | k 71 | ); 72 | } 73 | } 74 | } 75 | ///////////////////////////// 76 | /// Compile call to EXCEPTION(). 77 | fn c_exception(b: &Block, args: &mut [Expr]) -> CExpPtr { 78 | check_types(b, args, &[]); 79 | Box::new(Exception {}) 80 | } 81 | struct Exception {} 82 | impl CExp for Exception { 83 | fn eval(&self, e: &mut EvalEnv, _d: &[u8]) -> Value { 84 | let err = e.tr.get_error(); 85 | Value::String(Rc::new(err)) 86 | } 87 | } 88 | ///////////////////////////// 89 | /// Compile call to LEN. 90 | fn c_len(b: &Block, args: &mut [Expr]) -> CExpPtr { 91 | check_types(b, args, &[DataKind::String]); 92 | let s = c_value(b, &mut args[0]); 93 | Box::new(Len { s }) 94 | } 95 | struct Len { 96 | s: CExpPtr, 97 | } 98 | impl CExp for Len { 99 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> i64 { 100 | let s = self.s.eval(e, d).str(); 101 | s.len() as i64 102 | } 103 | } 104 | ///////////////////////////// 105 | /// Compile call to BINLEN. 106 | fn c_bin_len(b: &Block, args: &mut [Expr]) -> CExpPtr { 107 | check_types(b, args, &[DataKind::Binary]); 108 | let bv = c_value(b, &mut args[0]); 109 | Box::new(BinLen { bv }) 110 | } 111 | struct BinLen { 112 | bv: CExpPtr, 113 | } 114 | impl CExp for BinLen { 115 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> i64 { 116 | let x = self.bv.eval(e, d); 117 | x.bina().len() as i64 118 | } 119 | } 120 | ///////////////////////////// 121 | /// Compile call to LASTID. 122 | fn c_lastid(b: &Block, args: &mut [Expr]) -> CExpPtr { 123 | check_types(b, args, &[]); 124 | Box::new(LastId {}) 125 | } 126 | struct LastId {} 127 | impl CExp for LastId { 128 | fn eval(&self, ee: &mut EvalEnv, _d: &[u8]) -> i64 { 129 | ee.db.lastid.get() 130 | } 131 | } 132 | ///////////////////////////// 133 | /// Compile call to ALLOCPAGE. 134 | fn c_allocpage(b: &Block, args: &mut [Expr]) -> CExpPtr { 135 | check_types(b, args, &[]); 136 | Box::new(AllocPage {}) 137 | } 138 | struct AllocPage {} 139 | impl CExp for AllocPage { 140 | fn eval(&self, ee: &mut EvalEnv, _d: &[u8]) -> i64 { 141 | ee.db.alloc_page() as i64 142 | } 143 | } 144 | ///////////////////////////// 145 | /// Compile call to GLOBAL. 146 | fn c_global(b: &Block, args: &mut [Expr]) -> CExpPtr { 147 | check_types(b, args, &[DataKind::Int]); 148 | let x = c_int(b, &mut args[0]); 149 | Box::new(Global { x }) 150 | } 151 | struct Global { 152 | x: CExpPtr, 153 | } 154 | impl CExp for Global { 155 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> i64 { 156 | let x = self.x.eval(ee, d); 157 | ee.tr.global(x) 158 | } 159 | } 160 | ///////////////////////////// 161 | /// Compile call to PARSEINT. 162 | fn c_parse_int(b: &Block, args: &mut [Expr]) -> CExpPtr { 163 | check_types(b, args, &[DataKind::String]); 164 | let s = c_value(b, &mut args[0]); 165 | Box::new(ParseInt { s }) 166 | } 167 | struct ParseInt { 168 | s: CExpPtr, 169 | } 170 | impl CExp for ParseInt { 171 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> i64 { 172 | let s = self.s.eval(e, d).str(); 173 | s.parse().unwrap_or(0) 174 | } 175 | } 176 | ///////////////////////////// 177 | /// Compile call to PARSEFLOAT. 178 | fn c_parse_float(b: &Block, args: &mut [Expr]) -> CExpPtr { 179 | check_types(b, args, &[DataKind::String]); 180 | let s = c_value(b, &mut args[0]); 181 | Box::new(ParseFloat { s }) 182 | } 183 | struct ParseFloat { 184 | s: CExpPtr, 185 | } 186 | impl CExp for ParseFloat { 187 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> f64 { 188 | let s = self.s.eval(e, d).str(); 189 | s.parse().unwrap() 190 | } 191 | } 192 | ///////////////////////////// 193 | /// Compile call to CONTAINS. 194 | fn c_contains(b: &Block, args: &mut [Expr]) -> CExpPtr { 195 | check_types(b, args, &[DataKind::String, DataKind::String]); 196 | let s = c_value(b, &mut args[0]); 197 | let pat = c_value(b, &mut args[1]); 198 | Box::new(Contains { s, pat }) 199 | } 200 | struct Contains { 201 | s: CExpPtr, 202 | pat: CExpPtr, 203 | } 204 | impl CExp for Contains { 205 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> i64 { 206 | let s = self.s.eval(e, d).str().to_string(); 207 | let pat = self.pat.eval(e, d).str().to_string(); 208 | match s.find(&pat) { 209 | Some(u) => u as i64, 210 | None => -1, 211 | } 212 | } 213 | } 214 | ///////////////////////////// 215 | /// Compile call to REPLACE. 216 | fn c_replace(b: &Block, args: &mut [Expr]) -> CExpPtr { 217 | check_types( 218 | b, 219 | args, 220 | &[DataKind::String, DataKind::String, DataKind::String], 221 | ); 222 | let s = c_value(b, &mut args[0]); 223 | let pat = c_value(b, &mut args[1]); 224 | let sub = c_value(b, &mut args[2]); 225 | Box::new(Replace { s, pat, sub }) 226 | } 227 | struct Replace { 228 | s: CExpPtr, 229 | pat: CExpPtr, 230 | sub: CExpPtr, 231 | } 232 | impl CExp for Replace { 233 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> Value { 234 | let s = self.s.eval(e, d).str().to_string(); 235 | let pat = self.pat.eval(e, d).str().to_string(); 236 | let sub = self.sub.eval(e, d).str(); 237 | let result = s.replace(&pat, &sub); 238 | Value::String(Rc::new(result)) 239 | } 240 | } 241 | ///////////////////////////// 242 | /// Compile call to SUBSTRING. 243 | fn c_substring(b: &Block, args: &mut [Expr]) -> CExpPtr { 244 | check_types(b, args, &[DataKind::String, DataKind::Int, DataKind::Int]); 245 | let s = c_value(b, &mut args[0]); 246 | let f = c_int(b, &mut args[1]); 247 | let n = c_int(b, &mut args[2]); 248 | Box::new(Substring { s, f, n }) 249 | } 250 | struct Substring { 251 | s: CExpPtr, 252 | f: CExpPtr, 253 | n: CExpPtr, 254 | } 255 | impl CExp for Substring { 256 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> Value { 257 | let s = self.s.eval(ee, d).str(); 258 | let f = self.f.eval(ee, d) as usize - 1; 259 | let mut n = self.n.eval(ee, d) as usize; 260 | 261 | let s = &s[f..]; 262 | let mut chars = s.char_indices(); 263 | let end; 264 | loop { 265 | if let Some((x, _)) = chars.next() { 266 | if n == 0 { 267 | end = x; 268 | break; 269 | } 270 | n -= 1; 271 | } else { 272 | end = s.len(); 273 | break; 274 | } 275 | } 276 | let result = s[0..end].to_string(); 277 | Value::String(Rc::new(result)) 278 | } 279 | } 280 | 281 | ///////////////////////////// 282 | /// Compile call to BINSUBSTRING. 283 | fn c_binsubstring(b: &Block, args: &mut [Expr]) -> CExpPtr { 284 | check_types(b, args, &[DataKind::Binary, DataKind::Int, DataKind::Int]); 285 | let s = c_value(b, &mut args[0]); 286 | let f = c_int(b, &mut args[1]); 287 | let n = c_int(b, &mut args[2]); 288 | Box::new(BinSubstring { s, f, n }) 289 | } 290 | struct BinSubstring { 291 | s: CExpPtr, 292 | f: CExpPtr, 293 | n: CExpPtr, 294 | } 295 | impl CExp for BinSubstring { 296 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> Value { 297 | let s = self.s.eval(ee, d).bin(); 298 | let f = self.f.eval(ee, d) as usize - 1; 299 | let n = self.n.eval(ee, d) as usize; 300 | let mut lim = s.len(); 301 | if lim > f + n { 302 | lim = f + n; 303 | } 304 | let result = s[f..lim].to_vec(); 305 | Value::RcBinary(Rc::new(result)) 306 | } 307 | } 308 | 309 | ///////////////////////////// 310 | /// Compile call to ARG. 311 | fn c_arg(b: &Block, args: &mut [Expr]) -> CExpPtr { 312 | check_types(b, args, &[DataKind::Int, DataKind::String]); 313 | let k = c_int(b, &mut args[0]); 314 | let s = c_value(b, &mut args[1]); 315 | Box::new(Arg { k, s }) 316 | } 317 | struct Arg { 318 | k: CExpPtr, 319 | s: CExpPtr, 320 | } 321 | impl CExp for Arg { 322 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> Value { 323 | let k = self.k.eval(ee, d); 324 | let s = self.s.eval(ee, d).str(); 325 | let result = ee.tr.arg(k, &s); 326 | Value::String(result) 327 | } 328 | } 329 | 330 | ///////////////////////////// 331 | /// Compile call to HEADER. 332 | fn c_header(b: &Block, args: &mut [Expr]) -> CExpPtr { 333 | check_types(b, args, &[DataKind::String, DataKind::String]); 334 | let n = c_value(b, &mut args[0]); 335 | let v = c_value(b, &mut args[1]); 336 | Box::new(Header { n, v }) 337 | } 338 | struct Header { 339 | n: CExpPtr, 340 | v: CExpPtr, 341 | } 342 | impl CExp for Header { 343 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> i64 { 344 | let n = self.n.eval(ee, d).str(); 345 | let v = self.v.eval(ee, d).str(); 346 | ee.tr.header(&n, &v); 347 | 0 348 | } 349 | } 350 | 351 | ///////////////////////////// 352 | /// Compile call to STATUSCODE. 353 | fn c_status_code(b: &Block, args: &mut [Expr]) -> CExpPtr { 354 | check_types(b, args, &[DataKind::Int]); 355 | let code = c_int(b, &mut args[0]); 356 | Box::new(StatusCode { code }) 357 | } 358 | struct StatusCode { 359 | code: CExpPtr, 360 | } 361 | impl CExp for StatusCode { 362 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> i64 { 363 | let code = self.code.eval(ee, d); 364 | ee.tr.status_code(code); 365 | 0 366 | } 367 | } 368 | 369 | ///////////////////////////// 370 | /// Compile call to FILEATTR. 371 | fn c_fileattr(b: &Block, args: &mut [Expr]) -> CExpPtr { 372 | check_types(b, args, &[DataKind::Int, DataKind::Int]); 373 | let k = c_int(b, &mut args[0]); 374 | let x = c_int(b, &mut args[1]); 375 | Box::new(FileAttr { k, x }) 376 | } 377 | struct FileAttr { 378 | k: CExpPtr, 379 | x: CExpPtr, 380 | } 381 | impl CExp for FileAttr { 382 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> Value { 383 | let k = self.k.eval(ee, d); 384 | let x = self.x.eval(ee, d); 385 | let result = ee.tr.file_attr(k, x); 386 | Value::String(result) 387 | } 388 | } 389 | 390 | ///////////////////////////// 391 | /// Compile call to FILECONTENT. 392 | fn c_filecontent(b: &Block, args: &mut [Expr]) -> CExpPtr { 393 | check_types(b, args, &[DataKind::Int]); 394 | let k = c_int(b, &mut args[0]); 395 | Box::new(FileContent { k }) 396 | } 397 | struct FileContent { 398 | k: CExpPtr, 399 | } 400 | impl CExp for FileContent { 401 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> Value { 402 | let k = self.k.eval(ee, d); 403 | let result = ee.tr.file_content(k); 404 | Value::ArcBinary(result) 405 | } 406 | } 407 | 408 | ///////////////////////////// 409 | /// Compile call to REPACKFILE. 410 | #[cfg(feature = "pack")] 411 | fn c_repackfile(b: &Block, args: &mut [Expr]) -> CExpPtr { 412 | check_types( 413 | b, 414 | args, 415 | &[DataKind::Int, DataKind::String, DataKind::String], 416 | ); 417 | let k = c_int(b, &mut args[0]); 418 | let s = c_value(b, &mut args[1]); 419 | let n = c_value(b, &mut args[2]); 420 | Box::new(RepackFile { k, s, n }) 421 | } 422 | #[cfg(feature = "pack")] 423 | struct RepackFile { 424 | k: CExpPtr, 425 | s: CExpPtr, 426 | n: CExpPtr, 427 | } 428 | #[cfg(feature = "pack")] 429 | impl CExp for RepackFile { 430 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> i64 { 431 | let k = self.k.eval(ee, d); 432 | let s = self.s.eval(ee, d).str(); 433 | let n = self.n.eval(ee, d).str(); 434 | ee.db.repack_file(k, &s, &n) 435 | } 436 | } 437 | 438 | #[cfg(feature = "verify")] 439 | /// SQL to load every table ( required for database::verify to work correctly ). 440 | const LOADALLTABLES: &str = " 441 | DECLARE sid int, sname string, tname string 442 | FOR sid = Id, sname = Name FROM sys.Schema 443 | BEGIN 444 | FOR tname = Name FROM sys.Table WHERE Schema = sid 445 | BEGIN 446 | EXECUTE( 'IF false SELECT Id FROM ' | sys.Dot( sname, tname ) ) 447 | END 448 | END"; 449 | 450 | #[cfg(feature = "verify")] 451 | ///////////////////////////// 452 | /// Compile call to VERIFYDB. 453 | fn c_verifydb(b: &Block, args: &mut [Expr]) -> CExpPtr { 454 | check_types(b, args, &[]); 455 | Box::new(VerifyDb {}) 456 | } 457 | 458 | #[cfg(feature = "verify")] 459 | struct VerifyDb {} 460 | 461 | #[cfg(feature = "verify")] 462 | impl CExp for VerifyDb { 463 | fn eval(&self, ee: &mut EvalEnv, _d: &[u8]) -> Value { 464 | ee.db.run(LOADALLTABLES, ee.tr); 465 | let s = ee.db.verify(); 466 | Value::String(Rc::new(s)) 467 | } 468 | } 469 | 470 | #[cfg(feature = "renumber")] 471 | ///////////////////////////// 472 | /// Compile call to RENUMBER. 473 | fn c_renumber(b: &Block, args: &mut [Expr]) -> CExpPtr { 474 | check_types(b, args, &[]); 475 | Box::new(Renumber {}) 476 | } 477 | 478 | #[cfg(feature = "renumber")] 479 | struct Renumber {} 480 | 481 | #[cfg(feature = "renumber")] 482 | impl CExp for Renumber { 483 | fn eval(&self, ee: &mut EvalEnv, _d: &[u8]) -> i64 { 484 | ee.db.run(LOADALLTABLES, ee.tr); 485 | ee.db.renumber(); 486 | 0 487 | } 488 | } 489 | 490 | /// Compile call to BINTOSTR. 491 | fn c_bintostr(b: &Block, args: &mut [Expr]) -> CExpPtr { 492 | check_types(b, args, &[DataKind::Binary]); 493 | let bytes = c_value(b, &mut args[0]); 494 | Box::new(Bintostr { bytes }) 495 | } 496 | /// Compiled call to BINTOSTR. 497 | struct Bintostr { 498 | bytes: CExpPtr, 499 | } 500 | impl CExp for Bintostr { 501 | fn eval(&self, ee: &mut EvalEnv, d: &[u8]) -> Value { 502 | let bytes = self.bytes.eval(ee, d); 503 | Value::String(Rc::new(String::from_utf8(bytes.bina().to_vec()).unwrap())) 504 | } 505 | } 506 | -------------------------------------------------------------------------------- /src/bytes.rs: -------------------------------------------------------------------------------- 1 | use crate::{util, Cell, Ordering, Rc, Record, SaveOp, SortedFile, DB}; 2 | 3 | /// Number of fragment types. 4 | pub const NFT: usize = 4; 5 | 6 | /// Total bytes used taking into account all overhead ( 3 + 1 + 8 = 12 bytes, per fragment ). 7 | fn tot(len: usize, bpf: usize) -> usize { 8 | let nf = (len + bpf - 1) / bpf; 9 | nf * (bpf + 12) 10 | } 11 | 12 | /// Calculate best fragment type from byte length. 13 | pub fn fragment_type(len: usize, bpf: &[usize]) -> usize { 14 | let mut best = usize::MAX; 15 | let mut result = 0; 16 | for (ft, bpf) in bpf.iter().enumerate() { 17 | let t = tot(len, *bpf); 18 | if t <= best { 19 | best = t; 20 | result = ft; 21 | } 22 | } 23 | result 24 | } 25 | 26 | /// Calculate fragment sizes. 27 | pub fn bpf(hp: usize) -> [usize; NFT] { 28 | let hp = hp - 8; // 8 is to account for page header. 29 | let pp = hp / 1000; 30 | let max_bpf = hp / pp - 12; 31 | [40, 127, 333, max_bpf] 32 | } 33 | 34 | /// Storage of variable size values. 35 | pub struct ByteStorage { 36 | /// File for storing fragments. 37 | pub file: Rc, 38 | id_gen: Cell, 39 | /// Bytes per fragment. 40 | bpf: usize, 41 | } 42 | 43 | impl ByteStorage { 44 | /// Construct new ByteStorage with specified root page and fragment type. 45 | pub fn new(root_page: u64, bpf: usize) -> Self { 46 | let file = Rc::new(SortedFile::new(9 + bpf, 8, root_page)); 47 | ByteStorage { 48 | file, 49 | id_gen: Cell::new(u64::MAX), 50 | bpf, 51 | } 52 | } 53 | 54 | /// Get fragment Id value. 55 | fn get_id(&self, db: &DB) -> u64 { 56 | let mut result = self.id_gen.get(); 57 | if result == u64::MAX { 58 | result = 0; 59 | // Initialise id_gen to id of last record. 60 | let start = Fragment::new(u64::MAX, self.bpf); 61 | if let Some((pp, off)) = self.file.clone().dsc(db, Box::new(start)).next() { 62 | let p = pp.borrow(); 63 | result = 1 + util::getu64(&p.data, off); 64 | } 65 | self.id_gen.set(result); 66 | } 67 | result 68 | } 69 | 70 | /// Check whether there are changes to underlying file. 71 | pub fn changed(&self) -> bool { 72 | self.file.changed() 73 | } 74 | 75 | /// Save to underlying file. 76 | pub fn save(&self, db: &DB, op: SaveOp) { 77 | self.file.save(db, op); 78 | } 79 | 80 | /// Encode bytes. 81 | pub fn encode(&self, db: &DB, bytes: &[u8]) -> u64 { 82 | let result = self.get_id(db); 83 | let mut r = Fragment::new(0, self.bpf); 84 | let n = bytes.len(); 85 | let mut done = 0; 86 | loop { 87 | r.id = self.id_gen.get(); 88 | self.id_gen.set(r.id + 1); 89 | let mut len = n - done; 90 | if len > self.bpf { 91 | r.last = false; 92 | len = self.bpf; 93 | } else { 94 | r.last = true; 95 | } 96 | r.len = len; 97 | r.bytes[..len].copy_from_slice(&bytes[done..(len + done)]); 98 | done += len; 99 | self.file.insert(db, &r); 100 | if done == n { 101 | break; 102 | } 103 | } 104 | result 105 | } 106 | 107 | /// Decode bytes, inline bytes are reserved. 108 | pub fn decode(&self, db: &DB, mut id: u64, inline: usize) -> Vec { 109 | let mut result = vec![0_u8; inline]; 110 | let start = Fragment::new(id, self.bpf); 111 | for (pp, off) in self.file.asc(db, Box::new(start)) { 112 | let p = pp.borrow(); 113 | let data = &p.data; 114 | debug_assert!(util::getu64(data, off) == id); 115 | id += 1; 116 | let off = off + 8; 117 | let (len, last) = decode(&data[off..], self.bpf); 118 | result.extend_from_slice(&data[off..off + len]); 119 | if last { 120 | break; 121 | } 122 | } 123 | result 124 | } 125 | 126 | /// Delete a code. 127 | pub fn delcode(&self, db: &DB, id: u64) { 128 | let start = Fragment::new(id, self.bpf); 129 | let mut n = 0; 130 | for (pp, off) in self.file.asc(db, Box::new(start)) { 131 | let p = pp.borrow(); 132 | debug_assert!(util::getu64(&p.data, off) == id + n); 133 | n += 1; 134 | let off = off + 8; 135 | let (_len, last) = decode(&p.data[off..], self.bpf); 136 | if last { 137 | break; 138 | } 139 | } 140 | let mut r = Fragment::new(0, self.bpf); 141 | for xid in id..id + n { 142 | r.id = xid; 143 | self.file.remove(db, &r); 144 | } 145 | } 146 | 147 | /// Pack underlying file. 148 | #[cfg(feature = "pack")] 149 | pub fn repack_file(&self, db: &DB) -> i64 { 150 | let r = Fragment::new(0, self.bpf); 151 | self.file.repack(db, &r) 152 | } 153 | } 154 | 155 | /// Values are split into fragments. 156 | struct Fragment { 157 | id: u64, 158 | len: usize, 159 | last: bool, 160 | bytes: Vec, 161 | } 162 | 163 | impl Fragment { 164 | pub fn new(id: u64, bpf: usize) -> Self { 165 | Fragment { 166 | id, 167 | len: 0, 168 | last: false, 169 | bytes: vec![0; bpf], 170 | } 171 | } 172 | } 173 | 174 | impl Record for Fragment { 175 | fn compare(&self, _db: &DB, data: &[u8]) -> Ordering { 176 | let val = util::getu64(data, 0); 177 | self.id.cmp(&val) 178 | } 179 | 180 | fn save(&self, data: &mut [u8]) { 181 | util::setu64(data, self.id); 182 | let bpf = self.bytes.len(); 183 | data[8..8 + self.len].copy_from_slice(&self.bytes[..self.len]); 184 | 185 | // Maybe should zero unused bytes. 186 | 187 | let unused = bpf - self.len; 188 | data[8 + bpf] = (unused % 64) as u8 189 | + if self.last { 64 } else { 0 } 190 | + if unused >= 64 { 128 } else { 0 }; 191 | if unused >= 64 { 192 | data[8 + bpf - 1] = (unused / 64) as u8; 193 | } 194 | } 195 | } 196 | 197 | /// Result is data length and last flag. 198 | fn decode(data: &[u8], bpf: usize) -> (usize, bool) { 199 | let b = data[bpf]; 200 | let unused = (b % 64) as usize 201 | + if b >= 128 { 202 | data[bpf - 1] as usize * 64 203 | } else { 204 | 0 205 | }; 206 | (bpf - unused, b & 64 != 0) 207 | } 208 | -------------------------------------------------------------------------------- /src/cexp.rs: -------------------------------------------------------------------------------- 1 | use crate::{get_bytes, util, CExp, CExpPtr, EvalEnv, Function, Rc, Value}; 2 | 3 | /// Function call. 4 | pub(crate) struct Call { 5 | pub fp: Rc, 6 | pub pv: Vec>, 7 | } 8 | 9 | impl CExp for Call { 10 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> Value { 11 | for exp in &self.pv { 12 | let v = exp.eval(e, d); 13 | e.stack.push(v); 14 | } 15 | e.call(&self.fp); 16 | e.stack.pop().unwrap() 17 | } 18 | } 19 | 20 | pub(crate) struct Case { 21 | pub whens: Vec<(CExpPtr, CExpPtr)>, 22 | pub els: CExpPtr, 23 | } 24 | 25 | impl CExp for Case { 26 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> T { 27 | for (b, v) in &self.whens { 28 | if b.eval(e, d) { 29 | return v.eval(e, d); 30 | } 31 | } 32 | self.els.eval(e, d) 33 | } 34 | } 35 | 36 | pub(crate) struct Concat(pub CExpPtr, pub CExpPtr); 37 | 38 | impl CExp for Concat { 39 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> Value { 40 | let mut s1: Value = self.0.eval(e, d); 41 | let s2: Rc = self.1.eval(e, d).str(); 42 | // Append to existing string if not shared. 43 | if let Value::String(s) = &mut s1 { 44 | if let Some(ms) = Rc::get_mut(s) { 45 | ms.push_str(&s2); 46 | return s1; 47 | } 48 | } 49 | let s1 = s1.str(); 50 | let mut s = String::with_capacity(s1.len() + s2.len()); 51 | s.push_str(&s1); 52 | s.push_str(&s2); 53 | Value::String(Rc::new(s)) 54 | } 55 | } 56 | 57 | pub(crate) struct BinConcat(pub CExpPtr, pub CExpPtr); 58 | 59 | impl CExp for BinConcat { 60 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> Value { 61 | let mut b1 = self.0.eval(e, d); 62 | let b2 = self.1.eval(e, d).bin(); 63 | // Append to existing bytes if not shared. 64 | if let Value::RcBinary(b) = &mut b1 { 65 | if let Some(mb) = Rc::get_mut(b) { 66 | mb.extend_from_slice(&b2); 67 | return b1; 68 | } 69 | } 70 | let b1 = b1.bin(); 71 | let mut b = Vec::with_capacity(b1.len() + b2.len()); 72 | b.extend_from_slice(&b1); 73 | b.extend_from_slice(&b2); 74 | Value::RcBinary(Rc::new(b)) 75 | } 76 | } 77 | 78 | pub(crate) struct Or(pub CExpPtr, pub CExpPtr); 79 | 80 | impl CExp for Or { 81 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 82 | self.0.eval(e, d) || self.1.eval(e, d) 83 | } 84 | } 85 | 86 | pub(crate) struct And(pub CExpPtr, pub CExpPtr); 87 | 88 | impl CExp for And { 89 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 90 | self.0.eval(e, d) && self.1.eval(e, d) 91 | } 92 | } 93 | 94 | pub(crate) struct Minus(pub CExpPtr); 95 | 96 | impl CExp for Minus 97 | where 98 | T: std::ops::Neg, 99 | { 100 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> T { 101 | -self.0.eval(e, d) 102 | } 103 | } 104 | 105 | pub(crate) struct Not(pub CExpPtr); 106 | 107 | impl CExp for Not { 108 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 109 | !self.0.eval(e, d) 110 | } 111 | } 112 | 113 | pub(crate) struct Add(pub CExpPtr, pub CExpPtr); 114 | 115 | impl CExp for Add 116 | where 117 | T: std::ops::Add, 118 | { 119 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> T { 120 | self.0.eval(e, d) + self.1.eval(e, d) 121 | } 122 | } 123 | 124 | pub(crate) struct Sub(pub CExpPtr, pub CExpPtr); 125 | 126 | impl CExp for Sub 127 | where 128 | T: std::ops::Sub, 129 | { 130 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> T { 131 | self.0.eval(e, d) - self.1.eval(e, d) 132 | } 133 | } 134 | 135 | pub(crate) struct Mul(pub CExpPtr, pub CExpPtr); 136 | 137 | impl CExp for Mul 138 | where 139 | T: std::ops::Mul, 140 | { 141 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> T { 142 | self.0.eval(e, d) * self.1.eval(e, d) 143 | } 144 | } 145 | 146 | pub(crate) struct Div(pub CExpPtr, pub CExpPtr); 147 | 148 | impl CExp for Div 149 | where 150 | T: std::ops::Div, 151 | { 152 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> T { 153 | self.0.eval(e, d) / self.1.eval(e, d) 154 | } 155 | } 156 | 157 | pub(crate) struct Rem(pub CExpPtr, pub CExpPtr); 158 | 159 | impl CExp for Rem 160 | where 161 | T: std::ops::Rem, 162 | { 163 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> T { 164 | self.0.eval(e, d) % self.1.eval(e, d) 165 | } 166 | } 167 | 168 | pub(crate) struct Equal(pub CExpPtr, pub CExpPtr); 169 | 170 | impl CExp for Equal 171 | where 172 | T: std::cmp::PartialOrd, 173 | { 174 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 175 | self.0.eval(e, d) == self.1.eval(e, d) 176 | } 177 | } 178 | 179 | pub(crate) struct NotEqual(pub CExpPtr, pub CExpPtr); 180 | 181 | impl CExp for NotEqual 182 | where 183 | T: std::cmp::PartialOrd, 184 | { 185 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 186 | self.0.eval(e, d) != self.1.eval(e, d) 187 | } 188 | } 189 | 190 | pub(crate) struct Less(pub CExpPtr, pub CExpPtr); 191 | 192 | impl CExp for Less 193 | where 194 | T: std::cmp::PartialOrd, 195 | { 196 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 197 | self.0.eval(e, d) < self.1.eval(e, d) 198 | } 199 | } 200 | 201 | pub(crate) struct Greater(pub CExpPtr, pub CExpPtr); 202 | 203 | impl CExp for Greater 204 | where 205 | T: std::cmp::PartialOrd, 206 | { 207 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 208 | self.0.eval(e, d) > self.1.eval(e, d) 209 | } 210 | } 211 | 212 | pub(crate) struct LessEqual(pub CExpPtr, pub CExpPtr); 213 | 214 | impl CExp for LessEqual 215 | where 216 | T: std::cmp::PartialOrd, 217 | { 218 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 219 | self.0.eval(e, d) <= self.1.eval(e, d) 220 | } 221 | } 222 | 223 | pub(crate) struct GreaterEqual(pub CExpPtr, pub CExpPtr); 224 | 225 | impl CExp for GreaterEqual 226 | where 227 | T: std::cmp::PartialOrd, 228 | { 229 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 230 | self.0.eval(e, d) >= self.1.eval(e, d) 231 | } 232 | } 233 | 234 | pub(crate) struct ColumnI64 { 235 | pub off: usize, 236 | } 237 | 238 | impl CExp for ColumnI64 { 239 | fn eval(&self, _e: &mut EvalEnv, data: &[u8]) -> i64 { 240 | util::getu64(data, self.off) as i64 241 | } 242 | } 243 | 244 | pub(crate) struct ColumnI { 245 | pub off: usize, 246 | pub size: usize, 247 | } 248 | 249 | impl CExp for ColumnI { 250 | fn eval(&self, _e: &mut EvalEnv, data: &[u8]) -> i64 { 251 | util::iget(data, self.off, self.size) 252 | } 253 | } 254 | 255 | pub(crate) struct ColumnI8 { 256 | pub off: usize, 257 | } 258 | 259 | impl CExp for ColumnI8 { 260 | fn eval(&self, _e: &mut EvalEnv, data: &[u8]) -> i64 { 261 | data[self.off] as i8 as i64 262 | } 263 | } 264 | 265 | pub(crate) struct ColumnF64 { 266 | pub off: usize, 267 | } 268 | 269 | impl CExp for ColumnF64 { 270 | fn eval(&self, _e: &mut EvalEnv, data: &[u8]) -> f64 { 271 | util::getf64(data, self.off) 272 | } 273 | } 274 | 275 | pub(crate) struct ColumnF32 { 276 | pub off: usize, 277 | } 278 | 279 | impl CExp for ColumnF32 { 280 | fn eval(&self, _e: &mut EvalEnv, data: &[u8]) -> f64 { 281 | util::getf32(data, self.off) as f64 282 | } 283 | } 284 | 285 | pub(crate) struct ColumnBool { 286 | pub off: usize, 287 | } 288 | 289 | impl CExp for ColumnBool { 290 | fn eval(&self, _e: &mut EvalEnv, data: &[u8]) -> bool { 291 | data[self.off] & 1 != 0 292 | } 293 | } 294 | 295 | pub(crate) struct ColumnString { 296 | pub off: usize, 297 | pub size: usize, 298 | } 299 | 300 | impl CExp for ColumnString { 301 | fn eval(&self, ee: &mut EvalEnv, data: &[u8]) -> Value { 302 | let bytes = get_bytes(&ee.db, &data[self.off..], self.size).0; 303 | let str = String::from_utf8(bytes).unwrap(); 304 | Value::String(Rc::new(str)) 305 | } 306 | } 307 | 308 | pub(crate) struct ColumnBinary { 309 | pub off: usize, 310 | pub size: usize, 311 | } 312 | 313 | impl CExp for ColumnBinary { 314 | fn eval(&self, ee: &mut EvalEnv, data: &[u8]) -> Value { 315 | let bytes = get_bytes(&ee.db, &data[self.off..], self.size).0; 316 | Value::RcBinary(Rc::new(bytes)) 317 | } 318 | } 319 | 320 | pub(crate) struct Local(pub usize); 321 | 322 | impl CExp for Local { 323 | fn eval(&self, e: &mut EvalEnv, _d: &[u8]) -> f64 { 324 | if let Value::Float(v) = e.stack[e.bp + self.0] { 325 | v 326 | } else { 327 | unsafe_panic!() 328 | } 329 | } 330 | } 331 | impl CExp for Local { 332 | fn eval(&self, e: &mut EvalEnv, _d: &[u8]) -> i64 { 333 | if let Value::Int(v) = e.stack[e.bp + self.0] { 334 | v 335 | } else { 336 | unsafe_panic!() 337 | } 338 | } 339 | } 340 | 341 | impl CExp for Local { 342 | fn eval(&self, e: &mut EvalEnv, _d: &[u8]) -> bool { 343 | if let Value::Bool(v) = e.stack[e.bp + self.0] { 344 | v 345 | } else { 346 | unsafe_panic!() 347 | } 348 | } 349 | } 350 | 351 | impl CExp for Local { 352 | fn eval(&self, e: &mut EvalEnv, _d: &[u8]) -> Value { 353 | e.stack[e.bp + self.0].clone() 354 | } 355 | } 356 | 357 | pub(crate) struct Const(pub T); 358 | 359 | impl CExp for Const 360 | where 361 | T: Clone, 362 | { 363 | fn eval(&self, _e: &mut EvalEnv, _d: &[u8]) -> T { 364 | self.0.clone() 365 | } 366 | } 367 | pub(crate) struct ValToInt(pub CExpPtr); 368 | 369 | impl CExp for ValToInt { 370 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> i64 { 371 | if let Value::Int(x) = self.0.eval(e, d) { 372 | return x; 373 | } 374 | unsafe_panic!(); 375 | } 376 | } 377 | 378 | pub(crate) struct ValToFloat(pub CExpPtr); 379 | 380 | impl CExp for ValToFloat { 381 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> f64 { 382 | if let Value::Float(x) = self.0.eval(e, d) { 383 | return x; 384 | } 385 | unsafe_panic!(); 386 | } 387 | } 388 | 389 | pub(crate) struct ValToBool(pub CExpPtr); 390 | 391 | impl CExp for ValToBool { 392 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> bool { 393 | if let Value::Bool(x) = self.0.eval(e, d) { 394 | return x; 395 | } 396 | unsafe_panic!(); 397 | } 398 | } 399 | 400 | pub(crate) struct IntToVal(pub CExpPtr); 401 | 402 | impl CExp for IntToVal { 403 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> Value { 404 | Value::Int(self.0.eval(e, d)) 405 | } 406 | } 407 | 408 | pub(crate) struct FloatToVal(pub CExpPtr); 409 | 410 | impl CExp for FloatToVal { 411 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> Value { 412 | Value::Float(self.0.eval(e, d)) 413 | } 414 | } 415 | 416 | pub(crate) struct BoolToVal(pub CExpPtr); 417 | 418 | impl CExp for BoolToVal { 419 | fn eval(&self, e: &mut EvalEnv, d: &[u8]) -> Value { 420 | Value::Bool(self.0.eval(e, d)) 421 | } 422 | } 423 | -------------------------------------------------------------------------------- /src/dividedstg.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | block::{BlockStg, RSVD_SIZE}, 3 | util, Arc, Data, Storage, 4 | }; 5 | use std::cmp::min; 6 | 7 | /// Divides Storage into sub-files of arbitrary size using [BlockStg]. 8 | pub struct DividedStg { 9 | /// Underlying block storage. 10 | pub bs: BlockStg, 11 | /// Block capacity. 12 | pub blk_cap: u64, 13 | /// Number of block numbers that will fit in a block. 14 | base: u64, 15 | } 16 | 17 | /// Bytes required to save FD ( root, size ). 18 | pub const FD_SIZE: usize = 8 + 8; 19 | 20 | /// [DividedStg] File Descriptor. 21 | pub struct FD { 22 | /// Root block. 23 | root: u64, 24 | /// File size in bytes. 25 | size: u64, 26 | /// Number of data blocks needed ( can be computed from file size ). 27 | blocks: u64, 28 | /// Number of levels needed ( can be computed from file size ). 29 | level: u8, 30 | /// Set true when the FD is updated. 31 | pub changed: bool, 32 | } 33 | 34 | impl FD { 35 | /// File size. 36 | pub fn size(&self) -> u64 { 37 | self.size 38 | } 39 | 40 | /// Sets the file size and number of blocks required. 41 | fn set_size(&mut self, size: u64, blocks: u64) { 42 | self.changed = true; 43 | self.size = size; 44 | self.blocks = blocks; 45 | } 46 | } 47 | 48 | impl DividedStg { 49 | /// Construct DividedStg from specified Storage and block capacity. 50 | pub fn new(stg: Box, blk_cap: u64) -> Self { 51 | let bs = BlockStg::new(stg, blk_cap); 52 | let blk_cap = bs.blk_cap(); 53 | let base = blk_cap / bs.nsz() as u64; 54 | Self { bs, base, blk_cap } 55 | } 56 | 57 | /// Get file descriptor for a new file. 58 | pub fn new_file(&mut self) -> FD { 59 | FD { 60 | root: self.bs.new_block(), 61 | level: 0, 62 | size: 0, 63 | blocks: 1, 64 | changed: true, 65 | } 66 | } 67 | 68 | /// Drop specified file. 69 | pub fn drop_file(&mut self, f: &mut FD) { 70 | self.truncate(f, 0); 71 | self.bs.drop_block(f.root); 72 | } 73 | 74 | /// Free blocks not required for file of specified size. 75 | pub fn truncate(&mut self, f: &mut FD, size: u64) { 76 | if size < f.size { 77 | let reqd = self.blocks(size); 78 | if reqd < f.blocks { 79 | let levels = self.levels(reqd); 80 | 81 | // Calculate new root 82 | let mut new_root = f.root; 83 | for _ in levels..f.level { 84 | new_root = self.get_num(new_root, 0); 85 | } 86 | 87 | // For each level reduce the number of blocks. 88 | let mut level = f.level; 89 | let mut old = f.blocks; 90 | let mut new = reqd; 91 | while level > 0 && old != new { 92 | self.reduce_blocks(f, level, old, new); 93 | new = (new + self.base - 1) / self.base; 94 | old = (old + self.base - 1) / self.base; 95 | level -= 1; 96 | } 97 | if levels < f.level { 98 | self.bs.drop_block(f.root); 99 | f.root = new_root; 100 | f.level = levels; 101 | } 102 | } 103 | f.set_size(size, reqd); 104 | } 105 | } 106 | 107 | /// Write data to specified file at specified offset. 108 | pub fn write(&mut self, f: &mut FD, offset: u64, data: &[u8]) { 109 | let n = data.len(); 110 | let data = Arc::new(data.to_vec()); 111 | self.write_data(f, offset, data, n); 112 | } 113 | 114 | /// Write Data to specified file at specified offset. 115 | pub fn write_data(&mut self, f: &mut FD, offset: u64, data: Data, n: usize) { 116 | self.allocate(f, offset + n as u64); 117 | 118 | if f.blocks == 1 { 119 | self.bs.set_data(f.root, offset, data, 0, n); 120 | } else { 121 | self.write_blocks(f, offset, data, n); 122 | } 123 | } 124 | 125 | /// Read data from file at specified offset. 126 | pub fn read(&self, f: &FD, offset: u64, data: &mut [u8]) { 127 | if f.blocks == 1 { 128 | self.bs.get(f.root, offset, data); 129 | } else { 130 | self.read_blocks(f, offset, data); 131 | } 132 | } 133 | 134 | /// Save fd to byte buffer. 135 | pub fn save_fd(&self, fd: &FD, buf: &mut [u8]) { 136 | debug_assert!(fd.level == self.levels(fd.blocks)); 137 | debug_assert!(fd.blocks == self.blocks(fd.size)); 138 | util::setu64(&mut buf[0..8], fd.root); 139 | util::setu64(&mut buf[8..16], fd.size); 140 | } 141 | 142 | /// Load fd from byte buffer. 143 | pub fn load_fd(&self, buf: &[u8]) -> FD { 144 | let root = util::getu64(buf, 0); 145 | let size = util::getu64(buf, 8); 146 | let blocks = self.blocks(size); 147 | let level = self.levels(blocks); 148 | FD { 149 | root, 150 | size, 151 | blocks, 152 | level, 153 | changed: false, 154 | } 155 | } 156 | 157 | /// Set root file descriptor. 158 | pub fn set_root(&mut self, fd: &FD) { 159 | let mut rsvd = [0; RSVD_SIZE]; 160 | self.save_fd(fd, &mut rsvd); 161 | self.bs.set_rsvd(rsvd); 162 | } 163 | 164 | /// Get root file descriptor. 165 | pub fn get_root(&self) -> FD { 166 | let rsvd = self.bs.get_rsvd(); 167 | self.load_fd(&rsvd) 168 | } 169 | 170 | /// Save files to backing storage. 171 | pub fn save(&mut self) { 172 | self.bs.save(); 173 | } 174 | 175 | /// Wait for save to complete. 176 | pub fn wait_complete(&self) { 177 | self.bs.wait_complete(); 178 | } 179 | 180 | /// Allocate sufficient blocks for file of specified size. 181 | fn allocate(&mut self, f: &mut FD, size: u64) { 182 | if size > f.size { 183 | let reqd = self.blocks(size); 184 | if reqd > f.blocks { 185 | let new_level = self.levels(reqd); 186 | while f.level < new_level { 187 | let blk = self.bs.new_block(); 188 | self.set_num(blk, 0, f.root); 189 | f.root = blk; 190 | f.level += 1; 191 | } 192 | self.add_blocks(f, reqd); 193 | } 194 | f.set_size(size, reqd); 195 | } 196 | } 197 | 198 | /// Write data to file at specified offset. 199 | fn write_blocks(&mut self, f: &FD, offset: u64, data: Data, n: usize) { 200 | let mut done = 0; 201 | while done < n { 202 | let off = offset + done as u64; 203 | let (blk, off) = (off / self.blk_cap, off % self.blk_cap); 204 | let a = min(n - done, (self.blk_cap - off) as usize); 205 | let blk = self.get_block(f.root, f.level, blk); 206 | self.bs.set_data(blk, off, data.clone(), done, a); 207 | done += a; 208 | } 209 | } 210 | 211 | /// Read data from file at specified offset. 212 | fn read_blocks(&self, f: &FD, offset: u64, data: &mut [u8]) { 213 | let (mut done, len) = (0, data.len()); 214 | while done < len { 215 | let off = offset + done as u64; 216 | let (blk, off) = (off / self.blk_cap, off % self.blk_cap); 217 | let a = min(len - done, (self.blk_cap - off) as usize); 218 | if blk < f.blocks { 219 | let blk = self.get_block(f.root, f.level, blk); 220 | self.bs.get(blk, off, &mut data[done..done + a]); 221 | } 222 | done += a; 223 | } 224 | } 225 | 226 | /// Add data blocks up to specified number. 227 | fn add_blocks(&mut self, f: &mut FD, new: u64) { 228 | for ix in f.blocks..new { 229 | let nb = self.bs.new_block(); 230 | self.set_block(f.root, f.level, ix, nb); 231 | } 232 | } 233 | 234 | /// Reduce blocks at specified level from old to new. 235 | fn reduce_blocks(&mut self, f: &mut FD, level: u8, old: u64, new: u64) { 236 | for ix in new..old { 237 | let blk = self.get_block(f.root, level, ix); 238 | self.bs.drop_block(blk); 239 | } 240 | } 241 | 242 | /// Calculate the number of data blocks required for a file of specified size. 243 | fn blocks(&self, size: u64) -> u64 { 244 | if size == 0 { 245 | return 1; 246 | } 247 | (size + self.blk_cap - 1) / self.blk_cap 248 | } 249 | 250 | /// Calculate the number of extra levels needed for specified number of data blocks. 251 | fn levels(&self, blocks: u64) -> u8 { 252 | if blocks <= 1 { 253 | 0 254 | } else { 255 | (blocks - 1).ilog(self.base) as u8 + 1 256 | } 257 | } 258 | 259 | /// Set the block at index ix at specified level. 260 | fn set_block(&mut self, mut blk: u64, level: u8, mut ix: u64, value: u64) { 261 | if level > 1 { 262 | let x = ix / self.base; 263 | ix %= self.base; 264 | blk = if ix == 0 { 265 | let nb = self.bs.new_block(); 266 | self.set_block(blk, level - 1, x, nb); 267 | nb 268 | } else { 269 | self.get_block(blk, level - 1, x) 270 | }; 271 | } 272 | self.set_num(blk, ix, value); 273 | } 274 | 275 | /// Get the block at index ix at specified level. 276 | fn get_block(&self, mut blk: u64, level: u8, mut ix: u64) -> u64 { 277 | if level > 1 { 278 | let x = ix / self.base; 279 | ix %= self.base; 280 | blk = self.get_block(blk, level - 1, x); 281 | } 282 | self.get_num(blk, ix) 283 | } 284 | 285 | /// Get block number from block at specified index. 286 | fn get_num(&self, blk: u64, ix: u64) -> u64 { 287 | let nsz = self.bs.nsz(); 288 | let mut bytes = [0; 8]; 289 | self.bs.get(blk, ix * nsz as u64, &mut bytes[0..nsz]); 290 | u64::from_le_bytes(bytes) 291 | } 292 | 293 | /// Set block number in block at specified index. 294 | fn set_num(&mut self, blk: u64, ix: u64, v: u64) { 295 | let nsz = self.bs.nsz(); 296 | self.bs.set(blk, ix * nsz as u64, &v.to_le_bytes()[0..nsz]); 297 | } 298 | } 299 | 300 | #[test] 301 | fn divided_stg_test() { 302 | let blk_cap = 10000; 303 | let stg = crate::MemFile::new(); 304 | let mut ds = DividedStg::new(stg.clone(), blk_cap); 305 | 306 | let mut f = ds.new_file(); 307 | let data = b"hello george"; 308 | 309 | ds.write(&mut f, 0, data); 310 | 311 | let test_off = 200 * blk_cap; 312 | ds.write(&mut f, test_off, data); 313 | 314 | ds.save(); 315 | 316 | let mut ds = DividedStg::new(stg.clone(), blk_cap); 317 | 318 | let mut buf = vec![0; data.len()]; 319 | ds.read(&f, 0, &mut buf); 320 | assert!(&buf == data); 321 | 322 | let mut buf = vec![0; data.len()]; 323 | ds.read(&f, test_off, &mut buf); 324 | assert!(&buf == data); 325 | 326 | ds.truncate(&mut f, 10 * blk_cap); 327 | ds.drop_file(&mut f); 328 | ds.save(); 329 | } 330 | -------------------------------------------------------------------------------- /src/expr.rs: -------------------------------------------------------------------------------- 1 | use crate::*; 2 | use Instruction::{DataOp, ForNext, ForSortNext, Jump, JumpIfFalse}; 3 | 4 | /// Holds function name, line, column and message. 5 | #[derive(Clone)] 6 | pub(crate) struct SqlError { 7 | pub rname: String, 8 | pub line: usize, 9 | pub column: usize, 10 | pub msg: String, 11 | } 12 | /// Table Expression ( not yet type-checked or compiled against database ). 13 | pub enum TableExpression { 14 | /// Base table. 15 | Base(ObjRef), 16 | /// VALUEs. 17 | Values(Vec>), 18 | } 19 | /// Assign operation. 20 | #[derive(Clone, Copy)] 21 | #[non_exhaustive] 22 | pub enum AssignOp { 23 | /// Assign. 24 | Assign, 25 | /// append. 26 | Append, 27 | /// Increment. 28 | Inc, 29 | /// Decrement. 30 | Dec, 31 | } 32 | /// Vector of local variable numbers and AssignOp. 33 | pub type Assigns = Vec<(usize, AssignOp)>; 34 | 35 | /// From Expression ( not yet compiled ). 36 | #[non_exhaustive] 37 | pub struct FromExpression { 38 | /// Column names. 39 | pub colnames: Vec, 40 | /// Assigns. 41 | pub assigns: Assigns, 42 | /// Expressions. 43 | pub exps: Vec, 44 | /// FROM clause. 45 | pub from: Option>, 46 | /// WHERE expression. 47 | pub wher: Option, 48 | /// ORDER BY clause. 49 | pub orderby: Vec<(Expr, bool)>, 50 | } 51 | 52 | /// Parsing token. 53 | #[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy)] 54 | pub enum Token { 55 | /* Note: order is significant */ 56 | /// Less. 57 | Less, 58 | /// Less or Equal. 59 | LessEqual, 60 | /// Greater or Equal. 61 | GreaterEqual, 62 | /// Greater. 63 | Greater, 64 | /// Equal. 65 | Equal, 66 | /// Not Equal. 67 | NotEqual, 68 | /// In. 69 | In, 70 | /// + 71 | Plus, 72 | /// - 73 | Minus, 74 | /// * 75 | Times, 76 | /// / 77 | Divide, 78 | /// % 79 | Percent, 80 | /// | 81 | VBar, 82 | /// AND 83 | And, 84 | /// OR 85 | Or, 86 | /// |= 87 | VBarEqual, 88 | /// += 89 | PlusEqual, 90 | /// -= 91 | MinusEqual, 92 | /// Identifier. 93 | Id, 94 | /// Number. 95 | Number, 96 | /// Hex number. 97 | Hex, 98 | /// String literal. 99 | String, 100 | /// ( 101 | LBra, 102 | /// ) 103 | RBra, 104 | /// , 105 | Comma, 106 | /// : 107 | Colon, 108 | /// . 109 | Dot, 110 | /// ! 111 | Exclamation, 112 | /// Unknown. 113 | Unknown, 114 | /// End of file. 115 | EndOfFile, 116 | } 117 | 118 | impl Token { 119 | /// Get precedence of operator. 120 | pub fn precedence(self) -> i8 { 121 | const PA: [i8; 15] = [10, 10, 10, 10, 10, 10, 10, 20, 20, 30, 30, 30, 15, 8, 5]; 122 | PA[self as usize] 123 | } 124 | } 125 | 126 | /// Scalar Expression (uncompiled). 127 | #[non_exhaustive] 128 | pub struct Expr { 129 | /// Expression kind. 130 | pub exp: ExprIs, 131 | /// Data type. 132 | pub data_type: DataType, 133 | /// Doesn't depend on FROM clause. 134 | pub is_constant: bool, 135 | /// Has been type-checked. 136 | pub checked: bool, 137 | /// Column number. 138 | pub col: usize, 139 | } 140 | 141 | impl Expr { 142 | /// Construct new Expr. 143 | pub fn new(exp: ExprIs) -> Self { 144 | Expr { 145 | exp, 146 | data_type: NONE, 147 | is_constant: false, 148 | checked: false, 149 | col: 0, 150 | } 151 | } 152 | } 153 | 154 | /// Scalar Expression variants. 155 | #[non_exhaustive] 156 | pub enum ExprIs { 157 | /// Constant. 158 | Const(Value), 159 | /// Local variable. 160 | Local(usize), 161 | /// Column. 162 | ColName(String), 163 | /// Binary operator expression. 164 | Binary(Token, Box, Box), 165 | /// Not expression. 166 | Not(Box), 167 | /// Unary minus. 168 | Minus(Box), 169 | /// Case expression. 170 | Case(Vec<(Expr, Expr)>, Box), 171 | /// Function call. 172 | FuncCall(ObjRef, Vec), 173 | /// Builtin function call. 174 | BuiltinCall(String, Vec), 175 | /// Scalar select. 176 | ScalarSelect(Box), 177 | /// List of expressions. 178 | List(Vec), 179 | } 180 | 181 | /// Object reference ( Schema.Name ). 182 | #[derive(PartialEq, PartialOrd, Eq, Hash, Clone)] 183 | #[non_exhaustive] 184 | pub struct ObjRef { 185 | /// Schema. 186 | pub schema: String, 187 | /// Name within Schema. 188 | pub name: String, 189 | } 190 | 191 | impl ObjRef { 192 | /// Construct from string references. 193 | pub fn new(s: &str, n: &str) -> Self { 194 | Self { 195 | schema: s.to_string(), 196 | name: n.to_string(), 197 | } 198 | } 199 | /// Used for error messages. 200 | pub fn str(&self) -> String { 201 | format!("[{}].[{}]", &self.schema, &self.name) 202 | } 203 | } 204 | 205 | /// Binary=1, String=2, Int=3, Float=4, Bool=5. 206 | #[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy)] 207 | #[non_exhaustive] 208 | pub enum DataKind { 209 | /// None. 210 | None = 0, 211 | /// Binary. 212 | Binary = 1, 213 | /// String. 214 | String = 2, 215 | /// Integer. 216 | Int = 3, 217 | /// Float. 218 | Float = 4, 219 | /// Bool. 220 | Bool = 5, 221 | } 222 | 223 | /// Low 3 (KBITS) bits are DataKind, rest is size in bytes. 224 | pub type DataType = usize; 225 | 226 | pub(crate) const KBITS: usize = 3; 227 | pub(crate) const NONE: DataType = DataKind::None as usize; 228 | pub(crate) const BINARY: DataType = DataKind::Binary as usize + (16 << KBITS); 229 | pub(crate) const STRING: DataType = DataKind::String as usize + (16 << KBITS); 230 | pub(crate) const NAMESTR: DataType = DataKind::String as usize + (32 << KBITS); 231 | pub(crate) const BIGSTR: DataType = DataKind::String as usize + (250 << KBITS); 232 | pub(crate) const INT: DataType = DataKind::Int as usize + (8 << KBITS); 233 | pub(crate) const FLOAT: DataType = DataKind::Float as usize + (4 << KBITS); 234 | pub(crate) const DOUBLE: DataType = DataKind::Float as usize + (8 << KBITS); 235 | pub(crate) const BOOL: DataType = DataKind::Bool as usize + (1 << KBITS); 236 | 237 | /// Compute the DataKind of a DataType. 238 | pub fn data_kind(x: DataType) -> DataKind { 239 | const DKLOOK: [DataKind; 6] = [ 240 | DataKind::None, 241 | DataKind::Binary, 242 | DataKind::String, 243 | DataKind::Int, 244 | DataKind::Float, 245 | DataKind::Bool, 246 | ]; 247 | DKLOOK[x % (1 << KBITS)] 248 | } 249 | 250 | /// Compute the number of bytes required to store a value of the specified DataType. 251 | #[must_use] 252 | pub fn data_size(x: DataType) -> usize { 253 | x >> KBITS 254 | } 255 | 256 | /// Compilation block ( body of function or batch section ). 257 | pub struct Block<'a> { 258 | /// Number of function parameters. 259 | pub param_count: usize, 260 | /// Function return type. 261 | pub return_type: DataType, 262 | /// Datatypes of paramaters and local variables. 263 | pub local_typ: Vec, 264 | /// List of instructions. 265 | pub ilist: Vec, 266 | /// Id of break. 267 | pub break_id: usize, 268 | /// Database. 269 | pub db: DB, 270 | /// Current table in scope by FROM clause( or UPDATE statment ). 271 | pub from: Option, 272 | /// Only parse, no type checking or compilation. 273 | pub parse_only: bool, 274 | /// List of jumps. 275 | jumps: Vec, 276 | /// Lookup jump label by name. 277 | labels: HashMap<&'a [u8], usize>, 278 | /// Lookup local variable by name. 279 | local_map: HashMap<&'a [u8], usize>, 280 | /// Names of local variables. 281 | locals: Vec<&'a [u8]>, 282 | } 283 | 284 | impl<'a> Block<'a> { 285 | /// Construct a new block. 286 | pub fn new(db: DB) -> Self { 287 | Block { 288 | ilist: Vec::new(), 289 | jumps: Vec::new(), 290 | labels: HashMap::default(), 291 | local_map: HashMap::default(), 292 | locals: Vec::new(), 293 | local_typ: Vec::new(), 294 | break_id: 0, 295 | param_count: 0, 296 | return_type: NONE, 297 | from: None, 298 | db, 299 | parse_only: false, 300 | } 301 | } 302 | 303 | /// Check labels are all defined and patch jump instructions. 304 | pub fn resolve_jumps(&mut self) { 305 | for (k, v) in &self.labels { 306 | if self.jumps[*v] == usize::MAX { 307 | panic!("undefined label: {}", parse::tos(k)); 308 | } 309 | } 310 | for i in &mut self.ilist { 311 | match i { 312 | JumpIfFalse(x, _) | Jump(x) | ForNext(x, _) | ForSortNext(x, _) => { 313 | *x = self.jumps[*x] 314 | } 315 | _ => {} 316 | } 317 | } 318 | } 319 | 320 | /// Add an instruction to the instruction list. 321 | pub fn add(&mut self, s: Instruction) { 322 | if !self.parse_only { 323 | self.ilist.push(s); 324 | } 325 | } 326 | 327 | /// Add a Data Operation (DO) to the instruction list. 328 | pub fn dop(&mut self, dop: DO) { 329 | if !self.parse_only { 330 | self.add(DataOp(Box::new(dop))); 331 | } 332 | } 333 | 334 | /// Check the parameter kinds match the function. 335 | pub fn check_types(&self, r: &Rc, pkinds: &[DataKind]) { 336 | if pkinds.len() != r.param_count { 337 | panic!("param count mismatch"); 338 | } 339 | for (i, pk) in pkinds.iter().enumerate() { 340 | let ft = data_kind(r.local_typ[i]); 341 | let et = *pk; 342 | if ft != et { 343 | panic!("param type mismatch expected {:?} got {:?}", ft, et); 344 | } 345 | } 346 | } 347 | 348 | // Helper functions for other statements. 349 | 350 | /// Define a local variable ( parameter or declared ). 351 | pub fn def_local(&mut self, name: &'a [u8], dt: DataType) { 352 | let local_id = self.local_typ.len(); 353 | self.local_typ.push(dt); 354 | self.locals.push(name); 355 | if self.local_map.contains_key(name) { 356 | panic!("duplicate variable name"); 357 | } 358 | self.local_map.insert(name, local_id); 359 | } 360 | 361 | /// Get the number of a local variable from a name. 362 | pub fn get_local(&self, name: &[u8]) -> Option<&usize> { 363 | self.local_map.get(name) 364 | } 365 | 366 | /// Get the name of a local variable from a number. 367 | pub fn local_name(&self, num: usize) -> &[u8] { 368 | self.locals[num] 369 | } 370 | 371 | /// Get a local jump id. 372 | pub fn get_jump_id(&mut self) -> usize { 373 | let result = self.jumps.len(); 374 | self.jumps.push(usize::MAX); 375 | result 376 | } 377 | 378 | /// Set instruction location of jump id. 379 | pub fn set_jump(&mut self, jump_id: usize) { 380 | self.jumps[jump_id] = self.ilist.len(); 381 | } 382 | 383 | /// Get a local jump id to current location. 384 | pub fn get_loop_id(&mut self) -> usize { 385 | let result = self.get_jump_id(); 386 | self.set_jump(result); 387 | result 388 | } 389 | 390 | /// Get a number for a local goto label. 391 | pub fn get_goto_label(&mut self, s: &'a [u8]) -> usize { 392 | if let Some(jump_id) = self.labels.get(s) { 393 | *jump_id 394 | } else { 395 | let jump_id = self.get_jump_id(); 396 | self.labels.insert(s, jump_id); 397 | jump_id 398 | } 399 | } 400 | 401 | /// Set the local for a local goto lable. 402 | pub fn set_goto_label(&mut self, s: &'a [u8]) { 403 | if let Some(jump_id) = self.labels.get(s) { 404 | let j = *jump_id; 405 | if self.jumps[j] != usize::MAX { 406 | panic!("label already set"); 407 | } 408 | self.set_jump(j); 409 | } else { 410 | let jump_id = self.get_loop_id(); 411 | self.labels.insert(s, jump_id); 412 | } 413 | } 414 | 415 | /// Get the DataKind of an expression. 416 | pub fn kind(&self, e: &mut Expr) -> DataKind { 417 | compile::c_check(self, e); 418 | data_kind(e.data_type) 419 | } 420 | } 421 | -------------------------------------------------------------------------------- /src/gentrans.rs: -------------------------------------------------------------------------------- 1 | use crate::{panic, Any, Arc, BTreeMap, Data, Rc, Transaction, Value}; 2 | 3 | #[cfg(feature = "serde")] 4 | use serde::{Deserialize, Serialize}; 5 | 6 | /// General Query. 7 | 8 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 9 | #[non_exhaustive] 10 | pub struct GenQuery { 11 | /// The SQL query string. 12 | pub sql: Arc, 13 | /// The path argument for the query. 14 | pub path: String, 15 | /// Query parameters. 16 | pub params: BTreeMap, 17 | /// Query form. 18 | pub form: BTreeMap, 19 | /// Query cookies. 20 | pub cookies: BTreeMap, 21 | /// Query parts ( files ). 22 | pub parts: Vec, 23 | /// Micro-seconds since January 1, 1970 0:00:00 UTC 24 | pub now: i64, 25 | } 26 | 27 | /// General Response. 28 | #[non_exhaustive] 29 | pub struct GenResponse { 30 | /// Error string. 31 | pub err: String, 32 | /// Response status code. 33 | pub status_code: u16, 34 | /// Response headers. 35 | pub headers: Vec<(String, String)>, 36 | /// Reponse body. 37 | pub output: Vec, 38 | } 39 | 40 | /// Query + Response, implements Transaction. 41 | #[non_exhaustive] 42 | pub struct GenTransaction { 43 | /// Transaction Query. 44 | pub qy: GenQuery, 45 | /// Transaction Response. 46 | pub rp: GenResponse, 47 | /// Transaction extension data. 48 | pub ext: Box, 49 | } 50 | 51 | /// Part of multipart data ( uploaded files ). 52 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 53 | #[derive(Default)] 54 | #[non_exhaustive] 55 | pub struct Part { 56 | /// Part name. 57 | pub name: String, 58 | /// Part filename. 59 | pub file_name: String, 60 | /// Part contenttype. 61 | pub content_type: String, 62 | /// Text. 63 | pub text: String, 64 | /// Data. 65 | pub data: Data, 66 | } 67 | 68 | impl GenTransaction { 69 | /// Construct. 70 | pub fn new() -> Self { 71 | let now = std::time::SystemTime::now() 72 | .duration_since(std::time::SystemTime::UNIX_EPOCH) 73 | .unwrap(); 74 | let now = now.as_micros() as i64; 75 | let output = Vec::with_capacity(10000); 76 | let headers = Vec::new(); 77 | let status_code = 200; 78 | Self { 79 | qy: GenQuery { 80 | sql: Arc::new("EXEC web.Main()".to_string()), 81 | path: String::new(), 82 | params: BTreeMap::new(), 83 | form: BTreeMap::new(), 84 | cookies: BTreeMap::new(), 85 | parts: Vec::new(), 86 | now, 87 | }, 88 | rp: GenResponse { 89 | err: String::new(), 90 | output, 91 | status_code, 92 | headers, 93 | }, 94 | ext: Box::new(()), 95 | } 96 | } 97 | 98 | /// Append string to output. 99 | fn push_str(&mut self, s: &str) { 100 | self.rp.output.extend_from_slice(s.as_bytes()); 101 | } 102 | } 103 | 104 | impl Transaction for GenTransaction { 105 | fn arg(&mut self, kind: i64, s: &str) -> Rc { 106 | let s = match kind { 107 | 0 => Some(&self.qy.path), 108 | 1 => self.qy.params.get(s), 109 | 2 => self.qy.form.get(s), 110 | 3 => self.qy.cookies.get(s), 111 | _ => None, 112 | }; 113 | let s = if let Some(s) = s { s } else { "" }; 114 | Rc::new(s.to_string()) 115 | } 116 | 117 | fn status_code(&mut self, code: i64) { 118 | self.rp.status_code = code as u16; 119 | } 120 | 121 | fn header(&mut self, name: &str, value: &str) { 122 | self.rp.headers.push((name.to_string(), value.to_string())); 123 | } 124 | 125 | fn global(&self, kind: i64) -> i64 { 126 | match kind { 127 | 0 => self.qy.now, 128 | _ => panic!(), 129 | } 130 | } 131 | 132 | fn selected(&mut self, values: &[Value]) { 133 | for v in values { 134 | match v { 135 | Value::RcBinary(x) => { 136 | self.rp.output.extend_from_slice(x); 137 | } 138 | Value::ArcBinary(x) => { 139 | self.rp.output.extend_from_slice(x); 140 | } 141 | _ => { 142 | self.push_str(&v.str()); 143 | } 144 | } 145 | } 146 | } 147 | 148 | fn set_error(&mut self, err: String) { 149 | self.rp.err = err; 150 | } 151 | 152 | fn get_error(&mut self) -> String { 153 | let result = self.rp.err.to_string(); 154 | self.rp.err = String::new(); 155 | result 156 | } 157 | 158 | fn file_attr(&mut self, k: i64, x: i64) -> Rc { 159 | let k = k as usize; 160 | let result: &str = { 161 | if k >= self.qy.parts.len() { 162 | "" 163 | } else { 164 | let p = &self.qy.parts[k]; 165 | match x { 166 | 0 => &p.name, 167 | 1 => &p.content_type, 168 | 2 => &p.file_name, 169 | 3 => &p.text, 170 | _ => panic!(), 171 | } 172 | } 173 | }; 174 | Rc::new(result.to_string()) 175 | } 176 | 177 | fn file_content(&mut self, k: i64) -> Data { 178 | self.qy.parts[k as usize].data.clone() 179 | } 180 | 181 | fn set_extension(&mut self, ext: Box) { 182 | self.ext = ext; 183 | } 184 | 185 | fn get_extension(&mut self) -> Box { 186 | std::mem::replace(&mut self.ext, Box::new(())) 187 | } 188 | } 189 | 190 | impl Default for GenTransaction { 191 | fn default() -> Self { 192 | Self::new() 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/heap.rs: -------------------------------------------------------------------------------- 1 | use crate::Ordering; 2 | 3 | /// Vector indexed by U. 4 | struct VecU(Vec); 5 | 6 | impl std::ops::Index for VecU 7 | where 8 | usize: TryFrom, 9 | { 10 | type Output = T; 11 | fn index(&self, x: U) -> &Self::Output { 12 | let x = usize::try_from(x).ok().unwrap(); 13 | unsafe_assert!(x < self.0.len()); 14 | &self.0[x] 15 | } 16 | } 17 | 18 | impl std::ops::IndexMut for VecU 19 | where 20 | usize: TryFrom, 21 | { 22 | fn index_mut(&mut self, x: U) -> &mut Self::Output { 23 | let x = usize::try_from(x).ok().unwrap(); 24 | unsafe_assert!(x < self.0.len()); 25 | &mut self.0[x] 26 | } 27 | } 28 | 29 | /// Heap Node. 30 | struct HeapNode { 31 | /// Index of node from heap position. 32 | pub x: U, 33 | /// Heap position of this node. 34 | pub pos: U, 35 | /// Node id. 36 | pub id: T, 37 | /// Node key. 38 | pub key: K, 39 | } 40 | 41 | /// Generic heap with keys that can be modified for tracking least used page. 42 | pub struct GHeap { 43 | /// Number of heap nodes, not including free nodes. 44 | n: U, 45 | /// 1 + Index of start of free list. 46 | free: U, 47 | /// Vector of heap nodes. 48 | v: VecU>, 49 | } 50 | 51 | impl Default for GHeap 52 | where 53 | U: From, 54 | { 55 | fn default() -> Self { 56 | Self { 57 | n: 0.into(), 58 | free: 0.into(), 59 | v: VecU(Vec::default()), 60 | } 61 | } 62 | } 63 | 64 | impl GHeap 65 | where 66 | K: Ord, 67 | T: Default, 68 | U: Copy 69 | + From 70 | + std::cmp::PartialOrd 71 | + std::ops::AddAssign 72 | + std::ops::Add 73 | + std::ops::Sub 74 | + std::ops::SubAssign 75 | + std::ops::Mul 76 | + std::ops::Div, 77 | usize: TryFrom, 78 | { 79 | /// Current number of heap nodes. 80 | pub fn len(&self) -> U { 81 | self.n 82 | } 83 | 84 | /// Insert id into heap with specified key (usage). Result is index of heap node. 85 | pub fn insert(&mut self, id: T, key: K) -> U { 86 | let pos = self.n; 87 | if pos * 2.into() + 2.into() <= pos { 88 | panic!("GHeap overflow"); 89 | } 90 | self.n += 1.into(); 91 | let x = if self.free == 0.into() { 92 | let x = pos; 93 | self.v.0.push(HeapNode { x, pos, id, key }); 94 | x 95 | } else { 96 | let x = self.free - 1.into(); 97 | self.free = self.v[x].pos; 98 | self.v[pos].x = x; 99 | self.v[x].pos = pos; 100 | self.v[x].id = id; 101 | self.v[x].key = key; 102 | x 103 | }; 104 | self.move_up(pos, x); 105 | x 106 | } 107 | 108 | /// Modify key of specified heap node. 109 | pub fn modify(&mut self, x: U, newkey: K) { 110 | assert!(usize::try_from(x).ok().unwrap() < self.v.0.len()); 111 | let pos = self.v[x].pos; 112 | let cf = newkey.cmp(&self.v[x].key); 113 | self.v[x].key = newkey; 114 | 115 | match cf { 116 | Ordering::Greater => self.move_down(pos, x), 117 | Ordering::Less => self.move_up(pos, x), 118 | Ordering::Equal => (), 119 | } 120 | } 121 | 122 | /// Remove heap node with smallest key, returning the associated id. 123 | /// Note: index of heap node is no longer valid. 124 | pub fn pop(&mut self) -> T { 125 | let zero = 0.into(); 126 | let one = 1.into(); 127 | assert!(self.n > zero); 128 | self.n -= one; 129 | let xmin = self.v[zero].x; // Node with smallest key. 130 | let xlast = self.v[self.n].x; // Last node in heap. 131 | self.v[xlast].pos = zero; // Make last node first. 132 | self.v[zero].x = xlast; 133 | self.move_down(zero, xlast); 134 | 135 | // De-allocate popped node 136 | self.v[xmin].pos = self.free; 137 | self.free = xmin + one; 138 | 139 | std::mem::take(&mut self.v[xmin].id) 140 | } 141 | 142 | fn move_up(&mut self, mut c: U, cx: U) { 143 | while c > 0.into() { 144 | let p = (c - 1.into()) / 2.into(); 145 | let px = self.v[p].x; 146 | if self.v[cx].key >= self.v[px].key { 147 | return; 148 | } 149 | // Swap parent(p) and child(c). 150 | self.v[p].x = cx; 151 | self.v[cx].pos = p; 152 | self.v[c].x = px; 153 | self.v[px].pos = c; 154 | c = p; 155 | } 156 | } 157 | 158 | fn move_down(&mut self, mut p: U, px: U) { 159 | loop { 160 | let mut c = p * 2.into() + 1.into(); 161 | if c >= self.n { 162 | return; 163 | } 164 | let mut cx = self.v[c].x; 165 | let mut ck = &self.v[cx].key; 166 | let c2 = c + 1.into(); 167 | if c2 < self.n { 168 | let cx2 = self.v[c2].x; 169 | let ck2 = &self.v[cx2].key; 170 | if ck2 < ck { 171 | c = c2; 172 | cx = cx2; 173 | ck = ck2; 174 | } 175 | } 176 | if ck >= &self.v[px].key { 177 | return; 178 | } 179 | // Swap parent(p) and child(c). 180 | self.v[p].x = cx; 181 | self.v[cx].pos = p; 182 | self.v[c].x = px; 183 | self.v[px].pos = c; 184 | p = c; 185 | } 186 | } 187 | } 188 | 189 | #[test] 190 | pub fn test() { 191 | let mut h = GHeap::::default(); 192 | let _h5 = h.insert(5, 10); 193 | let _h8 = h.insert(8, 1); 194 | let _h13 = h.insert(13, 2); 195 | h.modify(_h8, 15); 196 | assert!(h.pop() == 13); 197 | let _h22 = h.insert(22, 9); 198 | assert!(h.pop() == 22); 199 | assert!(h.pop() == 5); 200 | assert!(h.pop() == 8); 201 | } 202 | 203 | #[test] 204 | pub fn test2() { 205 | use rand::Rng; 206 | let mut rng = rand::thread_rng(); 207 | 208 | let mut h = GHeap::::default(); 209 | let mut pages = crate::HashMap::default(); 210 | 211 | let mut results = Vec::new(); 212 | for _outer in 0..100 { 213 | let start = std::time::Instant::now(); 214 | for _i in 0..10000 { 215 | let r = rng.gen::(); 216 | let pnum = r % 1024; 217 | let action = (r / 1024) % 3; 218 | let usage = (r / 4096) % 1024; 219 | if action == 0 { 220 | let x = h.insert(pnum, usage); 221 | pages.insert(pnum, x); 222 | } else if action == 1 { 223 | if let Some(x) = pages.get(&pnum) { 224 | h.modify(*x, usage); 225 | } 226 | } else if action == 2 && h.n > 0 { 227 | let pnum = h.pop(); 228 | pages.remove(&pnum); 229 | } 230 | } 231 | results.push(start.elapsed().as_micros() as u64); 232 | } 233 | crate::bench::print_results("heap test", results); 234 | } 235 | -------------------------------------------------------------------------------- /src/pstore.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | heap::GHeap, nd, Arc, BTreeMap, Data, HashMap, HashSet, Mutex, PageStorage, PageStorageInfo, 3 | RwLock, SaveOp, Storage, 4 | }; 5 | 6 | type HX = u32; // Typical 8M cache will have 1K x 8KB pages, so 10 bits is typical, 32 should be plenty. 7 | type Heap = GHeap; 8 | 9 | /// ```Arc>``` 10 | pub type PageInfoPtr = Arc>; 11 | 12 | /// Information for a logical page, including historic data. 13 | pub struct PageInfo { 14 | /// Current data for the page( None implies it is stored in underlying file ). 15 | pub current: Option, 16 | /// Historic data for the page. Has data for page at specified time. 17 | /// A copy is made prior to an update, so get looks forward from access time. 18 | pub history: BTreeMap, 19 | /// How many times has the page been used. 20 | pub usage: u64, 21 | /// Heap index. 22 | pub hx: HX, 23 | } 24 | 25 | impl PageInfo { 26 | fn new() -> PageInfoPtr { 27 | Arc::new(Mutex::new(PageInfo { 28 | current: None, 29 | history: BTreeMap::new(), 30 | usage: 0, 31 | hx: HX::MAX, 32 | })) 33 | } 34 | 35 | /// Increase usage. 36 | fn inc_usage(&mut self, lpnum: u64, ah: &mut Heap) { 37 | self.usage += 1; 38 | if self.hx == HX::MAX { 39 | self.hx = ah.insert(lpnum, self.usage); 40 | } else { 41 | ah.modify(self.hx, self.usage); 42 | } 43 | } 44 | 45 | /// Get the Data for the page, checking history if not a writer. 46 | /// Reads Data from file if necessary. 47 | /// Result is Data and size of loaded data ( cache delta ). 48 | fn get_data(&mut self, lpnum: u64, a: &AccessPagedData) -> (Data, usize) { 49 | if !a.writer { 50 | if let Some((_k, v)) = self.history.range(a.time..).next() { 51 | return (v.clone(), 0); 52 | } 53 | } 54 | 55 | if let Some(p) = &self.current { 56 | return (p.clone(), 0); 57 | } 58 | 59 | // Get data from page storage. 60 | let ps = a.spd.ps.read().unwrap(); 61 | let data = ps.get_page(lpnum); 62 | self.current = Some(data.clone()); 63 | let len = data.len(); 64 | (data, len) 65 | } 66 | 67 | /// Set the page data, updating the history using the specified time and old data. 68 | /// Result is delta of length (old size, new size) 69 | fn set_data(&mut self, time: u64, old: Data, data: Data, do_history: bool) -> (usize, usize) { 70 | if do_history { 71 | self.history.insert(time, old); 72 | } 73 | let old = if let Some(x) = &self.current { 74 | x.len() 75 | } else { 76 | 0 77 | }; 78 | let new = data.len(); 79 | self.current = if new == 0 { None } else { Some(data) }; 80 | (old, new) 81 | } 82 | 83 | /// Trim entry for time t that no longer need to be retained, returning whether entry was retained. 84 | /// start is start of range for which no readers exist. 85 | fn trim(&mut self, t: u64, start: u64) -> bool { 86 | let first = self.history_start(t); 87 | if first >= start { 88 | // There is no reader that can read copy for time t, so copy can be removed. 89 | self.history.remove(&t); 90 | false 91 | } else { 92 | true 93 | } 94 | } 95 | 96 | /// Returns the earliest time that would return the page for the specified time. 97 | fn history_start(&self, t: u64) -> u64 { 98 | if let Some((k, _)) = self.history.range(..t).next_back() { 99 | *k + 1 100 | } else { 101 | 0 102 | } 103 | } 104 | } 105 | 106 | /// Central store of data. 107 | #[derive(Default)] 108 | pub struct Stash { 109 | /// Write time - number of writes. 110 | pub time: u64, 111 | /// Page number -> page info. 112 | pub pages: HashMap, 113 | /// Time -> reader count. Number of readers for given time. 114 | pub rdrs: BTreeMap, 115 | /// Time -> set of page numbers. Page copies held for given time. 116 | pub vers: BTreeMap>, 117 | /// Total size of current pages. 118 | pub total: i64, // Use i64 to avoid problems with overflow. 119 | /// trim_cache reduces total to mem_limit (or below). 120 | pub mem_limit: usize, 121 | /// Tracks loaded page with smallest usage. 122 | pub min: Heap, 123 | /// Total number of page accesses. 124 | pub read: u64, 125 | /// Total number of misses ( data was not already loaded ). 126 | pub miss: u64, 127 | } 128 | 129 | impl Stash { 130 | /// Set the value of the specified page for the current time. 131 | fn set(&mut self, lpnum: u64, old: Data, data: Data) { 132 | let time = self.time; 133 | let u = self.vers.entry(time).or_default(); 134 | let do_history = u.insert(lpnum); 135 | let p = self.get_pinfo(lpnum); 136 | let diff = p.lock().unwrap().set_data(time, old, data, do_history); 137 | self.delta(diff, false, false); 138 | } 139 | 140 | /// Get the PageInfoPtr for the specified page and note the page as used. 141 | fn get_pinfo(&mut self, lpnum: u64) -> PageInfoPtr { 142 | let p = self 143 | .pages 144 | .entry(lpnum) 145 | .or_insert_with(PageInfo::new) 146 | .clone(); 147 | p.lock().unwrap().inc_usage(lpnum, &mut self.min); 148 | self.read += 1; 149 | p 150 | } 151 | 152 | /// Register that there is a client reading the database. The result is the current time. 153 | fn begin_read(&mut self) -> u64 { 154 | let time = self.time; 155 | let n = self.rdrs.entry(time).or_insert(0); 156 | *n += 1; 157 | time 158 | } 159 | 160 | /// Register that the read at the specified time has ended. Stashed pages may be freed. 161 | fn end_read(&mut self, time: u64) { 162 | let n = self.rdrs.get_mut(&time).unwrap(); 163 | *n -= 1; 164 | if *n == 0 { 165 | self.rdrs.remove(&time); 166 | self.trim(time); 167 | } 168 | } 169 | 170 | /// Register that an update operation has completed. Time is incremented. 171 | /// Stashed pages may be freed. Returns number of pages updated. 172 | fn end_write(&mut self) -> usize { 173 | let result = if let Some(u) = self.vers.get(&self.time) { 174 | u.len() 175 | } else { 176 | 0 177 | }; 178 | let t = self.time; 179 | self.time = t + 1; 180 | self.trim(t); 181 | result 182 | } 183 | 184 | /// Trim historic data that is no longer required. 185 | fn trim(&mut self, time: u64) { 186 | let (s, r) = (self.start(time), self.retain(time)); 187 | if s != r { 188 | let mut empty = Vec::::new(); 189 | for (t, pl) in self.vers.range_mut(s..r) { 190 | pl.retain(|pnum| { 191 | let p = self.pages.get(pnum).unwrap(); 192 | p.lock().unwrap().trim(*t, s) 193 | }); 194 | if pl.is_empty() { 195 | empty.push(*t); 196 | } 197 | } 198 | for t in empty { 199 | self.vers.remove(&t); 200 | } 201 | } 202 | } 203 | 204 | /// Calculate the start of the range of times for which there are no readers. 205 | fn start(&self, time: u64) -> u64 { 206 | if let Some((t, _n)) = self.rdrs.range(..time).next_back() { 207 | 1 + *t 208 | } else { 209 | 0 210 | } 211 | } 212 | 213 | /// Calculate the end of the range of times for which there are no readers. 214 | fn retain(&self, time: u64) -> u64 { 215 | if let Some((t, _n)) = self.rdrs.range(time..).next() { 216 | *t 217 | } else { 218 | self.time 219 | } 220 | } 221 | 222 | /// Adjust total. 223 | fn delta(&mut self, d: (usize, usize), miss: bool, trim: bool) { 224 | if miss { 225 | self.miss += 1; 226 | } 227 | self.total += d.1 as i64 - d.0 as i64; 228 | if trim { 229 | self.trim_cache(); 230 | } 231 | } 232 | 233 | /// Trim cached data to configured limit. 234 | fn trim_cache(&mut self) { 235 | while self.total > self.mem_limit as i64 && self.min.len() > 0 { 236 | let lpnum = self.min.pop(); 237 | let mut p = self.pages.get(&lpnum).unwrap().lock().unwrap(); 238 | p.hx = HX::MAX; 239 | if let Some(data) = &p.current { 240 | self.total -= data.len() as i64; 241 | p.current = None; 242 | } 243 | } 244 | } 245 | 246 | /// Return the number of pages currently cached. 247 | pub fn cached(&self) -> usize { 248 | self.min.len() as usize 249 | } 250 | } 251 | 252 | /// Allows logical database pages to be shared to allow concurrent readers. 253 | pub struct SharedPagedData { 254 | /// Permanent storage of pages. 255 | pub ps: RwLock>, 256 | /// Stash of pages. 257 | pub stash: Mutex, 258 | /// Info on page sizes. 259 | pub psi: Box, 260 | } 261 | 262 | impl SharedPagedData { 263 | #[cfg(feature = "compact")] 264 | /// Construct default SharedPageData ( default depends on compact feature ). 265 | pub fn new(stg: Box) -> Arc { 266 | const EP_SIZE: usize = 1024; // Size of an extension page. 267 | const EP_MAX: usize = 16; // Maximum number of extension pages. 268 | const SP_SIZE: usize = (EP_MAX + 1) * 8; // =136. Starter page size. 269 | 270 | Self::new_from_ps(Box::new(crate::compact::CompactFile::new( 271 | stg, SP_SIZE, EP_SIZE, 272 | ))) 273 | } 274 | 275 | #[cfg(not(feature = "compact"))] 276 | /// Construct default SharedPageData ( default depends on compact feature ). 277 | pub fn new(stg: Box) -> Arc { 278 | let limits = crate::Limits::default(); 279 | Self::new_from_ps(crate::blockpagestg::BlockPageStg::new(stg, &limits)) 280 | } 281 | 282 | /// Construct SharedPageData based on specified PageStorage ( e.g. BlockPageStg ) 283 | pub fn new_from_ps(ps: Box) -> Arc { 284 | // Set a default stash memory limit of 10 MB. 285 | let stash = Stash { 286 | mem_limit: 10 * 1024 * 1024, 287 | ..Default::default() 288 | }; 289 | let psi = ps.info(); 290 | Arc::new(Self { 291 | stash: Mutex::new(stash), 292 | ps: RwLock::new(ps), 293 | psi, 294 | }) 295 | } 296 | 297 | /// Wait until current commits have been written. 298 | pub fn wait_complete(&self) { 299 | self.ps.read().unwrap().wait_complete(); 300 | } 301 | } 302 | 303 | /// Access to shared paged data. 304 | pub struct AccessPagedData { 305 | writer: bool, 306 | time: u64, 307 | /// Shared Page Data. 308 | pub spd: Arc, 309 | } 310 | 311 | impl AccessPagedData { 312 | /// Construct access to a virtual read-only copy of the database logical pages. 313 | pub fn new_reader(spd: Arc) -> Self { 314 | let time = spd.stash.lock().unwrap().begin_read(); 315 | AccessPagedData { 316 | writer: false, 317 | time, 318 | spd, 319 | } 320 | } 321 | 322 | /// Construct access to the database logical pages. 323 | pub fn new_writer(spd: Arc) -> Self { 324 | #[cfg(feature = "log")] 325 | { 326 | let psi = &spd.psi; 327 | println!( 328 | "max page size={} half={}", 329 | psi.max_size_page(), 330 | psi.half_size_page() 331 | ); 332 | } 333 | 334 | AccessPagedData { 335 | writer: true, 336 | time: 0, 337 | spd, 338 | } 339 | } 340 | 341 | /// Get locked guard of stash. 342 | pub fn stash(&self) -> std::sync::MutexGuard<'_, Stash> { 343 | self.spd.stash.lock().unwrap() 344 | } 345 | 346 | /// Get the Data for the specified page. 347 | pub fn get_data(&self, lpnum: u64) -> Data { 348 | // Get page info. 349 | let pinfo = self.stash().get_pinfo(lpnum); 350 | 351 | // Read the page data. 352 | let (data, loaded) = pinfo.lock().unwrap().get_data(lpnum, self); 353 | 354 | if loaded > 0 { 355 | self.stash().delta((0, loaded), true, true); 356 | } 357 | data 358 | } 359 | 360 | /// Set the data of the specified page. 361 | pub fn set_data(&self, lpnum: u64, data: Data) { 362 | debug_assert!(self.writer); 363 | 364 | // Get copy of current data. 365 | let pinfo = self.stash().get_pinfo(lpnum); 366 | 367 | // Read the page data. 368 | let (old, loaded) = pinfo.lock().unwrap().get_data(lpnum, self); 369 | 370 | // Update the stash ( ensures any readers will not attempt to read the file ). 371 | { 372 | let s = &mut *self.stash(); 373 | if loaded > 0 { 374 | s.delta((0, loaded), true, false); 375 | } 376 | s.set(lpnum, old, data.clone()); 377 | s.trim_cache(); 378 | } 379 | 380 | // Write data to underlying file. 381 | if data.len() > 0 { 382 | self.spd.ps.write().unwrap().set_page(lpnum, data); 383 | } else { 384 | self.spd.ps.write().unwrap().drop_page(lpnum); 385 | } 386 | } 387 | 388 | /// Allocate a logical page. 389 | pub fn alloc_page(&self) -> u64 { 390 | debug_assert!(self.writer); 391 | self.spd.ps.write().unwrap().new_page() 392 | } 393 | 394 | /// Free a logical page. 395 | pub fn free_page(&self, lpnum: u64) { 396 | self.set_data(lpnum, nd()); 397 | } 398 | 399 | /// Is the underlying file new (so needs to be initialised ). 400 | pub fn is_new(&self) -> bool { 401 | self.writer && self.spd.ps.read().unwrap().is_new() 402 | } 403 | 404 | /// Check whether compressing a page is worthwhile. 405 | pub fn compress(&self, size: usize, saving: usize) -> bool { 406 | debug_assert!(self.writer); 407 | self.spd.psi.compress(size, saving) 408 | } 409 | 410 | /// Commit changes to underlying file ( or rollback logical page allocations ). 411 | pub fn save(&self, op: SaveOp) -> usize { 412 | debug_assert!(self.writer); 413 | match op { 414 | SaveOp::Save => { 415 | self.spd.ps.write().unwrap().save(); 416 | self.stash().end_write() 417 | } 418 | SaveOp::RollBack => { 419 | // Note: rollback happens before any pages are updated. 420 | // However logical page allocations need to be rolled back. 421 | self.spd.ps.write().unwrap().rollback(); 422 | 0 423 | } 424 | } 425 | } 426 | 427 | /// Renumber a page. 428 | #[cfg(feature = "renumber")] 429 | pub fn renumber_page(&self, lpnum: u64) -> u64 { 430 | assert!(self.writer); 431 | let data = self.get_data(lpnum); 432 | self.stash().set(lpnum, data.clone(), nd()); 433 | let lpnum2 = self.spd.ps.write().unwrap().renumber(lpnum); 434 | debug_assert!(self 435 | .stash() 436 | .get_pinfo(lpnum2) 437 | .lock() 438 | .unwrap() 439 | .current 440 | .is_none()); 441 | let old2 = self.get_data(lpnum2); 442 | self.stash().set(lpnum2, old2, data); 443 | lpnum2 444 | } 445 | } 446 | 447 | impl Drop for AccessPagedData { 448 | fn drop(&mut self) { 449 | if !self.writer { 450 | self.stash().end_read(self.time); 451 | } 452 | } 453 | } 454 | -------------------------------------------------------------------------------- /src/run.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | panic, Assigns, Block, Cell, ColInfo, DataType, EvalEnv, Expr, IndexInfo, ObjRef, PagePtr, Rc, 3 | RefCell, Table, Value, 4 | }; 5 | 6 | /// Instruction. 7 | #[non_exhaustive] 8 | pub enum Instruction { 9 | /// Push constant. 10 | PushConst(Value), 11 | /// Push expression. 12 | PushValue(CExpPtr), 13 | /// Push local variable. 14 | PushLocal(usize), 15 | /// Assign local variable. 16 | PopToLocal(usize), 17 | /// Jump. 18 | Jump(usize), 19 | /// Jump if false. 20 | JumpIfFalse(usize, CExpPtr), 21 | /// Call 22 | Call(Rc), 23 | /// Return from function. 24 | Return, 25 | /// Throw error. 26 | Throw, 27 | /// Execute string. 28 | Execute, 29 | /// Initialise FOR statement. 30 | ForInit(usize, Box), 31 | /// Next iteration of FOR statement. 32 | ForNext(usize, Box), 33 | /// Initialise FOR statement ( sorted case ). 34 | ForSortInit(usize, Box), 35 | /// Next iteration of FOR statement ( sorted case ). 36 | ForSortNext(usize, Box<(usize, usize, Assigns)>), 37 | /// Data operation. 38 | DataOp(Box), 39 | /// SELECT expression. 40 | Select(Box), 41 | /// Set local variables from table. 42 | Set(Box), 43 | // Special push instructions ( optimisations ) 44 | /// Push Integer expression. 45 | PushInt(CExpPtr), 46 | /// Push Float expression. 47 | PushFloat(CExpPtr), 48 | /// Push bool expression. 49 | PushBool(CExpPtr), 50 | // More optimisations. 51 | /// Assign a local variable. 52 | AssignLocal(usize, CExpPtr), 53 | /// Append to a local variable. 54 | AppendLocal(usize, CExpPtr), 55 | /// Increment (+=) a local variable. 56 | IncLocal(usize, CExpPtr), 57 | /// Decrement (-=) a local variable. 58 | DecLocal(usize, CExpPtr), 59 | } 60 | 61 | /// Compiled Function. 62 | #[non_exhaustive] 63 | pub struct Function { 64 | /// Number of parameters. 65 | pub param_count: usize, 66 | /// Function return type. 67 | pub return_type: DataType, 68 | /// Types of local parameters/variables. 69 | pub local_typ: Vec, 70 | /// Source SQL. 71 | pub source: Rc, 72 | /// List of instructions. 73 | pub ilist: RefCell>, // Valid when compiled is true. 74 | /// Has function been compiled. 75 | pub compiled: Cell, 76 | } 77 | 78 | /// Compiled expression which yields type T when evaluated. 79 | pub trait CExp { 80 | /// Evaluate the compiled expression. 81 | fn eval(&self, ee: &mut EvalEnv, data: &[u8]) -> T; 82 | } 83 | 84 | /// Pointer to [CExp]. 85 | pub type CExpPtr = Box>; 86 | 87 | /// Function that compiles a builtin function call. 88 | #[derive(Clone, Copy)] 89 | #[non_exhaustive] 90 | pub enum CompileFunc { 91 | /// Value result. 92 | Value(fn(&Block, &mut [Expr]) -> CExpPtr), 93 | /// Int result. 94 | Int(fn(&Block, &mut [Expr]) -> CExpPtr), 95 | /// Float result. 96 | Float(fn(&Block, &mut [Expr]) -> CExpPtr), 97 | } 98 | 99 | /// Iterator that yields references to page data. 100 | pub type DataSource = Box>; 101 | 102 | /// State for FOR loop (non-sorted case). 103 | #[non_exhaustive] 104 | pub struct ForState { 105 | /// Data source. 106 | pub data_source: DataSource, 107 | } 108 | impl std::fmt::Debug for ForState { 109 | fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result { 110 | Ok(()) 111 | } 112 | } 113 | 114 | /// State for FOR loop (sorted case). 115 | #[non_exhaustive] 116 | pub struct ForSortState { 117 | /// Currrent index into rows. 118 | pub ix: usize, 119 | /// Rows. 120 | pub rows: Vec>, 121 | } 122 | impl std::fmt::Debug for ForSortState { 123 | fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result { 124 | Ok(()) 125 | } 126 | } 127 | 128 | /// Info for ForNext Inst. 129 | #[non_exhaustive] 130 | pub struct ForNextInfo { 131 | /// FOR id. 132 | pub for_id: usize, 133 | /// Assigns. 134 | pub assigns: Assigns, 135 | /// Expressions. 136 | pub exps: Vec>, 137 | /// WHERE expression. 138 | pub wher: Option>, 139 | } 140 | 141 | /// Compiled Table Expression. 142 | #[non_exhaustive] 143 | pub enum CTableExpression { 144 | /// Base table. 145 | Base(Rc), 146 | /// Row identified by Id. 147 | IdGet(Rc
, CExpPtr), 148 | /// Indexed rows. 149 | IxGet(Rc
, Vec>, usize), 150 | /// VALUE expressions. 151 | Values(Vec>>), 152 | } 153 | 154 | impl CTableExpression { 155 | /// Get underlying table. 156 | pub fn table(&self) -> Rc
{ 157 | match self { 158 | CTableExpression::Base(t) => t.clone(), 159 | CTableExpression::IdGet(t, _) => t.clone(), 160 | CTableExpression::IxGet(t, _, _) => t.clone(), 161 | _ => panic!(), 162 | } 163 | } 164 | } 165 | 166 | /// Compiled From Expression. 167 | #[non_exhaustive] 168 | pub struct CFromExpression { 169 | /// Column names. 170 | pub colnames: Vec, 171 | /// Assignments ( left hand side ). 172 | pub assigns: Assigns, 173 | /// Expressions. 174 | pub exps: Vec>, 175 | /// FROM expression. 176 | pub from: Option, 177 | /// WHERE expression. 178 | pub wher: Option>, 179 | /// ORDER BY expressions. 180 | pub orderby: Vec>, 181 | /// DESC bits. 182 | pub desc: Vec, 183 | } 184 | 185 | /// Database Operation 186 | #[non_exhaustive] 187 | pub enum DO { 188 | /// Create Schema. 189 | CreateSchema(String), 190 | /// Create Table. 191 | CreateTable(ColInfo), 192 | /// Create Index. 193 | CreateIndex(IndexInfo), 194 | /// Create Function. 195 | CreateFunction(ObjRef, Rc, bool), 196 | /// Alter Table. 197 | AlterTable(ObjRef, Vec), 198 | /// Drop Schema. 199 | DropSchema(String), 200 | /// Drop Table. 201 | DropTable(ObjRef), 202 | /// Drop Index. 203 | DropIndex(ObjRef, String), 204 | /// Drop Function. 205 | DropFunction(ObjRef), 206 | /// Insert into Table. 207 | Insert(Rc
, Vec, CTableExpression), 208 | /// Update Table rows. 209 | Update( 210 | Vec<(usize, CExpPtr)>, 211 | CTableExpression, 212 | Option>, 213 | ), 214 | /// Delete Table rows. 215 | Delete(CTableExpression, Option>), 216 | } 217 | 218 | /// Actions for altering columns of a table. 219 | #[non_exhaustive] 220 | pub enum AlterCol { 221 | /// Add column. 222 | Add(String, DataType), 223 | /// Drop column. 224 | Drop(String), 225 | /// Modify column. 226 | Modify(String, DataType), 227 | } 228 | -------------------------------------------------------------------------------- /src/stg.rs: -------------------------------------------------------------------------------- 1 | use crate::{Arc, Data, Mutex}; 2 | 3 | /// Interface for database storage. 4 | pub trait Storage: Send + Sync { 5 | /// Get the size of the underlying storage. 6 | /// Note : this is valid initially and after a commit but is not defined after write is called. 7 | fn size(&self) -> u64; 8 | 9 | /// Read data from storage. 10 | fn read(&self, start: u64, data: &mut [u8]); 11 | 12 | /// Write byte slice to storage. 13 | fn write(&mut self, start: u64, data: &[u8]); 14 | 15 | /// Write byte Vec to storage. 16 | fn write_vec(&mut self, start: u64, data: Vec) { 17 | let len = data.len(); 18 | let d = Arc::new(data); 19 | self.write_data(start, d, 0, len); 20 | } 21 | 22 | /// Write Data slice to storage. 23 | fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) { 24 | self.write(start, &data[off..off + len]); 25 | } 26 | 27 | /// Finish write transaction, size is new size of underlying storage. 28 | fn commit(&mut self, size: u64); 29 | 30 | /// Write u64 to storage. 31 | fn write_u64(&mut self, start: u64, value: u64) { 32 | self.write(start, &value.to_le_bytes()); 33 | } 34 | 35 | /// Read u64 from storage. 36 | fn read_u64(&self, start: u64) -> u64 { 37 | let mut bytes = [0; 8]; 38 | self.read(start, &mut bytes); 39 | u64::from_le_bytes(bytes) 40 | } 41 | 42 | /// Clone. 43 | fn clone(&self) -> Box { 44 | panic!() 45 | } 46 | 47 | /// Wait until current writes are complete. 48 | fn wait_complete(&self) {} 49 | 50 | /// Reset buffer 51 | fn reset(&mut self) { 52 | panic!() 53 | } 54 | } 55 | 56 | /// Interface for page storage. 57 | pub trait PageStorage: Send + Sync { 58 | /// Is the underlying storage new? 59 | fn is_new(&self) -> bool; 60 | /// Information about page sizes. 61 | fn info(&self) -> Box; 62 | /// Make a new page, result is page number. 63 | fn new_page(&mut self) -> u64; 64 | /// Drop page number. 65 | fn drop_page(&mut self, pn: u64); 66 | /// Set contents of page. 67 | fn set_page(&mut self, pn: u64, data: Data); 68 | /// Get contents of page. 69 | fn get_page(&self, pn: u64) -> Data; 70 | /// Get page size (for repacking). 71 | fn size(&self, pn: u64) -> usize; 72 | /// Save pages to underlying storage. 73 | fn save(&mut self); 74 | /// Undo changes since last save ( but set_page/renumber cannot be undone, only new_page and drop_page can be undone ). 75 | fn rollback(&mut self); 76 | /// Wait until save is complete. 77 | fn wait_complete(&self); 78 | #[cfg(feature = "verify")] 79 | /// Get set of free pages and number of pages ever allocated ( for VERIFY builtin function ). 80 | fn get_free(&mut self) -> (crate::HashSet, u64); 81 | #[cfg(feature = "renumber")] 82 | /// Renumber page. 83 | fn renumber(&mut self, pn: u64) -> u64; 84 | #[cfg(feature = "renumber")] 85 | /// Load free pages in preparation for page renumbering. Returns number of used pages or None if there are no free pages. 86 | fn load_free_pages(&mut self) -> Option; 87 | #[cfg(feature = "renumber")] 88 | /// Final part of page renumber operation. 89 | fn set_alloc_pn(&mut self, target: u64); 90 | } 91 | 92 | /// Information about page sizes. 93 | pub trait PageStorageInfo: Send + Sync { 94 | /// Number of different page sizes. 95 | fn sizes(&self) -> usize; 96 | /// Size index for given page size. 97 | fn index(&self, size: usize) -> usize; 98 | /// Page size for ix ( 1-based ix must be <= sizes() ). 99 | fn size(&self, ix: usize) -> usize; 100 | /// Maximum size page. 101 | fn max_size_page(&self) -> usize { 102 | self.size(self.sizes()) 103 | } 104 | /// Half size page. 105 | fn half_size_page(&self) -> usize { 106 | self.size(self.index(self.max_size_page() / 2 - 50)) 107 | } 108 | /// Is it worth compressing a page of given size by saving. 109 | fn compress(&self, size: usize, saving: usize) -> bool { 110 | self.index(size - saving) < self.index(size) 111 | } 112 | } 113 | 114 | /// Simple implementation of [Storage] using `Vec`. 115 | #[derive(Default)] 116 | pub struct MemFile { 117 | v: Arc>>, 118 | } 119 | 120 | impl MemFile { 121 | /// Get a new (boxed) MemFile. 122 | pub fn new() -> Box { 123 | Box::::default() 124 | } 125 | } 126 | 127 | impl Storage for MemFile { 128 | fn size(&self) -> u64 { 129 | let v = self.v.lock().unwrap(); 130 | v.len() as u64 131 | } 132 | 133 | fn read(&self, off: u64, bytes: &mut [u8]) { 134 | let off = off as usize; 135 | let len = bytes.len(); 136 | let mut v = self.v.lock().unwrap(); 137 | if off + len > v.len() { 138 | v.resize(off + len, 0); 139 | } 140 | bytes.copy_from_slice(&v[off..off + len]); 141 | } 142 | 143 | fn write(&mut self, off: u64, bytes: &[u8]) { 144 | let off = off as usize; 145 | let len = bytes.len(); 146 | let mut v = self.v.lock().unwrap(); 147 | if off + len > v.len() { 148 | v.resize(off + len, 0); 149 | } 150 | v[off..off + len].copy_from_slice(bytes); 151 | } 152 | 153 | fn commit(&mut self, size: u64) { 154 | let mut v = self.v.lock().unwrap(); 155 | v.resize(size as usize, 0); 156 | } 157 | 158 | fn clone(&self) -> Box { 159 | Box::new(Self { v: self.v.clone() }) 160 | } 161 | } 162 | 163 | use std::{fs, fs::OpenOptions, io::Read, io::Seek, io::SeekFrom, io::Write}; 164 | 165 | /// Simple implementation of [Storage] using `std::fs::File`. 166 | pub struct SimpleFileStorage { 167 | file: Arc>, 168 | } 169 | 170 | impl SimpleFileStorage { 171 | /// Construct from filename. 172 | pub fn new(filename: &str) -> Box { 173 | Box::new(Self { 174 | file: Arc::new(Mutex::new( 175 | OpenOptions::new() 176 | .read(true) 177 | .write(true) 178 | .create(true) 179 | .truncate(false) 180 | .open(filename) 181 | .unwrap(), 182 | )), 183 | }) 184 | } 185 | } 186 | 187 | impl Storage for SimpleFileStorage { 188 | fn size(&self) -> u64 { 189 | let mut f = self.file.lock().unwrap(); 190 | f.seek(SeekFrom::End(0)).unwrap() 191 | } 192 | 193 | fn read(&self, off: u64, bytes: &mut [u8]) { 194 | let mut f = self.file.lock().unwrap(); 195 | f.seek(SeekFrom::Start(off)).unwrap(); 196 | let _ = f.read(bytes).unwrap(); 197 | } 198 | 199 | fn write(&mut self, off: u64, bytes: &[u8]) { 200 | let mut f = self.file.lock().unwrap(); 201 | // The list of operating systems which auto-zero is likely more than this...research is todo. 202 | #[cfg(not(any(target_os = "windows", target_os = "linux")))] 203 | { 204 | let size = f.seek(SeekFrom::End(0)).unwrap(); 205 | if off > size { 206 | f.set_len(off).unwrap(); 207 | /* 208 | let len = (off - size) as usize; 209 | let zb = vec![0; len]; 210 | f.seek(SeekFrom::Start(size)).unwrap(); 211 | let _ = f.write(&zb).unwrap(); 212 | */ 213 | } 214 | } 215 | f.seek(SeekFrom::Start(off)).unwrap(); 216 | let _ = f.write(bytes).unwrap(); 217 | } 218 | 219 | fn commit(&mut self, size: u64) { 220 | let f = self.file.lock().unwrap(); 221 | f.set_len(size).unwrap(); 222 | f.sync_all().unwrap(); 223 | } 224 | 225 | fn clone(&self) -> Box { 226 | Box::new(Self { 227 | file: self.file.clone(), 228 | }) 229 | } 230 | } 231 | 232 | /// Alternative to SimpleFileStorage that uses multiple [SimpleFileStorage]s to allow parallel reads/writes by different threads. 233 | #[allow(clippy::vec_box)] 234 | pub struct MultiFileStorage { 235 | filename: String, 236 | files: Arc>>>, 237 | } 238 | 239 | impl MultiFileStorage { 240 | /// Create new MultiFileStorage. 241 | pub fn new(filename: &str) -> Box { 242 | Box::new(Self { 243 | filename: filename.to_string(), 244 | files: Arc::new(Mutex::new(Vec::new())), 245 | }) 246 | } 247 | 248 | fn get_file(&self) -> Box { 249 | if let Some(f) = self.files.lock().unwrap().pop() { 250 | f 251 | } else { 252 | SimpleFileStorage::new(&self.filename) 253 | } 254 | } 255 | 256 | fn put_file(&self, f: Box) { 257 | self.files.lock().unwrap().push(f); 258 | } 259 | } 260 | 261 | impl Storage for MultiFileStorage { 262 | fn size(&self) -> u64 { 263 | let f = self.get_file(); 264 | let result = f.size(); 265 | self.put_file(f); 266 | result 267 | } 268 | 269 | fn read(&self, off: u64, bytes: &mut [u8]) { 270 | let f = self.get_file(); 271 | f.read(off, bytes); 272 | self.put_file(f); 273 | } 274 | 275 | fn write(&mut self, off: u64, bytes: &[u8]) { 276 | let mut f = self.get_file(); 277 | f.write(off, bytes); 278 | self.put_file(f); 279 | } 280 | 281 | fn commit(&mut self, size: u64) { 282 | let mut f = self.get_file(); 283 | f.commit(size); 284 | self.put_file(f); 285 | } 286 | 287 | fn clone(&self) -> Box { 288 | Box::new(Self { 289 | filename: self.filename.clone(), 290 | files: self.files.clone(), 291 | }) 292 | } 293 | } 294 | 295 | /// Dummy Stg that can be used for Atomic upd file if "reliable" atomic commits are not required. 296 | pub struct DummyFile {} 297 | impl DummyFile { 298 | /// Construct. 299 | pub fn new() -> Box { 300 | Box::new(Self {}) 301 | } 302 | } 303 | 304 | impl Storage for DummyFile { 305 | fn size(&self) -> u64 { 306 | 0 307 | } 308 | 309 | fn read(&self, _off: u64, _bytes: &mut [u8]) {} 310 | 311 | fn write(&mut self, _off: u64, _bytes: &[u8]) {} 312 | 313 | fn commit(&mut self, _size: u64) {} 314 | 315 | fn clone(&self) -> Box { 316 | Self::new() 317 | } 318 | } 319 | -------------------------------------------------------------------------------- /src/stgwin.rs: -------------------------------------------------------------------------------- 1 | use crate::stg::Storage; 2 | 3 | use windows::{ 4 | core::Handle, 5 | Win32::Foundation::{CloseHandle, HANDLE}, 6 | Win32::Storage::FileSystem::{ 7 | CreateFileA, // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea 8 | GetFileSizeEx, // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfilesizeex 9 | ReadFile, // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile 10 | SetEndOfFile, 11 | SetFilePointerEx, 12 | WriteFile, // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-writefile 13 | FILE_BEGIN, 14 | FILE_FLAG_OVERLAPPED, 15 | FILE_GENERIC_READ, 16 | FILE_GENERIC_WRITE, 17 | FILE_SHARE_READ, 18 | OPEN_ALWAYS, 19 | }, 20 | Win32::System::Threading::{ 21 | CreateEventA, // See https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-createeventa 22 | WaitForSingleObject, // https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitforsingleobject 23 | WAIT_OBJECT_0, 24 | }, 25 | Win32::System::IO::{/*GetOverlappedResult,*/ OVERLAPPED, OVERLAPPED_0, OVERLAPPED_0_0}, 26 | }; 27 | 28 | // See also https://docs.microsoft.com/en-us/windows/win32/fileio/synchronous-and-asynchronous-i-o 29 | 30 | pub struct WinEvent { 31 | event: HANDLE, 32 | } 33 | impl Drop for WinEvent { 34 | fn drop(&mut self) { 35 | unsafe { 36 | CloseHandle(self.event); 37 | } 38 | } 39 | } 40 | 41 | pub struct WinFileStorage { 42 | pub file: HANDLE, 43 | } 44 | 45 | impl WinFileStorage { 46 | pub fn new(filename: &str) -> Self { 47 | unsafe { 48 | let file = CreateFileA( 49 | filename, 50 | FILE_GENERIC_READ | FILE_GENERIC_WRITE, 51 | FILE_SHARE_READ, 52 | std::ptr::null_mut(), 53 | OPEN_ALWAYS, 54 | FILE_FLAG_OVERLAPPED, 55 | None, 56 | ); 57 | 58 | if let Err(err) = file.ok() { 59 | panic!("{} Filename= {}", err, filename); 60 | } 61 | Self { file } 62 | } 63 | } 64 | 65 | pub fn start_read(&self, off: u64, buffer: &mut [u8]) -> WinEvent { 66 | unsafe { 67 | let event: HANDLE = CreateEventA(std::ptr::null_mut(), true, false, None); 68 | 69 | let mut overlapped = OVERLAPPED { 70 | Anonymous: OVERLAPPED_0 { 71 | Anonymous: OVERLAPPED_0_0 { 72 | Offset: off as u32, 73 | OffsetHigh: (off >> 32) as u32, 74 | }, 75 | }, 76 | hEvent: event, 77 | Internal: 0, 78 | InternalHigh: 0, 79 | }; 80 | 81 | let blen = buffer.len(); 82 | 83 | let _ok = ReadFile( 84 | self.file, 85 | buffer.as_mut_ptr() as _, 86 | blen as u32, 87 | std::ptr::null_mut(), 88 | &mut overlapped, 89 | ); 90 | 91 | WinEvent { event } 92 | } 93 | } 94 | 95 | pub fn start_write(&mut self, off: u64, buffer: &[u8]) -> WinEvent { 96 | unsafe { 97 | let event: HANDLE = CreateEventA(std::ptr::null_mut(), true, false, None); 98 | 99 | let mut overlapped = OVERLAPPED { 100 | Anonymous: OVERLAPPED_0 { 101 | Anonymous: OVERLAPPED_0_0 { 102 | Offset: off as u32, 103 | OffsetHigh: (off >> 32) as u32, 104 | }, 105 | }, 106 | hEvent: event, 107 | Internal: 0, 108 | InternalHigh: 0, 109 | }; 110 | 111 | let blen = buffer.len(); 112 | 113 | let _ok = WriteFile( 114 | self.file, 115 | buffer.as_ptr() as _, 116 | blen as u32, 117 | std::ptr::null_mut(), 118 | &mut overlapped, 119 | ); 120 | 121 | WinEvent { event } 122 | } 123 | } 124 | 125 | pub fn wait(&self, x: WinEvent) { 126 | unsafe { 127 | let wait_ok = WaitForSingleObject(x.event, u32::MAX); 128 | debug_assert!(wait_ok == WAIT_OBJECT_0); 129 | } 130 | } 131 | 132 | pub fn truncate(&mut self, size: u64) { 133 | unsafe { 134 | let mut pos = 0; 135 | SetFilePointerEx(self.file, size as i64, &mut pos, FILE_BEGIN); 136 | SetEndOfFile(self.file); 137 | } 138 | } 139 | } 140 | 141 | impl Storage for WinFileStorage { 142 | fn size(&self) -> u64 { 143 | unsafe { 144 | let mut result: i64 = 0; 145 | GetFileSizeEx(self.file, &mut result); 146 | result as u64 147 | } 148 | } 149 | 150 | fn read(&self, off: u64, buffer: &mut [u8]) { 151 | let e = self.start_read(off, buffer); 152 | self.wait(e); 153 | } 154 | 155 | fn write(&mut self, off: u64, buffer: &[u8]) { 156 | let e = self.start_write(off, buffer); 157 | self.wait(e); 158 | } 159 | 160 | fn commit(&mut self, size: u64) { 161 | self.truncate(size); 162 | } 163 | 164 | /// Read multiple ranges. List is (file offset, data offset, data size). 165 | fn read_multiple(&self, list: &[(u64, usize, usize)], data: &mut [u8]) { 166 | let mut events = Vec::new(); 167 | for (addr, off, size) in list { 168 | let data = &mut data[*off..off + *size]; 169 | events.push(self.start_read(*addr, data)); 170 | } 171 | for e in events { 172 | self.wait(e); 173 | } 174 | } 175 | } 176 | 177 | impl Drop for WinFileStorage { 178 | fn drop(&mut self) { 179 | unsafe { 180 | let closed_ok = CloseHandle(self.file); 181 | assert!(closed_ok.as_bool()); 182 | } 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/sys.rs: -------------------------------------------------------------------------------- 1 | use crate::*; 2 | 3 | /// Create a schema in the database by writing to the system Schema table. 4 | pub fn create_schema(db: &DB, name: &str) { 5 | if let Some(_id) = get_schema(db, name) { 6 | panic!("schema '{}' already exists", name); 7 | } 8 | let t = &db.sys_schema; 9 | let mut row = t.row(); 10 | row.id = t.alloc_id(db); 11 | row.values[0] = Value::String(Rc::new(name.to_string())); 12 | t.insert(db, &mut row); 13 | } 14 | 15 | /// Create a new table in the database by writing to the system Table and Column tables. 16 | pub fn create_table(db: &DB, info: &ColInfo) { 17 | if let Some(_t) = get_table(db, &info.name) { 18 | panic!("table {} already exists", info.name.str()); 19 | } 20 | let tid = { 21 | let schema = &info.name.schema; 22 | if let Some(schema_id) = get_schema(db, schema) { 23 | let root = db.alloc_page(); 24 | let t = &db.sys_table; 25 | let mut row = t.row(); 26 | // Columns are root, schema, name, id_gen 27 | row.id = t.alloc_id(db); 28 | row.values[0] = Value::Int(root as i64); 29 | row.values[1] = Value::Int(schema_id); 30 | row.values[2] = Value::String(Rc::new(info.name.name.clone())); 31 | row.values[3] = Value::Int(1); 32 | t.insert(db, &mut row); 33 | row.id 34 | } else { 35 | panic!("schema not found [{}]", &schema); 36 | } 37 | }; 38 | { 39 | let cnames = &info.colnames; 40 | let t = &db.sys_column; 41 | let mut row = t.row(); 42 | row.values[0] = Value::Int(tid); 43 | for (num, typ) in info.typ.iter().enumerate() { 44 | // Columns are Table, Name, Type 45 | row.id = t.alloc_id(db); 46 | row.values[1] = Value::String(Rc::new(cnames[num].to_string())); 47 | row.values[2] = Value::Int(*typ as i64); 48 | t.insert(db, &mut row); 49 | } 50 | } 51 | } 52 | 53 | /// Create a new table index by writing to the system Index and IndexColumn tables. 54 | pub fn create_index(db: &DB, info: &IndexInfo) { 55 | if let Some(table) = db.get_table(&info.tname) { 56 | let root = db.alloc_page(); 57 | let index_id = { 58 | let t = &db.sys_index; 59 | let mut row = t.row(); 60 | // Columns are Root, Table, Name 61 | row.id = t.alloc_id(db); 62 | row.values[0] = Value::Int(root as i64); 63 | row.values[1] = Value::Int(table.id); 64 | row.values[2] = Value::String(Rc::new(info.iname.clone())); 65 | t.insert(db, &mut row); 66 | row.id 67 | }; 68 | { 69 | let t = &db.sys_index_col; 70 | let mut row = t.row(); 71 | for cnum in &info.cols { 72 | // Columns are Index, ColIndex 73 | row.id = t.alloc_id(db); 74 | row.values[0] = Value::Int(index_id); 75 | row.values[1] = Value::Int(*cnum as i64); 76 | t.insert(db, &mut row); 77 | } 78 | } 79 | if root > SYS_ROOT_LAST { 80 | table.add_index(root, info.cols.clone(), index_id); 81 | table.init_index(db); 82 | } 83 | } else { 84 | panic!("table not found: {}", &info.tname.str()); 85 | } 86 | } 87 | 88 | /// Create or alter a function in the database by saving the source into the Function system table. 89 | pub fn create_function(db: &DB, name: &ObjRef, source: Rc, alter: bool) { 90 | if let Some(schema_id) = get_schema(db, &name.schema) { 91 | let t = &db.sys_function; 92 | if alter { 93 | // Columns are Schema(0), Name(1), Definition(2). 94 | let keys = vec![ 95 | Value::Int(schema_id), 96 | Value::String(Rc::new(name.name.to_string())), 97 | ]; 98 | if let Some((pp, off)) = t.ix_get(db, keys, 0) { 99 | let p = &mut *pp.borrow_mut(); 100 | let off = off + t.info.off[2]; 101 | let (val, oldcode) = Value::load(db, BIGSTR, &p.data, off); 102 | if val.str() != source { 103 | db.delcode(oldcode); 104 | let val = Value::String(source); 105 | let newcode = db.encode(&val, data_size(BIGSTR)); 106 | val.save(BIGSTR, &mut p.data, off, newcode); 107 | t.file.set_dirty(p, &pp); 108 | db.function_reset.set(true); 109 | } 110 | return; 111 | } 112 | panic!("function {} not found", &name.str()); 113 | } else { 114 | if get_function_id(db, name).is_some() { 115 | panic!("function already exists"); 116 | } 117 | // Create new function. 118 | let mut row = t.row(); 119 | // Columns are Schema, Name, Definition 120 | row.id = t.alloc_id(db); 121 | row.values[0] = Value::Int(schema_id); 122 | row.values[1] = Value::String(Rc::new(name.name.clone())); 123 | row.values[2] = Value::String(source); 124 | t.insert(db, &mut row); 125 | } 126 | } else { 127 | panic!("schema [{}] not found", &name.schema); 128 | } 129 | } 130 | 131 | /// Get the id of a schema from a name. 132 | pub fn get_schema(db: &DB, sname: &str) -> Option { 133 | if let Some(&id) = db.schemas.borrow().get(sname) { 134 | return Some(id); 135 | } 136 | let t = &db.sys_schema; 137 | let keys = vec![Value::String(Rc::new(sname.to_string()))]; 138 | if let Some((pp, off)) = t.ix_get(db, keys, 0) { 139 | let p = &pp.borrow(); 140 | let a = t.access(p, off); 141 | debug_assert!(a.str(db, 0) == sname); 142 | let id = a.id() as i64; 143 | db.schemas.borrow_mut().insert(sname.to_string(), id); 144 | return Some(id); 145 | } 146 | None 147 | } 148 | 149 | /// Get the id, root, id_gen for specified table. 150 | fn get_table0(db: &DB, name: &ObjRef) -> Option<(i64, i64, i64)> { 151 | if let Some(schema_id) = get_schema(db, &name.schema) { 152 | let t = &db.sys_table; 153 | // Columns are root, schema, name, id_gen 154 | let keys = vec![ 155 | Value::Int(schema_id), 156 | Value::String(Rc::new(name.name.to_string())), 157 | ]; 158 | if let Some((pp, off)) = t.ix_get(db, keys, 0) { 159 | let p = &pp.borrow(); 160 | let a = t.access(p, off); 161 | return Some((a.id() as i64, a.int(0), a.int(3))); 162 | } 163 | } 164 | None 165 | } 166 | 167 | /// Get information about an index from name. 168 | pub fn get_index(db: &DB, tname: &ObjRef, iname: &str) -> (Rc
, usize, u64) { 169 | if let Some(t) = get_table(db, tname) { 170 | // Loop through indexes. Columns are Root, Table, Name. 171 | let ixt = &db.sys_index; 172 | let key = Value::Int(t.id); 173 | for (ix, (pp, off)) in ixt.scan_key(db, key, 0).enumerate() { 174 | let p = &pp.borrow(); 175 | let a = ixt.access(p, off); 176 | if a.str(db, 2) == iname { 177 | let id = a.id(); 178 | return (t, ix, id); 179 | } 180 | } 181 | panic!("index {} not found", iname); 182 | } else { 183 | panic!("table {} not found", tname.str()); 184 | } 185 | } 186 | 187 | /// Gets table from the database. 188 | pub fn get_table(db: &DB, name: &ObjRef) -> Option> { 189 | if let Some((table_id, root, id_gen)) = get_table0(db, name) { 190 | let mut info = ColInfo::empty(name.clone()); 191 | // Get columns. Columns are Table, Name, Type 192 | let t = &db.sys_column; 193 | let key = Value::Int(table_id); 194 | for (pp, off) in t.scan_key(db, key, 0) { 195 | let p = &pp.borrow(); 196 | let a = t.access(p, off); 197 | debug_assert!(a.int(0) == table_id); 198 | let cname = a.str(db, 1); 199 | let ctype = a.int(2) as DataType; 200 | info.add(cname, ctype); 201 | } 202 | let table = Table::new(table_id, root as u64, id_gen, Rc::new(info)); 203 | // Get indexes. Columns are Root, Table, Name. 204 | let t = &db.sys_index; 205 | let key = Value::Int(table_id); 206 | for (pp, off) in t.scan_key(db, key, 0) { 207 | let p = &pp.borrow(); 208 | let a = t.access(p, off); 209 | debug_assert!(a.int(1) == table_id); 210 | let index_id = a.id() as i64; 211 | let root = a.int(0) as u64; 212 | let mut cols = Vec::new(); 213 | let t = &db.sys_index_col; 214 | // Columns are Index, ColIndex 215 | let key = Value::Int(index_id); 216 | for (pp, off) in t.scan_key(db, key, 0) { 217 | let p = &pp.borrow(); 218 | let a = t.access(p, off); 219 | debug_assert!(a.int(0) == index_id); 220 | let cnum = a.int(1) as usize; 221 | cols.push(cnum); 222 | } 223 | table.add_index(root, cols, index_id); 224 | } 225 | db.publish_table(table.clone()); 226 | Some(table) 227 | } else { 228 | None 229 | } 230 | } 231 | 232 | /// Get then parse a function from the database. 233 | pub fn get_function(db: &DB, name: &ObjRef) -> Option> { 234 | if let Some(schema_id) = get_schema(db, &name.schema) { 235 | let t = &db.sys_function; 236 | let keys = vec![ 237 | Value::Int(schema_id), 238 | Value::String(Rc::new(name.name.to_string())), 239 | ]; 240 | if let Some((pp, off)) = t.ix_get(db, keys, 0) { 241 | let p = &pp.borrow(); 242 | let a = t.access(p, off); 243 | let source = Rc::new(a.str(db, 2)); 244 | let function = parse_function(db, source); 245 | db.functions 246 | .borrow_mut() 247 | .insert(name.clone(), function.clone()); 248 | return Some(function); 249 | } 250 | } 251 | None 252 | } 253 | 254 | /// Get the id of a function. 255 | pub fn get_function_id(db: &DB, name: &ObjRef) -> Option { 256 | if let Some(schema_id) = get_schema(db, &name.schema) { 257 | let t = &db.sys_function; 258 | let keys = vec![ 259 | Value::Int(schema_id), 260 | Value::String(Rc::new(name.name.to_string())), 261 | ]; 262 | if let Some((pp, off)) = t.ix_get(db, keys, 0) { 263 | let p = &pp.borrow(); 264 | let a = t.access(p, off); 265 | return Some(a.id() as i64); 266 | } 267 | } 268 | None 269 | } 270 | 271 | /// Parse a function definition. 272 | fn parse_function(db: &DB, source: Rc) -> Rc { 273 | let mut p = Parser::new(&source, db); 274 | p.b.parse_only = true; 275 | p.parse_function(); 276 | Rc::new(Function { 277 | compiled: Cell::new(false), 278 | ilist: RefCell::new(Vec::new()), 279 | local_typ: p.b.local_typ, 280 | return_type: p.b.return_type, 281 | param_count: p.b.param_count, 282 | source, 283 | }) 284 | } 285 | 286 | /// Get the IdGen field for a table. This is only needed to initialise system tables. 287 | pub fn get_id_gen(db: &DB, id: u64) -> i64 { 288 | let t = &db.sys_table; 289 | let (pp, off) = t.id_get(db, id).unwrap(); 290 | let p = &pp.borrow(); 291 | let a = t.access(p, off); 292 | debug_assert!(a.id() == id); 293 | a.int(3) 294 | } 295 | 296 | /// Update IdGen field for a table. 297 | pub fn save_id_gen(db: &DB, id: u64, val: i64) { 298 | let t = &db.sys_table; 299 | let (pp, off) = t.id_get(db, id).unwrap(); 300 | let p = &mut pp.borrow_mut(); 301 | let mut wa = t.write_access(p, off); 302 | debug_assert!(wa.id() == id); 303 | wa.set_int(3, val); 304 | t.file.set_dirty(p, &pp); 305 | } 306 | 307 | /// Update root page for table ( for ALTER TABLE ). 308 | pub fn set_root(db: &DB, id: i64, new_root: u64) { 309 | let id = id as u64; 310 | let t = &db.sys_table; 311 | let (pp, off) = t.id_get(db, id).unwrap(); 312 | let p = &mut pp.borrow_mut(); 313 | let mut wa = t.write_access(p, off); 314 | debug_assert!(wa.id() == id); 315 | wa.set_int(0, new_root as i64); 316 | t.file.set_dirty(p, &pp); 317 | } 318 | 319 | /// Update root page for index. 320 | #[cfg(feature = "renumber")] 321 | pub fn set_ix_root(db: &DB, id: i64, new_root: u64) { 322 | let id = id as u64; 323 | let t = &db.sys_index; 324 | let (pp, off) = t.id_get(db, id).unwrap(); 325 | let p = &mut pp.borrow_mut(); 326 | let mut wa = t.write_access(p, off); 327 | debug_assert!(wa.id() == id); 328 | wa.set_int(0, new_root as i64); 329 | t.file.set_dirty(p, &pp); 330 | } 331 | -------------------------------------------------------------------------------- /src/test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | pub fn test_amount() -> usize { 3 | str::parse(&std::env::var("TA").unwrap_or("1".to_string())).unwrap() 4 | } 5 | 6 | #[test] 7 | /// Idea of this test is to check database saves and loads ok. 8 | pub fn save_test() { 9 | use crate::*; 10 | 11 | let mf = MemFile::new(); 12 | 13 | for i in 0..2 { 14 | let mut bmap = BuiltinMap::default(); 15 | standard_builtins(&mut bmap); 16 | let bmap = Arc::new(bmap); 17 | 18 | let af = AtomicFile::new(mf.clone(), MemFile::new()); 19 | let spd = SharedPagedData::new(af); 20 | let wapd = AccessPagedData::new_writer(spd.clone()); 21 | 22 | let db = Database::new(wapd, "CREATE SCHEMA test", bmap.clone()); 23 | 24 | let mut tr = GenTransaction::default(); 25 | 26 | if i == 0 { 27 | let sql = " 28 | CREATE TABLE test.Cust(Name string) GO 29 | INSERT INTO test.Cust(Name) VALUES ('freddy') 30 | "; 31 | db.run(&sql, &mut tr); 32 | assert!(db.changed()); 33 | assert!(db.save() > 0); 34 | spd.wait_complete(); 35 | } else { 36 | let sql = "SELECT Name FROM test.Cust"; 37 | db.run(&sql, &mut tr); 38 | assert_eq!(tr.rp.output, b"freddy"); 39 | } 40 | } 41 | } 42 | 43 | #[test] 44 | pub fn concurrency() { 45 | use crate::*; 46 | 47 | let stg = AtomicFile::new(MemFile::new(), MemFile::new()); 48 | 49 | let mut bmap = BuiltinMap::default(); 50 | standard_builtins(&mut bmap); 51 | let bmap = Arc::new(bmap); 52 | 53 | let spd = SharedPagedData::new(stg); 54 | let wapd = AccessPagedData::new_writer(spd.clone()); 55 | let db = Database::new(wapd, "CREATE SCHEMA test", bmap.clone()); 56 | 57 | let nt = 100; 58 | 59 | // Create nt tables. 60 | for i in 0..nt { 61 | let mut tr = GenTransaction::default(); 62 | let sql = format!( 63 | "CREATE TABLE test.[T{}](N int) GO INSERT INTO test.[T{}](N) VALUES (0)", 64 | i, i 65 | ); 66 | db.run(&sql, &mut tr); 67 | assert!(db.save() > 0); 68 | } 69 | 70 | // Create readers at different update times. 71 | let mut rapd = Vec::new(); 72 | for i in 0..1000 * test_amount() { 73 | rapd.push((i, AccessPagedData::new_reader(spd.clone()))); 74 | let mut tr = GenTransaction::default(); 75 | let table = i % nt; 76 | let sql = format!("UPDATE test.[T{}] SET N = N + 1 WHERE 1=1", table); 77 | db.run(&sql, &mut tr); 78 | assert!(db.save() > 0); 79 | } 80 | 81 | // Run the readers in random order, checking content of random table. 82 | use rand::Rng; 83 | let mut rng = rand::thread_rng(); 84 | while !rapd.is_empty() { 85 | let r = rng.gen::() % rapd.len(); 86 | let (i, rapd) = rapd.remove(r); 87 | let db = Database::new(rapd, "", bmap.clone()); 88 | let mut tr = GenTransaction::default(); 89 | let table = rng.gen::() % nt; 90 | let sql = format!("SELECT N FROM test.[T{}]", table); 91 | db.run(&sql, &mut tr); 92 | let expect = i / nt + if i % nt > table { 1 } else { 0 }; 93 | assert!(tr.rp.output == format!("{}", expect).as_bytes()); 94 | } 95 | } 96 | 97 | #[test] 98 | pub fn rtest() { 99 | use crate::*; 100 | 101 | const INITSQL : &str = " 102 | 103 | CREATE FN sys.QuoteName( s string ) RETURNS string AS 104 | BEGIN 105 | RETURN '[' | REPLACE( s, ']', ']]' ) | ']' 106 | END 107 | 108 | CREATE FN sys.Dot( schema string, name string ) RETURNS string AS 109 | BEGIN 110 | RETURN sys.QuoteName( schema ) | '.' | sys.QuoteName( name ) 111 | END 112 | 113 | CREATE FN sys.TableName( table int ) RETURNS string AS 114 | BEGIN 115 | DECLARE schema int, name string 116 | SET schema = Schema, name = Name FROM sys.Table WHERE Id = table 117 | IF name = '' RETURN '' 118 | SET result = sys.Dot( Name, name ) FROM sys.Schema WHERE Id = schema 119 | END 120 | 121 | CREATE FN sys.DropTable 122 | ( t int ) AS 123 | /* Note: this should not be called directly, instead use DROP TABLE statement */ 124 | BEGIN 125 | /* Delete the rows */ 126 | EXECUTE( 'DELETE FROM ' | sys.TableName(t) | ' WHERE true' ) 127 | 128 | DECLARE id int 129 | /* Delete the Index data */ 130 | FOR id = Id FROM sys.Index WHERE Table = t 131 | BEGIN 132 | DELETE FROM sys.IndexColumn WHERE Index = id 133 | END 134 | DELETE FROM sys.Index WHERE Table = t 135 | /* Delete the column data */ 136 | FOR id = Id FROM sys.Column WHERE Table = t 137 | BEGIN 138 | -- DELETE FROM browse.Column WHERE Id = id 139 | END 140 | /* Delete other data */ 141 | -- DELETE FROM browse.Table WHERE Id = t 142 | DELETE FROM sys.Column WHERE Table = t 143 | DELETE FROM sys.Table WHERE Id = t 144 | END 145 | 146 | CREATE FN sys.ClearTable 147 | (t int) AS 148 | BEGIN 149 | EXECUTE( 'DELETE FROM ' | sys.TableName(t) | ' WHERE true' ) 150 | END 151 | 152 | CREATE SCHEMA rtest 153 | GO 154 | CREATE TABLE rtest.Gen(x int) 155 | GO 156 | INSERT INTO rtest.Gen(x) VALUES(1) 157 | GO 158 | CREATE SCHEMA rtestdata 159 | GO 160 | 161 | CREATE FN rtest.repeat( s string, n int ) RETURNS string AS 162 | BEGIN 163 | WHILE n > 0 164 | BEGIN 165 | SET result |= s 166 | SET n -= 1 167 | END 168 | END 169 | 170 | CREATE FN rtest.OneTest() AS 171 | BEGIN 172 | DECLARE rtestdata int 173 | SET rtestdata = Id FROM sys.Schema WHERE Name = 'rtestdata' 174 | 175 | DECLARE r int 176 | SET r = x FROM rtest.Gen 177 | SET r = r * 48271 % 2147483647 178 | 179 | DECLARE sql string, a int 180 | SET a = r % 2 181 | 182 | DECLARE tname string 183 | SET tname = 't' | ( r / 100 ) % 7 184 | 185 | DECLARE exists string 186 | SET exists = '' 187 | SET exists = Name FROM sys.Table WHERE Schema = rtestdata AND Name = tname 188 | 189 | SET sql = CASE 190 | WHEN r % 20 = 0 THEN 'SELECT VERIFYDB()' 191 | WHEN r % 20 = 19 THEN 'SELECT REPACKFILE(-4,'''','''')' 192 | WHEN r % 20 = 18 THEN 'SELECT REPACKFILE(-3,'''','''')' 193 | WHEN r % 20 = 17 THEN 'SELECT RENUMBER()' 194 | WHEN exists = '' THEN 195 | CASE WHEN r % 2 =1 THEN 'CREATE TABLE rtestdata.[' | tname | '](x string, y int(5))' 196 | ELSE 'CREATE TABLE rtestdata.[' | tname | '](x string, y int(3), z string )' 197 | END 198 | WHEN r % 5 = 0 THEN 'ALTER TABLE rtestdata.[' | tname | '] ADD [z' | r | '] binary' 199 | WHEN r % 21 = 1 THEN 'DROP TABLE rtestdata.[' | tname | ']' 200 | WHEN r % 2 = 1 THEN 'INSERT INTO rtestdata.[' | tname | '](x,y) VALUES ( rtest.repeat(''George Gordon Fairbrother Barwood'','|(r % 1000)|'),' | (r % 10) | ')' 201 | ELSE 'DELETE FROM rtestdata.[' | tname | '] WHERE y = ' | ( r%15) 202 | END 203 | SELECT ' sql=' | sql 204 | EXECUTE( sql ) 205 | UPDATE rtest.Gen SET x = r WHERE true 206 | END 207 | GO 208 | "; 209 | 210 | let stg = AtomicFile::new(MemFile::new(), MemFile::new()); 211 | 212 | let mut bmap = BuiltinMap::default(); 213 | standard_builtins(&mut bmap); 214 | let bmap = Arc::new(bmap); 215 | 216 | let spd = SharedPagedData::new(stg); 217 | let wapd = AccessPagedData::new_writer(spd.clone()); 218 | let db = Database::new(wapd, INITSQL, bmap.clone()); 219 | 220 | // To check things work with low mem_limit. 221 | { 222 | // let mut s = spd.stash.lock().unwrap(); 223 | // s.mem_limit = 1; 224 | } 225 | 226 | for _i in 0..1000 * test_amount() { 227 | let mut tr = GenTransaction::default(); 228 | let sql = "EXEC rtest.OneTest()"; 229 | db.run(&sql, &mut tr); 230 | db.save(); 231 | let s = std::str::from_utf8(&tr.rp.output).unwrap(); 232 | if s.len() > 0 { 233 | // println!("output={}", s); 234 | } 235 | assert_eq!(tr.get_error(), ""); 236 | } 237 | // assert!(false); 238 | } 239 | 240 | #[test] 241 | pub fn rollback() { 242 | use crate::*; 243 | 244 | let stg = AtomicFile::new(MemFile::new(), MemFile::new()); 245 | 246 | let mut bmap = BuiltinMap::default(); 247 | standard_builtins(&mut bmap); 248 | let bmap = Arc::new(bmap); 249 | 250 | let spd = SharedPagedData::new(stg); 251 | 252 | let wapd = AccessPagedData::new_writer(spd.clone()); 253 | let db = Database::new(wapd, "", bmap.clone()); 254 | 255 | let mut tr = GenTransaction::default(); 256 | let sql = " 257 | CREATE TABLE sys.test(x int) 258 | DECLARE sql string SET sql = 'SELECT PARSEINT(''x'')' 259 | EXECUTE(sql) 260 | "; 261 | db.run(&sql, &mut tr); 262 | } 263 | 264 | #[test] 265 | pub fn insert_delete() { 266 | use crate::*; 267 | 268 | let stg = AtomicFile::new(MemFile::new(), MemFile::new()); 269 | 270 | let mut bmap = BuiltinMap::default(); 271 | standard_builtins(&mut bmap); 272 | let bmap = Arc::new(bmap); 273 | 274 | let spd = SharedPagedData::new(stg); 275 | let wapd = AccessPagedData::new_writer(spd.clone()); 276 | let db = Database::new(wapd, "", bmap.clone()); 277 | 278 | let mut tr = GenTransaction::default(); 279 | 280 | let sql = format!( 281 | " 282 | CREATE TABLE sys.test(x int,name string) 283 | GO 284 | DECLARE @i int 285 | WHILE @i < {} 286 | BEGIN 287 | INSERT INTO sys.test(x,name) VALUES(@i,'Hello World') 288 | SET @i += 1 289 | END 290 | DELETE FROM sys.test WHERE Id % 3 = 1 291 | DELETE FROM sys.test WHERE Id % 3 = 2 292 | DELETE FROM sys.test WHERE true 293 | ", 294 | test_amount() * 100000 295 | ); 296 | db.run(&sql, &mut tr); 297 | db.save(); 298 | assert_eq!(tr.get_error(), ""); 299 | } 300 | 301 | //$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 302 | 303 | #[test] 304 | fn test_date_calc() { 305 | use crate::*; 306 | const INITSQL: &str = " 307 | CREATE SCHEMA [date] 308 | CREATE FN [date].[YearDayToYearMonthDay]( yd int ) RETURNS int AS 309 | BEGIN 310 | DECLARE y int, d int, leap bool, fdm int, m int, dim int 311 | SET y = yd / 512 312 | SET d = yd % 512 - 1 313 | SET leap = date.IsLeapYear( y ) 314 | -- Jan = 0..30, Feb = 0..27 or 0..28 315 | IF NOT leap AND d >= 59 SET d = d + 1 316 | SET fdm = CASE 317 | WHEN d < 31 THEN 0 -- Jan 318 | WHEN d < 60 THEN 31 -- Feb 319 | WHEN d < 91 THEN 60 -- Mar 320 | WHEN d < 121 THEN 91 -- Apr 321 | WHEN d < 152 THEN 121 -- May 322 | WHEN d < 182 THEN 152 -- Jun 323 | WHEN d < 213 THEN 182 -- Jul 324 | WHEN d < 244 THEN 213 -- Aug 325 | WHEN d < 274 THEN 244 -- Sep 326 | WHEN d < 305 THEN 274 -- Oct 327 | WHEN d < 335 THEN 305 -- Nov 328 | ELSE 335 -- Dec 329 | END 330 | SET dim = d - fdm 331 | SET m = ( d - dim + 28 ) / 31 332 | RETURN date.YearMonthDay( y, m+1, dim+1 ) 333 | END 334 | 335 | CREATE FN [date].[DaysToYearDay]( days int ) RETURNS int AS 336 | BEGIN 337 | -- Given a date represented by the number of days since 1 Jan 0000 338 | -- calculate a date in Year/Day representation stored as 339 | -- year * 512 + day where day is 1..366, the day in the year. 340 | 341 | DECLARE year int, day int, cycle int 342 | -- 146097 is the number of the days in a 400 year cycle ( 400 * 365 + 97 leap years ) 343 | SET cycle = days / 146097 344 | SET days = days - 146097 * cycle -- Same as days % 146097 345 | SET year = days / 365 346 | SET day = days - year * 365 -- Same as days % 365 347 | -- Need to adjust day to allow for leap years. 348 | -- Leap years are 0, 4, 8, 12 ... 96, not 100, 104 ... not 200... not 300, 400, 404 ... not 500. 349 | -- Adjustment as function of y is 0 => 0, 1 => 1, 2 =>1, 3 => 1, 4 => 1, 5 => 2 .. 350 | SET day = day - ( year + 3 ) / 4 + ( year + 99 ) / 100 - ( year + 399 ) / 400 351 | 352 | IF day < 0 353 | BEGIN 354 | SET year -= 1 355 | SET day += CASE WHEN date.IsLeapYear( year ) THEN 366 ELSE 365 END 356 | END 357 | RETURN 512 * ( cycle * 400 + year ) + day + 1 358 | END 359 | 360 | CREATE FN [date].[YearMonthDay]( year int, month int, day int ) RETURNS int AS 361 | BEGIN 362 | RETURN year * 512 + month * 32 + day 363 | END 364 | 365 | CREATE FN [date].[IsLeapYear]( y int ) RETURNS bool AS 366 | BEGIN 367 | RETURN y % 4 = 0 AND ( y % 100 != 0 OR y % 400 = 0 ) 368 | END 369 | 370 | CREATE FN [date].[DaysToYearMonthDay]( days int ) RETURNS int AS 371 | BEGIN 372 | RETURN date.YearDayToYearMonthDay( date.DaysToYearDay( days ) ) 373 | END 374 | 375 | CREATE FN [date].[test]() AS 376 | BEGIN 377 | DECLARE days int, ymd int 378 | WHILE days < 1000 379 | BEGIN 380 | SET days += 1 381 | SET ymd = date.DaysToYearMonthDay(days) 382 | END 383 | END 384 | "; 385 | 386 | let stg = AtomicFile::new(MemFile::new(), MemFile::new()); 387 | 388 | let mut bmap = BuiltinMap::default(); 389 | standard_builtins(&mut bmap); 390 | let bmap = Arc::new(bmap); 391 | 392 | let spd = SharedPagedData::new(stg); 393 | let wapd = AccessPagedData::new_writer(spd.clone()); 394 | let db = Database::new(wapd, INITSQL, bmap.clone()); 395 | 396 | // To check things work with low mem_limit. 397 | { 398 | // let mut s = spd.stash.lock().unwrap(); 399 | // s.mem_limit = 1; 400 | } 401 | 402 | let mut results = Vec::new(); 403 | for _i in 0..100 { 404 | let start = std::time::Instant::now(); 405 | let mut tr = GenTransaction::default(); 406 | let sql = "EXEC date.test()"; 407 | db.run(&sql, &mut tr); 408 | results.push(start.elapsed().as_micros() as u64); 409 | assert_eq!(tr.get_error(), ""); 410 | } 411 | crate::bench::print_results("date calc test", results); 412 | } 413 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use crate::{BTreeSet, Data, HashMap, Rc, RefCell}; 2 | 3 | /// In debug mode or feature unsafe-optim not enabled, same as debug_assert! otherwise unsafe compiler hint. 4 | #[cfg(any(debug_assertions, not(feature = "unsafe-optim")))] 5 | macro_rules! unsafe_assert { 6 | ( $cond: expr ) => { 7 | debug_assert!($cond) 8 | }; 9 | } 10 | 11 | /// In debug mode or feature unsafe-optim not enabled, same as debug_assert! otherwise unsafe compiler hint. 12 | #[cfg(all(not(debug_assertions), feature = "unsafe-optim"))] 13 | macro_rules! unsafe_assert { 14 | ( $cond: expr ) => { 15 | if !$cond { 16 | unsafe { std::hint::unreachable_unchecked() } 17 | } 18 | }; 19 | } 20 | 21 | /// In debug mode or feature unsafe-optim not enabled, same as panic! otherwise unsafe compiler hint. 22 | #[cfg(any(debug_assertions, not(feature = "unsafe-optim")))] 23 | macro_rules! unsafe_panic { 24 | () => { 25 | panic!() 26 | }; 27 | } 28 | 29 | /// In debug mode or feature unsafe-optim not enabled, same as debug_assert! otherwise unsafe compiler hint. 30 | #[cfg(all(not(debug_assertions), feature = "unsafe-optim"))] 31 | macro_rules! unsafe_panic { 32 | () => {{ 33 | unsafe { std::hint::unreachable_unchecked() } 34 | }}; 35 | } 36 | 37 | /// Wrap a type in Rc + RefCell. 38 | pub fn new(x: T) -> std::rc::Rc> { 39 | Rc::new(RefCell::new(x)) 40 | } 41 | 42 | /// New Data ( `Arc::new(Vec::new())` ). 43 | pub fn nd() -> Data { 44 | Data::default() 45 | } 46 | 47 | /// Construct a new map wrapped in a RefCell. 48 | pub fn newmap() -> RefCell> { 49 | RefCell::new(HashMap::default()) 50 | } 51 | 52 | /// Extract u64 from byte data. 53 | pub fn getu64(data: &[u8], off: usize) -> u64 { 54 | unsafe_assert!(off + 8 <= data.len()); 55 | let data = &data[off..off + 8]; 56 | u64::from_le_bytes(data.try_into().unwrap()) 57 | } 58 | 59 | /// Store u64 to byte data. 60 | pub fn setu64(data: &mut [u8], val: u64) { 61 | unsafe_assert!(data.len() >= 8); 62 | data[0..8].copy_from_slice(&val.to_le_bytes()); 63 | } 64 | 65 | /// Extract f64 from byte data. 66 | pub fn getf64(data: &[u8], off: usize) -> f64 { 67 | let data = &data[off..off + 8]; 68 | f64::from_le_bytes(data.try_into().unwrap()) 69 | } 70 | 71 | /// Extract f32 from byte data. 72 | pub fn getf32(data: &[u8], off: usize) -> f32 { 73 | let data = &data[off..off + 4]; 74 | f32::from_le_bytes(data.try_into().unwrap()) 75 | } 76 | 77 | /// Extract unsigned value of n bytes from data. 78 | pub fn get(data: &[u8], off: usize, n: usize) -> u64 { 79 | let mut buf = [0_u8; 8]; 80 | unsafe_assert!(off + n <= data.len()); 81 | buf[0..n].copy_from_slice(&data[off..off + n]); 82 | u64::from_le_bytes(buf) 83 | } 84 | 85 | /// Extract signed value of n bytes from data. 86 | pub fn iget(data: &[u8], off: usize, n: usize) -> i64 { 87 | let mut x: u64 = get(data, off, n); 88 | if n < 8 { 89 | let sign_bit = 1 << (n * 8 - 1); 90 | if (sign_bit & x) != 0 { 91 | x += u64::MAX << (n * 8); 92 | } 93 | } 94 | x as i64 95 | } 96 | 97 | /// Store signed value of n bytes to data ( with overflow check ). 98 | pub fn iset(data: &mut [u8], off: usize, val: i64, n: usize) { 99 | if n < 8 { 100 | let chk = val + (1 << ((n * 8) - 1)); 101 | if chk < 0 || chk >= (1 << (n * 8)) { 102 | panic!("overflow storing value {} in {} bytes", val, n); 103 | } 104 | } 105 | let bytes = val.to_le_bytes(); 106 | data[off..off + n].copy_from_slice(&bytes[0..n]); 107 | } 108 | 109 | /// Store unsigned value of n bytes to data. 110 | pub fn set(data: &mut [u8], off: usize, val: u64, n: usize) { 111 | let bytes = val.to_le_bytes(); 112 | data[off..off + n].copy_from_slice(&bytes[0..n]); 113 | } 114 | 115 | // Bitfield macros 116 | 117 | /// The mask to extract $len bits at bit offset $off. 118 | macro_rules! bitmask { 119 | ( $off: expr, $len: expr ) => { 120 | ((1 << $len) - 1) << $off 121 | }; 122 | } 123 | 124 | /// Extract $len bits from $val at bit offset $off. 125 | macro_rules! getbits { 126 | ( $val: expr, $off: expr, $len: expr ) => { 127 | ($val & bitmask!($off, $len)) >> $off 128 | }; 129 | } 130 | 131 | /// Update $len bits in $var at bit offset $off to $val. 132 | macro_rules! setbits { 133 | ( $var: expr, $off: expr, $len: expr, $val: expr ) => { 134 | $var = ($var & !bitmask!($off, $len)) | (($val << $off) & bitmask!($off, $len)) 135 | }; 136 | } 137 | 138 | /// Convert a hex char byte to a byte in range 0..15. 139 | pub fn hex(c: u8) -> u8 // 140 | { 141 | match c { 142 | b'0'..=b'9' => c - b'0', 143 | b'A'..=b'F' => c + 10 - b'A', 144 | b'a'..=b'f' => c + 10 - b'a', 145 | _ => { 146 | panic!() 147 | } 148 | } 149 | } 150 | 151 | /// Convert hex literal to bytes. 152 | pub fn parse_hex(s: &[u8]) -> Vec { 153 | let n = s.len() / 2; 154 | let mut result = Vec::::with_capacity(n); 155 | for i in 0..n { 156 | result.push(hex(s[i * 2]) * 16 + hex(s[i * 2 + 1])); 157 | } 158 | result 159 | } 160 | 161 | /// Convert bytes to hex string. 162 | pub fn to_hex(bytes: &[u8]) -> String { 163 | const HEX: &[u8; 16] = b"0123456789abcdef"; 164 | let mut s = vec![b'0', b'x']; 165 | for b in bytes { 166 | let b = *b as usize; 167 | s.push(HEX[b / 16]); 168 | s.push(HEX[b % 16]); 169 | } 170 | String::from_utf8(s).unwrap() 171 | } 172 | 173 | /// Set of usize, optimised for elements < 64. default() is empty set. 174 | #[derive(Default)] 175 | pub struct SmallSet { 176 | /// Holds elements < 64 as a bitmap. 177 | bitset: u64, 178 | /// Holds elements >= 64. 179 | overflow: BTreeSet, 180 | } 181 | 182 | impl SmallSet { 183 | /* 184 | /// The set is empty. 185 | pub fn is_empty(&self) -> bool { 186 | self.bitset == 0 && self.overflow.len() == 0 187 | } 188 | */ 189 | 190 | /// Insert x into set. 191 | pub fn insert(&mut self, x: usize) { 192 | if x < 64 { 193 | self.bitset |= 1 << x; 194 | } else { 195 | self.overflow.insert(x); 196 | } 197 | } 198 | 199 | /// Test whether set contains x. 200 | pub fn contains(&self, x: usize) -> bool { 201 | if x < 64 { 202 | self.bitset & (1 << x) != 0 203 | } else { 204 | self.overflow.contains(&x) 205 | } 206 | } 207 | 208 | /// Remove x from set, result is whether set contained x. 209 | pub fn remove(&mut self, x: usize) -> bool { 210 | if x < 64 { 211 | let bit: u64 = 1 << x; 212 | let result = self.bitset & bit != 0; 213 | self.bitset &= u64::MAX - bit; 214 | result 215 | } else { 216 | self.overflow.remove(&x) 217 | } 218 | } 219 | } 220 | 221 | /// Function to compare bytes. Length is taken from a. Calls d for each range that is different. 222 | /// Interior equal ranges less than min_eq are taken as different. 223 | pub fn _diff(a: &[u8], b: &[u8], min_eq: usize, mut d: F) 224 | where 225 | F: FnMut(usize, usize), 226 | { 227 | let mut check = 0; 228 | let mut i = 0; 229 | let n = a.len(); 230 | while i < n && a[i] == b[i] { 231 | i += 1; 232 | } 233 | while i < n { 234 | let start = i; 235 | let mut end; 236 | loop { 237 | loop { 238 | i += 1; 239 | if i == n || a[i] == b[i] { 240 | break; 241 | } 242 | } 243 | end = i; 244 | // Check that following equal range is at least min_eq. 245 | while i < n && a[i] == b[i] { 246 | i += 1; 247 | } 248 | if i - end >= min_eq || i == n { 249 | break; 250 | } 251 | } 252 | assert_eq!(a[check..start], b[check..start]); 253 | check = end; 254 | d(start, end - start); 255 | } 256 | assert_eq!(a[check..n], b[check..n]); 257 | } 258 | 259 | #[test] 260 | fn difftest() { 261 | use rand::Rng; 262 | let mut rng = rand::thread_rng(); 263 | for _ in 0..1000 { 264 | let mut v = Vec::new(); 265 | for _i in 0..100 { 266 | v.push(0); 267 | } 268 | let mut v2 = v.clone(); 269 | for _ in 0..rng.gen::() % 50 { 270 | v2[rng.gen::() % 100] = 1; 271 | } 272 | let mut x = 0; 273 | _diff(&v, &v2, 2, |off, len| { 274 | //println!("off={off} len={len}"); 275 | assert_eq!(v[x..off], v2[x..off]); 276 | x = off + len; 277 | }); 278 | assert_eq!(v[x..], v2[x..]); 279 | //println!("Done a test"); 280 | } 281 | } 282 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | use crate::*; 2 | 3 | #[derive(Clone, Copy)] 4 | /// Code for variable length values. 5 | pub struct Code { 6 | /// ByteStorage Id. 7 | pub id: u64, 8 | /// Fragment type. 9 | pub ft: usize, 10 | } 11 | 12 | #[derive(Clone)] 13 | /// Simple value ( Binary, String, Int, Float, Bool ). 14 | /// 15 | /// When stored in a database record, binary(n) and string(n) values are allocated (n+1) bytes (8<=n<=249). 16 | /// If the value is more than n bytes, the first (n-8) bytes are stored inline, and the rest are coded. 17 | #[non_exhaustive] 18 | pub enum Value { 19 | /// No value. 20 | None, 21 | /// Binary. 22 | RcBinary(Rc>), 23 | /// Arc Binary. 24 | ArcBinary(Arc>), 25 | /// String. 26 | String(Rc), 27 | /// Integer. 28 | Int(i64), 29 | /// Float. 30 | Float(f64), 31 | /// Bool. 32 | Bool(bool), 33 | /// For expression. 34 | For(Rc>), 35 | /// For expression ( sorted case ). 36 | ForSort(Rc>), 37 | } 38 | 39 | impl Value { 40 | /// Get the default Value for a DataType. 41 | pub fn default(t: DataType) -> Value { 42 | match data_kind(t) { 43 | DataKind::Bool => Value::Bool(false), 44 | DataKind::Float => Value::Float(0.0), 45 | DataKind::String => Value::String(Rc::new(String::new())), 46 | DataKind::Binary => Value::RcBinary(Rc::new(Vec::new())), 47 | _ => Value::Int(0), 48 | } 49 | } 50 | 51 | /// Get a Value from byte data. 52 | pub fn load(db: &DB, typ: DataType, data: &[u8], off: usize) -> (Value, Code) { 53 | let mut code = Code { 54 | id: u64::MAX, 55 | ft: 0, 56 | }; 57 | let size = data_size(typ); 58 | let val = match data_kind(typ) { 59 | DataKind::Binary => { 60 | let (bytes, u) = get_bytes(db, &data[off..], size); 61 | code = u; 62 | Value::RcBinary(Rc::new(bytes)) 63 | } 64 | DataKind::String => { 65 | let (bytes, u) = get_bytes(db, &data[off..], size); 66 | code = u; 67 | let str = String::from_utf8(bytes).unwrap(); 68 | Value::String(Rc::new(str)) 69 | } 70 | DataKind::Bool => Value::Bool(data[off] != 0), 71 | DataKind::Float => { 72 | let f = if size == 4 { 73 | util::getf32(data, off) as f64 74 | } else { 75 | util::getf64(data, off) 76 | }; 77 | Value::Float(f) 78 | } 79 | _ => Value::Int(util::iget(data, off, size) as i64), 80 | }; 81 | (val, code) 82 | } 83 | 84 | /// Save a Value to byte data. 85 | pub fn save(&self, typ: DataType, data: &mut [u8], off: usize, code: Code) { 86 | let size = data_size(typ); 87 | match self { 88 | Value::Bool(x) => { 89 | data[off] = if *x { 1 } else { 0 }; 90 | } 91 | Value::Int(x) => util::iset(data, off, *x, size), 92 | Value::Float(x) => { 93 | if size == 8 { 94 | let bytes = (*x).to_le_bytes(); 95 | data[off..off + 8].copy_from_slice(&bytes); 96 | } else { 97 | debug_assert!(size == 4); 98 | let val = *x as f32; 99 | let bytes = val.to_le_bytes(); 100 | data[off..off + 4].copy_from_slice(&bytes); 101 | } 102 | } 103 | Value::String(s) => { 104 | save_bytes(s.as_bytes(), &mut data[off..], code, size); 105 | } 106 | Value::RcBinary(b) => { 107 | save_bytes(b, &mut data[off..], code, size); 108 | } 109 | Value::ArcBinary(b) => { 110 | save_bytes(b, &mut data[off..], code, size); 111 | } 112 | _ => {} 113 | } 114 | } 115 | 116 | /// Convert a Value to a String. 117 | pub fn str(&self) -> Rc { 118 | match self { 119 | Value::String(s) => s.clone(), 120 | Value::Int(x) => Rc::new(x.to_string()), 121 | Value::Bool(x) => Rc::new(x.to_string()), 122 | Value::Float(x) => Rc::new(x.to_string()), 123 | Value::RcBinary(x) => Rc::new(util::to_hex(x)), 124 | Value::ArcBinary(x) => Rc::new(util::to_hex(x)), 125 | _ => panic!("str not implemented"), 126 | } 127 | } 128 | 129 | /// Get integer value. 130 | pub fn int(&self) -> i64 { 131 | match self { 132 | Value::Int(x) => *x, 133 | _ => panic!(), 134 | } 135 | } 136 | 137 | /// Get float value. 138 | pub fn float(&self) -> f64 { 139 | match self { 140 | Value::Float(x) => *x, 141 | _ => panic!(), 142 | } 143 | } 144 | 145 | /// Append to a String. 146 | pub fn append(&mut self, val: &Value) { 147 | if let Value::String(s) = self { 148 | let val = val.str(); 149 | if let Some(ms) = Rc::get_mut(s) { 150 | ms.push_str(&val); 151 | } else { 152 | let mut ns = String::with_capacity(s.len() + val.len()); 153 | ns.push_str(s); 154 | ns.push_str(&val); 155 | *self = Value::String(Rc::new(ns)); 156 | } 157 | } else { 158 | panic!() 159 | } 160 | } 161 | 162 | /// Inc an integer or float. 163 | pub fn inc(&mut self, val: &Value) { 164 | match self { 165 | Value::Int(x) => *x += val.int(), 166 | Value::Float(x) => *x += val.float(), 167 | _ => panic!(), 168 | } 169 | } 170 | 171 | /// Dec an integer or float. 172 | pub fn dec(&mut self, val: &Value) { 173 | match self { 174 | Value::Int(x) => *x -= val.int(), 175 | Value::Float(x) => *x -= val.float(), 176 | _ => panic!(), 177 | } 178 | } 179 | 180 | /// Convert a Value to a Binary. 181 | pub fn bin(&self) -> Rc> { 182 | match self { 183 | Value::ArcBinary(x) => Rc::new(x.to_vec()), 184 | Value::RcBinary(x) => x.clone(), 185 | Value::String(s) => Rc::new(s.as_bytes().to_vec()), 186 | Value::Float(x) => Rc::new(x.to_le_bytes().to_vec()), 187 | Value::Int(x) => Rc::new(x.to_le_bytes().to_vec()), 188 | _ => panic!("bin not implemented"), 189 | } 190 | } 191 | 192 | /// Borrow address of Binary value. 193 | pub fn bina(&self) -> &[u8] { 194 | match self { 195 | Value::RcBinary(data) => data, 196 | Value::ArcBinary(data) => data, 197 | _ => panic!(), 198 | } 199 | } 200 | } 201 | 202 | /// Value comparison. 203 | impl std::cmp::Ord for Value { 204 | fn cmp(&self, other: &Self) -> Ordering { 205 | match self { 206 | Value::String(s1) => { 207 | if let Value::String(s2) = other { 208 | return s1.cmp(s2); 209 | } 210 | } 211 | Value::Int(x1) => { 212 | if let Value::Int(x2) = other { 213 | return x1.cmp(x2); 214 | } 215 | } 216 | Value::Float(x1) => { 217 | if let Value::Float(x2) = other { 218 | return x1.partial_cmp(x2).unwrap(); 219 | } 220 | } 221 | Value::RcBinary(b1) => { 222 | if let Value::RcBinary(b2) = other { 223 | return b1.cmp(b2); 224 | } 225 | } 226 | _ => {} 227 | } 228 | panic!() 229 | } 230 | } 231 | 232 | impl PartialOrd for Value { 233 | fn partial_cmp(&self, other: &Self) -> Option { 234 | Some(self.cmp(other)) 235 | } 236 | } 237 | 238 | impl PartialEq for Value { 239 | fn eq(&self, other: &Self) -> bool { 240 | if let Some(eq) = self.partial_cmp(other) { 241 | eq == Ordering::Equal 242 | } else { 243 | false 244 | } 245 | } 246 | } 247 | 248 | impl Eq for Value {} 249 | 250 | /// Decode bytes. Result is bytes and code ( or u64::MAX if no code ). 251 | pub fn get_bytes(db: &DB, data: &[u8], size: usize) -> (Vec, Code) { 252 | let n = data[0] as usize; 253 | if n < size { 254 | let mut bytes = vec![0_u8; n]; 255 | bytes[0..n].copy_from_slice(&data[1..=n]); 256 | ( 257 | bytes, 258 | Code { 259 | id: u64::MAX, 260 | ft: 0, 261 | }, 262 | ) 263 | } else { 264 | let id = util::getu64(data, size - 8); 265 | let ft = 255 - n; 266 | let code = Code { id, ft }; 267 | let mut bytes = db.decode(code, size - 9); 268 | bytes[0..size - 9].copy_from_slice(&data[1..size - 8]); 269 | (bytes, code) 270 | } 271 | } 272 | 273 | /// Save bytes. 274 | pub fn save_bytes(bytes: &[u8], data: &mut [u8], code: Code, size: usize) { 275 | let n = bytes.len(); 276 | if n < size { 277 | data[0] = n as u8; 278 | data[1..=n].copy_from_slice(&bytes[0..n]); 279 | } else { 280 | // Store first (size-9) bytes and code. 281 | data[0] = 255 - code.ft as u8; 282 | data[1..size - 8].copy_from_slice(&bytes[0..size - 9]); 283 | util::setu64(&mut data[size - 8..], code.id); 284 | } 285 | } 286 | -------------------------------------------------------------------------------- /src/wmap.rs: -------------------------------------------------------------------------------- 1 | use crate::{BTreeMap, Data, Storage}; 2 | use std::cmp::min; 3 | 4 | #[derive(Default)] 5 | /// Slice of Data to be written to storage. 6 | pub struct DataSlice { 7 | /// Slice data. 8 | pub data: Data, 9 | /// Start of slice. 10 | pub off: usize, 11 | /// Length of slice. 12 | pub len: usize, 13 | } 14 | 15 | impl DataSlice { 16 | /// Get reference to the whole slice. 17 | pub fn all(&self) -> &[u8] { 18 | &self.data[self.off..self.off + self.len] 19 | } 20 | /// Get reference to part of slice. 21 | pub fn part(&self, off: usize, len: usize) -> &[u8] { 22 | &self.data[self.off + off..self.off + off + len] 23 | } 24 | /// Trim specified amount from start of slice. 25 | pub fn trim(&mut self, trim: usize) { 26 | self.off += trim; 27 | self.len -= trim; 28 | } 29 | /// Take the data. 30 | pub fn take(&mut self) -> Data { 31 | std::mem::take(&mut self.data) 32 | } 33 | } 34 | 35 | #[derive(Default)] 36 | /// Updateable storage based on some underlying storage. 37 | pub struct WMap { 38 | /// Map of writes. Key is the end of the slice. 39 | map: BTreeMap, 40 | } 41 | 42 | impl WMap { 43 | /// Is the map empty? 44 | pub fn is_empty(&self) -> bool { 45 | self.map.is_empty() 46 | } 47 | 48 | /// Number of key-value pairs in the map. 49 | pub fn len(&self) -> usize { 50 | self.map.len() 51 | } 52 | 53 | /// Take the map and convert it to a Vec. 54 | pub fn to_vec(&mut self) -> Vec<(u64, DataSlice)> { 55 | let map = std::mem::take(&mut self.map); 56 | let mut result = Vec::with_capacity(map.len()); 57 | for (end, v) in map { 58 | let start = end - v.len as u64; 59 | result.push((start, v)); 60 | } 61 | result 62 | } 63 | 64 | /// Write the map into storage. 65 | pub fn to_storage(&self, stg: &mut dyn Storage) { 66 | for (end, v) in self.map.iter() { 67 | let start = end - v.len as u64; 68 | stg.write_data(start, v.data.clone(), v.off, v.len); 69 | } 70 | } 71 | 72 | #[cfg(not(feature = "pstd"))] 73 | /// Write to storage, existing writes which overlap with new write need to be trimmed or removed. 74 | pub fn write(&mut self, start: u64, data: Data, off: usize, len: usize) { 75 | if len != 0 { 76 | let (mut insert, mut remove) = (Vec::new(), Vec::new()); 77 | let end = start + len as u64; 78 | for (ee, v) in self.map.range_mut(start + 1..) { 79 | let ee = *ee; 80 | let es = ee - v.len as u64; // Existing write Start. 81 | if es >= end { 82 | // Existing write starts after end of new write, nothing to do. 83 | break; 84 | } else if start <= es { 85 | if end < ee { 86 | // New write starts before existing write, but doesn't subsume it. Trim existing write. 87 | v.trim((end - es) as usize); 88 | break; 89 | } 90 | // New write subsumes existing write entirely, remove existing write. 91 | remove.push(ee); 92 | } else if end < ee { 93 | // New write starts in middle of existing write, ends before end of existing write, 94 | // put start of existing write in insert list, trim existing write. 95 | insert.push((es, v.data.clone(), v.off, (start - es) as usize)); 96 | v.trim((end - es) as usize); 97 | break; 98 | } else { 99 | // New write starts in middle of existing write, ends after existing write, 100 | // put start of existing write in insert list, remove existing write. 101 | insert.push((es, v.take(), v.off, (start - es) as usize)); 102 | remove.push(ee); 103 | } 104 | } 105 | for end in remove { 106 | self.map.remove(&end); 107 | } 108 | for (start, data, off, len) in insert { 109 | self.map 110 | .insert(start + len as u64, DataSlice { data, off, len }); 111 | } 112 | self.map 113 | .insert(start + len as u64, DataSlice { data, off, len }); 114 | } 115 | } 116 | 117 | #[cfg(feature = "pstd")] 118 | /// Write to storage, existing writes which overlap with new write need to be trimmed or removed. 119 | pub fn write(&mut self, start: u64, data: Data, off: usize, len: usize) { 120 | if len != 0 { 121 | let end = start + len as u64; 122 | let mut c = self 123 | .map 124 | .lower_bound_mut(std::ops::Bound::Excluded(&start)) 125 | .with_mutable_key(); 126 | while let Some((eend, v)) = c.next() { 127 | let ee = *eend; 128 | let es = ee - v.len as u64; // Existing write Start. 129 | if es >= end { 130 | // Existing write starts after end of new write, nothing to do. 131 | c.prev(); 132 | break; 133 | } else if start <= es { 134 | if end < ee { 135 | // New write starts before existing write, but doesn't subsume it. Trim existing write. 136 | v.trim((end - es) as usize); 137 | c.prev(); 138 | break; 139 | } 140 | // New write subsumes existing write entirely, remove existing write. 141 | c.remove_prev(); 142 | } else if end < ee { 143 | // New write starts in middle of existing write, ends before end of existing write, 144 | // trim existing write, insert start of existing write. 145 | let (data, off, len) = (v.data.clone(), v.off, (start - es) as usize); 146 | v.trim((end - es) as usize); 147 | c.prev(); 148 | c.insert_before_unchecked(es + len as u64, DataSlice { data, off, len }); 149 | break; 150 | } else { 151 | // New write starts in middle of existing write, ends after existing write, 152 | // Trim existing write ( modifies key, but this is ok as ordering is not affected ). 153 | v.len = (start - es) as usize; 154 | *eend = es + v.len as u64; 155 | } 156 | } 157 | // Insert the new write. 158 | c.insert_after_unchecked(start + len as u64, DataSlice { data, off, len }); 159 | } 160 | } 161 | 162 | /// Read from storage, taking map of existing writes into account. Unwritten ranges are read from underlying storage. 163 | pub fn read(&self, start: u64, data: &mut [u8], u: &dyn Storage) { 164 | let len = data.len(); 165 | if len != 0 { 166 | let mut done = 0; 167 | for (&end, v) in self.map.range(start + 1..) { 168 | let es = end - v.len as u64; // Existing write Start. 169 | let doff = start + done as u64; 170 | if es > doff { 171 | // Read from underlying storage. 172 | let a = min(len - done, (es - doff) as usize); 173 | u.read(doff, &mut data[done..done + a]); 174 | done += a; 175 | if done == len { 176 | return; 177 | } 178 | } 179 | // Use existing write. 180 | let skip = (start + done as u64 - es) as usize; 181 | let a = min(len - done, v.len - skip); 182 | data[done..done + a].copy_from_slice(v.part(skip, a)); 183 | done += a; 184 | if done == len { 185 | return; 186 | } 187 | } 188 | u.read(start + done as u64, &mut data[done..]); 189 | } 190 | } 191 | } 192 | --------------------------------------------------------------------------------