├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── cdb.rs ├── cdb.txt ├── src ├── hash.rs ├── lib.rs ├── reader.rs ├── uint32.rs └── writer.rs └── tests ├── make.rs ├── read.rs ├── test1.cdb ├── test1.txt ├── test2.cdb └── test2.sh /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | sudo: false 3 | cache: cargo 4 | rust: 5 | - nightly 6 | - beta 7 | - stable 8 | os: 9 | - linux 10 | - osx 11 | matrix: 12 | allow_failures: 13 | - rust: nightly 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cdb" 3 | version = "0.6.0" 4 | authors = ["Bruce Guenter "] 5 | description = "Pure Rust library to read and write CDB files" 6 | homepage = "https://github.com/bruceg/cdb-rs" 7 | repository = "https://github.com/bruceg/cdb-rs" 8 | readme = "README.md" 9 | license = "Unlicense" 10 | edition = "2018" 11 | 12 | [dependencies] 13 | filebuffer = "0.4" 14 | libc = "0.2.4" 15 | 16 | [dev-dependencies] 17 | criterion = "0.2" 18 | 19 | [[bench]] 20 | name = "cdb" 21 | harness = false 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | Anyone is free to copy, modify, publish, use, compile, sell, or 3 | distribute this software, either in source code form or as a compiled 4 | binary, for any purpose, commercial or non-commercial, and by any means. 5 | 6 | In jurisdictions that recognize copyright laws, the author or authors of 7 | this software dedicate any and all copyright interest in the software to 8 | the public domain. We make this dedication for the benefit of the public 9 | at large and to the detriment of our heirs and successors. We intend 10 | this dedication to be an overt act of relinquishment in perpetuity of 11 | all present and future rights to this software under copyright law. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 15 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 16 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | DEALINGS IN THE SOFTWARE. 20 | 21 | For more information, please refer to 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cdb 2 | ==== 3 | 4 | [![Crate](https://img.shields.io/crates/v/cdb.svg)](https://crates.io/crates/cdb) 5 | [![Build Status](https://travis-ci.org/bruceg/cdb-rs.svg?branch=master)](https://travis-ci.org/bruceg/cdb-rs) 6 | 7 | This library provides pure Rust support for reading and writing 8 | [CDB][cdb] files. A CDB file is a constant key-value on-disk hash 9 | table, designed for high-speed lookups. 10 | 11 | [cdb]: http://cr.yp.to/cdb.html 12 | 13 | [Documentation](https://docs.rs/cdb) 14 | 15 | ## License 16 | 17 | Public Domain 18 | -------------------------------------------------------------------------------- /benches/cdb.rs: -------------------------------------------------------------------------------- 1 | extern crate cdb; 2 | #[macro_use] 3 | extern crate criterion; 4 | 5 | use cdb::CDB; 6 | use criterion::Criterion; 7 | 8 | fn test_cdb() -> CDB { 9 | CDB::open("tests/test2.cdb").expect("Could not open tests/test2.cdb") 10 | } 11 | 12 | fn reader_benchmark(c: &mut Criterion) { 13 | c.bench_function("CDB::open", |b| { 14 | b.iter(|| { 15 | test_cdb(); 16 | }) 17 | }); 18 | c.bench_function("CDB::find", |b| { 19 | let cdb = test_cdb(); 20 | b.iter(|| cdb.find(b"two")) 21 | }); 22 | c.bench_function("CDB::find long", |b| { 23 | let cdb = test_cdb(); 24 | b.iter(|| cdb.find(b"this key will be split across two reads")) 25 | }); 26 | c.bench_function("CDB::find result", |b| { 27 | let cdb = test_cdb(); 28 | b.iter(|| cdb.find(b"two").next().unwrap()) 29 | }); 30 | c.bench_function("CDB::find result loop", |b| { 31 | let cdb = test_cdb(); 32 | b.iter(|| { 33 | for result in cdb.find(b"one") { 34 | result.unwrap(); 35 | } 36 | }) 37 | }); 38 | c.bench_function("CDB::open + find result loop", |b| { 39 | b.iter(|| { 40 | let cdb = test_cdb(); 41 | for result in cdb.find(b"one") { 42 | result.unwrap(); 43 | } 44 | }) 45 | }); 46 | c.bench_function("CDB::iter result loop", |b| { 47 | let cdb = test_cdb(); 48 | b.iter(|| { 49 | for result in cdb.iter() { 50 | result.unwrap(); 51 | } 52 | }) 53 | }); 54 | c.bench_function("CDB::open + iter result loop", |b| { 55 | b.iter(|| { 56 | let cdb = test_cdb(); 57 | for result in cdb.iter() { 58 | result.unwrap(); 59 | } 60 | }) 61 | }); 62 | } 63 | 64 | criterion_group!(benches, reader_benchmark); 65 | criterion_main!(benches); 66 | -------------------------------------------------------------------------------- /cdb.txt: -------------------------------------------------------------------------------- 1 | A structure for constant databases 2 | 19960914 3 | Copyright 1996 4 | D. J. Bernstein, djb@pobox.com 5 | 6 | A cdb is an associative array: it maps strings (``keys'') to strings 7 | (``data''). 8 | 9 | A cdb contains 256 pointers to linearly probed open hash tables. The 10 | hash tables contain pointers to (key,data) pairs. A cdb is stored in 11 | a single file on disk: 12 | 13 | +----------------+---------+-------+-------+-----+---------+ 14 | | p0 p1 ... p255 | records | hash0 | hash1 | ... | hash255 | 15 | +----------------+---------+-------+-------+-----+---------+ 16 | 17 | Each of the 256 initial pointers states a position and a length. The 18 | position is the starting byte position of the hash table. The length 19 | is the number of slots in the hash table. 20 | 21 | Records are stored sequentially, without special alignment. A record 22 | states a key length, a data length, the key, and the data. 23 | 24 | Each hash table slot states a hash value and a byte position. If the 25 | byte position is 0, the slot is empty. Otherwise, the slot points to 26 | a record whose key has that hash value. 27 | 28 | Positions, lengths, and hash values are 32-bit quantities, stored in 29 | little-endian form in 4 bytes. Thus a cdb must fit into 4 gigabytes. 30 | 31 | A record is located as follows. Compute the hash value of the key in 32 | the record. The hash value modulo 256 is the number of a hash table. 33 | The hash value divided by 256, modulo the length of that table, is a 34 | slot number. Probe that slot, the next higher slot, and so on, until 35 | you find the record or run into an empty slot. 36 | 37 | The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting 38 | hash of 5381. 39 | -------------------------------------------------------------------------------- /src/hash.rs: -------------------------------------------------------------------------------- 1 | const HASHSTART: u32 = 0x1505; 2 | 3 | pub fn add(h: u32, c: u8) -> u32 { 4 | //(h + (h << 5)) ^ (c as u32) 5 | h.wrapping_shl(5).wrapping_add(h) ^ (c as u32) 6 | } 7 | 8 | pub fn hash(buf: &[u8]) -> u32 { 9 | let mut h = HASHSTART; 10 | for c in buf { 11 | h = add(h, *c); 12 | } 13 | h 14 | } 15 | 16 | #[test] 17 | fn samples() { 18 | assert_eq!(hash(b""), 0x0001505); 19 | assert_eq!(hash(b"Hello, world!"), 0x564369e8); 20 | assert_eq!(hash(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), 0x40032705); 21 | } 22 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate provides support for reading and writing 2 | //! [CDB](https://cr.yp.to/cdb.html) files. A CDB is a "constant 3 | //! database" that acts as an on-disk associative array mapping keys to 4 | //! values, allowing multiple values for each key. It provides for fast 5 | //! lookups and low overheads. A constant database has no provision for 6 | //! updating, only rewriting from scratch. 7 | //! 8 | //! # Examples 9 | //! 10 | //! Reading a set of records: 11 | //! 12 | //! ``` 13 | //! let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 14 | //! 15 | //! for result in cdb.find(b"one") { 16 | //! println!("{:?}", result.unwrap()); 17 | //! } 18 | //! ``` 19 | //! 20 | //! Creating a database with safe atomic updating: 21 | //! 22 | //! ```no_run 23 | //! fn main() -> std::io::Result<()> { 24 | //! let mut cdb = cdb::CDBWriter::create("temporary.cdb")?; 25 | //! cdb.add(b"one", b"Hello, ")?; 26 | //! cdb.add(b"one", b"world!\n")?; 27 | //! cdb.add(b"two", &[1, 2, 3, 4])?; 28 | //! cdb.finish()?; 29 | //! Ok(()) 30 | //! } 31 | //! ``` 32 | //! 33 | //! # References 34 | //! 35 | //! * [D. J. Bernstein's original software](https://cr.yp.to/cdb.html) 36 | //! * [Constant Database (cdb) Internals](https://www.unixuser.org/~euske/doc/cdbinternals/index.html) 37 | //! * [Wikipedia](https://en.wikipedia.org/wiki/Cdb_(software)) 38 | 39 | extern crate filebuffer; 40 | 41 | mod hash; 42 | mod reader; 43 | mod uint32; 44 | mod writer; 45 | 46 | pub use crate::reader::{CDBIter, CDBKeyValueIter, CDBValueIter, Result, CDB}; 47 | pub use crate::writer::{CDBMake, CDBWriter}; 48 | -------------------------------------------------------------------------------- /src/reader.rs: -------------------------------------------------------------------------------- 1 | use filebuffer::FileBuffer; 2 | use std::cmp::min; 3 | use std::io; 4 | use std::path; 5 | 6 | use crate::hash::hash; 7 | use crate::uint32; 8 | 9 | pub use std::io::Result; 10 | 11 | const KEYSIZE: usize = 32; 12 | 13 | /// CDB file reader 14 | /// 15 | /// # Example 16 | /// 17 | /// ``` 18 | /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 19 | /// 20 | /// for result in cdb.find(b"one") { 21 | /// println!("{:?}", result.unwrap()); 22 | /// } 23 | /// ``` 24 | pub struct CDB { 25 | file: FileBuffer, 26 | size: usize, 27 | } 28 | 29 | fn err_badfile() -> Result { 30 | Err(io::Error::new(io::ErrorKind::Other, "Invalid file format")) 31 | } 32 | 33 | impl CDB { 34 | /// Opens the named file and returns the CDB reader. 35 | /// 36 | /// # Examples 37 | /// 38 | /// ``` 39 | /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 40 | /// ``` 41 | pub fn open>(filename: P) -> Result { 42 | let file = FileBuffer::open(&filename)?; 43 | if file.len() < 2048 + 8 + 8 || file.len() > 0xffffffff { 44 | return err_badfile(); 45 | } 46 | let size = file.len(); 47 | Ok(CDB { file, size }) 48 | } 49 | 50 | fn read(&self, buf: &mut [u8], pos: u32) -> Result { 51 | let len = buf.len(); 52 | let pos = pos as usize; 53 | if pos + len > self.size { 54 | return err_badfile(); 55 | } 56 | buf.copy_from_slice(&self.file[pos..pos + len]); 57 | Ok(len) 58 | } 59 | 60 | fn hash_table(&self, khash: u32) -> (u32, u32, u32) { 61 | let x = ((khash as usize) & 0xff) << 3; 62 | let (hpos, hslots) = uint32::unpack2(&self.file[x..x + 8]); 63 | let kpos = if hslots > 0 { 64 | hpos + (((khash >> 8) % hslots) << 3) 65 | } else { 66 | 0 67 | }; 68 | (hpos, hslots, kpos) 69 | } 70 | 71 | fn match_key(&self, key: &[u8], pos: u32) -> Result { 72 | let mut buf = [0 as u8; KEYSIZE]; 73 | let mut len = key.len(); 74 | let mut pos = pos; 75 | let mut keypos = 0; 76 | 77 | while len > 0 { 78 | let n = min(len, buf.len()); 79 | self.read(&mut buf[..n], pos)?; 80 | if buf[..n] != key[keypos..keypos + n] { 81 | return Ok(false); 82 | } 83 | pos += n as u32; 84 | keypos += n; 85 | len -= n; 86 | } 87 | Ok(true) 88 | } 89 | 90 | /// Find the first record with the named key. 91 | /// 92 | /// # Examples 93 | /// 94 | /// ``` 95 | /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 96 | /// if let Some(record) = cdb.get(b"one") { 97 | /// println!("{:?}", record.unwrap()); 98 | /// } 99 | /// ``` 100 | pub fn get(&self, key: &[u8]) -> Option>> { 101 | self.find(key).next() 102 | } 103 | 104 | /// Find all records with the named key. The returned iterator 105 | /// produces each value associated with the key. 106 | /// 107 | /// # Examples 108 | /// 109 | /// ``` 110 | /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 111 | /// 112 | /// for result in cdb.find(b"one") { 113 | /// println!("{:?}", result.unwrap()); 114 | /// } 115 | /// ``` 116 | pub fn find(&self, key: &[u8]) -> CDBValueIter { 117 | CDBValueIter::find(self, key) 118 | } 119 | 120 | /// Iterate over all the `(key, value)` pairs in the database. 121 | /// 122 | /// # Examples 123 | /// 124 | /// ``` 125 | /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 126 | /// for result in cdb.iter() { 127 | /// let (key, value) = result.unwrap(); 128 | /// println!("{:?} => {:?}", key, value); 129 | /// } 130 | /// ```` 131 | pub fn iter(&self) -> CDBKeyValueIter { 132 | CDBKeyValueIter::start(&self) 133 | } 134 | } 135 | 136 | /// Type alias for [`CDBValueiter`](struct.CDBValueIter.html) 137 | pub type CDBIter<'a> = CDBValueIter<'a>; 138 | 139 | /// Iterator over a set of records in the CDB with the same key. 140 | /// 141 | /// See [`CDB::find`](struct.CDB.html#method.find) 142 | pub struct CDBValueIter<'a> { 143 | cdb: &'a CDB, 144 | key: Vec, 145 | khash: u32, 146 | kloop: u32, 147 | kpos: u32, 148 | hpos: u32, 149 | hslots: u32, 150 | dpos: u32, 151 | dlen: u32, 152 | } 153 | 154 | impl<'a> CDBValueIter<'a> { 155 | fn find(cdb: &'a CDB, key: &[u8]) -> Self { 156 | let khash = hash(key); 157 | let (hpos, hslots, kpos) = cdb.hash_table(khash); 158 | 159 | CDBValueIter { 160 | cdb: cdb, 161 | key: key.into_iter().map(|x| *x).collect(), 162 | khash: khash, 163 | kloop: 0, 164 | kpos: kpos, 165 | hpos: hpos, 166 | hslots: hslots, 167 | dpos: 0, 168 | dlen: 0, 169 | } 170 | } 171 | 172 | fn read_vec(&self) -> Result> { 173 | let mut result = vec![0; self.dlen as usize]; 174 | self.cdb.read(&mut result[..], self.dpos)?; 175 | Ok(result) 176 | } 177 | } 178 | 179 | macro_rules! iter_try { 180 | ( $e:expr ) => { 181 | match $e { 182 | Err(x) => { 183 | return Some(Err(x)); 184 | } 185 | Ok(y) => y, 186 | } 187 | }; 188 | } 189 | 190 | impl<'a> Iterator for CDBValueIter<'a> { 191 | type Item = Result>; 192 | fn next(&mut self) -> Option { 193 | while self.kloop < self.hslots { 194 | let mut buf = [0 as u8; 8]; 195 | let kpos = self.kpos; 196 | iter_try!(self.cdb.read(&mut buf, kpos)); 197 | let (khash, pos) = uint32::unpack2(&buf); 198 | if pos == 0 { 199 | return None; 200 | } 201 | self.kloop += 1; 202 | self.kpos += 8; 203 | if self.kpos == self.hpos + (self.hslots << 3) { 204 | self.kpos = self.hpos; 205 | } 206 | if khash == self.khash { 207 | iter_try!(self.cdb.read(&mut buf, pos)); 208 | let (klen, dlen) = uint32::unpack2(&buf); 209 | if klen as usize == self.key.len() { 210 | if iter_try!(self.cdb.match_key(&self.key[..], pos + 8)) { 211 | self.dlen = dlen; 212 | self.dpos = pos + 8 + self.key.len() as u32; 213 | return Some(self.read_vec()); 214 | } 215 | } 216 | } 217 | } 218 | None 219 | } 220 | } 221 | 222 | /// Iterator over all the records in the CDB. 223 | /// 224 | /// See [`CDB::iter`](struct.CDB.html#method.iter) 225 | pub struct CDBKeyValueIter<'a> { 226 | cdb: &'a CDB, 227 | pos: u32, 228 | data_end: u32, 229 | } 230 | 231 | impl<'a> CDBKeyValueIter<'a> { 232 | fn start(cdb: &'a CDB) -> Self { 233 | let data_end = uint32::unpack(&cdb.file[0..4]).min(cdb.size as u32); 234 | Self { 235 | cdb, 236 | pos: 2048, 237 | data_end, 238 | } 239 | } 240 | } 241 | 242 | impl<'a> Iterator for CDBKeyValueIter<'a> { 243 | type Item = Result<(Vec, Vec)>; 244 | fn next(&mut self) -> Option { 245 | if self.pos + 8 >= self.data_end { 246 | None 247 | } else { 248 | let (klen, dlen) = 249 | uint32::unpack2(&self.cdb.file[self.pos as usize..self.pos as usize + 8]); 250 | if self.pos + klen + dlen >= self.data_end { 251 | Some(err_badfile()) 252 | } else { 253 | let kpos = (self.pos + 8) as usize; 254 | let dpos = kpos + klen as usize; 255 | let mut key = vec![0; klen as usize]; 256 | let mut value = vec![0; dlen as usize]; 257 | // Copied from CDB::read 258 | key.copy_from_slice(&self.cdb.file[kpos..kpos + klen as usize]); 259 | value.copy_from_slice(&self.cdb.file[dpos..dpos + dlen as usize]); 260 | self.pos += 8 + klen + dlen; 261 | Some(Ok((key, value))) 262 | } 263 | } 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /src/uint32.rs: -------------------------------------------------------------------------------- 1 | use std::u32; 2 | 3 | pub fn unpack(data: &[u8]) -> u32 { 4 | assert!(data.len() >= 4); 5 | // Use u32::from_bytes when it stabilizes 6 | // Rust compiles this down to an efficient word copy 7 | (data[0] as u32) | ((data[1] as u32) << 8) | ((data[2] as u32) << 16) | ((data[3] as u32) << 24) 8 | } 9 | 10 | pub fn unpack2(buf: &[u8]) -> (u32, u32) { 11 | (unpack(&buf[0..4]), unpack(&buf[4..8])) 12 | } 13 | 14 | fn _pack(src: u32) -> [u8; 4] { 15 | // Use u32::to_bytes when it stabilizes 16 | // Rust compiles this down to an efficient word copy 17 | [ 18 | src as u8, 19 | (src >> 8) as u8, 20 | (src >> 16) as u8, 21 | (src >> 24) as u8, 22 | ] 23 | } 24 | 25 | pub fn pack(data: &mut [u8], src: u32) { 26 | assert!(data.len() >= 4); 27 | data[..4].copy_from_slice(&_pack(src)); 28 | } 29 | 30 | pub fn pack2(data: &mut [u8], src0: u32, src1: u32) { 31 | assert!(data.len() >= 8); 32 | pack(&mut data[0..4], src0); 33 | pack(&mut data[4..8], src1); 34 | } 35 | -------------------------------------------------------------------------------- /src/writer.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::max; 2 | use std::fs; 3 | use std::io; 4 | use std::io::prelude::*; 5 | use std::iter; 6 | use std::path; 7 | use std::string; 8 | 9 | use crate::hash::hash; 10 | use crate::uint32; 11 | 12 | pub use std::io::Result; 13 | 14 | #[derive(Clone, Copy, Debug)] 15 | struct HashPos { 16 | hash: u32, 17 | pos: u32, 18 | } 19 | 20 | impl HashPos { 21 | fn pack(&self, buf: &mut [u8]) { 22 | uint32::pack2(buf, self.hash, self.pos); 23 | } 24 | } 25 | 26 | fn err_toobig() -> Result { 27 | Err(io::Error::new(io::ErrorKind::Other, "File too big")) 28 | } 29 | 30 | /// Base interface for making a CDB file. 31 | /// 32 | /// # Example 33 | /// 34 | /// ```no_run 35 | /// fn main() -> std::io::Result<()> { 36 | /// let file = std::fs::File::create("temporary.cdb")?; 37 | /// let mut cdb = cdb::CDBMake::new(file)?; 38 | /// cdb.add(b"one", b"Hello,")?; 39 | /// cdb.add(b"two", b"world!")?; 40 | /// cdb.finish()?; 41 | /// Ok(()) 42 | /// } 43 | /// ``` 44 | pub struct CDBMake { 45 | entries: Vec>, 46 | pos: u32, 47 | file: io::BufWriter, 48 | } 49 | 50 | impl CDBMake { 51 | /// Create a new CDB maker. 52 | pub fn new(file: fs::File) -> Result { 53 | let mut w = io::BufWriter::new(file); 54 | let buf = [0; 2048]; 55 | w.seek(io::SeekFrom::Start(0))?; 56 | w.write(&buf)?; 57 | Ok(CDBMake { 58 | entries: iter::repeat(vec![]).take(256).collect::>(), 59 | pos: 2048, 60 | file: w, 61 | }) 62 | } 63 | 64 | fn pos_plus(&mut self, len: u32) -> Result<()> { 65 | if self.pos + len < len { 66 | err_toobig() 67 | } else { 68 | self.pos += len; 69 | Ok(()) 70 | } 71 | } 72 | 73 | fn add_end(&mut self, keylen: u32, datalen: u32, hash: u32) -> Result<()> { 74 | self.entries[(hash & 0xff) as usize].push(HashPos { 75 | hash: hash, 76 | pos: self.pos, 77 | }); 78 | self.pos_plus(8)?; 79 | self.pos_plus(keylen)?; 80 | self.pos_plus(datalen)?; 81 | Ok(()) 82 | } 83 | 84 | fn add_begin(&mut self, keylen: u32, datalen: u32) -> Result<()> { 85 | let mut buf = [0; 8]; 86 | uint32::pack2(&mut buf[0..8], keylen, datalen); 87 | self.file.write(&buf)?; 88 | Ok(()) 89 | } 90 | 91 | /// Add a record to the CDB file. 92 | pub fn add(&mut self, key: &[u8], data: &[u8]) -> Result<()> { 93 | if key.len() >= 0xffffffff || data.len() >= 0xffffffff { 94 | return Err(io::Error::new(io::ErrorKind::Other, "Key or data too big")); 95 | } 96 | self.add_begin(key.len() as u32, data.len() as u32)?; 97 | self.file.write(key)?; 98 | self.file.write(data)?; 99 | self.add_end(key.len() as u32, data.len() as u32, hash(&key[..])) 100 | } 101 | 102 | /// Set the permissions on the underlying file. 103 | pub fn set_permissions(&self, perm: fs::Permissions) -> Result<()> { 104 | self.file.get_ref().set_permissions(perm) 105 | } 106 | 107 | /// Finish writing to the CDB file and flush its contents. 108 | pub fn finish(mut self) -> Result<()> { 109 | let mut buf = [0; 8]; 110 | 111 | let maxsize = self.entries.iter().fold(1, |acc, e| max(acc, e.len() * 2)); 112 | let count = self.entries.iter().fold(0, |acc, e| acc + e.len()); 113 | if maxsize + count > (0xffffffff / 8) { 114 | return err_toobig(); 115 | } 116 | 117 | let mut table = vec![HashPos { hash: 0, pos: 0 }; maxsize]; 118 | 119 | let mut header = [0 as u8; 2048]; 120 | for i in 0..256 { 121 | let len = self.entries[i].len() * 2; 122 | let j = i * 8; 123 | uint32::pack2(&mut header[j..j + 8], self.pos, len as u32); 124 | 125 | for e in self.entries[i].iter() { 126 | let mut wh = (e.hash as usize >> 8) % len; 127 | while table[wh].pos != 0 { 128 | wh += 1; 129 | if wh == len { 130 | wh = 0; 131 | } 132 | } 133 | table[wh] = *e; 134 | } 135 | 136 | for hp in table.iter_mut().take(len) { 137 | hp.pack(&mut buf); 138 | self.file.write(&buf)?; 139 | self.pos_plus(8)?; 140 | *hp = HashPos { hash: 0, pos: 0 }; 141 | } 142 | } 143 | 144 | self.file.flush()?; 145 | self.file.seek(io::SeekFrom::Start(0))?; 146 | self.file.write(&header)?; 147 | self.file.flush()?; 148 | Ok(()) 149 | } 150 | } 151 | 152 | /// A CDB file writer which handles atomic updating. 153 | /// 154 | /// Using this type, a CDB file is safely written by first creating a 155 | /// temporary file, building the CDB structure into that temporary file, 156 | /// and finally renaming that temporary file over the final file name. 157 | /// If the temporary file is not properly finished (ie due to an error), 158 | /// the temporary file is deleted when this writer is dropped. 159 | /// 160 | /// # Example 161 | /// 162 | /// ```no_run 163 | /// use cdb::CDBWriter; 164 | /// 165 | /// fn main() -> std::io::Result<()> { 166 | /// let mut cdb = CDBWriter::create("temporary.cdb")?; 167 | /// cdb.add(b"one", b"Hello")?; 168 | /// cdb.finish()?; 169 | /// Ok(()) 170 | /// } 171 | /// ``` 172 | pub struct CDBWriter { 173 | dstname: String, 174 | tmpname: String, 175 | cdb: Option, 176 | } 177 | 178 | impl CDBWriter { 179 | /// Safely create a new CDB file. 180 | /// 181 | /// The suffix for the temporary file defaults to `".tmp"`. 182 | pub fn create + string::ToString>(filename: P) -> Result { 183 | CDBWriter::with_suffix(filename, ".tmp") 184 | } 185 | 186 | /// Safely create a new CDB file, using a specific suffix for the temporary file. 187 | pub fn with_suffix + string::ToString>( 188 | filename: P, 189 | suffix: &str, 190 | ) -> Result { 191 | let mut tmpname = filename.to_string(); 192 | tmpname.push_str(suffix); 193 | CDBWriter::with_filenames(filename, &tmpname) 194 | } 195 | 196 | /// Safely create a new CDB file, using two specific file names. 197 | /// 198 | /// Note that the temporary file name must be on the same filesystem 199 | /// as the destination, or else the final rename will fail. 200 | pub fn with_filenames< 201 | P: AsRef + string::ToString, 202 | Q: AsRef + string::ToString, 203 | >( 204 | filename: P, 205 | tmpname: Q, 206 | ) -> Result { 207 | let file = fs::File::create(&tmpname)?; 208 | let cdb = CDBMake::new(file)?; 209 | Ok(CDBWriter { 210 | dstname: filename.to_string(), 211 | tmpname: tmpname.to_string(), 212 | cdb: Some(cdb), 213 | }) 214 | } 215 | 216 | /// Add a record to the CDB file. 217 | pub fn add(&mut self, key: &[u8], data: &[u8]) -> Result<()> { 218 | // The unwrap() is safe here, as the internal cdb is only ever 219 | // None during finish(), which does not call this. 220 | self.cdb.as_mut().unwrap().add(key, data) 221 | } 222 | 223 | /// Set permissions on the temporary file. 224 | /// 225 | /// This must be done before the file is finished, as the temporary 226 | /// file will no longer exist at that point. 227 | pub fn set_permissions(&self, perm: fs::Permissions) -> Result<()> { 228 | self.cdb.as_ref().unwrap().set_permissions(perm) 229 | } 230 | 231 | pub fn finish(mut self) -> Result<()> { 232 | self.cdb.take().unwrap().finish()?; 233 | fs::rename(&self.tmpname, &self.dstname)?; 234 | Ok(()) 235 | } 236 | } 237 | 238 | impl Drop for CDBWriter { 239 | #[allow(unused_must_use)] 240 | fn drop(&mut self) { 241 | if let Some(_) = self.cdb { 242 | fs::remove_file(&self.tmpname); 243 | } 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /tests/make.rs: -------------------------------------------------------------------------------- 1 | extern crate cdb; 2 | use std::fs; 3 | 4 | macro_rules! noerr { 5 | ( $e:expr ) => { 6 | if let Err(x) = $e { 7 | panic!("{}", x); 8 | } 9 | }; 10 | } 11 | 12 | #[test] 13 | fn test_make() { 14 | let filename = "tests/make.cdb"; 15 | 16 | let mut cdb = cdb::CDBWriter::create(filename).unwrap(); 17 | noerr!(cdb.add(b"one", b"Hello")); 18 | noerr!(cdb.add(b"two", b"Goodbye")); 19 | noerr!(cdb.add(b"one", b", World!")); 20 | noerr!(cdb.add(b"this key will be split across two reads", b"Got it.")); 21 | noerr!(cdb.finish()); 22 | 23 | let cdb = cdb::CDB::open(filename).unwrap(); 24 | assert_eq!(cdb.find(b"two").next().unwrap().unwrap(), b"Goodbye"); 25 | assert_eq!( 26 | cdb.find(b"this key will be split across two reads") 27 | .next() 28 | .unwrap() 29 | .unwrap(), 30 | b"Got it." 31 | ); 32 | let mut i = cdb.find(b"one"); 33 | assert_eq!(i.next().unwrap().unwrap(), b"Hello"); 34 | assert_eq!(i.next().unwrap().unwrap(), b", World!"); 35 | 36 | let mut i = cdb.iter(); 37 | let next = i.next().unwrap().unwrap(); 38 | assert_eq!(next.0, b"one"); 39 | assert_eq!(next.1, b"Hello"); 40 | let next = i.next().unwrap().unwrap(); 41 | assert_eq!(next.0, b"two"); 42 | assert_eq!(next.1, b"Goodbye"); 43 | let next = i.next().unwrap().unwrap(); 44 | assert_eq!(next.0, b"one"); 45 | assert_eq!(next.1, b", World!"); 46 | let next = i.next().unwrap().unwrap(); 47 | // Can't do this key easily due to missing trait for [u8; 39] 48 | //assert_eq!(next.0, b"this key will be split across two reads"); 49 | assert_eq!(next.1, b"Got it."); 50 | 51 | noerr!(fs::remove_file(filename)); 52 | } 53 | -------------------------------------------------------------------------------- /tests/read.rs: -------------------------------------------------------------------------------- 1 | extern crate cdb; 2 | 3 | #[test] 4 | fn test_one() { 5 | let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 6 | let mut i = cdb.find(b"one"); 7 | assert_eq!(i.next().unwrap().unwrap(), b"Hello"); 8 | assert_eq!(i.next().unwrap().unwrap(), b", World!"); 9 | } 10 | 11 | #[test] 12 | fn test_two() { 13 | let cdb = cdb::CDB::open("tests/test1.cdb").unwrap(); 14 | assert_eq!(cdb.find(b"two").next().unwrap().unwrap(), b"Goodbye"); 15 | assert_eq!( 16 | cdb.find(b"this key will be split across two reads") 17 | .next() 18 | .unwrap() 19 | .unwrap(), 20 | b"Got it." 21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /tests/test1.cdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bruceg/cdb-rs/c3c544d332728b8cf51eadbcdd4f045abd15a40e/tests/test1.cdb -------------------------------------------------------------------------------- /tests/test1.txt: -------------------------------------------------------------------------------- 1 | +3,5:one->Hello 2 | +3,7:two->Goodbye 3 | +3,8:one->, World! 4 | +39,7:this key will be split across two reads->Got it. 5 | 6 | -------------------------------------------------------------------------------- /tests/test2.cdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bruceg/cdb-rs/c3c544d332728b8cf51eadbcdd4f045abd15a40e/tests/test2.cdb -------------------------------------------------------------------------------- /tests/test2.sh: -------------------------------------------------------------------------------- 1 | for i in $( seq 1000 ) 2 | do 3 | key=$(( $i * 3141592654 % 1000000 )) 4 | val=$(( $i * 2718281828459045 )) 5 | echo "+${#key},${#val}:${key}->${val}" 6 | echo "+3,${#i}:one->${i}" 7 | done 8 | echo "+3,7:two->Goodbye" 9 | echo 10 | --------------------------------------------------------------------------------