├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    └── cdb.rs
├── cdb.txt
├── src
    ├── hash.rs
    ├── lib.rs
    ├── reader.rs
    ├── uint32.rs
    └── writer.rs
└── tests
    ├── make.rs
    ├── read.rs
    ├── test1.cdb
    ├── test1.txt
    ├── test2.cdb
    └── test2.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | sudo: false
 3 | cache: cargo
 4 | rust:
 5 |   - nightly
 6 |   - beta
 7 |   - stable
 8 | os:
 9 |   - linux
10 |   - osx
11 | matrix:
12 |   allow_failures:
13 |    - rust: nightly
14 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cdb"
 3 | version = "0.6.0"
 4 | authors = ["Bruce Guenter <bruce@untroubled.org>"]
 5 | description = "Pure Rust library to read and write CDB files"
 6 | homepage = "https://github.com/bruceg/cdb-rs"
 7 | repository = "https://github.com/bruceg/cdb-rs"
 8 | readme = "README.md"
 9 | license = "Unlicense"
10 | edition = "2018"
11 | 
12 | [dependencies]
13 | filebuffer = "0.4"
14 | libc = "0.2.4"
15 | 
16 | [dev-dependencies]
17 | criterion = "0.2"
18 | 
19 | [[bench]]
20 | name = "cdb"
21 | harness = false
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | Anyone is free to copy, modify, publish, use, compile, sell, or
 3 | distribute this software, either in source code form or as a compiled
 4 | binary, for any purpose, commercial or non-commercial, and by any means.
 5 | 
 6 | In jurisdictions that recognize copyright laws, the author or authors of
 7 | this software dedicate any and all copyright interest in the software to
 8 | the public domain. We make this dedication for the benefit of the public
 9 | at large and to the detriment of our heirs and successors. We intend
10 | this dedication to be an overt act of relinquishment in perpetuity of
11 | all present and future rights to this software under copyright law.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | DEALINGS IN THE SOFTWARE.
20 | 
21 | For more information, please refer to <http://unlicense.org/>
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | cdb
 2 | ====
 3 | 
 4 | [![Crate](https://img.shields.io/crates/v/cdb.svg)](https://crates.io/crates/cdb)
 5 | [![Build Status](https://travis-ci.org/bruceg/cdb-rs.svg?branch=master)](https://travis-ci.org/bruceg/cdb-rs)
 6 | 
 7 | This library provides pure Rust support for reading and writing
 8 | [CDB][cdb] files.  A CDB file is a constant key-value on-disk hash
 9 | table, designed for high-speed lookups.
10 | 
11 | [cdb]: http://cr.yp.to/cdb.html
12 | 
13 | [Documentation](https://docs.rs/cdb)
14 | 
15 | ## License
16 | 
17 | Public Domain
18 | 


--------------------------------------------------------------------------------
/benches/cdb.rs:
--------------------------------------------------------------------------------
 1 | extern crate cdb;
 2 | #[macro_use]
 3 | extern crate criterion;
 4 | 
 5 | use cdb::CDB;
 6 | use criterion::Criterion;
 7 | 
 8 | fn test_cdb() -> CDB {
 9 |     CDB::open("tests/test2.cdb").expect("Could not open tests/test2.cdb")
10 | }
11 | 
12 | fn reader_benchmark(c: &mut Criterion) {
13 |     c.bench_function("CDB::open", |b| {
14 |         b.iter(|| {
15 |             test_cdb();
16 |         })
17 |     });
18 |     c.bench_function("CDB::find", |b| {
19 |         let cdb = test_cdb();
20 |         b.iter(|| cdb.find(b"two"))
21 |     });
22 |     c.bench_function("CDB::find long", |b| {
23 |         let cdb = test_cdb();
24 |         b.iter(|| cdb.find(b"this key will be split across two reads"))
25 |     });
26 |     c.bench_function("CDB::find result", |b| {
27 |         let cdb = test_cdb();
28 |         b.iter(|| cdb.find(b"two").next().unwrap())
29 |     });
30 |     c.bench_function("CDB::find result loop", |b| {
31 |         let cdb = test_cdb();
32 |         b.iter(|| {
33 |             for result in cdb.find(b"one") {
34 |                 result.unwrap();
35 |             }
36 |         })
37 |     });
38 |     c.bench_function("CDB::open + find result loop", |b| {
39 |         b.iter(|| {
40 |             let cdb = test_cdb();
41 |             for result in cdb.find(b"one") {
42 |                 result.unwrap();
43 |             }
44 |         })
45 |     });
46 |     c.bench_function("CDB::iter result loop", |b| {
47 |         let cdb = test_cdb();
48 |         b.iter(|| {
49 |             for result in cdb.iter() {
50 |                 result.unwrap();
51 |             }
52 |         })
53 |     });
54 |     c.bench_function("CDB::open + iter result loop", |b| {
55 |         b.iter(|| {
56 |             let cdb = test_cdb();
57 |             for result in cdb.iter() {
58 |                 result.unwrap();
59 |             }
60 |         })
61 |     });
62 | }
63 | 
64 | criterion_group!(benches, reader_benchmark);
65 | criterion_main!(benches);
66 | 


--------------------------------------------------------------------------------
/cdb.txt:
--------------------------------------------------------------------------------
 1 | A structure for constant databases
 2 | 19960914
 3 | Copyright 1996
 4 | D. J. Bernstein, djb@pobox.com
 5 | 
 6 | A cdb is an associative array: it maps strings (``keys'') to strings
 7 | (``data'').
 8 | 
 9 | A cdb contains 256 pointers to linearly probed open hash tables. The
10 | hash tables contain pointers to (key,data) pairs. A cdb is stored in
11 | a single file on disk:
12 | 
13 |     +----------------+---------+-------+-------+-----+---------+
14 |     | p0 p1 ... p255 | records | hash0 | hash1 | ... | hash255 |
15 |     +----------------+---------+-------+-------+-----+---------+
16 | 
17 | Each of the 256 initial pointers states a position and a length. The
18 | position is the starting byte position of the hash table. The length
19 | is the number of slots in the hash table.
20 | 
21 | Records are stored sequentially, without special alignment. A record
22 | states a key length, a data length, the key, and the data.
23 | 
24 | Each hash table slot states a hash value and a byte position. If the
25 | byte position is 0, the slot is empty. Otherwise, the slot points to
26 | a record whose key has that hash value.
27 | 
28 | Positions, lengths, and hash values are 32-bit quantities, stored in
29 | little-endian form in 4 bytes. Thus a cdb must fit into 4 gigabytes.
30 | 
31 | A record is located as follows. Compute the hash value of the key in
32 | the record. The hash value modulo 256 is the number of a hash table.
33 | The hash value divided by 256, modulo the length of that table, is a
34 | slot number. Probe that slot, the next higher slot, and so on, until
35 | you find the record or run into an empty slot.
36 | 
37 | The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
38 | hash of 5381.
39 | 


--------------------------------------------------------------------------------
/src/hash.rs:
--------------------------------------------------------------------------------
 1 | const HASHSTART: u32 = 0x1505;
 2 | 
 3 | pub fn add(h: u32, c: u8) -> u32 {
 4 |     //(h + (h << 5)) ^ (c as u32)
 5 |     h.wrapping_shl(5).wrapping_add(h) ^ (c as u32)
 6 | }
 7 | 
 8 | pub fn hash(buf: &[u8]) -> u32 {
 9 |     let mut h = HASHSTART;
10 |     for c in buf {
11 |         h = add(h, *c);
12 |     }
13 |     h
14 | }
15 | 
16 | #[test]
17 | fn samples() {
18 |     assert_eq!(hash(b""), 0x0001505);
19 |     assert_eq!(hash(b"Hello, world!"), 0x564369e8);
20 |     assert_eq!(hash(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), 0x40032705);
21 | }
22 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! This crate provides support for reading and writing
 2 | //! [CDB](https://cr.yp.to/cdb.html) files. A CDB is a "constant
 3 | //! database" that acts as an on-disk associative array mapping keys to
 4 | //! values, allowing multiple values for each key. It provides for fast
 5 | //! lookups and low overheads. A constant database has no provision for
 6 | //! updating, only rewriting from scratch.
 7 | //!
 8 | //! # Examples
 9 | //!
10 | //! Reading a set of records:
11 | //!
12 | //! ```
13 | //! let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
14 | //!
15 | //! for result in cdb.find(b"one") {
16 | //!     println!("{:?}", result.unwrap());
17 | //! }
18 | //! ```
19 | //!
20 | //! Creating a database with safe atomic updating:
21 | //!
22 | //! ```no_run
23 | //! fn main() -> std::io::Result<()> {
24 | //!     let mut cdb = cdb::CDBWriter::create("temporary.cdb")?;
25 | //!     cdb.add(b"one", b"Hello, ")?;
26 | //!     cdb.add(b"one", b"world!\n")?;
27 | //!     cdb.add(b"two", &[1, 2, 3, 4])?;
28 | //!     cdb.finish()?;
29 | //!     Ok(())
30 | //! }
31 | //! ```
32 | //!
33 | //! # References
34 | //!
35 | //!  * [D. J. Bernstein's original software](https://cr.yp.to/cdb.html)
36 | //!  * [Constant Database (cdb) Internals](https://www.unixuser.org/~euske/doc/cdbinternals/index.html)
37 | //!  * [Wikipedia](https://en.wikipedia.org/wiki/Cdb_(software))
38 | 
39 | extern crate filebuffer;
40 | 
41 | mod hash;
42 | mod reader;
43 | mod uint32;
44 | mod writer;
45 | 
46 | pub use crate::reader::{CDBIter, CDBKeyValueIter, CDBValueIter, Result, CDB};
47 | pub use crate::writer::{CDBMake, CDBWriter};
48 | 


--------------------------------------------------------------------------------
/src/reader.rs:
--------------------------------------------------------------------------------
  1 | use filebuffer::FileBuffer;
  2 | use std::cmp::min;
  3 | use std::io;
  4 | use std::path;
  5 | 
  6 | use crate::hash::hash;
  7 | use crate::uint32;
  8 | 
  9 | pub use std::io::Result;
 10 | 
 11 | const KEYSIZE: usize = 32;
 12 | 
 13 | /// CDB file reader
 14 | ///
 15 | /// # Example
 16 | ///
 17 | /// ```
 18 | /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
 19 | ///
 20 | /// for result in cdb.find(b"one") {
 21 | ///     println!("{:?}", result.unwrap());
 22 | /// }
 23 | /// ```
 24 | pub struct CDB {
 25 |     file: FileBuffer,
 26 |     size: usize,
 27 | }
 28 | 
 29 | fn err_badfile<T>() -> Result<T> {
 30 |     Err(io::Error::new(io::ErrorKind::Other, "Invalid file format"))
 31 | }
 32 | 
 33 | impl CDB {
 34 |     /// Opens the named file and returns the CDB reader.
 35 |     ///
 36 |     /// # Examples
 37 |     ///
 38 |     /// ```
 39 |     /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
 40 |     /// ```
 41 |     pub fn open<P: AsRef<path::Path>>(filename: P) -> Result<CDB> {
 42 |         let file = FileBuffer::open(&filename)?;
 43 |         if file.len() < 2048 + 8 + 8 || file.len() > 0xffffffff {
 44 |             return err_badfile();
 45 |         }
 46 |         let size = file.len();
 47 |         Ok(CDB { file, size })
 48 |     }
 49 | 
 50 |     fn read(&self, buf: &mut [u8], pos: u32) -> Result<usize> {
 51 |         let len = buf.len();
 52 |         let pos = pos as usize;
 53 |         if pos + len > self.size {
 54 |             return err_badfile();
 55 |         }
 56 |         buf.copy_from_slice(&self.file[pos..pos + len]);
 57 |         Ok(len)
 58 |     }
 59 | 
 60 |     fn hash_table(&self, khash: u32) -> (u32, u32, u32) {
 61 |         let x = ((khash as usize) & 0xff) << 3;
 62 |         let (hpos, hslots) = uint32::unpack2(&self.file[x..x + 8]);
 63 |         let kpos = if hslots > 0 {
 64 |             hpos + (((khash >> 8) % hslots) << 3)
 65 |         } else {
 66 |             0
 67 |         };
 68 |         (hpos, hslots, kpos)
 69 |     }
 70 | 
 71 |     fn match_key(&self, key: &[u8], pos: u32) -> Result<bool> {
 72 |         let mut buf = [0 as u8; KEYSIZE];
 73 |         let mut len = key.len();
 74 |         let mut pos = pos;
 75 |         let mut keypos = 0;
 76 | 
 77 |         while len > 0 {
 78 |             let n = min(len, buf.len());
 79 |             self.read(&mut buf[..n], pos)?;
 80 |             if buf[..n] != key[keypos..keypos + n] {
 81 |                 return Ok(false);
 82 |             }
 83 |             pos += n as u32;
 84 |             keypos += n;
 85 |             len -= n;
 86 |         }
 87 |         Ok(true)
 88 |     }
 89 | 
 90 |     /// Find the first record with the named key.
 91 |     ///
 92 |     /// # Examples
 93 |     ///
 94 |     /// ```
 95 |     /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
 96 |     /// if let Some(record) = cdb.get(b"one") {
 97 |     ///     println!("{:?}", record.unwrap());
 98 |     /// }
 99 |     /// ```
100 |     pub fn get(&self, key: &[u8]) -> Option<Result<Vec<u8>>> {
101 |         self.find(key).next()
102 |     }
103 | 
104 |     /// Find all records with the named key. The returned iterator
105 |     /// produces each value associated with the key.
106 |     ///
107 |     /// # Examples
108 |     ///
109 |     /// ```
110 |     /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
111 |     ///
112 |     /// for result in cdb.find(b"one") {
113 |     ///     println!("{:?}", result.unwrap());
114 |     /// }
115 |     /// ```
116 |     pub fn find(&self, key: &[u8]) -> CDBValueIter {
117 |         CDBValueIter::find(self, key)
118 |     }
119 | 
120 |     /// Iterate over all the `(key, value)` pairs in the database.
121 |     ///
122 |     /// # Examples
123 |     ///
124 |     /// ```
125 |     /// let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
126 |     /// for result in cdb.iter() {
127 |     ///     let (key, value) = result.unwrap();
128 |     ///     println!("{:?} => {:?}", key, value);
129 |     /// }
130 |     /// ````
131 |     pub fn iter(&self) -> CDBKeyValueIter {
132 |         CDBKeyValueIter::start(&self)
133 |     }
134 | }
135 | 
136 | /// Type alias for [`CDBValueiter`](struct.CDBValueIter.html)
137 | pub type CDBIter<'a> = CDBValueIter<'a>;
138 | 
139 | /// Iterator over a set of records in the CDB with the same key.
140 | ///
141 | /// See [`CDB::find`](struct.CDB.html#method.find)
142 | pub struct CDBValueIter<'a> {
143 |     cdb: &'a CDB,
144 |     key: Vec<u8>,
145 |     khash: u32,
146 |     kloop: u32,
147 |     kpos: u32,
148 |     hpos: u32,
149 |     hslots: u32,
150 |     dpos: u32,
151 |     dlen: u32,
152 | }
153 | 
154 | impl<'a> CDBValueIter<'a> {
155 |     fn find(cdb: &'a CDB, key: &[u8]) -> Self {
156 |         let khash = hash(key);
157 |         let (hpos, hslots, kpos) = cdb.hash_table(khash);
158 | 
159 |         CDBValueIter {
160 |             cdb: cdb,
161 |             key: key.into_iter().map(|x| *x).collect(),
162 |             khash: khash,
163 |             kloop: 0,
164 |             kpos: kpos,
165 |             hpos: hpos,
166 |             hslots: hslots,
167 |             dpos: 0,
168 |             dlen: 0,
169 |         }
170 |     }
171 | 
172 |     fn read_vec(&self) -> Result<Vec<u8>> {
173 |         let mut result = vec![0; self.dlen as usize];
174 |         self.cdb.read(&mut result[..], self.dpos)?;
175 |         Ok(result)
176 |     }
177 | }
178 | 
179 | macro_rules! iter_try {
180 |     ( $e:expr ) => {
181 |         match $e {
182 |             Err(x) => {
183 |                 return Some(Err(x));
184 |             }
185 |             Ok(y) => y,
186 |         }
187 |     };
188 | }
189 | 
190 | impl<'a> Iterator for CDBValueIter<'a> {
191 |     type Item = Result<Vec<u8>>;
192 |     fn next(&mut self) -> Option<Self::Item> {
193 |         while self.kloop < self.hslots {
194 |             let mut buf = [0 as u8; 8];
195 |             let kpos = self.kpos;
196 |             iter_try!(self.cdb.read(&mut buf, kpos));
197 |             let (khash, pos) = uint32::unpack2(&buf);
198 |             if pos == 0 {
199 |                 return None;
200 |             }
201 |             self.kloop += 1;
202 |             self.kpos += 8;
203 |             if self.kpos == self.hpos + (self.hslots << 3) {
204 |                 self.kpos = self.hpos;
205 |             }
206 |             if khash == self.khash {
207 |                 iter_try!(self.cdb.read(&mut buf, pos));
208 |                 let (klen, dlen) = uint32::unpack2(&buf);
209 |                 if klen as usize == self.key.len() {
210 |                     if iter_try!(self.cdb.match_key(&self.key[..], pos + 8)) {
211 |                         self.dlen = dlen;
212 |                         self.dpos = pos + 8 + self.key.len() as u32;
213 |                         return Some(self.read_vec());
214 |                     }
215 |                 }
216 |             }
217 |         }
218 |         None
219 |     }
220 | }
221 | 
222 | /// Iterator over all the records in the CDB.
223 | ///
224 | /// See [`CDB::iter`](struct.CDB.html#method.iter)
225 | pub struct CDBKeyValueIter<'a> {
226 |     cdb: &'a CDB,
227 |     pos: u32,
228 |     data_end: u32,
229 | }
230 | 
231 | impl<'a> CDBKeyValueIter<'a> {
232 |     fn start(cdb: &'a CDB) -> Self {
233 |         let data_end = uint32::unpack(&cdb.file[0..4]).min(cdb.size as u32);
234 |         Self {
235 |             cdb,
236 |             pos: 2048,
237 |             data_end,
238 |         }
239 |     }
240 | }
241 | 
242 | impl<'a> Iterator for CDBKeyValueIter<'a> {
243 |     type Item = Result<(Vec<u8>, Vec<u8>)>;
244 |     fn next(&mut self) -> Option<Self::Item> {
245 |         if self.pos + 8 >= self.data_end {
246 |             None
247 |         } else {
248 |             let (klen, dlen) =
249 |                 uint32::unpack2(&self.cdb.file[self.pos as usize..self.pos as usize + 8]);
250 |             if self.pos + klen + dlen >= self.data_end {
251 |                 Some(err_badfile())
252 |             } else {
253 |                 let kpos = (self.pos + 8) as usize;
254 |                 let dpos = kpos + klen as usize;
255 |                 let mut key = vec![0; klen as usize];
256 |                 let mut value = vec![0; dlen as usize];
257 |                 // Copied from CDB::read
258 |                 key.copy_from_slice(&self.cdb.file[kpos..kpos + klen as usize]);
259 |                 value.copy_from_slice(&self.cdb.file[dpos..dpos + dlen as usize]);
260 |                 self.pos += 8 + klen + dlen;
261 |                 Some(Ok((key, value)))
262 |             }
263 |         }
264 |     }
265 | }
266 | 


--------------------------------------------------------------------------------
/src/uint32.rs:
--------------------------------------------------------------------------------
 1 | use std::u32;
 2 | 
 3 | pub fn unpack(data: &[u8]) -> u32 {
 4 |     assert!(data.len() >= 4);
 5 |     // Use u32::from_bytes when it stabilizes
 6 |     // Rust compiles this down to an efficient word copy
 7 |     (data[0] as u32) | ((data[1] as u32) << 8) | ((data[2] as u32) << 16) | ((data[3] as u32) << 24)
 8 | }
 9 | 
10 | pub fn unpack2(buf: &[u8]) -> (u32, u32) {
11 |     (unpack(&buf[0..4]), unpack(&buf[4..8]))
12 | }
13 | 
14 | fn _pack(src: u32) -> [u8; 4] {
15 |     // Use u32::to_bytes when it stabilizes
16 |     // Rust compiles this down to an efficient word copy
17 |     [
18 |         src as u8,
19 |         (src >> 8) as u8,
20 |         (src >> 16) as u8,
21 |         (src >> 24) as u8,
22 |     ]
23 | }
24 | 
25 | pub fn pack(data: &mut [u8], src: u32) {
26 |     assert!(data.len() >= 4);
27 |     data[..4].copy_from_slice(&_pack(src));
28 | }
29 | 
30 | pub fn pack2(data: &mut [u8], src0: u32, src1: u32) {
31 |     assert!(data.len() >= 8);
32 |     pack(&mut data[0..4], src0);
33 |     pack(&mut data[4..8], src1);
34 | }
35 | 


--------------------------------------------------------------------------------
/src/writer.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::max;
  2 | use std::fs;
  3 | use std::io;
  4 | use std::io::prelude::*;
  5 | use std::iter;
  6 | use std::path;
  7 | use std::string;
  8 | 
  9 | use crate::hash::hash;
 10 | use crate::uint32;
 11 | 
 12 | pub use std::io::Result;
 13 | 
 14 | #[derive(Clone, Copy, Debug)]
 15 | struct HashPos {
 16 |     hash: u32,
 17 |     pos: u32,
 18 | }
 19 | 
 20 | impl HashPos {
 21 |     fn pack(&self, buf: &mut [u8]) {
 22 |         uint32::pack2(buf, self.hash, self.pos);
 23 |     }
 24 | }
 25 | 
 26 | fn err_toobig<T>() -> Result<T> {
 27 |     Err(io::Error::new(io::ErrorKind::Other, "File too big"))
 28 | }
 29 | 
 30 | /// Base interface for making a CDB file.
 31 | ///
 32 | /// # Example
 33 | ///
 34 | /// ```no_run
 35 | /// fn main() -> std::io::Result<()> {
 36 | ///     let file = std::fs::File::create("temporary.cdb")?;
 37 | ///     let mut cdb = cdb::CDBMake::new(file)?;
 38 | ///     cdb.add(b"one", b"Hello,")?;
 39 | ///     cdb.add(b"two", b"world!")?;
 40 | ///     cdb.finish()?;
 41 | ///     Ok(())
 42 | /// }
 43 | /// ```
 44 | pub struct CDBMake {
 45 |     entries: Vec<Vec<HashPos>>,
 46 |     pos: u32,
 47 |     file: io::BufWriter<fs::File>,
 48 | }
 49 | 
 50 | impl CDBMake {
 51 |     /// Create a new CDB maker.
 52 |     pub fn new(file: fs::File) -> Result<CDBMake> {
 53 |         let mut w = io::BufWriter::new(file);
 54 |         let buf = [0; 2048];
 55 |         w.seek(io::SeekFrom::Start(0))?;
 56 |         w.write(&buf)?;
 57 |         Ok(CDBMake {
 58 |             entries: iter::repeat(vec![]).take(256).collect::<Vec<_>>(),
 59 |             pos: 2048,
 60 |             file: w,
 61 |         })
 62 |     }
 63 | 
 64 |     fn pos_plus(&mut self, len: u32) -> Result<()> {
 65 |         if self.pos + len < len {
 66 |             err_toobig()
 67 |         } else {
 68 |             self.pos += len;
 69 |             Ok(())
 70 |         }
 71 |     }
 72 | 
 73 |     fn add_end(&mut self, keylen: u32, datalen: u32, hash: u32) -> Result<()> {
 74 |         self.entries[(hash & 0xff) as usize].push(HashPos {
 75 |             hash: hash,
 76 |             pos: self.pos,
 77 |         });
 78 |         self.pos_plus(8)?;
 79 |         self.pos_plus(keylen)?;
 80 |         self.pos_plus(datalen)?;
 81 |         Ok(())
 82 |     }
 83 | 
 84 |     fn add_begin(&mut self, keylen: u32, datalen: u32) -> Result<()> {
 85 |         let mut buf = [0; 8];
 86 |         uint32::pack2(&mut buf[0..8], keylen, datalen);
 87 |         self.file.write(&buf)?;
 88 |         Ok(())
 89 |     }
 90 | 
 91 |     /// Add a record to the CDB file.
 92 |     pub fn add(&mut self, key: &[u8], data: &[u8]) -> Result<()> {
 93 |         if key.len() >= 0xffffffff || data.len() >= 0xffffffff {
 94 |             return Err(io::Error::new(io::ErrorKind::Other, "Key or data too big"));
 95 |         }
 96 |         self.add_begin(key.len() as u32, data.len() as u32)?;
 97 |         self.file.write(key)?;
 98 |         self.file.write(data)?;
 99 |         self.add_end(key.len() as u32, data.len() as u32, hash(&key[..]))
100 |     }
101 | 
102 |     /// Set the permissions on the underlying file.
103 |     pub fn set_permissions(&self, perm: fs::Permissions) -> Result<()> {
104 |         self.file.get_ref().set_permissions(perm)
105 |     }
106 | 
107 |     /// Finish writing to the CDB file and flush its contents.
108 |     pub fn finish(mut self) -> Result<()> {
109 |         let mut buf = [0; 8];
110 | 
111 |         let maxsize = self.entries.iter().fold(1, |acc, e| max(acc, e.len() * 2));
112 |         let count = self.entries.iter().fold(0, |acc, e| acc + e.len());
113 |         if maxsize + count > (0xffffffff / 8) {
114 |             return err_toobig();
115 |         }
116 | 
117 |         let mut table = vec![HashPos { hash: 0, pos: 0 }; maxsize];
118 | 
119 |         let mut header = [0 as u8; 2048];
120 |         for i in 0..256 {
121 |             let len = self.entries[i].len() * 2;
122 |             let j = i * 8;
123 |             uint32::pack2(&mut header[j..j + 8], self.pos, len as u32);
124 | 
125 |             for e in self.entries[i].iter() {
126 |                 let mut wh = (e.hash as usize >> 8) % len;
127 |                 while table[wh].pos != 0 {
128 |                     wh += 1;
129 |                     if wh == len {
130 |                         wh = 0;
131 |                     }
132 |                 }
133 |                 table[wh] = *e;
134 |             }
135 | 
136 |             for hp in table.iter_mut().take(len) {
137 |                 hp.pack(&mut buf);
138 |                 self.file.write(&buf)?;
139 |                 self.pos_plus(8)?;
140 |                 *hp = HashPos { hash: 0, pos: 0 };
141 |             }
142 |         }
143 | 
144 |         self.file.flush()?;
145 |         self.file.seek(io::SeekFrom::Start(0))?;
146 |         self.file.write(&header)?;
147 |         self.file.flush()?;
148 |         Ok(())
149 |     }
150 | }
151 | 
152 | /// A CDB file writer which handles atomic updating.
153 | ///
154 | /// Using this type, a CDB file is safely written by first creating a
155 | /// temporary file, building the CDB structure into that temporary file,
156 | /// and finally renaming that temporary file over the final file name.
157 | /// If the temporary file is not properly finished (ie due to an error),
158 | /// the temporary file is deleted when this writer is dropped.
159 | ///
160 | /// # Example
161 | ///
162 | /// ```no_run
163 | /// use cdb::CDBWriter;
164 | ///
165 | /// fn main() -> std::io::Result<()> {
166 | ///     let mut cdb = CDBWriter::create("temporary.cdb")?;
167 | ///     cdb.add(b"one", b"Hello")?;
168 | ///     cdb.finish()?;
169 | ///     Ok(())
170 | /// }
171 | /// ```
172 | pub struct CDBWriter {
173 |     dstname: String,
174 |     tmpname: String,
175 |     cdb: Option<CDBMake>,
176 | }
177 | 
178 | impl CDBWriter {
179 |     /// Safely create a new CDB file.
180 |     ///
181 |     /// The suffix for the temporary file defaults to `".tmp"`.
182 |     pub fn create<P: AsRef<path::Path> + string::ToString>(filename: P) -> Result<CDBWriter> {
183 |         CDBWriter::with_suffix(filename, ".tmp")
184 |     }
185 | 
186 |     /// Safely create a new CDB file, using a specific suffix for the temporary file.
187 |     pub fn with_suffix<P: AsRef<path::Path> + string::ToString>(
188 |         filename: P,
189 |         suffix: &str,
190 |     ) -> Result<CDBWriter> {
191 |         let mut tmpname = filename.to_string();
192 |         tmpname.push_str(suffix);
193 |         CDBWriter::with_filenames(filename, &tmpname)
194 |     }
195 | 
196 |     /// Safely create a new CDB file, using two specific file names.
197 |     ///
198 |     /// Note that the temporary file name must be on the same filesystem
199 |     /// as the destination, or else the final rename will fail.
200 |     pub fn with_filenames<
201 |         P: AsRef<path::Path> + string::ToString,
202 |         Q: AsRef<path::Path> + string::ToString,
203 |     >(
204 |         filename: P,
205 |         tmpname: Q,
206 |     ) -> Result<CDBWriter> {
207 |         let file = fs::File::create(&tmpname)?;
208 |         let cdb = CDBMake::new(file)?;
209 |         Ok(CDBWriter {
210 |             dstname: filename.to_string(),
211 |             tmpname: tmpname.to_string(),
212 |             cdb: Some(cdb),
213 |         })
214 |     }
215 | 
216 |     /// Add a record to the CDB file.
217 |     pub fn add(&mut self, key: &[u8], data: &[u8]) -> Result<()> {
218 |         // The unwrap() is safe here, as the internal cdb is only ever
219 |         // None during finish(), which does not call this.
220 |         self.cdb.as_mut().unwrap().add(key, data)
221 |     }
222 | 
223 |     /// Set permissions on the temporary file.
224 |     ///
225 |     /// This must be done before the file is finished, as the temporary
226 |     /// file will no longer exist at that point.
227 |     pub fn set_permissions(&self, perm: fs::Permissions) -> Result<()> {
228 |         self.cdb.as_ref().unwrap().set_permissions(perm)
229 |     }
230 | 
231 |     pub fn finish(mut self) -> Result<()> {
232 |         self.cdb.take().unwrap().finish()?;
233 |         fs::rename(&self.tmpname, &self.dstname)?;
234 |         Ok(())
235 |     }
236 | }
237 | 
238 | impl Drop for CDBWriter {
239 |     #[allow(unused_must_use)]
240 |     fn drop(&mut self) {
241 |         if let Some(_) = self.cdb {
242 |             fs::remove_file(&self.tmpname);
243 |         }
244 |     }
245 | }
246 | 


--------------------------------------------------------------------------------
/tests/make.rs:
--------------------------------------------------------------------------------
 1 | extern crate cdb;
 2 | use std::fs;
 3 | 
 4 | macro_rules! noerr {
 5 |     ( $e:expr ) => {
 6 |         if let Err(x) = $e {
 7 |             panic!("{}", x);
 8 |         }
 9 |     };
10 | }
11 | 
12 | #[test]
13 | fn test_make() {
14 |     let filename = "tests/make.cdb";
15 | 
16 |     let mut cdb = cdb::CDBWriter::create(filename).unwrap();
17 |     noerr!(cdb.add(b"one", b"Hello"));
18 |     noerr!(cdb.add(b"two", b"Goodbye"));
19 |     noerr!(cdb.add(b"one", b", World!"));
20 |     noerr!(cdb.add(b"this key will be split across two reads", b"Got it."));
21 |     noerr!(cdb.finish());
22 | 
23 |     let cdb = cdb::CDB::open(filename).unwrap();
24 |     assert_eq!(cdb.find(b"two").next().unwrap().unwrap(), b"Goodbye");
25 |     assert_eq!(
26 |         cdb.find(b"this key will be split across two reads")
27 |             .next()
28 |             .unwrap()
29 |             .unwrap(),
30 |         b"Got it."
31 |     );
32 |     let mut i = cdb.find(b"one");
33 |     assert_eq!(i.next().unwrap().unwrap(), b"Hello");
34 |     assert_eq!(i.next().unwrap().unwrap(), b", World!");
35 | 
36 |     let mut i = cdb.iter();
37 |     let next = i.next().unwrap().unwrap();
38 |     assert_eq!(next.0, b"one");
39 |     assert_eq!(next.1, b"Hello");
40 |     let next = i.next().unwrap().unwrap();
41 |     assert_eq!(next.0, b"two");
42 |     assert_eq!(next.1, b"Goodbye");
43 |     let next = i.next().unwrap().unwrap();
44 |     assert_eq!(next.0, b"one");
45 |     assert_eq!(next.1, b", World!");
46 |     let next = i.next().unwrap().unwrap();
47 |     // Can't do this key easily due to missing trait for [u8; 39]
48 |     //assert_eq!(next.0, b"this key will be split across two reads");
49 |     assert_eq!(next.1, b"Got it.");
50 | 
51 |     noerr!(fs::remove_file(filename));
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/read.rs:
--------------------------------------------------------------------------------
 1 | extern crate cdb;
 2 | 
 3 | #[test]
 4 | fn test_one() {
 5 |     let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
 6 |     let mut i = cdb.find(b"one");
 7 |     assert_eq!(i.next().unwrap().unwrap(), b"Hello");
 8 |     assert_eq!(i.next().unwrap().unwrap(), b", World!");
 9 | }
10 | 
11 | #[test]
12 | fn test_two() {
13 |     let cdb = cdb::CDB::open("tests/test1.cdb").unwrap();
14 |     assert_eq!(cdb.find(b"two").next().unwrap().unwrap(), b"Goodbye");
15 |     assert_eq!(
16 |         cdb.find(b"this key will be split across two reads")
17 |             .next()
18 |             .unwrap()
19 |             .unwrap(),
20 |         b"Got it."
21 |     );
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/test1.cdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bruceg/cdb-rs/c3c544d332728b8cf51eadbcdd4f045abd15a40e/tests/test1.cdb


--------------------------------------------------------------------------------
/tests/test1.txt:
--------------------------------------------------------------------------------
1 | +3,5:one->Hello
2 | +3,7:two->Goodbye
3 | +3,8:one->, World!
4 | +39,7:this key will be split across two reads->Got it.
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/test2.cdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bruceg/cdb-rs/c3c544d332728b8cf51eadbcdd4f045abd15a40e/tests/test2.cdb


--------------------------------------------------------------------------------
/tests/test2.sh:
--------------------------------------------------------------------------------
 1 | for i in $( seq 1000 )
 2 | do
 3 | 	key=$(( $i * 3141592654 % 1000000 ))
 4 | 	val=$(( $i * 2718281828459045 ))
 5 | 	echo "+${#key},${#val}:${key}->${val}"
 6 | 	echo "+3,${#i}:one->${i}"
 7 | done
 8 | echo "+3,7:two->Goodbye"
 9 | echo
10 | 


--------------------------------------------------------------------------------