├── .gitignore ├── LICENSE ├── README.md ├── atomic-hashmap ├── Cargo.toml └── src │ ├── lib.rs │ ├── sponge.rs │ ├── table.rs │ └── table.rs.orig ├── cbloom ├── Cargo.toml └── src │ └── lib.rs ├── chashmap ├── Cargo.toml └── src │ ├── lib.rs │ └── tests.rs ├── conc ├── Cargo.toml └── src │ ├── atomic.rs │ ├── debug.rs │ ├── garbage.rs │ ├── global.rs │ ├── guard.rs │ ├── hazard.rs │ ├── lib.rs │ ├── local.rs │ ├── mpsc.rs │ ├── settings.rs │ └── sync │ ├── mod.rs │ ├── stm.rs │ └── treiber.rs ├── control-flow ├── Cargo.toml └── src │ └── lib.rs ├── core ├── Cargo.toml └── src │ ├── alloc │ ├── dedup.rs │ ├── mod.rs │ ├── page.rs │ └── state_block.rs │ ├── disk │ ├── cache.rs │ ├── cluster.rs │ ├── crypto.rs │ ├── header.rs │ ├── mod.rs │ ├── todo │ └── vdev.rs │ ├── error.rs │ ├── fs │ ├── array.rs │ ├── mod.rs │ └── object.rs │ ├── lib.rs │ └── macros.rs ├── icon.svg ├── little-endian ├── Cargo.toml └── src │ └── lib.rs ├── lz4 ├── Cargo.toml └── src │ ├── compress.rs │ ├── decompress.rs │ ├── lib.rs │ ├── main.rs │ └── tests.rs ├── mlcr ├── Cargo.toml ├── src │ └── lib.rs └── tests │ └── simple.rs ├── notes ├── compression.md ├── vdevs.md └── zmicro.md ├── seahash ├── Cargo.toml ├── README.md ├── benches │ └── gigabyte.rs ├── logo.png └── src │ ├── buffer.rs │ ├── helper.rs │ ├── lib.rs │ ├── reference.rs │ └── stream.rs ├── spec └── specification.tex ├── speck ├── Cargo.toml ├── benches │ └── lib.rs └── src │ └── lib.rs ├── thread-object ├── Cargo.toml └── src │ └── lib.rs ├── type-name ├── Cargo.toml └── src │ └── lib.rs └── zmicro ├── Cargo.toml └── src ├── lib.rs └── range.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | *.aux 4 | *.pdf 5 | *.log 6 | *.out 7 | *.pdf_tex 8 | *.toc 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Ticki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TFS was replaced by [RedoxFS](https://gitlab.redox-os.org/redox-os/redoxfs) and is no longer maintained, most of the features of TFS have been incorporated into RedoxFS 2 | 3 | ![TFS](https://rawgit.com/ticki/tfs/master/icon.svg) 4 | 5 | # TFS: Next-generation file system 6 | 7 | TFS is a modular, fast, and feature rich next-gen file system, employing 8 | modern techniques for high performance, high space efficiency, and high 9 | scalability. 10 | 11 | TFS was created out of the need for a modern file system for Redox OS, 12 | as a replacement for ZFS, which proved to be slow to implement because 13 | of its monolithic design. 14 | 15 | TFS is inspired by the ideas behind ZFS, but at the same time it aims to 16 | be modular and easier to implement. 17 | 18 | TFS is not related to the file system of the same name by 19 | *terminalcloud*. 20 | 21 | *While many components are complete, TFS itself is not ready for use.* 22 | 23 | [![MIT/X11 permissive license.](https://img.shields.io/github/license/ticki/tfs.svg)](https://en.wikipedia.org/wiki/MIT_License) 24 | 25 | ![GitHub Stars](https://img.shields.io/github/stars/ticki/tfs.svg?style=social&label=Star) 26 | 27 | ## Design goals 28 | 29 | TFS is designed with the following goals in mind: 30 | 31 | - Concurrent 32 | 33 | TFS contains very few locks and aims to be as suitable for 34 | multithreaded systems as possible. It makes use of multiple truly 35 | concurrent structures to manage the data, and scales linearly by the 36 | number of cores. **This is perhaps the most important feature 37 | of TFS.** 38 | 39 | - Asynchronous 40 | 41 | TFS is asynchronous: operations can happen independently; writes and 42 | reads from the disk need not block. 43 | 44 | - Full-disk compression 45 | 46 | TFS is the first file system to incorporate complete full-disk 47 | compression through a scheme we call RACC (random-access 48 | cluster compression). This means that every cluster is compressed 49 | only affecting performance slightly. It is estimated that you get 50 | 60-120% more usable space. 51 | 52 | - Revision history 53 | 54 | TFS stores a revision history of every file without imposing 55 | extra overhead. This means that you can revert any file into an 56 | earlier version, backing up the system automatically and without 57 | imposed overhead from copying. 58 | 59 | - Data integrity 60 | 61 | TFS, like ZFS, stores full checksums of the file (not just 62 | metadata), and on top of that, it is done in the parent block. That 63 | means that almost all data corruption will be detected upon read. 64 | 65 | - Copy-on-write semantics 66 | 67 | Similarly to Btrfs and ZFS, TFS uses CoW semantics, meaning that no 68 | cluster is ever overwritten directly, but instead it is copied and 69 | written to a new cluster. 70 | 71 | - O(1) recursive copies 72 | 73 | Like some other file systems, TFS can do recursive copies in 74 | constant time, but there is an unique addition: TFS doesn't copy 75 | even after it is mutated. How? It maintains segments of the file 76 | individually, such that only the updated segment needs copying. 77 | 78 | - Guaranteed atomicity 79 | 80 | The system will never enter an inconsistent state (unless there is 81 | hardware failure), meaning that unexpected power-off won't ever 82 | damage the system. 83 | 84 | - Improved caching 85 | 86 | TFS puts a lot of effort into caching the disk to speed up 87 | disk accesses. It uses machine learning to learn patterns and 88 | predict future uses to reduce the number of cache misses. TFS also 89 | compresses the in-memory cache, reducing the amount of 90 | memory needed. 91 | 92 | - Better file monitoring 93 | 94 | CoW is very suitable for high-performance, scalable file monitoring, 95 | but unfortunately only few file systems incorporate that. TFS is one 96 | of those. 97 | 98 | - All memory safe 99 | 100 | TFS uses only components written in Rust. As such, memory unsafety 101 | is only possible in code marked unsafe, which is checked 102 | extra carefully. 103 | 104 | - Full coverage testing 105 | 106 | TFS aims to be full coverage with respect to testing. This gives 107 | relatively strong guarantees on correctness by instantly revealing 108 | large classes of bugs. 109 | 110 | - SSD friendly 111 | 112 | TFS tries to avoid the write limitation in SSD by repositioning 113 | dead sectors. 114 | 115 | - Improved garbage collection 116 | 117 | TFS uses Bloom filters for space-efficient and fast 118 | garbage collection. TFS allows the FS garbage collector to run in 119 | the background without blocking the rest of the file system. 120 | 121 | ## FAQ 122 | 123 | ### Why do you use SPECK as the default cipher? 124 | 125 | - SPECK is a relatively young cipher, yet it has been subject to a lot 126 | of (ineffective) cryptanalysis, so it is relatively secure. It has 127 | really good performance and a simple implementation. Portability is 128 | an important part of the TFS design, and truly portable AES 129 | implementations without side-channel attacks is harder than many 130 | think (particularly, there are issues with SubBytes in most 131 | portable implementations). SPECK does not have this issue, and can 132 | thus be securely implemented portably with minimal effort. 133 | 134 | ### How similar is TFS and ZFS? 135 | 136 | - Not that similar, actually. They share many of the basic ideas, but 137 | otherwise they are essentially unconnected. But ZFS' design has 138 | shaped TFS' a lot. 139 | 140 | ### Is TFS Redox-only? 141 | 142 | - No, and it was never planned to be Redox-only. 143 | 144 | ### How does whole-disk compression work? 145 | 146 | - Whole-disk compression is -- to my knowledge -- exclusive to TFS. It 147 | works by collecting as many "pages" (virtual data blocks) into a 148 | "cluster" (allocation unit). By doing this, the pages can be read by 149 | simply decompressing the respective cluster. 150 | 151 | ### Why is ZMicro so slow? Will it affect the performance of TFS? 152 | 153 | - The reason ZMicro is so slow is because it works on a bit level, 154 | giving excellent compression ratio on the cost of performance. This 155 | horribly slow performance is paid back by the reduced number 156 | of writes. In fact, more than 50% of the allocations with ZMicro 157 | will only write one sector, as opposed to 3. Secondly, no matter how 158 | fast your disk is, it will not get anywhere near the performance of 159 | ZMicro because disk operations are inherently slow, and when put in 160 | perspective, the performance of the compression is 161 | really unimportant. 162 | 163 | ### Extendible hashing or B+ trees? 164 | 165 | - Neither. TFS uses a combination of trees and hash tables: Nested hash tables, a form of hash trees. The idea is that instead of reallocating, a new subtable is created in the bucket. 166 | 167 | ## Resources on design 168 | 169 | I've written a number of pieces on the design of TFS: 170 | 171 | - [SeaHash: Explained](http://ticki.github.io/blog/seahash-explained/) - This describes the default checksum algorithm designed for TFS. 172 | - [On Random-Access Compression](http://ticki.github.io/blog/on-random-access-compression/) - This post describes the algorithm used for random-access compression. 173 | - [Ternary as a prediction residue code](http://ticki.github.io/blog/ternary-as-a-prediction-residue-code/) - The use of this is related to creating a good adaptive (headerless) entropy compressor. 174 | - [How LZ4 works](http://ticki.github.io/blog/how-lz4-works/) - This describes how the LZ4 compression algorithm works. 175 | - [Collision Resolution with Nested Hash Tables](https://ticki.github.io/blog/collision-resolution-with-nested-hash-tables/) - This describes the method of nested hash tables we use for the directory structure. 176 | - [An Atomic Hash Table](https://ticki.github.io/blog/an-atomic-hash-table/) - This describes the concurrent, in-memory hash table/key-value store. 177 | 178 | ## Specification 179 | 180 | The full specification can be found in `specification.tex`, to render it install `texlive` or another distribution with XeTeX, and run: 181 | 182 | ```sh 183 | xelatex --shell-escape specification.tex 184 | ``` 185 | 186 | Then open the file named `specification.pdf` -------------------------------------------------------------------------------- /atomic-hashmap/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "atomic-hashmap" 3 | version = "0.1.0" 4 | authors = ["ticki "] 5 | keywords = ["conc", "concurrent", "hashmap", "map", "table"] 6 | 7 | [dependencies] 8 | conc = "0.2" 9 | -------------------------------------------------------------------------------- /atomic-hashmap/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of a lock-free, atomic hash table. 2 | //! 3 | //! This crate provides a high-performance implementation of a completely 4 | //! lock-free (no mutexes, no spin-locks, or the alike) hash table. 5 | //! 6 | //! The only instruction we use is CAS, which allows us to atomically update 7 | //! the table. 8 | //! 9 | //! # Design 10 | //! 11 | //! It is structured as a 256-radix tree with a pseudorandom permutation 12 | //! applied to the key. Contrary to open addressing, this approach is entirely 13 | //! lock-free and need not reallocation. 14 | //! 15 | //! The permutation is a simple table+XOR based length-padded function, which 16 | //! is applied to avoid excessive depth (this is what makes it a "hash table"). 17 | //! 18 | //! See [this blog post](https://ticki.github.io/blog/an-atomic-hash-table/) 19 | //! for details. 20 | 21 | #![feature(box_patterns)] 22 | 23 | extern crate conc; 24 | 25 | mod sponge; 26 | mod table; 27 | 28 | use std::hash::Hash; 29 | use sponge::Sponge; 30 | 31 | /// A lock-free, concurrent hash map. 32 | // TODO: Make assumptions about `Hash` clear. 33 | pub struct HashMap { 34 | /// The root table of the hash map. 35 | table: table::Table, 36 | } 37 | 38 | impl HashMap { 39 | /// Get a value from the map. 40 | pub fn get(&self, key: &K) -> Option> { 41 | self.table.get(key, Sponge::new(&key)) 42 | } 43 | 44 | /// Insert a key with a certain value into the map. 45 | /// 46 | /// If it already exists, the value is replaced and the old value is returned. 47 | pub fn insert(&self, key: K, val: V) -> Option> { 48 | let sponge = Sponge::new(&key); 49 | self.table.insert(table::Pair { 50 | key: key, 51 | val: val, 52 | }, sponge) 53 | } 54 | 55 | /// Remove a key from the hash map. 56 | /// 57 | /// If any, the removed value is returned. 58 | pub fn remove(&self, key: &K) -> Option> { 59 | self.table.remove(key, Sponge::new(&key)) 60 | } 61 | 62 | /// Apply a closure to every entry in the map. 63 | pub fn for_each(&self, f: F) { 64 | self.table.for_each(&f); 65 | } 66 | 67 | /// Remove and apply a closure to every entry in the map. 68 | pub fn take_each(&self, f: F) { 69 | self.table.take_each(&f); 70 | } 71 | 72 | /// Remove every entry from the map. 73 | pub fn clear(&self) { 74 | self.take_each(|_, _| ()); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /atomic-hashmap/src/sponge.rs: -------------------------------------------------------------------------------- 1 | //! The traversal sequence generator (bijective sponge). 2 | 3 | // TODO: Get rid of this all-together. 4 | 5 | use std::hash::{Hash, Hasher}; 6 | 7 | /// Permute an integer pseudorandomly. 8 | /// 9 | /// This is a bijective function emitting chaotic behavior. Such functions are used as building 10 | /// blocks for hash functions. 11 | fn sigma(x: u8) -> u8 { 12 | /// A permutation table. 13 | /// 14 | /// It is generated by generating a cycle by going through all the integers, several thousand 15 | /// times, then permuting them based on a pseudorandom stream based on 1024 of PCG round, then 16 | /// the table is reordered to be full-cycle. 17 | static TABLE: [u8; 256] = [ 18 | 13, 103, 249, 200, 212, 207, 40, 84, 229, 204, 219, 135, 92, 148, 106, 139, 95, 152, 19 | 49, 55, 132, 2, 30, 186, 108, 213, 159, 224, 111, 68, 37, 178, 129, 157, 247, 54, 20 | 81, 56, 223, 28, 174, 87, 166, 6, 217, 41, 67, 161, 4, 205, 201, 211, 254, 171, 21 | 208, 52, 18, 115, 194, 142, 7, 104, 164, 234, 126, 203, 233, 138, 97, 222, 124, 168, 22 | 151, 91, 112, 63, 78, 144, 98, 119, 246, 26, 187, 150, 145, 64, 51, 136, 96, 85, 23 | 46, 12, 218, 154, 25, 53, 240, 206, 192, 215, 160, 60, 190, 42, 116, 127, 110, 114, 24 | 21, 9, 199, 173, 133, 1, 243, 73, 20, 176, 181, 184, 209, 237, 225, 180, 189, 86, 25 | 72, 128, 43, 39, 123, 163, 248, 167, 44, 188, 29, 24, 179, 221, 8, 47, 255, 36, 26 | 15, 228, 83, 149, 70, 153, 35, 16, 65, 216, 121, 88, 227, 183, 197, 118, 109, 232, 27 | 99, 245, 120, 230, 155, 38, 79, 27, 71, 252, 147, 140, 244, 195, 3, 241, 34, 14, 28 | 214, 23, 202, 77, 250, 31, 238, 158, 62, 226, 19, 231, 235, 172, 75, 50, 253, 89, 29 | 101, 107, 102, 169, 177, 76, 69, 57, 74, 134, 170, 146, 198, 48, 61, 80, 182, 66, 30 | 22, 17, 117, 0, 236, 10, 122, 82, 196, 58, 131, 94, 162, 191, 220, 100, 242, 130, 31 | 5, 125, 59, 137, 90, 175, 141, 251, 239, 185, 32, 193, 33, 210, 143, 105, 93, 165, 32 | 113, 156, 11, 45, 33 | ]; 34 | 35 | // Simply permute based on the lookup table. 36 | TABLE[x as usize] 37 | } 38 | 39 | /// A sponge. 40 | /// 41 | /// Sponges can be written to and then squeezed, which extracts a pseudorandom number. Given enough 42 | /// of the squeezing stream, it should be possible to reconstruct the written input in its entire. 43 | /// In other words, given enough of the output streams of two distinct inputs, they are not equal. 44 | /// Hence, there will eventually be no collisions. 45 | /// 46 | /// The idea of this is that we want to randomly generate hash values, but we want to avoid 47 | /// collisions, which can otherwise make the hash tables incorrect (there is no exact collision 48 | /// resolution). 49 | /// 50 | /// # Example of a bijective sponge 51 | /// 52 | /// Suppose `1` and `2` both generate the stream 53 | /// 54 | /// 1, 2 → 233, 21, 34, 54 55 | /// 56 | /// If we read more of the stream of `1` and the stream of `2`, the two streams will eventually 57 | /// diverge: 58 | /// 59 | /// 1 → 233, 21, 34, 54, 242 60 | /// 2 → 233, 21, 34, 54, 32 61 | /// 62 | /// # How the this bijective sponge works 63 | /// 64 | /// The way it works is reading the input and bijectively updating the state based on the input. An 65 | /// internal buffer with all the states are held. To make sure output depends on the higher input, 66 | /// and not just the lower, we do the same in reverse: The output extracted updates a state, 67 | /// defining a permutation, which is applied to the byte read from the internal buffer. 68 | /// 69 | /// To avoid ambiguities with different lengths, we length pad the stream. 70 | /// 71 | /// ## Example 72 | /// 73 | /// Suppose our input stream is 74 | /// 75 | /// 1, 2, 3 76 | /// 77 | /// (we ignore length padding, which simply means appending the length here) 78 | /// 79 | /// Then we start with state `0`, we then write into our internal buffer 80 | /// 81 | /// σ(0 ⊕ 1) = σ(1) 82 | /// 83 | /// Next, we read `2`, yielding internal buffer of 84 | /// 85 | /// σ(1), σ(σ(1) ⊕ 2) 86 | /// 87 | /// Then `3`, giving us 88 | /// 89 | /// σ(1), σ(σ(1) ⊕ 2), σ(σ(σ(1) ⊕ 2) ⊕ 3) 90 | /// 91 | /// This can be reverted by taking the first, inverting the permutation, then inverting the next, 92 | /// and XORing with the first, and then using that information to invert the last. 93 | /// 94 | /// However, it fails to have the lower items depend on the higher. To combat this, we have a 95 | /// special way of squeezing: 96 | /// 97 | /// When we start squeezing, we set the state to `0` again. Then we pop the highest byte from the 98 | /// internal buffer, which is then XOR'd with the state and permuted by σ. This gives the new state 99 | /// and the extracted byte. 100 | #[derive(Default, Clone)] 101 | pub struct Sponge { 102 | /// The state of the sponge. 103 | /// 104 | /// This is equal to the last outputted byte (and if none, the last byte in the buffer). 105 | state: u8, 106 | /// The internal buffer. 107 | buffer: Vec, 108 | } 109 | 110 | impl Sponge { 111 | /// Create a new sponge for hashing a particular key. 112 | pub fn new(key: &T) -> Sponge { 113 | // Initialize the sponge. 114 | let mut sponge = Sponge::default(); 115 | 116 | // Write the key into the sponge. 117 | key.hash(&mut sponge); 118 | 119 | // Switch to squeezing. 120 | sponge.begin_squeeze(); 121 | 122 | sponge 123 | } 124 | 125 | /// Extract an output byte from the sponge. 126 | pub fn squeeze(&mut self) -> u8 { 127 | // We the popped byte XOR by the state and then permute through the table. 128 | self.state = sigma(self.state ^ self.buffer.pop().unwrap_or(0)); 129 | 130 | self.state 131 | } 132 | 133 | /// Truncate the sponge to match another sponge. 134 | /// 135 | /// This truncates and sets state of the sponge under the assumption that the two sponges have 136 | /// outputted matching streams so far. As a result, the sponge is put in a state, which is 137 | /// supposed to be equal to the state, which would otherwise be achieved by squeezing the 138 | /// sponge as much as `other`. 139 | pub fn matching(&mut self, other: &Sponge) { 140 | // These are matching, as the two sponges are assumed to have outputted the same bytes so 141 | // far. 142 | self.state = other.state; 143 | // Next, truncate the buffer, so the internal buffers are of same length. 144 | self.buffer.truncate(other.buffer.len()); 145 | } 146 | 147 | /// Go to squeezing state. 148 | /// 149 | /// This does length padding, thus ensuring bijectivity. 150 | fn begin_squeeze(&mut self) { 151 | // Pad with the length. 152 | // TODO: When non-lexical lifetimes land, merge the two following lines. 153 | let len = self.buffer.len(); 154 | self.write_usize(len); 155 | // Zero the state. 156 | self.state = 0; 157 | } 158 | } 159 | 160 | impl Hasher for Sponge { 161 | fn finish(&self) -> u64 { 162 | // You can't produce a u64 from a sponge. It is designed to produce an endless sequence of 163 | // bytes. 164 | unreachable!(); 165 | } 166 | 167 | fn write(&mut self, bytes: &[u8]) { 168 | // Write each byte one-by-one. 169 | // TODO: This could be faster. 170 | for &i in bytes { 171 | self.write_u8(i); 172 | } 173 | } 174 | 175 | fn write_u8(&mut self, i: u8) { 176 | // Mix in the state and permute. 177 | self.state = sigma(self.state ^ i); 178 | // The new state is then pushed to the buffer to be extracted later. 179 | self.buffer.push(self.state); 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /cbloom/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cbloom" 3 | version = "0.1.1" 4 | authors = ["ticki "] 5 | description = "Concurrent implementation of Bloom filters." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/cbloom" 8 | license = "MIT" 9 | keywords = ["bloom", "concurrent", "set", "filter", "garbage-collection"] 10 | exclude = ["target", "Cargo.lock"] 11 | -------------------------------------------------------------------------------- /cbloom/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A concurrent implementation of Bloom filters. 2 | //! 3 | //! Bloom filters is a simple data structure, which is used in many different situations. It can 4 | //! neatly solve certain problems heaurustically without need for extreme memory usage. 5 | //! 6 | //! This implementation is fairly standard, except that it uses atomic integers to work 7 | //! concurrently. 8 | 9 | #![feature(integer_atomics)] 10 | 11 | use std::cmp; 12 | use std::sync::atomic::{self, AtomicU64}; 13 | 14 | /// The atomic ordering used throughout the crate. 15 | const ORDERING: atomic::Ordering = atomic::Ordering::Relaxed; 16 | 17 | /// Hash an integer. 18 | /// 19 | /// This is a pseudorandom permutation of `u64` with high statistical quality. It can thus be used 20 | /// as a hash function. 21 | fn hash(mut x: u64) -> u64 { 22 | // The following is copied from SeaHash. 23 | 24 | x = x.wrapping_mul(0x6eed0e9da4d94a4f); 25 | let a = x >> 32; 26 | let b = x >> 60; 27 | x ^= a >> b; 28 | x = x.wrapping_mul(0x6eed0e9da4d94a4f); 29 | 30 | // We XOR with some constant to make it zero-sensitive. 31 | x ^ 0x11c92f7574d3e84f 32 | } 33 | 34 | /// A concurrent Bloom filter. 35 | /// 36 | /// Bloom filters are a probabilistic data structure, which allows you to insert elements, and 37 | /// later test if they were inserted. The filter will either know it doesn't contain the element, 38 | /// or that it might. It will never be "sure", hence the name "filter". 39 | /// 40 | /// It works by having an array of bits. Every element is hashed into a sequence of these bits. The 41 | /// bits of the inserted elements are set to 1. When testing for membership, we simply AND the 42 | /// bits. 43 | pub struct Filter { 44 | /// The bit array. 45 | /// 46 | /// We use `u64` to improve performance of `Filter::clear()`. 47 | bits: Vec, 48 | /// The number of hash functions. 49 | hashers: usize, 50 | } 51 | 52 | impl Filter { 53 | /// Get the chunk of a particular hash. 54 | #[inline] 55 | fn get(&self, hash: u64) -> &AtomicU64 { 56 | &self.bits[(hash as usize / 64) % self.bits.len()] 57 | } 58 | 59 | /// Create a new Bloom filter with the optimal number of hash functions. 60 | /// 61 | /// This creates a Bloom filter with `bytes` bytes of internal data, and optimal number (for 62 | /// `expected_elements` number of elements) of hash functions. 63 | pub fn new(bytes: usize, expected_elements: usize) -> Filter { 64 | // The number of hashers are calculated by multiplying the bits per element by ln(2), which 65 | // we approximate through multiplying by an integer, then shifting. To make things more 66 | // precise, we add 0x8000 to round the shift. 67 | Filter::with_size_and_hashers(bytes, (bytes / expected_elements * 45426 + 0x8000) >> 16) 68 | } 69 | 70 | /// Create a new Bloom filter with some number of bytes and hashers. 71 | /// 72 | /// This creates a Bloom filter with at least `bytes` bytes of internal data and `hashers` 73 | /// number of hash functions. 74 | /// 75 | /// If `hashers` is 0, it will be rounded to 1. 76 | pub fn with_size_and_hashers(bytes: usize, hashers: usize) -> Filter { 77 | // Convert `bytes` to number of `u64`s, and ceil to avoid case where the output is 0. 78 | let len = (bytes + 7) / 8; 79 | // Initialize a vector with zeros. 80 | let mut vec = Vec::with_capacity(len); 81 | for _ in 0..len { 82 | vec.push(AtomicU64::new(0)); 83 | } 84 | 85 | Filter { 86 | bits: vec, 87 | // Set hashers to 1, if it is 0, as there must be at least one hash function. 88 | hashers: cmp::max(hashers, 1), 89 | } 90 | } 91 | 92 | /// Clear the Bloom filter. 93 | /// 94 | /// This removes every element from the Bloom filter. 95 | /// 96 | /// Note that it will not do so atomically, and it can remove elements inserted simulatenously 97 | /// to this function being called. 98 | pub fn clear(&self) { 99 | for i in &self.bits { 100 | // Clear the bits of this chunk. 101 | i.store(0, ORDERING); 102 | } 103 | } 104 | 105 | /// Insert an element into the Bloom filter. 106 | pub fn insert(&self, x: u64) { 107 | // Start at `x`. 108 | let mut h = x; 109 | // Run over the hashers. 110 | for _ in 0..self.hashers { 111 | // We use the hash function to generate a pseudorandom sequence, defining the different 112 | // hashes. 113 | h = hash(h); 114 | // Create a mask and OR the chunk chosen by `hash`. 115 | self.get(h).fetch_or(1 << (h % 8), ORDERING); 116 | } 117 | } 118 | 119 | /// Check if the Bloom filter potentially contains an element. 120 | /// 121 | /// This returns `true` if we're not sure if the filter contains `x` or not, and `false` if we 122 | /// know that the filter does not contain `x`. 123 | pub fn maybe_contains(&self, x: u64) -> bool { 124 | // Start at `x`. 125 | let mut h = x; 126 | 127 | // Go over the hashers. 128 | for _ in 0..self.hashers { 129 | // Again, the hashes are defined by a cuckoo sequence of repeatedly hashing. 130 | h = hash(h); 131 | // Short-circuit if the bit is not set. 132 | if self.get(h).load(ORDERING) & 1 << (h % 8) == 0 { 133 | // Since the bit of this hash value was not set, it is impossible that the filter 134 | // contains `x`, so we return `false`. 135 | return false; 136 | } 137 | } 138 | 139 | // Every bit was set, so the element might be in the filter. 140 | true 141 | } 142 | } 143 | 144 | #[cfg(test)] 145 | mod tests { 146 | use super::*; 147 | 148 | use std::sync::Arc; 149 | use std::thread; 150 | 151 | #[test] 152 | fn insert() { 153 | let filter = Filter::new(400, 4); 154 | filter.insert(3); 155 | filter.insert(5); 156 | filter.insert(7); 157 | filter.insert(13); 158 | 159 | assert!(!filter.maybe_contains(0)); 160 | assert!(!filter.maybe_contains(1)); 161 | assert!(!filter.maybe_contains(2)); 162 | assert!(filter.maybe_contains(3)); 163 | assert!(filter.maybe_contains(5)); 164 | assert!(filter.maybe_contains(7)); 165 | assert!(filter.maybe_contains(13)); 166 | 167 | for i in 14..60 { 168 | assert!(!filter.maybe_contains(!i)); 169 | } 170 | } 171 | 172 | #[test] 173 | fn clear() { 174 | let filter = Filter::new(400, 4); 175 | filter.insert(3); 176 | filter.insert(5); 177 | filter.insert(7); 178 | filter.insert(13); 179 | 180 | filter.clear(); 181 | 182 | assert!(!filter.maybe_contains(0)); 183 | assert!(!filter.maybe_contains(1)); 184 | assert!(!filter.maybe_contains(2)); 185 | assert!(!filter.maybe_contains(3)); 186 | assert!(!filter.maybe_contains(5)); 187 | assert!(!filter.maybe_contains(7)); 188 | assert!(!filter.maybe_contains(13)); 189 | } 190 | 191 | #[test] 192 | fn spam() { 193 | let filter = Arc::new(Filter::new(2000, 100)); 194 | let mut joins = Vec::new(); 195 | 196 | for _ in 0..16 { 197 | let filter = filter.clone(); 198 | joins.push(thread::spawn(move || for i in 0..100 { 199 | filter.insert(i) 200 | })); 201 | } 202 | 203 | for i in joins { 204 | i.join().unwrap(); 205 | } 206 | 207 | for i in 0..100 { 208 | assert!(filter.maybe_contains(i)); 209 | } 210 | for i in 100..200 { 211 | assert!(!filter.maybe_contains(i)); 212 | } 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /chashmap/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "chashmap" 3 | version = "2.2.1" 4 | authors = ["ticki "] 5 | description = "Fast, concurrent hash maps with extensive API." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/chashmap" 8 | license = "MIT" 9 | keywords = ["hashmap", "concurrent", "parking_lot", "lock", "map"] 10 | exclude = ["target", "Cargo.lock"] 11 | 12 | [dependencies] 13 | parking_lot = "0.4" 14 | owning_ref = "0.3" 15 | -------------------------------------------------------------------------------- /conc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "conc" 3 | version = "0.5.1" 4 | authors = ["ticki "] 5 | description = "Hazard-pointer-based concurrent memory reclamation." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/conc" 8 | license = "MIT" 9 | keywords = ["crossbeam", "hazard", "concurrent", "stm", "treiber"] 10 | exclude = ["target", "Cargo.lock"] 11 | 12 | [dependencies] 13 | lazy_static = "0.2" 14 | rand = "0.3" 15 | parking_lot = "0.4" 16 | 17 | [dependencies.backtrace] 18 | version = "0.3" 19 | optional = true 20 | 21 | [features] 22 | debug-tools = ["backtrace"] 23 | -------------------------------------------------------------------------------- /conc/src/debug.rs: -------------------------------------------------------------------------------- 1 | //! Runtime debugging tools. 2 | 3 | #[cfg(feature = "debug-tools")] 4 | extern crate backtrace; 5 | 6 | /// Execute closure when the environment variable, `CONC_DEBUG_MODE`, is set. 7 | /// 8 | /// When compiled in release mode, this is a NOP. 9 | #[cfg(feature = "debug-tools")] 10 | pub fn exec(f: F) { 11 | use self::backtrace::Backtrace; 12 | use std::env; 13 | 14 | thread_local! { 15 | /// Is `CONC_DEBUG_MODE` set? 16 | /// 17 | /// This is cached to avoid expensive repeated syscalls or similar things. 18 | static DEBUG_MODE_ENABLED: bool = env::var("CONC_DEBUG_MODE").is_ok(); 19 | /// Is `CONC_DEBUG_STACKTRACE` set? 20 | /// 21 | /// This is cached to avoid expensive repeated syscalls or similar things. 22 | static STACK_TRACE_ENABLED: bool = env::var("CONC_DEBUG_STACKTRACE").is_ok(); 23 | } 24 | 25 | // If enabled, run the closure. 26 | if DEBUG_MODE_ENABLED.with(|&x| x) { 27 | f(); 28 | if STACK_TRACE_ENABLED.with(|&x| x) { 29 | println!("{:?}", Backtrace::new()); 30 | } 31 | } 32 | } 33 | 34 | /// Do nothing. 35 | /// 36 | /// When compiled in debug mode, this will execute the closure when envvar `CONC_DEBUG_MODE` is 37 | /// set. 38 | #[inline] 39 | #[cfg(not(feature = "debug-tools"))] 40 | pub fn exec(_: F) {} 41 | -------------------------------------------------------------------------------- /conc/src/garbage.rs: -------------------------------------------------------------------------------- 1 | //! Literal garbage. 2 | 3 | use debug; 4 | 5 | /// An object to be deleted eventually. 6 | /// 7 | /// Garbage refers to objects which are waiting to be destroyed, at some point after all references 8 | /// to them are gone. 9 | /// 10 | /// When it's dropped, the destructor of the garbage runs. 11 | /// 12 | /// See also: ideology. 13 | #[derive(Debug)] 14 | pub struct Garbage { 15 | /// The pointer to the object. 16 | ptr: *const u8, 17 | /// The destructor of the object. 18 | /// 19 | /// The argument given when called is the `self.ptr` field. 20 | dtor: unsafe fn(*const u8), 21 | } 22 | 23 | impl Garbage { 24 | /// Create a new garbage item given its parameters. 25 | /// 26 | /// This takes the pointer and destructor (which takes pointer as argument) and construct the 27 | /// corresponding garbage item. 28 | pub fn new(ptr: *const u8, dtor: fn(*const u8)) -> Garbage { 29 | debug_assert!(ptr as usize > 0, "Creating garbage with invalid pointer."); 30 | 31 | Garbage { 32 | ptr: ptr, 33 | dtor: dtor, 34 | } 35 | } 36 | 37 | /// Create a garbage item deallocating and dropping a box. 38 | /// 39 | /// Assuming `item` is a pointer representing a `Box`, this creates a garbage item, which has 40 | /// a destructor dropping and deallocating the box represented by `item`. 41 | /// 42 | /// Due to the affine type system, we must pass a pointer rather than the box directly. 43 | /// 44 | /// # Safety 45 | /// 46 | /// This is unsafe as there is no way to verify that `item` is indeed a box, nor is it possible 47 | /// to secure against double-drops and other issues arising from the fact that we're passing a 48 | /// pointer. 49 | // TODO: Find a way to do this safely. 50 | // FIXME: This might actually be unsound, as it takes `T` and runs its destructor potentially 51 | // in another thread. In other words, an (unaliased) `&mut T` is available in another 52 | // thread through the destructor, meaning that it should be `Sync`, I think. I can't 53 | // however think of any cases where this would lead to safety issues, but I think it is 54 | // theoretically unsound. Investigate further. 55 | pub unsafe fn new_box(item: *const T) -> Garbage { 56 | unsafe fn dtor(ptr: *const u8) { 57 | // Drop the box represented by `ptr`. 58 | Box::from_raw(ptr as *mut u8 as *mut T); 59 | } 60 | 61 | Garbage { 62 | ptr: item as *const u8, 63 | dtor: dtor::, 64 | } 65 | } 66 | 67 | /// Get the inner pointer of the garbage. 68 | pub fn ptr(&self) -> *const u8 { 69 | self.ptr 70 | } 71 | } 72 | 73 | impl Drop for Garbage { 74 | fn drop(&mut self) { 75 | // Print message in debug mode. 76 | debug::exec(|| println!("Destroying garbage: {:?}", self)); 77 | 78 | unsafe { (self.dtor)(self.ptr); } 79 | } 80 | } 81 | 82 | // We must do this manually due to the raw pointer. 83 | unsafe impl Send for Garbage {} 84 | 85 | #[cfg(test)] 86 | mod tests { 87 | use super::*; 88 | use std::ptr; 89 | 90 | fn nop(_: *const u8) {} 91 | 92 | #[test] 93 | fn ptr() { 94 | let g = Garbage::new(0x2 as *const u8, nop); 95 | assert_eq!(g.ptr() as usize, 2); 96 | } 97 | 98 | #[test] 99 | fn new_box() { 100 | for _ in 0..1000 { 101 | unsafe { Garbage::new_box(Box::into_raw(Box::new(2))); } 102 | } 103 | } 104 | 105 | #[cfg(debug_assertions)] 106 | #[test] 107 | #[should_panic] 108 | fn debug_invalid_pointer() { 109 | Garbage::new(ptr::null(), nop); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /conc/src/global.rs: -------------------------------------------------------------------------------- 1 | //! The global state. 2 | 3 | use parking_lot::Mutex; 4 | use std::collections::HashSet; 5 | use std::{mem, panic}; 6 | use {rand, hazard, mpsc, debug, settings}; 7 | use garbage::Garbage; 8 | 9 | lazy_static! { 10 | /// The global state. 11 | /// 12 | /// This state is shared between all the threads. 13 | static ref STATE: State = State::new(); 14 | } 15 | 16 | /// Create a new hazard. 17 | /// 18 | /// This creates a new hazard and registers it in the global state. It's secondary, writer part is 19 | /// returned. 20 | pub fn create_hazard() -> hazard::Writer { 21 | STATE.create_hazard() 22 | } 23 | 24 | /// Export garbage into the global state. 25 | /// 26 | /// This adds the garbage, which will eventually be destroyed, to the global state. Note that this 27 | /// does not tick, and thus cannot cause garbage collection. 28 | pub fn export_garbage(garbage: Vec) { 29 | STATE.export_garbage(garbage) 30 | } 31 | 32 | /// Attempt to garbage collect. 33 | /// 34 | /// If another garbage collection is currently running, the thread will do nothing, and `Err(())` 35 | /// will be returned. Otherwise, it returns `Ok(())`. 36 | /// 37 | /// # Panic 38 | /// 39 | /// If a destructor panics, this will panic as well. 40 | pub fn try_gc() -> Result<(), ()> { 41 | STATE.try_gc() 42 | } 43 | 44 | /// Tick the clock. 45 | /// 46 | /// This shall be called when new garbage is added, as it will trigger a GC by some probability. 47 | pub fn tick() { 48 | // Generate a random number and compare it against the probability. 49 | if rand::random::() < settings::get().gc_probability { 50 | // The outfall was to (attempt at) GC. 51 | let _ = try_gc(); 52 | } 53 | } 54 | 55 | /// A message to the global state. 56 | enum Message { 57 | /// Add new garbage. 58 | Garbage(Vec), 59 | /// Add a new hazard. 60 | NewHazard(hazard::Reader), 61 | } 62 | 63 | /// The global state. 64 | /// 65 | /// The global state is shared between all threads and keeps track of the garbage and the active 66 | /// hazards. 67 | /// 68 | /// It is divided into two parts: The channel and the garbo. The channel buffers messages, which 69 | /// will eventually be executed at garbo, which holds all the data structures and is protected by a 70 | /// mutex. The garbo holds the other end to the channel. 71 | struct State { 72 | /// The message-passing channel. 73 | chan: mpsc::Sender, 74 | /// The garbo part of the state. 75 | garbo: Mutex, 76 | } 77 | 78 | impl State { 79 | /// Initialize a new state. 80 | fn new() -> State { 81 | // Create the message-passing channel. 82 | let (send, recv) = mpsc::channel(); 83 | 84 | // Construct the state from the two halfs of the channel. 85 | State { 86 | chan: send, 87 | garbo: Mutex::new(Garbo { 88 | chan: recv, 89 | garbage: Vec::new(), 90 | hazards: Vec::new(), 91 | }) 92 | } 93 | } 94 | 95 | /// Create a new hazard. 96 | /// 97 | /// This creates a new hazard and registers it in the global state. It's secondary, writer part 98 | /// is returned. 99 | fn create_hazard(&self) -> hazard::Writer { 100 | // Create the hazard. 101 | let (writer, reader) = hazard::create(); 102 | // Communicate the new hazard to the global state through the channel. 103 | self.chan.send(Message::NewHazard(reader)); 104 | // Return the other half of the hazard. 105 | writer 106 | } 107 | 108 | /// Export garbage into the global state. 109 | /// 110 | /// This adds the garbage, which will eventually be destroyed, to the global state. 111 | fn export_garbage(&self, garbage: Vec) { 112 | // Send the garbage to the message-passing channel of the state. 113 | self.chan.send(Message::Garbage(garbage)); 114 | } 115 | 116 | /// Try to collect the garbage. 117 | /// 118 | /// This will handle all of the messages in the channel and then attempt at collect the 119 | /// garbage. If another thread is currently collecting garbage, `Err(())` is returned, 120 | /// otherwise it returns `Ok(())`. 121 | /// 122 | /// Garbage collection works by scanning the hazards and dropping all the garbage which is not 123 | /// currently active in the hazards. 124 | fn try_gc(&self) -> Result<(), ()> { 125 | // Lock the "garbo" (the part of the state needed to GC). 126 | if let Some(mut garbo) = self.garbo.try_lock() { 127 | // Collect the garbage. 128 | garbo.gc(); 129 | 130 | Ok(()) 131 | } else { 132 | // Another thread is collecting. 133 | Err(()) 134 | } 135 | } 136 | } 137 | 138 | impl panic::RefUnwindSafe for State {} 139 | 140 | /// The garbo part of the state. 141 | /// 142 | /// This part is supposed to act like the garbage collecting part. It handles hazards, garbage, and 143 | /// the receiving point of the message-passing channel. 144 | struct Garbo { 145 | /// The channel of messages. 146 | chan: mpsc::Receiver, 147 | /// The to-be-destroyed garbage. 148 | garbage: Vec, 149 | /// The current hazards. 150 | hazards: Vec, 151 | } 152 | 153 | impl Garbo { 154 | /// Handle a given message. 155 | /// 156 | /// "Handle" in this case refers to applying the operation defined by the message to the state, 157 | /// effectually executing the instruction of the message. 158 | fn handle(&mut self, msg: Message) { 159 | match msg { 160 | // Append the garbage bulk to the garbage list. 161 | Message::Garbage(mut garbage) => self.garbage.append(&mut garbage), 162 | // Register the new hazard into the state. 163 | Message::NewHazard(hazard) => self.hazards.push(hazard), 164 | } 165 | } 166 | 167 | /// Handle all the messages and garbage collect all unused garbage. 168 | /// 169 | /// # Panic 170 | /// 171 | /// If a destructor panics, this will panic as well. 172 | fn gc(&mut self) { 173 | // Print message in debug mode. 174 | debug::exec(|| println!("Collecting garbage.")); 175 | 176 | // Handle all the messages sent. 177 | for msg in self.chan.recv_all() { 178 | self.handle(msg); 179 | } 180 | 181 | // Create the set which will keep the _active_ hazards. 182 | let mut active = HashSet::with_capacity(self.hazards.len()); 183 | 184 | // Take out the hazards and go over them one-by-one. 185 | let len = self.hazards.len(); // TODO: This should be substituted into next line. 186 | for hazard in mem::replace(&mut self.hazards, Vec::with_capacity(len)) { 187 | match hazard.get() { 188 | // The hazard is dead, so the other end (the writer) is not available anymore, 189 | // hence we can safely destroy it. 190 | hazard::State::Dead => unsafe { hazard.destroy() }, 191 | // The hazard is free and must thus be put back to the hazard list. 192 | hazard::State::Free => self.hazards.push(hazard), 193 | hazard::State::Protect(ptr) => { 194 | // This hazard is active, hence we insert the pointer it contains in our 195 | // "active" set. 196 | active.insert(ptr); 197 | // Since the hazard is still alive, we must put it back to the hazard list for 198 | // future use. 199 | self.hazards.push(hazard); 200 | }, 201 | } 202 | } 203 | 204 | // Scan the garbage for unused objects. 205 | self.garbage.retain(|garbage| active.contains(&garbage.ptr())) 206 | } 207 | } 208 | 209 | impl Drop for Garbo { 210 | fn drop(&mut self) { 211 | // Do a final GC. 212 | self.gc(); 213 | } 214 | } 215 | 216 | #[cfg(test)] 217 | mod tests { 218 | use super::*; 219 | use garbage::Garbage; 220 | use std::{panic, ptr}; 221 | 222 | #[test] 223 | fn dtor_runs() { 224 | fn dtor(x: *const u8) { 225 | unsafe { 226 | *(x as *mut u8) = 1; 227 | } 228 | } 229 | 230 | let s = State::new(); 231 | for _ in 0..1000 { 232 | let b = Box::new(0); 233 | let h = s.create_hazard(); 234 | h.protect(&*b); 235 | s.export_garbage(vec![Garbage::new(&*b, dtor)]); 236 | while s.try_gc().is_err() {} 237 | assert_eq!(*b, 0); 238 | while s.try_gc().is_err() {} 239 | h.free(); 240 | while s.try_gc().is_err() {} 241 | assert_eq!(*b, 1); 242 | h.kill(); 243 | } 244 | } 245 | 246 | #[test] 247 | fn clean_up_state() { 248 | fn dtor(x: *const u8) { 249 | unsafe { 250 | *(x as *mut u8) = 1; 251 | } 252 | } 253 | 254 | for _ in 0..1000 { 255 | let b = Box::new(0); 256 | { 257 | let s = State::new(); 258 | s.export_garbage(vec![Garbage::new(&*b, dtor)]); 259 | } 260 | 261 | assert_eq!(*b, 1); 262 | } 263 | } 264 | 265 | #[test] 266 | fn panic_invalidate_state() { 267 | fn panic(_: *const u8) { 268 | panic!(); 269 | } 270 | 271 | fn dtor(x: *const u8) { 272 | unsafe { 273 | *(x as *mut u8) = 1; 274 | } 275 | } 276 | 277 | let s = State::new(); 278 | let b = Box::new(0); 279 | let h = create_hazard(); 280 | h.protect(&*b); 281 | s.export_garbage(vec![Garbage::new(&*b, dtor), Garbage::new(0x2 as *const u8, panic)]); 282 | let _ = panic::catch_unwind(|| { 283 | while s.try_gc().is_err() {} 284 | }); 285 | assert_eq!(*b, 0); 286 | h.free(); 287 | while s.try_gc().is_err() {} 288 | assert_eq!(*b, 1); 289 | } 290 | 291 | #[test] 292 | #[should_panic] 293 | fn panic_in_dtor() { 294 | fn dtor(_: *const u8) { 295 | panic!(); 296 | } 297 | 298 | let s = State::new(); 299 | s.export_garbage(vec![Garbage::new(ptr::null(), dtor)]); 300 | while s.try_gc().is_err() {} 301 | } 302 | 303 | #[cfg(debug_assertions)] 304 | #[test] 305 | #[should_panic] 306 | fn debug_more_hazards() { 307 | let s = State::new(); 308 | let h = s.create_hazard(); 309 | h.free(); 310 | mem::forget(h); 311 | } 312 | } 313 | -------------------------------------------------------------------------------- /conc/src/guard.rs: -------------------------------------------------------------------------------- 1 | //! RAII guards for hazards. 2 | 3 | use std::ops; 4 | use std::sync::atomic; 5 | use {hazard, local}; 6 | 7 | #[cfg(debug_assertions)] 8 | use std::cell::Cell; 9 | #[cfg(debug_assertions)] 10 | thread_local! { 11 | /// Number of guards the current thread is creating. 12 | static CURRENT_CREATING: Cell = Cell::new(0); 13 | } 14 | 15 | /// Assert (in debug mode) that no guards are currently being created in this thread. 16 | /// 17 | /// This shall be used when you want to ensure, that a function called within the guard constructor 18 | /// doesn't cause endless looping, due to the blocked hazard. 19 | /// 20 | /// In particular, it should be called in functions that could trigger a garbage collection, thus 21 | /// requiring that hazards are eventually unblocked. 22 | pub fn debug_assert_no_create() { 23 | #[cfg(debug_assertions)] 24 | CURRENT_CREATING.with(|x| assert_eq!(x.get(), 0)); 25 | } 26 | 27 | /// A RAII guard protecting from garbage collection. 28 | /// 29 | /// This "guards" the held pointer against garbage collection. First when all guards of said 30 | /// pointer is gone (the data is unreachable), it can be collected. 31 | // TODO: Remove this `'static` bound. 32 | #[must_use = "\ 33 | You are getting a `conc::Guard` without using it, which means it is potentially \ 34 | unnecessary overhead. Consider replacing the method with something that doesn't \ 35 | return a guard.\ 36 | "] 37 | #[derive(Debug)] 38 | pub struct Guard { 39 | /// The inner hazard. 40 | hazard: hazard::Writer, 41 | /// The pointer to the protected object. 42 | pointer: &'static T, 43 | } 44 | 45 | impl Guard { 46 | /// Failably create a new guard. 47 | /// 48 | /// This has all the same restrictions and properties as `Guard::new()` (please read its 49 | /// documentation before using), with the exception of being failable. 50 | /// 51 | /// This means that the closure can return and error and abort the creation of the guard. 52 | pub fn try_new(ptr: F) -> Result, E> 53 | where F: FnOnce() -> Result<&'static T, E> { 54 | // Increment the number of guards currently being created. 55 | #[cfg(debug_assertions)] 56 | CURRENT_CREATING.with(|x| x.set(x.get() + 1)); 57 | 58 | // Get a hazard in blocked state. 59 | let hazard = local::get_hazard(); 60 | 61 | // This fence is necessary for ensuring that `hazard` does not get reordered to after `ptr` 62 | // has run. 63 | // TODO: Is this fence even necessary? 64 | atomic::fence(atomic::Ordering::SeqCst); 65 | 66 | // Right here, any garbage collection is blocked, due to the hazard above. This ensures 67 | // that between the potential read in `ptr` and it being protected by the hazard, there 68 | // will be no premature free. 69 | 70 | // Evaluate the pointer through the closure. 71 | let res = ptr(); 72 | 73 | // Decrement the number of guards currently being created. 74 | #[cfg(debug_assertions)] 75 | CURRENT_CREATING.with(|x| x.set(x.get() - 1)); 76 | 77 | match res { 78 | Ok(ptr) => { 79 | // Now that we have the pointer, we can protect it by the hazard, unblocking a pending 80 | // garbage collection if it exists. 81 | hazard.protect(ptr as *const T as *const u8); 82 | 83 | Ok(Guard { 84 | hazard: hazard, 85 | pointer: ptr, 86 | }) 87 | }, 88 | Err(err) => { 89 | // Set the hazard to free to ensure that the hazard doesn't remain blocking. 90 | hazard.free(); 91 | 92 | Err(err) 93 | } 94 | } 95 | } 96 | 97 | /// Create a new guard. 98 | /// 99 | /// Because it must ensure that no garbage collection happens until the pointer is read, it 100 | /// takes a closure, which is evaluated to the pointer the guard will hold. During the span of 101 | /// this closure, garbage collection is ensured to not happen, making it safe to read from an 102 | /// atomic pointer without risking the ABA problem. 103 | /// 104 | /// # Important! 105 | /// 106 | /// It is very important that this closure does not contain anything which might cause a 107 | /// garbage collection, as garbage collecting inside this closure will cause the current thread 108 | /// to be blocked infinitely (because the hazard is blocked) and stop all other threads from 109 | /// collecting garbage, leading to memory leaks in those — unless it is compiled in debug mode, 110 | /// in which case it will likely panic. 111 | pub fn new(ptr: F) -> Guard 112 | where F: FnOnce() -> &'static T { 113 | Guard::try_new::<_, ()>(|| Ok(ptr())).unwrap() 114 | } 115 | 116 | /// Conditionally create a new guard. 117 | /// 118 | /// This acts `try_new`, but with `Option` instead of `Result`. 119 | pub fn maybe_new(ptr: F) -> Option> 120 | where F: FnOnce() -> Option<&'static T> { 121 | Guard::try_new(|| ptr().ok_or(())).ok() 122 | } 123 | 124 | /// Map the pointer to another. 125 | /// 126 | /// This allows one to map a pointer to a pointer e.g. to an object referenced by the old. It 127 | /// is very convenient for creating APIs without the need for creating a wrapper type. 128 | // TODO: Is this sound? 129 | pub fn map(self, f: F) -> Guard 130 | where F: FnOnce(&T) -> &U { 131 | Guard { 132 | hazard: self.hazard, 133 | pointer: f(self.pointer), 134 | } 135 | } 136 | 137 | /// (Failably) map the pointer to another. 138 | /// 139 | /// This corresponds to `map`, but when the closure returns `Err`, this does as well. In other 140 | /// words, the closure can fail. 141 | pub fn try_map(self, f: F) -> Result, E> 142 | where F: FnOnce(&T) -> Result<&U, E> { 143 | Ok(Guard { 144 | hazard: self.hazard, 145 | pointer: f(self.pointer)?, 146 | }) 147 | } 148 | 149 | /// Conditionally map the pointer to another. 150 | /// 151 | /// This acts `try_map`, but with `Option` instead of `Result`. 152 | pub fn maybe_map(self, f: F) -> Option> 153 | where F: FnOnce(&T) -> Option<&U> { 154 | let hazard = self.hazard; 155 | f(self.pointer).map(|res| Guard { 156 | hazard: hazard, 157 | pointer: res, 158 | }) 159 | } 160 | 161 | /// Get the raw pointer of this guard. 162 | pub fn as_ptr(&self) -> *const T { 163 | self.pointer 164 | } 165 | } 166 | 167 | impl ops::Deref for Guard { 168 | type Target = T; 169 | 170 | fn deref(&self) -> &T { 171 | self.pointer 172 | } 173 | } 174 | 175 | #[cfg(test)] 176 | mod tests { 177 | use super::*; 178 | use std::mem; 179 | 180 | use Atomic; 181 | use std::sync::atomic; 182 | 183 | #[test] 184 | fn new() { 185 | assert_eq!(&*Guard::new(|| "blah"), "blah"); 186 | } 187 | 188 | #[test] 189 | fn maybe_new() { 190 | assert_eq!(&*Guard::maybe_new(|| Some("blah")).unwrap(), "blah"); 191 | assert!(Guard::::maybe_new(|| None).is_none()); 192 | } 193 | 194 | #[test] 195 | fn try_new() { 196 | assert_eq!(&*Guard::try_new::<_, u8>(|| Ok("blah")).unwrap(), "blah"); 197 | assert_eq!(Guard::::try_new(|| Err(2)).unwrap_err(), 2); 198 | } 199 | 200 | #[test] 201 | fn map() { 202 | let g = Guard::new(|| "blah"); 203 | assert_eq!(&*g.map(|x| { 204 | assert_eq!(x, "blah"); 205 | "blah2" 206 | }), "blah2"); 207 | } 208 | 209 | #[test] 210 | fn maybe_map() { 211 | let g = Guard::new(|| "blah"); 212 | assert_eq!(&*g.maybe_map(|x| { 213 | assert_eq!(x, "blah"); 214 | Some("blah2") 215 | }).unwrap(), "blah2"); 216 | let g = Guard::new(|| "blah"); 217 | assert_eq!(&*g, "blah"); 218 | assert!(g.maybe_map::(|_| None).is_none()); 219 | } 220 | 221 | #[test] 222 | fn try_map() { 223 | let g = Guard::new(|| "blah"); 224 | assert_eq!(&*g.try_map::<_, u8, _>(|x| { 225 | assert_eq!(x, "blah"); 226 | Ok("blah2") 227 | }).unwrap(), "blah2"); 228 | let g = Guard::new(|| "blah"); 229 | assert_eq!(&*g, "blah"); 230 | assert_eq!(g.try_map::(|_| Err(2)).unwrap_err(), 2); 231 | } 232 | 233 | #[test] 234 | fn map_field() { 235 | let a = Atomic::new(Some(Box::new((7, 13)))); 236 | let g = a.load(atomic::Ordering::Relaxed).unwrap().map(|&(_, ref b)| b); 237 | drop(a); 238 | ::gc(); 239 | assert_eq!(*g, 13); 240 | } 241 | 242 | #[test] 243 | #[should_panic] 244 | fn panic_during_guard_creation() { 245 | let _ = Guard::new(|| -> &'static u8 { panic!() }); 246 | } 247 | 248 | #[test] 249 | fn nested_guard_creation() { 250 | for _ in 0..100 { 251 | let _ = Guard::new(|| { 252 | mem::forget(Guard::new(|| "blah")); 253 | "blah" 254 | }); 255 | } 256 | } 257 | 258 | #[cfg(debug_assertions)] 259 | #[test] 260 | #[should_panic] 261 | fn debug_catch_infinite_blockage() { 262 | let _ = Guard::new(|| { 263 | local::export_garbage(); 264 | "blah" 265 | }); 266 | } 267 | } 268 | -------------------------------------------------------------------------------- /conc/src/local.rs: -------------------------------------------------------------------------------- 1 | //! The thread-local state. 2 | 3 | use std::{mem, thread}; 4 | use std::cell::RefCell; 5 | use {global, hazard, guard, debug, settings}; 6 | use garbage::Garbage; 7 | 8 | thread_local! { 9 | /// The state of this thread. 10 | static STATE: RefCell = RefCell::new(State::default()); 11 | } 12 | 13 | /// Add new garbage to be deleted. 14 | /// 15 | /// This garbage is pushed to a thread-local queue. When enough garbage is accumulated in the 16 | /// thread, it is exported to the global state. 17 | pub fn add_garbage(garbage: Garbage) { 18 | // Print message in debug mode. 19 | debug::exec(|| println!("Adding garbage: {:?}", garbage)); 20 | // Since this function can trigger a GC, it must not be called inside a guard constructor. 21 | guard::debug_assert_no_create(); 22 | 23 | if STATE.state() == thread::LocalKeyState::Destroyed { 24 | // The state was deinitialized, so we must rely on the global state for queueing garbage. 25 | global::export_garbage(vec![garbage]); 26 | } else { 27 | // Add the garbage. 28 | if STATE.with(|s| s.borrow_mut().add_garbage(garbage)) { 29 | // The local state exported garbage to the global state, hence we must tick in order to 30 | // ensure that the garbage is periodically collected. 31 | global::tick(); 32 | } 33 | } 34 | } 35 | 36 | /// Get a blocked hazard. 37 | /// 38 | /// If possible, this will simply pop one of the thread-local cache of hazards. Otherwise, one must 39 | /// be registered in the global state. 40 | /// 41 | /// # Fence 42 | /// 43 | /// This does not fence, and you must thus be careful with updating the value afterwards, as 44 | /// reordering can happen, meaning that the hazard has not been blocked yet. 45 | pub fn get_hazard() -> hazard::Writer { 46 | if STATE.state() == thread::LocalKeyState::Destroyed { 47 | // The state was deinitialized, so we must rely on the global state for creating new 48 | // hazards. 49 | global::create_hazard() 50 | } else { 51 | STATE.with(|s| s.borrow_mut().get_hazard()) 52 | } 53 | } 54 | 55 | /// Free a hazard. 56 | /// 57 | /// This frees a hazard to the thread-local cache of hazards. 58 | /// 59 | /// It is important that the hazard is **not** in blocked state, as such thing can cause infinite 60 | /// looping. 61 | /// 62 | /// # Panics 63 | /// 64 | /// This might panic in debug mode if the hazard given is in blocked state, as such thing can cause 65 | /// infinite garbage collection cycle, or if the hazard is in dead state, as that means that it may 66 | /// not be reusable (it could be destroyed). 67 | pub fn free_hazard(hazard: hazard::Writer) { 68 | // Print message in debug mode. 69 | debug::exec(|| println!("Freeing hazard: {:?}", hazard)); 70 | // Since this function can trigger a GC, it must not be called inside a guard constructor. 71 | guard::debug_assert_no_create(); 72 | 73 | debug_assert!(!hazard.is_blocked(), "Illegally freeing a blocked hazards."); 74 | 75 | if STATE.state() == thread::LocalKeyState::Destroyed { 76 | // Since the state was deinitialized, we cannot store it for later reuse, so we are forced 77 | // to simply kill the hazard. 78 | hazard.kill(); 79 | } else { 80 | STATE.with(|s| s.borrow_mut().free_hazard(hazard)); 81 | } 82 | } 83 | 84 | /// Export the garbage of this thread to the global state. 85 | /// 86 | /// This is useful for propagating accumulated garbage such that it can be destroyed by the next 87 | /// garbage collection. 88 | pub fn export_garbage() { 89 | // Since this function can trigger a GC, it must not be called inside a guard constructor. 90 | guard::debug_assert_no_create(); 91 | 92 | // We can only export when the TLS variable isn't destroyed. Otherwise, there would be nothing 93 | // to export! 94 | if STATE.state() != thread::LocalKeyState::Destroyed { 95 | STATE.with(|s| s.borrow_mut().export_garbage()); 96 | // We tick after the state is no longer reserved, as the tick could potentially call 97 | // destructor that access the TLS variable. 98 | global::tick(); 99 | } 100 | } 101 | 102 | /// A thread-local state. 103 | #[derive(Default)] 104 | struct State { 105 | /// The cached garbage waiting to be exported to the global state. 106 | garbage: Vec, 107 | /// The cache of currently available hazards. 108 | /// 109 | /// We maintain this cache to avoid the performance hit of creating new hazards. 110 | /// 111 | /// The hazards in this vector are not necessarily in state "free". Only when a sufficient 112 | /// amount of available hazards has accumulated, they will be set to free. This means that we 113 | /// don't have to reset the state of a hazard after usage, giving a quite significant speed-up. 114 | available_hazards: Vec, 115 | /// The hazards in the cache before this index are free. 116 | /// 117 | /// This number keeps track what hazards in `self.available_hazard` are set to state "free". 118 | /// Before this index, every hazard must be set to "free". 119 | /// 120 | /// It is useful for knowing when to free the hazards to allow garbage collection. 121 | available_hazards_free_before: usize, 122 | } 123 | 124 | impl State { 125 | /// Get the number of hazards in the cache which are not in state "free". 126 | fn non_free_hazards(&self) -> usize { 127 | self.available_hazards.len() - self.available_hazards_free_before 128 | } 129 | 130 | /// See `get_hazard()`. 131 | fn get_hazard(&mut self) -> hazard::Writer { 132 | // Check if there is hazards in the cache. 133 | if let Some(hazard) = self.available_hazards.pop() { 134 | // There is; we don't need to create a new hazard. 135 | 136 | // Since the hazard popped from the cache is not blocked, we must block the hazard to 137 | // satisfy the requirements of this function. 138 | hazard.block(); 139 | hazard 140 | } else { 141 | // There is not; we must create a new hazard. 142 | global::create_hazard() 143 | } 144 | } 145 | 146 | /// See `free_hazard()`. 147 | fn free_hazard(&mut self, hazard: hazard::Writer) { 148 | // FIXME: This can lead to some subtle bugs, since the dtor is unpredictable as there is no 149 | // way of predicting when the hazard is cleared. 150 | 151 | // Push the given hazard to the cache. 152 | self.available_hazards.push(hazard); 153 | 154 | // Check if we exceeded the limit. 155 | if self.non_free_hazards() > settings::get().max_non_free_hazards { 156 | // We did; we must now set the non-free hazards to "free". 157 | for i in &self.available_hazards[self.available_hazards_free_before..] { 158 | i.free(); 159 | } 160 | 161 | // Update the counter such that we mark the new hazards set to "free". 162 | self.available_hazards_free_before = self.available_hazards.len(); 163 | } 164 | } 165 | 166 | /// Queues garbage to destroy. 167 | /// 168 | /// Eventually the added garbage will be exported to the global state through 169 | /// `global::add_garbage()`. 170 | /// 171 | /// See `add_garbage` for more information. 172 | /// 173 | /// When this happens (i.e. the global state gets the garbage), it returns `true`. Otherwise, 174 | /// it returns `false`. 175 | fn add_garbage(&mut self, garbage: Garbage) -> bool { 176 | // Push the garbage to the cache of garbage. 177 | self.garbage.push(garbage); 178 | 179 | // Export the garbage if it exceeds the limit. 180 | // TODO: use memory instead of items as a metric. 181 | if self.garbage.len() > settings::get().max_garbage_before_export { 182 | self.export_garbage(); 183 | true 184 | } else { false } 185 | } 186 | 187 | /// See `export_garbage()` for more information. 188 | fn export_garbage(&mut self) { 189 | // Print message in debug mode. 190 | debug::exec(|| println!("Exporting garbage.")); 191 | 192 | // Clear the vector and export the garbage. 193 | global::export_garbage(mem::replace(&mut self.garbage, Vec::new())); 194 | } 195 | } 196 | 197 | impl Drop for State { 198 | fn drop(&mut self) { 199 | // Clear every hazard to "dead" state. 200 | for hazard in self.available_hazards.drain(..) { 201 | hazard.kill(); 202 | } 203 | 204 | // The thread is exiting, thus we must export the garbage to the global state to avoid 205 | // memory leaks. It is very important that this does indeed not tick, as causing garbage 206 | // collection means accessing RNG state, a TLS variable, which cannot be done when, we are 207 | // here, after it has deinitialized. 208 | // TODO: Figure out a way we can tick anyway. 209 | self.export_garbage(); 210 | } 211 | } 212 | 213 | #[cfg(test)] 214 | mod tests { 215 | use super::*; 216 | use garbage::Garbage; 217 | use hazard; 218 | use std::thread; 219 | 220 | #[test] 221 | fn dtor_runs() { 222 | fn dtor(x: *const u8) { 223 | unsafe { 224 | *(x as *mut u8) = 1; 225 | } 226 | } 227 | 228 | for _ in 0..1000 { 229 | let b = Box::new(0); 230 | let h = get_hazard(); 231 | h.protect(&*b); 232 | add_garbage(Garbage::new(&*b, dtor)); 233 | ::gc(); 234 | assert_eq!(*b, 0); 235 | ::gc(); 236 | h.free(); 237 | ::gc(); 238 | assert_eq!(*b, 1); 239 | } 240 | } 241 | 242 | #[test] 243 | fn dtor_runs_cross_thread() { 244 | fn dtor(x: *const u8) { 245 | unsafe { 246 | *(x as *mut u8) = 1; 247 | } 248 | } 249 | 250 | for _ in 0..1000 { 251 | let b = Box::new(0); 252 | let bptr = &*b as *const _ as usize; 253 | let h = thread::spawn(move || { 254 | let h = get_hazard(); 255 | h.protect(bptr as *const u8); 256 | h 257 | }).join().unwrap(); 258 | add_garbage(Garbage::new(&*b, dtor)); 259 | ::gc(); 260 | assert_eq!(*b, 0); 261 | ::gc(); 262 | h.free(); 263 | ::gc(); 264 | assert_eq!(*b, 1); 265 | } 266 | } 267 | 268 | #[test] 269 | fn clear_hazards() { 270 | let mut s = State::default(); 271 | let mut v = Vec::new(); 272 | for _ in 0..100 { 273 | let (w, r) = hazard::create(); 274 | w.protect(0x1 as *const u8); 275 | v.push(r); 276 | s.free_hazard(w); 277 | } 278 | 279 | for i in &v[0..16] { 280 | assert_eq!(i.get(), hazard::State::Free); 281 | } 282 | 283 | mem::forget(v); 284 | } 285 | 286 | #[test] 287 | fn kill_hazards() { 288 | fn dtor(x: *const u8) { 289 | unsafe { 290 | *(x as *mut u8) = 1; 291 | } 292 | } 293 | 294 | for _ in 0..1000 { 295 | let b = thread::spawn(move || { 296 | let b = Box::new(0); 297 | let h = get_hazard(); 298 | h.protect(&*b); 299 | add_garbage(Garbage::new(&*b, dtor)); 300 | ::gc(); 301 | assert_eq!(*b, 0); 302 | b 303 | }).join().unwrap(); 304 | ::gc(); 305 | assert_eq!(*b, 1); 306 | } 307 | } 308 | 309 | #[cfg(debug_assertions)] 310 | #[test] 311 | #[should_panic] 312 | fn debug_free_blocked() { 313 | use std::mem; 314 | 315 | let (writer, reader) = hazard::create(); 316 | mem::forget(reader); 317 | 318 | free_hazard(writer); 319 | } 320 | } 321 | -------------------------------------------------------------------------------- /conc/src/mpsc.rs: -------------------------------------------------------------------------------- 1 | //! Multi-producer single-consumer queues. 2 | //! 3 | //! Since the standard library's implementation of `mpsc` requires us to clone the senders in 4 | //! advance, such that we cannot store them in our global state outside a lock, we must implement 5 | //! our own `mpsc` queue. 6 | //! 7 | //! Right now, the implementation is really nothing but a wrapper around `Mutex>`, and 8 | //! although this is reasonably fast as the lock is only held for very short time, it is 9 | //! sub-optimal, and blocking. 10 | 11 | use parking_lot::Mutex; 12 | use std::sync::Arc; 13 | use std::mem; 14 | 15 | /// Create a MPSC pair. 16 | /// 17 | /// This creates a "channel", i.e. a pair of sender and receiver connected to each other. 18 | pub fn channel() -> (Sender, Receiver) { 19 | // Create a new ARC. 20 | let end = Arc::new(Mutex::new(Vec::new())); 21 | 22 | (Sender { 23 | inner: end.clone(), 24 | }, Receiver { 25 | inner: end, 26 | }) 27 | } 28 | 29 | /// The sender of a MPSC channel. 30 | pub struct Sender { 31 | /// The wrapped end. 32 | inner: Arc>>, 33 | } 34 | 35 | impl Sender { 36 | /// Send an item to this channel. 37 | pub fn send(&self, item: T) { 38 | // Lock the vector, and push. 39 | self.inner.lock().push(item); 40 | } 41 | } 42 | 43 | /// The receiver of a MPSC channel. 44 | pub struct Receiver { 45 | /// The wrapped end. 46 | inner: Arc>>, 47 | } 48 | 49 | impl Receiver { 50 | /// Receive all the elements in the queue. 51 | /// 52 | /// This takes all the elements and applies the given closure to them in an unspecified order. 53 | pub fn recv_all(&self) -> Vec { 54 | // Lock the vector, and replace it by an empty vector, then iterate. 55 | mem::replace(&mut *self.inner.lock(), Vec::new()) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /conc/src/settings.rs: -------------------------------------------------------------------------------- 1 | //! Settings and presets. 2 | 3 | use std::cell::Cell; 4 | 5 | thread_local! { 6 | /// The settings for the current thread. 7 | static LOCAL_SETTINGS: Cell = Cell::new(Settings::default()) 8 | } 9 | 10 | /// Settings for the system. 11 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] 12 | pub struct Settings { 13 | /// The probability of triggering a GC when ticking. 14 | /// 15 | /// Whenever the system "ticks" it generates a random number. If the number is below this 16 | /// setting, it will try to collect the garbage. 17 | /// 18 | /// So, this probability is given such that `0` corresponds to never and `!0` corresponds to 19 | /// nearly always. 20 | pub gc_probability: usize, 21 | /// The maximal amount of garbage before exportation to the global state. 22 | /// 23 | /// When the local state's garbage queue exceeds this limit, it exports it to the global 24 | /// garbage queue. 25 | pub max_garbage_before_export: usize, 26 | /// The maximal amount of non-free hazards in the thread-local cache. 27 | /// 28 | /// When it exceeds this limit, it will clean up the cached hazards. With "cleaning up" we mean 29 | /// setting the state of the hazards to "free" in order to allow garbage collection of the 30 | /// object it is currently protecting. 31 | pub max_non_free_hazards: usize, 32 | } 33 | 34 | impl Default for Settings { 35 | fn default() -> Settings { 36 | Settings { 37 | gc_probability: (!0) / 128, 38 | max_garbage_before_export: 64, 39 | max_non_free_hazards: 16, 40 | } 41 | } 42 | } 43 | 44 | impl Settings { 45 | /// Preset for low memory, high CPU usage. 46 | pub fn low_memory() -> Settings { 47 | Settings { 48 | gc_probability: (!0) / 32, 49 | max_garbage_before_export: 16, 50 | max_non_free_hazards: 4, 51 | } 52 | } 53 | 54 | /// Preset for high memory, low CPU usage. 55 | pub fn low_cpu() -> Settings { 56 | Settings { 57 | gc_probability: (!0) / 256, 58 | max_garbage_before_export: 128, 59 | max_non_free_hazards: 32, 60 | } 61 | } 62 | 63 | /// Disable GC for this settings instance. 64 | /// 65 | /// This ensures that the current thread will not be blocked to collect garbage. The garbage 66 | /// can still be propagated and destroyed, it will just not happen in this thread. 67 | pub fn disable_automatic_gc(&mut self) { 68 | self.gc_probability = 0; 69 | } 70 | 71 | /// Disable automatic exportation. 72 | /// 73 | /// This ensures that no destructors gets exported to the global state before the thread exits. 74 | /// In particular, no destructor will be run unless exportation is explicitly done. 75 | pub fn disable_automatic_export(&mut self) { 76 | // Set to the max value. This will prevent exportation, as the garbage (which is of more 77 | // than one byte) queue would have to fill more than the whole memory space, which is 78 | // obviously impossible. 79 | self.max_garbage_before_export = !0; 80 | } 81 | } 82 | 83 | /// Get the settings of the current thread. 84 | pub fn get() -> Settings { 85 | LOCAL_SETTINGS.with(|x| x.get()) 86 | } 87 | 88 | /// Set the settings for the current thread. 89 | /// 90 | /// # Important 91 | /// 92 | /// This is not global. That is, if you call this in thread A, the setting change won't affect 93 | /// thread B. If you want to have the same settings in multiple threads, you should call this 94 | /// function in the start of every thread you spawn with the `Settings`, you want. 95 | pub fn set_local(settings: Settings) { 96 | LOCAL_SETTINGS.with(|x| x.set(settings)) 97 | } 98 | 99 | #[cfg(test)] 100 | mod tests { 101 | use super::*; 102 | use std::thread; 103 | use {Garbage, local}; 104 | 105 | #[test] 106 | fn set_get() { 107 | set_local(Settings { 108 | max_garbage_before_export: 22, 109 | .. Default::default() 110 | }); 111 | assert_eq!(get().max_garbage_before_export, 22); 112 | } 113 | 114 | #[test] 115 | fn default() { 116 | thread::spawn(|| { 117 | assert_eq!(get(), Settings::default()); 118 | }).join().unwrap(); 119 | } 120 | 121 | #[test] 122 | fn disable_automatic_gc() { 123 | thread_local! { 124 | static X: Cell = Cell::default(); 125 | } 126 | 127 | fn dtor(_: *const u8) { 128 | X.with(|x| x.set(true)); 129 | } 130 | 131 | let mut settings = get(); 132 | settings.disable_automatic_gc(); 133 | set_local(settings); 134 | 135 | for _ in 0..100000 { 136 | local::add_garbage(Garbage::new(0x1 as *const u8, dtor)); 137 | assert!(!X.with(|x| x.get())); 138 | } 139 | 140 | // Avoid messing with other tests. 141 | set_local(Settings::default()); 142 | } 143 | 144 | #[test] 145 | fn disable_automatic_exportation() { 146 | fn dtor(x: *const u8) { 147 | unsafe { 148 | *(x as *mut u8) = 1; 149 | } 150 | } 151 | 152 | let mut settings = get(); 153 | settings.disable_automatic_export(); 154 | set_local(settings); 155 | 156 | for _ in 0..100000 { 157 | let b = Box::new(0); 158 | local::add_garbage(Garbage::new(&*b, dtor)); 159 | assert_eq!(*b, 0); 160 | } 161 | 162 | // Avoid messing with other tests. 163 | set_local(Settings::default()); 164 | } 165 | 166 | #[test] 167 | fn compare_presets() { 168 | let low = Settings::low_memory(); 169 | let high = Settings::low_cpu(); 170 | 171 | assert!(low.gc_probability > high.gc_probability); 172 | assert!(high.max_garbage_before_export > low.max_garbage_before_export); 173 | assert!(high.max_non_free_hazards > low.max_non_free_hazards); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /conc/src/sync/mod.rs: -------------------------------------------------------------------------------- 1 | //! Various simple lock-free data structures built on `conc`. 2 | 3 | mod stm; 4 | mod treiber; 5 | 6 | pub use self::stm::Stm; 7 | pub use self::treiber::Treiber; 8 | -------------------------------------------------------------------------------- /conc/src/sync/stm.rs: -------------------------------------------------------------------------------- 1 | //! Software transactional memory. 2 | 3 | use {Atomic, Guard}; 4 | use std::sync::atomic; 5 | 6 | /// A software transactional memory container. 7 | pub struct Stm { 8 | /// The inner data. 9 | inner: Atomic, 10 | } 11 | 12 | impl Stm { 13 | /// Create a new STM container. 14 | pub fn new(data: Option>) -> Stm { 15 | Stm { 16 | inner: Atomic::new(data), 17 | } 18 | } 19 | 20 | /// Update the data. 21 | /// 22 | /// This applies closure `f` to the data of `self`. If the data isn't updated in the meantime, 23 | /// the change will applied. Otherwise, the closure is reevaluated. 24 | pub fn update(&self, f: F) 25 | where 26 | F: Fn(Option>) -> Option>, 27 | T: 'static, 28 | { 29 | loop { 30 | // Read a snapshot of the current data. 31 | let snapshot = self.inner.load(atomic::Ordering::Acquire); 32 | // Construct a pointer from this guard. 33 | let snapshot_ptr = snapshot.as_ref().map(Guard::as_ptr); 34 | // Evaluate the closure on the snapshot. 35 | let ret = f(snapshot); 36 | 37 | // If the snapshot pointer is still the same, update the data to the closure output. 38 | if self.inner.compare_and_store(snapshot_ptr, ret, atomic::Ordering::Release).is_ok() { 39 | break; 40 | } 41 | } 42 | } 43 | 44 | /// Read the container. 45 | pub fn load(&self) -> Option> { 46 | self.inner.load(atomic::Ordering::Relaxed) 47 | } 48 | } 49 | 50 | #[cfg(test)] 51 | mod tests { 52 | use super::*; 53 | use std::thread; 54 | use std::sync::Arc; 55 | 56 | #[test] 57 | fn single_threaded() { 58 | let stm = Stm::new(None); 59 | 60 | stm.update(|_| Some(Box::new(4))); 61 | stm.update(|x| Some(Box::new(*x.unwrap() + 1))); 62 | stm.update(|x| { 63 | assert!(*x.unwrap() == 5); 64 | None 65 | }); 66 | assert!(stm.load().is_none()); 67 | } 68 | 69 | #[test] 70 | fn multi_threaded() { 71 | let stm = Arc::new(Stm::new(Some(Box::new(0)))); 72 | 73 | let mut j = Vec::new(); 74 | for _ in 0..16 { 75 | let stm = stm.clone(); 76 | j.push(thread::spawn(move || { 77 | for _ in 0..1_000_000 { 78 | stm.update(|x| Some(Box::new(*x.unwrap() + 1))) 79 | } 80 | })) 81 | } 82 | 83 | for i in j { 84 | i.join().unwrap(); 85 | } 86 | 87 | assert_eq!(*stm.load().unwrap(), 16_000_000); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /control-flow/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "control-flow" 3 | version = "0.1.1" 4 | authors = ["ticki "] 5 | description = "A hack to control control-flow outside closures." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/control-flow" 8 | license = "MIT" 9 | keywords = ["closure", "control", "flow", "macro"] 10 | exclude = ["target", "Cargo.lock"] 11 | -------------------------------------------------------------------------------- /control-flow/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A hack to control control-flow outside closures. 2 | //! 3 | //! This crate allows one to do things like breaking loops outside a closure. It works through a 4 | //! a macro hack. Unless you really really need this, don't use it. 5 | //! 6 | //! # Example 7 | //! 8 | //! ```rust 9 | //! #[macro_use] 10 | //! extern crate control_flow; 11 | //! 12 | //! loop { 13 | //! let closure = || { 14 | //! defer!(break) 15 | //! }; 16 | //! 17 | //! // Breaks the loop. 18 | //! run_loop!(closure()); 19 | //! } 20 | //! ``` 21 | 22 | /// A deferred control-flow command. 23 | #[must_use = "Without using the `Command` it doesn't do anything. You should execute it through `run!()` or `run_loop!()`."] 24 | pub enum Command { 25 | /// Pass the value on. 26 | /// 27 | /// This is not the same as return. What it does is that instead of breaking the control flow, 28 | /// it passes on the value. That is, when `run!()` is called on this variant, the value that it 29 | /// holds is evaluated to. 30 | Give(T), 31 | /// Return the value. 32 | /// 33 | /// This (when eventually executed) returns the given value. 34 | Return(R), 35 | /// Break a loop. 36 | /// 37 | /// This (when eventually executed) breaks the loop. 38 | Break, 39 | /// Continue a loop. 40 | /// 41 | /// This (when eventually executed) continues the loop to next iteration. 42 | Continue, 43 | } 44 | 45 | /// Create a deferred control-flow command. 46 | /// 47 | /// This takes a command (e.g. `return value`, `break`, `continue`, etc.) and creates the command 48 | /// in the form of the `Command` enum. This is deferred (that is, it is not runned instantly) until 49 | /// one executes the `Command`, which is done through `run!()` and `run_loop!()` depending on 50 | /// whether or not you are in a loop. 51 | #[macro_export] 52 | macro_rules! defer { 53 | (return $val:expr) => { $crate::Command::Return($val) }; 54 | (return) => { defer!(return ()) }; 55 | (break) => { $crate::Command::Break }; 56 | (continue) => { $crate::Command::Continue }; 57 | ($val:expr) => { $crate::Command::Give($val) }; 58 | () => { defer!(()) } 59 | } 60 | 61 | /// Run a deferred control-flow command (outside a loop). 62 | /// 63 | /// This takes a `Command` and runs it. This only works when not using loop-specific commands. 64 | #[macro_export] 65 | macro_rules! run { 66 | ($command:expr) => { 67 | match $command { 68 | $crate::Command::Give(x) => x, 69 | $crate::Command::Return(x) => return x, 70 | _ => panic!("\ 71 | Using loop-dependent `Command` variants without loop mode enabled. Consider using \ 72 | `control_loop` instead.\ 73 | "), 74 | } 75 | } 76 | } 77 | 78 | /// Run a deferred control-flow command within a loop. 79 | /// 80 | /// This takes a `Command` and runs it. 81 | #[macro_export] 82 | macro_rules! run_loop { 83 | ($command:expr) => { 84 | match $command { 85 | $crate::Command::Give(x) => x, 86 | $crate::Command::Return(x) => return x, 87 | $crate::Command::Break => break, 88 | $crate::Command::Continue => continue, 89 | } 90 | } 91 | } 92 | 93 | #[cfg(test)] 94 | mod tests { 95 | #[test] 96 | fn loop_break() { 97 | let mut x = true; 98 | loop { 99 | run_loop!(defer!(break)); 100 | x = false; 101 | } 102 | assert!(x); 103 | } 104 | 105 | #[test] 106 | fn loop_continue() { 107 | let mut x = true; 108 | for _ in 0..100 { 109 | assert!(x); 110 | run_loop!(defer!(continue)); 111 | x = false; 112 | } 113 | } 114 | 115 | #[test] 116 | #[allow(unused_assignments)] 117 | fn return_early() { 118 | let x = false; 119 | run!(defer!(return)); 120 | assert!(x); 121 | } 122 | 123 | #[test] 124 | #[allow(unused_assignments)] 125 | fn store_ctrl() { 126 | assert!((|| { 127 | let mut x = defer!(return false); 128 | x = defer!(return true); 129 | 130 | run!(x); 131 | unreachable!(); 132 | })()); 133 | } 134 | 135 | 136 | #[test] 137 | fn direct_value() { 138 | assert!(run!(defer!(true))); 139 | assert_eq!(run!(defer!()), ()); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tfs-core" 3 | version = "0.1.0" 4 | authors = ["ticki "] 5 | 6 | [dependencies] 7 | cbloom = "0.1" 8 | crossbeam = "0.2" 9 | futures = "0.1" 10 | little-endian = "1.0" 11 | lz4-compress = "0.1" 12 | mlcr = "0.2" 13 | rand = "0.3" 14 | ring = "0.7" 15 | ring-pwhash = "0.1" 16 | seahash = "3.0" 17 | slog = "1.5" 18 | speck = "1.0" 19 | thread-object = "0.2" 20 | type-name = "0.1" 21 | 22 | [features] 23 | security = [] 24 | -------------------------------------------------------------------------------- /core/src/alloc/dedup.rs: -------------------------------------------------------------------------------- 1 | //! Data deduplication. 2 | //! 3 | //! This module provides data structures for eliminating duplicates at a page level, meaning that 4 | //! if two equal pages are allocated, they can be reduced to one, reducing the space used. 5 | 6 | use crossbeam::sync::AtomicOption; 7 | use ring::digest; 8 | use std::sync::atomic; 9 | 10 | use {little_endian, disk}; 11 | use alloc::page; 12 | 13 | /// The atomic ordering used in the table. 14 | const ORDERING: atomic::Ordering = atomic::Ordering::Relaxed; 15 | 16 | /// A SHA-256 fingerprint of a page. 17 | /// 18 | /// It is broken into two `u128` since `u256` isn't supported yet. 19 | // TODO: ^^^^ 20 | struct Fingerprint(u128, u128); 21 | 22 | impl Fingerprint { 23 | /// Fingerprint a page. 24 | /// 25 | /// This calculates the fingerprint of page `buf` through SHA-2. 26 | fn new(buf: &disk::SectorBuf) -> Fingerprint { 27 | // Hash it into a 256-bit value. 28 | let hash = digest::digest(digest::SHA256, buf).as_ref(); 29 | 30 | // Read it in two parts to get two `u128`s. 31 | (little_endian::read(hash), little_endian::read(hash[16..])) 32 | } 33 | } 34 | 35 | /// The maximal number of pagess the table can contain. 36 | const MAX_PAGES_IN_TABLE: usize = 1 << 16; 37 | 38 | /// A deduplication candidate. 39 | /// 40 | /// This is a potential match. It stores data to check if it is a complete match. 41 | #[derive(Copy, Clone)] 42 | struct Candidate { 43 | /// The candidate for deduplication. 44 | /// 45 | /// This is a page pointer of some page which is potentially containing the same data, as the 46 | /// page we're allocating. If it is indeed a match, it is sufficient to use this page instead 47 | /// of allocating a new. 48 | page: page::Pointer, 49 | /// The fingerprint of the page data. 50 | /// 51 | /// No fingerprint function mapping a domain to a smaller codomain is injective (gives unique 52 | /// fingerprints), but with wide enough fingerprints, finding collisions gets practically 53 | /// impossible. Even if an user had malicious intends, they cannot compute a collision. 54 | fingerprint: Fingerprint, 55 | } 56 | 57 | impl Candidate { 58 | /// Check if this candidate matches some data buffer. 59 | /// 60 | /// If not, `false` is returned. 61 | fn is_match(&self, buf: &disk::SectorBuf) -> bool { 62 | // Check the fingerprint against the hash of the buffer. Again, this is strictly speak 63 | // heuristic, but for all practical purposes, no collisions will ever be found. 64 | self.fingerprint == Fingerprint::new(buf) 65 | } 66 | } 67 | 68 | /// A deduplication table. 69 | /// 70 | /// Deduplication tables stores information needed to determine if some page already exist or the 71 | /// disk or not. They're heuristic in the sense that sometimes a duplicate may exists but not be 72 | /// deduplicated. This is due to the fact that there is no probing and thus checksum collisions 73 | /// cannot be resolved. Therefore, it will replace a random old candidate. 74 | #[derive(Default)] 75 | pub struct Table { 76 | /// The table of candidates. 77 | /// 78 | /// When looking up a particular candidate, the checksum modulo the table size is used. If this 79 | /// entry is `None`, there is no candidate. 80 | table: [AtomicOption; MAX_PAGES_IN_TABLE], 81 | } 82 | 83 | impl Table { 84 | /// Find a duplicate of some page. 85 | /// 86 | /// This searches for a duplicate of `buf` which has checksum `cksum`. If no duplicate is 87 | /// found, `None` is returned. 88 | fn dedup(&self, buf: &disk::SectorBuf, cksum: u32) -> Option { 89 | // We look up in the table with the checksum under some modulus, since that is faster to 90 | // calculate than a cryptographic hash, meaning that we can refine candidates based on a 91 | // rougher first-hand measure. 92 | let entry = self.table[cksum % MAX_PAGES_IN_TABLE]; 93 | 94 | // Temporarily remove the entry from the table. 95 | if let Some(candidate) = entry.take(ORDERING) { 96 | // A candidate exists. 97 | 98 | // Put it back into the entry. 99 | entry.swap(candidate); 100 | 101 | // Check if the checksum and fingerprint matches. 102 | if cksum == candidate.page.checksum && candidate.isMatch(buf) { 103 | // Yup. 104 | Some(candidate.page) 105 | } else { 106 | // Nup. 107 | None 108 | } 109 | } else { 110 | // No candidate was stored in the table. 111 | None 112 | } 113 | } 114 | 115 | /// Insert a page into the table. 116 | /// 117 | /// This inserts page `page` with data `buf` into the deduplication table. 118 | fn insert(&mut self, buf: &disk::SectorBuf, page: page::Pointer) { 119 | // Overwrite the old entry with the new updated entry. 120 | self.table[page.checksum % MAX_PAGES_IN_TABLE].swap(Candidate { 121 | page: page, 122 | // TODO: This fingerprint might be double-calculated due to the use in `dedup`. 123 | fingerprint: Fingerprint::new(buf), 124 | }, ORDERING); 125 | } 126 | } 127 | 128 | #[cfg(test)] 129 | mod tests { 130 | use super::*; 131 | 132 | #[test] 133 | fn duplicate() { 134 | let mut table = Table::default(); 135 | let p1 = page::Pointer { 136 | cksum: 7, 137 | .. Default::default() 138 | }; 139 | let p2 = page::Pointer { 140 | cksum: 13, 141 | .. Default::default() 142 | }; 143 | 144 | table.insert(&Default::default(), p1); 145 | table.insert(&Default::default(), p2); 146 | 147 | assert_eq!(table.dedup(&Default::default(), 7), p1); 148 | assert_eq!(table.dedup(&Default::default(), 13), p2); 149 | } 150 | 151 | #[test] 152 | fn checksum_collision() { 153 | let mut table = Table::default(); 154 | let p1 = page::Pointer { 155 | cksum: 7, 156 | .. Default::default() 157 | }; 158 | let p2 = page::Pointer { 159 | cksum: 7, 160 | cluster: cluster::Pointer::new(100).unwrap(), 161 | .. Default::default() 162 | }; 163 | 164 | table.insert([0; disk::SECTOR_SIZE], p1); 165 | table.insert([1; disk::SECTOR_SIZE], p2); 166 | 167 | assert_eq!(table.dedup(&Default::default(), 7), p2); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /core/src/alloc/page.rs: -------------------------------------------------------------------------------- 1 | //! Pages. 2 | //! 3 | //! Pages are like virtual clusters: they share the same size and can contain the same data, but 4 | //! they're represented specially on the disk. The distinction between pages and clusters is what 5 | //! allows for random-access full-disk compression. 6 | //! 7 | //! Clusters can contain at least one page, but often more: this is achieved by concatenating and 8 | //! compressing the pages. If they can be compressed enough to fit into a single cluster, the 9 | //! compressed data will be stored in the cluster. If not, the cluster can contain the page 10 | //! uncompressed, in which pages and clusters coincide. 11 | //! 12 | //! To distinguish between various pages in a cluster, the pointer contains offset information. 13 | //! There is a reserved offset value for representing uncompressed clusters as well. 14 | //! 15 | //! In other words, RACC (the name of the algorithm) greedily tries to fit as many pages into a 16 | //! cluster by compressing the pages together. To avoid storing metadata in the clusters, the 17 | //! pointers contains this information instead. 18 | 19 | use little_endian; 20 | use disk::cluster; 21 | 22 | /// The size (in bytes) of a serialized page pointer. 23 | pub const POINTER_SIZE: usize = 16; 24 | 25 | /// A page pointer. 26 | /// 27 | /// Page pointer contains information necessary for read and write pages on the disk. They're 28 | /// similar to clutter pointer in that sense, but they contain more information: 29 | /// 30 | /// 1. The cluster the page is stored in. 31 | /// 2. _How_ to read the page from the cluster. 32 | /// 3. A checksum of the page. 33 | pub struct Pointer { 34 | /// The cluster in which the page is stored. 35 | cluster: cluster::Pointer, 36 | /// The offset into the decompressed stream. 37 | /// 38 | /// Clusters can be either uncompressed (containing one page) or compressed (containing some 39 | /// number of pages). This field contains information about _how_ to load the page, namely if 40 | /// the cluster is compressed or not, and if compressed, what the offset to read it from the 41 | /// decompressed stream. 42 | /// 43 | /// If this is `None`, the page can be read directly from the cluster without any 44 | /// decompression. 45 | /// 46 | /// If this is `Some(offset)`, the cluster must be decompressed and the page can be read 47 | /// `offset` pages into the decompressed stream. `offset` is assumed to never be `!0` in order 48 | /// to ensure the serialization to be injective. 49 | offset: Option, 50 | /// Checksum of the page. 51 | /// 52 | /// This checksum is calculated through the algorithm specified in the disk header, and when 53 | /// the page is read, it is compared against the page's expected checksum to detect possible 54 | /// data corruption. 55 | /// 56 | /// The reason for storing this in the pointer as opposed to in the cluster is somewhat 57 | /// complex: It has multiple benefits. For one, we avoid resizing the clusters so they match 58 | /// the standard sector size, but more importantly, we avoid the [self-validation 59 | /// problem](https://blogs.oracle.com/bonwick/entry/zfs_end_to_end_data). Namely, it is able to 60 | /// detect phantom writes. 61 | /// 62 | /// The idea was originally conceived by Bonwick (main author of ZFS), who thought that the 63 | /// file system could be organized like a Merkle tree of checksums. 64 | /// 65 | /// Most other approaches have the issue of not detecting phantom writes or not preserving 66 | /// consistency on crashes. 67 | checksum: u32, 68 | } 69 | 70 | impl little_endian::Encode for Pointer { 71 | fn write_le(self, into: &mut [u8]) { 72 | // The lowest bytes are dedicated to the cluster pointer. 73 | little_endian::write(into, self.cluster); 74 | // Next, we write the page offset, which is needed for knowing where the pointer points to 75 | // in the decompressed stream. 76 | little_endian::write(&mut into[cluster::POINTER_SIZE..], if let Some(offset) = self.offset { 77 | // TODO: Consider removing this. 78 | assert_ne!(offset, !0, "The page offset cannot be 0xFFFFFFFF, as it collides with \ 79 | the serialization of the uncompressed page offset."); 80 | 81 | offset 82 | } else { 83 | // When there is no offset, we use `!0` to represent that it is uncompressed. 84 | !0 85 | }); 86 | // Lastly, we write the checksum. 87 | little_endian::write(&mut into[cluster::POINTER_SIZE..][32..], self.checksum); 88 | } 89 | } 90 | 91 | impl little_endian::Decode for Option { 92 | fn read_le(from: &[u8]) -> Option { 93 | // The 64 lowest bits are used for the cluster. 94 | little_endian::read(from).map(|cluster| Pointer { 95 | cluster: cluster, 96 | // Next the page offset is stored. 97 | offset: match little_endian::read(&from[cluster::POINTER_SIZE..]) { 98 | // Again, the trap value !0 represents an uncompressed cluster. 99 | 0xFFFFFFFF => None, 100 | // This cluster was compressed and the offset is `n`. 101 | n => Some(n), 102 | }, 103 | // The highest 32 bit then store the checksum. 104 | checksum: little_endian::read(&from[cluster::POINTER_SIZE..][32..]), 105 | }) 106 | } 107 | } 108 | 109 | impl little_endian::Encode for Option { 110 | fn write_le(self, into: &mut [u8]) { 111 | if let Some(ptr) = self { 112 | // Simply write the inner pointer into the buffer. 113 | little_endian::write(into, self) 114 | } else { 115 | // Zero the first `POINTER_SIZE` bytes of the buffer (null pointer). 116 | for i in &mut into[..POINTER_SIZE] { 117 | *i = 0; 118 | } 119 | } 120 | } 121 | } 122 | 123 | #[cfg(test)] 124 | mod tests { 125 | use super::*; 126 | 127 | fn assert_inverse(x: u128) { 128 | let mut buf = [0; 16]; 129 | little_endian::write(&mut buf, x); 130 | assert_eq!(little_endian::read(&buf), x); 131 | } 132 | 133 | #[test] 134 | fn inverse_identity() { 135 | assert_inverse(38); 136 | assert_inverse(0x0101010101010101FEFFFFFF21231234); 137 | assert_inverse(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF); 138 | 139 | // Randomized testing. 140 | for mut x in 0u128..1000000 { 141 | // I'm in fucking love with this permutation. 142 | x = x.wrapping_mul(0x6eed0e9da4d94a4f6eed0e9da4d94a4f); 143 | x ^= (x >> 64) >> (x >> 120); 144 | x = x.wrapping_mul(0x6eed0e9da4d94a4f6eed0e9da4d94a4f); 145 | 146 | assert_inverse(x) 147 | } 148 | } 149 | 150 | #[test] 151 | fn fixed_values() { 152 | let mut ptr = Pointer::from(&[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFE, 0xFF, 153 | 0xFF, 0xFF, 0xCC, 0xCC, 0xCC, 0xCC]); 154 | 155 | assert_eq!(ptr.cluster, 0x0101010101010101); 156 | assert_eq!(ptr.offset, Some(!0 - 1)); 157 | assert_eq!(ptr.checksum, 0xCCCCCCCC); 158 | 159 | ptr = Pointer::from(&[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF, 0xFF, 0xFF, 160 | 0xFF, 0xCC, 0xCC, 0xCC, 0xCC]); 161 | 162 | assert_eq!(ptr.cluster, 0x0101010101010101); 163 | assert_eq!(ptr.offset, None); 164 | assert_eq!(ptr.checksum, 0xCCCCCCCC); 165 | 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /core/src/alloc/state_block.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | use {little_endian, Error}; 3 | use alloc::page; 4 | use disk::{self, cluster}; 5 | 6 | /// A compression algorithm configuration option. 7 | pub enum CompressionAlgorithm { 8 | /// Identity function/compression disabled. 9 | Identity = 0, 10 | /// LZ4 compression. 11 | /// 12 | /// LZ4 is a very fast LZ77-family compression algorithm. Like other LZ77 compressors, it is 13 | /// based on streaming data reduplication. The details are described 14 | /// [here](http://ticki.github.io/blog/how-lz4-works/). 15 | Lz4 = 1, 16 | } 17 | 18 | impl TryFrom for CompressionAlgorithm { 19 | type Err = Error; 20 | 21 | fn try_from(from: u16) -> Result { 22 | match from { 23 | 0 => Ok(CompressionAlgorithm::Identity), 24 | 1 => Ok(CompressionAlgorithm::Lz4), 25 | 0x8000...0xFFFF => Err(err!(Corruption, "unknown implementation-defined compression algorithm option {:x}", from)), 26 | _ => Err(err!(Corruption, "invalid compression algorithm option {:x}", from)), 27 | } 28 | } 29 | } 30 | 31 | /// The freelist head. 32 | /// 33 | /// The freelist chains some number of blocks containing pointers to free blocks. This allows for 34 | /// simple and efficient allocation. This struct stores information about the head block in the 35 | /// freelist. 36 | struct FreelistHead { 37 | /// A pointer to the head of the freelist. 38 | /// 39 | /// This cluster contains pointers to other free clusters. If not full, it is padded with 40 | /// zeros. 41 | cluster: cluster::Pointer, 42 | /// The checksum of the freelist head up to the last free cluster. 43 | /// 44 | /// This is the checksum of the metacluster (at `self.cluster`). 45 | checksum: u64, 46 | } 47 | 48 | /// The state sub-block. 49 | pub struct State { 50 | /// A pointer to the superpage. 51 | pub superpage: Option, 52 | /// The freelist head. 53 | /// 54 | /// If the freelist is empty, this is set to `None`. 55 | pub freelist_head: Option, 56 | } 57 | 58 | /// The options sub-block. 59 | pub struct Options { 60 | /// The chosen compression algorithm. 61 | pub compression_algorithm: CompressionAlgorithm, 62 | } 63 | 64 | /// The TFS state block. 65 | pub struct StateBlock { 66 | /// The static options section of the state block. 67 | pub options: Options, 68 | /// The dynamic state section of the state block. 69 | pub state: State, 70 | } 71 | 72 | impl StateBlock { 73 | /// Parse the binary representation of a state block. 74 | fn decode( 75 | buf: &disk::SectorBuf, 76 | checksum_algorithm: disk::header::ChecksumAlgorithm, 77 | ) -> Result { 78 | // Make sure that the checksum of the state block matches the 8 byte field in the start. 79 | let expected = little_endian::read(&buf); 80 | let found = checksum_algorithm.hash(&buf[8..]); 81 | if expected != found { 82 | return Err(err!(Corruption, "mismatching checksums in the state block - expected \ 83 | {:x}, found {:x}", expected, found)); 84 | } 85 | 86 | Ok(StateBlock { 87 | options: Options { 88 | // Load the compression algorithm config field. 89 | compression_algorithm: CompressionAlgorithm::try_from(little_endian::read(buf[8..]))?, 90 | }, 91 | state: State { 92 | // Load the superpage pointer. 93 | superpage: little_endian::read(buf[16..]), 94 | // Construct the freelist head metadata. If the pointer is 0, we return `None`. 95 | freelist_head: little_endian::read(&buf[32..]).map(|freelist_head| { 96 | FreelistHead { 97 | cluster: freelist_head, 98 | // Load the checksum of the freelist head. 99 | checksum: little_endian::read(&buf[40..]), 100 | } 101 | }), 102 | }, 103 | }) 104 | } 105 | 106 | /// Encode the state block into a sector-sized buffer. 107 | fn encode(&self, checksum_algorithm: disk::header::ChecksumAlgorithm) -> disk::SectorBuf { 108 | // Create a buffer to hold the data. 109 | let mut buf = disk::SectorBuf::default(); 110 | 111 | // Write the compression algorithm. 112 | little_endian::write(&mut buf[8..], self.options.compression_algorithm as u16); 113 | // Write the superpage pointer. If no superpage is initialized, we simply write a null 114 | // pointer. 115 | little_endian::write(&mut buf[16..], self.state.superpage); 116 | 117 | if let Some(freelist_head) = self.state.freelist_head { 118 | // Write the freelist head pointer. 119 | little_endian::write(&mut buf[32..], freelist_head.cluster); 120 | // Write the checksum of the freelist head. 121 | little_endian::write(&mut buf[40..], freelist_head.checksum); 122 | } 123 | // If the free list was empty, both the checksum, and pointer are zero, which matching the 124 | // buffer's current state. 125 | 126 | // Calculate and store the checksum. 127 | let cksum = checksum_algorithm.hash(&buf[8..]); 128 | little_endian::write(&mut buf, cksum); 129 | 130 | buf 131 | } 132 | } 133 | 134 | #[cfg(test)] 135 | mod tests { 136 | use super::*; 137 | use error; 138 | 139 | #[test] 140 | fn inverse_identity() { 141 | let mut block = StateBlock::default(); 142 | assert_eq!(StateBlock::decode(block.encode()).unwrap(), block); 143 | 144 | block.options.compression_algorithm = CompressionAlgorithm::Identity; 145 | assert_eq!(StateBlock::decode(block.encode()).unwrap(), block); 146 | 147 | block.state.superpage = 200; 148 | assert_eq!(StateBlock::decode(block.encode()).unwrap(), block); 149 | 150 | block.state.freelist_head = Some(FreelistHead { 151 | cluster: 22, 152 | checksum: 2, 153 | }); 154 | assert_eq!(StateBlock::decode(block.encode()).unwrap(), block); 155 | } 156 | 157 | #[test] 158 | fn manual_mutation() { 159 | let mut block = StateBlock::default(); 160 | let mut sector = block.encode(); 161 | 162 | block.options.compression_algorithm = CompressionAlgorithm::Identity; 163 | sector[9] = 0; 164 | little_endian::write(&mut sector, seahash::hash(sector[8..])); 165 | assert_eq!(sector, block.encode()); 166 | 167 | block.state.superpage = 29; 168 | sector[16] = 29; 169 | little_endian::write(&mut sector, seahash::hash(sector[8..])); 170 | assert_eq!(sector, block.encode()); 171 | 172 | block.state.freelist_head = Some(FreelistHead { 173 | cluster: 22, 174 | checksum: 2, 175 | }); 176 | sector[32] = 22; 177 | sector[40] = 2; 178 | little_endian::write(&mut sector, seahash::hash(sector[8..])); 179 | assert_eq!(sector, block.encode()); 180 | } 181 | 182 | #[test] 183 | fn mismatching_checksum() { 184 | let mut sector = StateBlock::default().encode(); 185 | sector[2] = 20; 186 | assert_eq!(StateBlock::decode(sector).unwrap_err().kind, error::Kind::Corruption); 187 | } 188 | 189 | #[test] 190 | fn unknown_invalid_options() { 191 | let mut sector = StateBlock::default().encode(); 192 | 193 | sector = StateBlock::default().encode(); 194 | 195 | sector[8] = 0xFF; 196 | assert_eq!(StateBlock::decode(sector).unwrap_err().kind, error::Kind::Corruption); 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /core/src/disk/cache.rs: -------------------------------------------------------------------------------- 1 | use futures::Future; 2 | use atomic_hashmap::AtomicHashMap; 3 | use {mlcr, Error}; 4 | use disk::{self, vdev, Disk}; 5 | use disk::header::DiskHeader; 6 | 7 | /// The default initial capacity of the sector map. 8 | const INITIAL_CAPACITY: usize = 256; 9 | 10 | /// A cached disk. 11 | /// 12 | /// This wrapper manages caching of the disk. 13 | pub struct Cached { 14 | /// The inner disk. 15 | disk: D, 16 | 17 | /// The cache replacement tracker. 18 | /// 19 | /// This tracks the state of the replacement algorithm, which chooses which cache block shall 20 | /// be replaced in favor of a new cache. It serves to estimate/guess which block is likely not 21 | /// used in the near future. 22 | tracker: mlcr::ConcurrentCache, 23 | /// The sector-number-to-data block map. 24 | sectors: AtomicHashMap, 25 | } 26 | 27 | impl Cached { 28 | /// Create a cache from a backing disk. 29 | fn new(disk: D) -> Cached { 30 | Cached { 31 | disk: disk, 32 | tracker: mlcr::ConcurrentCache::new(), 33 | sectors: AtomicHashMap::with_capacity(INITIAL_CAPACITY), 34 | } 35 | } 36 | 37 | /// Write a sector. 38 | /// 39 | /// This writes `buf` into sector `sector`. If it fails, the error is returned. 40 | fn write( 41 | &self, 42 | sector: disk::Sector, 43 | buf: Box, 44 | ) -> future!(()) { 45 | debug!(self, "writing sector"; "sector" => sector); 46 | 47 | // Then insert it into the cache. 48 | self.sectors.insert(sector, buf); 49 | // Write the data to the disk. 50 | self.disk.write(sector, &buf) 51 | } 52 | 53 | /// Drop a sector from the cache and trim it. 54 | /// 55 | /// After this has been completed, the data of the sector shall not be read, until some other 56 | /// data has been written to the sector. 57 | /// 58 | /// Note that it doesn't necessarily "wipe" the data. 59 | fn trim(&self, sector: disk::Sector) -> future!(()) { 60 | debug!(self, "wiping sector"; "sector" => sector); 61 | 62 | // Update the cache tracker. 63 | self.tracker.remove(sector); 64 | // Update the sector map. 65 | self.sectors.remove(sector); 66 | // Finally, trim the sector. 67 | self.disk.trim(sector) 68 | } 69 | 70 | /// Read a sector. 71 | /// 72 | /// This reads sector `sector`, and applies the closure `map`. If `sector` needs to be fetched 73 | /// from the disk, and `map` fails, data recovery is attempted. 74 | /// 75 | /// If an I/O operation fails, the error is returned. Otherwise, the return value of `map` is 76 | /// returned. 77 | fn read_then(&self, sector: disk::Sector, map: F) -> future!(T) 78 | where F: Fn(atomic_hash_map::Value) -> future!(T) { 79 | debug!(self, "reading sector"; "sector" => sector); 80 | 81 | // Check if the sector is already available in the cache. 82 | if let Some(buf) = self.sectors.get(sector) { 83 | // Yup, we found the sector in the cache. 84 | trace!(self, "cache hit; reading from cache"; "sector" => sector); 85 | 86 | // Touch the sector. 87 | self.tracker.touch(sector); 88 | 89 | map(buf) 90 | } else { 91 | trace!(self, "cache miss; reading from disk"; "sector" => sector); 92 | 93 | // Insert the sector into the cache tracker. 94 | self.tracker.touch(sector); 95 | 96 | // Fetch the data from the disk. 97 | self.disk.read(sector).map(|buf| { 98 | // Insert the read data into the hash table. 99 | self.sectors.get_mut_or(sector, buf) 100 | }).and_then(map) 101 | // TODO: If the above failed, try to recover the data through the vdev redundancy. 102 | } 103 | } 104 | 105 | /// Reduce the cache. 106 | /// 107 | /// This reduces the cache to exactly `to` blocks. 108 | fn reduce(&self, to: usize) { 109 | info!(self, "reducing cache"; "to" => to); 110 | 111 | // Lock the cache tracker. 112 | let tracker = self.tracker.lock(); 113 | 114 | // Remove all the coldest sectors. 115 | for i in tracker.trim(to) { 116 | // Remove that piece of shit. 117 | self.sectors.remove(i); 118 | } 119 | } 120 | } 121 | 122 | delegate_log!(Cached.disk); 123 | 124 | // TODO: Add tests. 125 | -------------------------------------------------------------------------------- /core/src/disk/cluster.rs: -------------------------------------------------------------------------------- 1 | //! Clusters. 2 | 3 | use little_endian; 4 | 5 | /// The size (in bytes) of a cluster pointer. 6 | pub const POINTER_SIZE: usize = 8; 7 | 8 | /// A pointer to some cluster. 9 | // TODO: Use `NonZero`. 10 | pub struct Pointer(u64); 11 | 12 | impl little_endian::Encode for Pointer { 13 | fn write_le(self, into: &mut [u8]) { 14 | if let Some(ptr) = self { 15 | // Simply write the inner pointer into the buffer. 16 | little_endian::write(into, self) 17 | } else { 18 | // Zero the first `POINTER_SIZE` bytes of the buffer (null pointer). 19 | for i in &mut into[..POINTER_SIZE] { 20 | *i = 0; 21 | } 22 | } 23 | } 24 | } 25 | 26 | impl little_endian::Decode for Option { 27 | fn read_le(from: &[u8]) -> Option { 28 | if &from[..POINTER_SIZE] == &[0; POINTER_SIZE] { 29 | // The pointer was null, so we return `None`. 30 | None 31 | } else { 32 | // The pointer wasn't null, so we can simply read it as an integer. Note that we have 33 | // already ensured that it is not null, so it is safe. 34 | Some(Pointer(little_endian::read(from))) 35 | } 36 | } 37 | } 38 | 39 | impl little_endian::Encode for Option { 40 | fn write_le(self, into: &mut [u8]) { 41 | if let Some(ptr) = self { 42 | // Simply write the inner pointer into the buffer. 43 | little_endian::write(into, self) 44 | } else { 45 | // Zero the first `POINTER_SIZE` bytes of the buffer (null pointer). 46 | for i in &mut into[..POINTER_SIZE] { 47 | *i = 0; 48 | } 49 | } 50 | } 51 | } 52 | 53 | #[cfg(test)] 54 | mod tests { 55 | use super::*; 56 | 57 | fn null_pointer() { 58 | assert!(little_endian::read(&[0; POINTER_SIZE]).is_none()); 59 | } 60 | 61 | fn non_null_pointer() { 62 | let original_buf = &[2, 0, 0, 0, 0, 0, 0, 0]; 63 | let ptr = little_endian::read(original_buf).unwrap(); 64 | let mut buf = [0; 8]; 65 | little_endian::write(&mut buf, ptr); 66 | 67 | assert_eq!(original_buf, buf); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /core/src/disk/crypto.rs: -------------------------------------------------------------------------------- 1 | //! Cryptography. 2 | 3 | use little_endian; 4 | use ring_pwhash::scrypt; 5 | 6 | /// Derive the key to use. 7 | pub fn derive_key(salt: u128, password: &[u8]) -> u128 { 8 | /// The `log n` parameter for scrypt. 9 | const SCRYPT_LOG_N: u8 = 20; 10 | /// The `r` parameter for scrypt. 11 | const SCRYPT_R: u32 = 8; 12 | /// The `p` parameter for scrypt. 13 | const SCRYPT_P: u32 = 1; 14 | 15 | // Use scrypt to generate the key from the password and salt. 16 | let mut key = [0; 16]; 17 | scrypt::scrypt(password, salt, &scrypt::ScryptParams::new(SCRYPT_LOG_N, SCRYPT_R, SCRYPT_P), &mut key); 18 | 19 | // Read the scrypt-generated pad into a single integer, used as the key for the cipher. 20 | little_endian::read(key) 21 | } 22 | -------------------------------------------------------------------------------- /core/src/disk/mod.rs: -------------------------------------------------------------------------------- 1 | mod cache; 2 | mod crypto; 3 | mod vdev; 4 | pub mod cluster; 5 | pub mod header; 6 | 7 | use futures::Future; 8 | use {slog, Error}; 9 | 10 | /// The logical sector size. 11 | pub const SECTOR_SIZE: usize = 512; 12 | /// The size of a sector pointer. 13 | pub const SECTOR_POINTER_SIZE: usize = 8; 14 | 15 | /// A disk sector number. 16 | pub type Sector = usize; 17 | /// A buffer of sector size. 18 | pub type SectorBuf = [u8; SECTOR_SIZE]; 19 | 20 | /// A cached disk with a TFS header. 21 | pub type TfsDisk = cache::Cached>; 22 | 23 | /// Load the TFS disk. 24 | /// 25 | /// This does not initialize or create the structure. It will merely load the disk. 26 | pub fn open(disk: D, password: &[u8]) -> future!(TfsDisk) { 27 | vdev::Driver::open(disk).cached() 28 | } 29 | 30 | /// Initialize/create the TFS disk. 31 | /// 32 | /// This creates the structure (given some options given in `options`) of the disk, and effectively 33 | /// initializes a system. 34 | pub fn init(disk: D, options: header::Options) -> future!(TfsDisk) { 35 | vdev::Driver::init(disk, options).cached() 36 | } 37 | 38 | /// A storage device. 39 | /// 40 | /// This trait acts similarly to `std::io::{Read, Write}`, but is designed specifically for disks. 41 | pub trait Disk: slog::Drain { 42 | /// The future returned from read operations. 43 | /// 44 | /// In order to avoid performance hit of copying a whole sector around, we allocate the data on 45 | /// the heap through `Box`. 46 | type ReadFuture: Future, Error = Error>; 47 | /// The future returned from write operations. 48 | type WriteFuture: Future; 49 | /// The future returned from the trim operations. 50 | type TrimFuture: Future; 51 | 52 | /// The number of sectors on this disk. 53 | fn number_of_sectors(&self) -> Sector; 54 | /// Read data from the disk directly into the return value. 55 | /// 56 | /// The result is wrapped in a future, which represents the operation, such that it can be 57 | /// done asynchronously. 58 | fn read(&self, sector: Sector) -> Self::ReadFuture; 59 | /// Write data to the disk. 60 | /// 61 | /// This returns a future, which carries the operation writing `buf` into sector `sector`. 62 | /// First when the future has completed, the operation has been executed. 63 | fn write(&self, sector: Sector, buf: &SectorBuf) -> Self::WriteFuture; 64 | /// Inform the disk that a sector is no longer in use. 65 | /// 66 | /// This returns a future, which carries the operation trimming sector `sector`. First when the 67 | /// future has completed, the operation has been executed. 68 | fn trim(&self, sector: Sector) -> Self::TrimFuture; 69 | 70 | /// Create a cached version of the disk. 71 | fn cached(self) -> cache::Cached { 72 | cache::Cached::new(self) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /core/src/disk/todo: -------------------------------------------------------------------------------- 1 | - Reorganize the state block to have sections similar to the disk header. 2 | ✔ Assign a secret ID and use it for salt. 3 | ✔ Move allocator stuff to its own module. 4 | ✔ Make a map over infrastructure 5 | ✔ Upsert 6 | - Entry API for chashmap 7 | ✔ Move the concurrent cache tracker to the mlcr crate. 8 | - Remove "sector" in favor of the term "cluster" 9 | - Remove the checksum config option 10 | 11 | -------------------------------------------------------------------------------- /core/src/disk/vdev.rs: -------------------------------------------------------------------------------- 1 | //! Virtual devices. 2 | //! 3 | //! A virtual device or "vdev" is a disk with some extra capabilities. It transforms operations to 4 | //! other operationss in order to provide new features. 5 | //! 6 | //! Vdevs themself can be seen as an image (transformation) of another disk. They might modify the 7 | //! sector enumeration or provide some redundancy, encryption, or similar features working on disk 8 | //! level. 9 | //! 10 | //! The term vdev has similar meaning in the context of ZFS. 11 | //! 12 | //! It is important that vdevs keep the invariants of the inner vdev. In particular, it may not 13 | //! leave to an inconsistent state, unless the inner vdev does. 14 | 15 | use std::mem; 16 | use futures::{future, Future}; 17 | 18 | use Error; 19 | use disk::{self, Disk}; 20 | use disk::header::{self, DiskHeader}; 21 | 22 | /// A driver transforming a normal disk into a disk respecting the vdev setup. 23 | /// 24 | /// It reads the vdev setup from the disk header, which it fetches from the disk. Then it builds 25 | /// the vdev stack, which it stores. 26 | /// 27 | /// Importantly, this subtracts the disk header, so sector `0` is really sector `1` of the inner 28 | /// disk. 29 | pub struct Driver { 30 | /// The cached disk header. 31 | /// 32 | /// The disk header contains various very basic information about the disk and how to interact 33 | /// with it. 34 | /// 35 | /// In reality, we could fetch this from the `disk` field as-we-go, but that hurts performance, 36 | /// so we cache it in memory. 37 | pub header: header::DiskHeader, 38 | /// The inner disk. 39 | // TODO: Remove this vtable? 40 | disk: D, 41 | } 42 | 43 | impl Driver { 44 | /// Set up the driver from some disk. 45 | /// 46 | /// This will load the disk header from `disk` and construct the driver. It will also set the 47 | /// disk to be in open state. If any encryption is enabled, `password` will be used as the 48 | /// password. 49 | /// 50 | /// The result is wrapped in a future, which represents the operation, such that it can be 51 | /// executed asynchronously. 52 | fn open(disk: D, password: &[u8]) -> future!(Driver) { 53 | info!(disk, "loading the state and initializing the driver"); 54 | 55 | // Read the disk header. 56 | debug!(disk, "read the disk header"); 57 | disk.read(0).and_then(|header| { 58 | let driver = Driver { 59 | header: DiskHeader::decode(header)?, 60 | disk: disk, 61 | }; 62 | 63 | match driver.header.state_flag { 64 | // Throw a warning if it wasn't properly shut down. 65 | header::StateFlag::Open => { 66 | warn!(driver, "the disk's state flag is still open, likely wasn't properly shut \ 67 | down last time; beware of data loss"); 68 | }, 69 | // The state inconsistent; throw an error. 70 | header::StateFlag::Inconsistent => return Err(err!(Corruption, "the file system is in an inconsistent state, possibly due to crash")), 71 | } 72 | 73 | // Set the state flag to open. 74 | debug!(driver, "setting the state flag to 'open'"); 75 | driver.header.state_flag = header::StateFlag::Open; 76 | 77 | // Update the version. 78 | debug!(driver, "updating the version number"; 79 | "old version" => header.version_number, 80 | "new version" => header::VERSION_NUMBER); 81 | driver.header.version_number = header::VERSION_NUMBER; 82 | 83 | Ok(driver) 84 | }).and_then(|driver| { 85 | // Flush the updated header. 86 | driver.flush_header().map(|_| driver) 87 | }) 88 | } 89 | 90 | /// Initialize a disk with a new header. 91 | /// 92 | /// This sets the disk header (provided by the `header` argument) of disk `disk` and returns 93 | /// the driver representing the disk. 94 | /// 95 | /// It is used as an entry point to create a new file system. 96 | fn init(disk: D, options: header::Options) -> future!(Driver) { 97 | info!(disk, "creating a new system"); 98 | 99 | // Create the new header from the user-specified options. 100 | let header = DiskHeader::new(options); 101 | // Write the header to the disk. 102 | disk.write(0, header.encode()).map(|_| Driver { 103 | header: header, 104 | disk: disk, 105 | }) 106 | } 107 | 108 | /// Flush the stored disk header. 109 | /// 110 | /// This returns a future, which carries this operation. First when the future has completed, 111 | /// the operations has been executed. 112 | fn flush_header(&self) -> future!(()) { 113 | debug!(self, "flushing the disk header"); 114 | 115 | // Encode and write it to the disk. 116 | self.disk.write(0, &self.header.encode()) 117 | } 118 | } 119 | 120 | impl Drop for Driver { 121 | fn drop(&mut self) { 122 | info!(self, "closing the driver"); 123 | 124 | // Set the state flag to close so we know that it was a proper shutdown. 125 | debug!(self, "setting state flag to 'closed'"); 126 | self.header.state_flag = header::StateFlag::Closed; 127 | // Flush the header. 128 | self.flush_header().wait().unwrap(); 129 | } 130 | } 131 | 132 | delegate_log!(Driver.disk); 133 | 134 | impl Disk for Driver { 135 | type ReadFuture = D::ReadFuture; 136 | type WriteFuture = D::WriteFuture; 137 | type TrimFuture = D::TrimFuture; 138 | 139 | fn number_of_sectors(&self) -> disk::Sector { 140 | // Start out with the raw number of sectors. We subtract one to cut of the disk header. 141 | let mut sectors = self.disk.number_of_sectors() - 1; 142 | 143 | // Go over the vdev stack. 144 | for vdev in self.header.vdev_stack { 145 | match vdev { 146 | // Mirrors divide the disk in half, as the higher half must mirror the lower. 147 | header::Vdev::Mirror => sectors /= 2, 148 | header::Vdev::Speck => (), 149 | } 150 | } 151 | } 152 | 153 | fn read(&self, sector: disk::Sector) -> D::ReadFuture { 154 | // We start out by reading the inner buffer. We subtract one to cut of the disk header. 155 | let mut buf = self.disk.read(sector + 1); 156 | 157 | // Go over the vdev stack. 158 | for vdev in self.header.vdev_stack { 159 | // Note that it is very important that `sector` gets updated to account for changed 160 | // address space. 161 | 162 | match vdev { 163 | // TODO 164 | header::Vdev::Speck => unimplemented!(), 165 | _ => (), 166 | } 167 | } 168 | } 169 | 170 | fn write(&self, sector: disk::Sector, buf: &disk::SectorBuf) -> D::WriteFuture { 171 | // Start a vector to hold the writes. This allows us to rewrite the write operations for 172 | // every vdev transformation. 173 | let mut writes = vec![(sector, buf)]; 174 | 175 | // Go over the vdev stack. 176 | for vdev in self.header.vdev_stack { 177 | match vdev { 178 | // Mirror the higher and lower half. 179 | header::Vdev::Mirror => for i in 0..writes.len() { 180 | // Write the higher half. 181 | writes.push((writes[i].0 * 2, writes[i].1)); 182 | }, 183 | // TODO 184 | header::Vdev::Speck => unimplemented!(), 185 | } 186 | } 187 | 188 | // Execute all the writes, we've buffered. 189 | future::join_all(writes.into_iter().map(|(sector, buf)| { 190 | self.disk.write(sector, buf) 191 | })) 192 | } 193 | 194 | fn trim(&self, sector: disk::Sector) -> D::TrimFuture { 195 | // Start a vector to track what sectors to trim. 196 | let mut trims = vec![sector]; 197 | 198 | // Go over the vdev stack. 199 | for vdev in self.header.vdev_stack { 200 | match vdev { 201 | // Mirror the higher and lower half. 202 | header::Vdev::Mirror => for i in 0..writes.len() { 203 | // Trim the higher half's sector. 204 | trims.push(trims[i]); 205 | }, 206 | // Encryption doesn't matter for trimming. 207 | header::Vdev::Speck => (), 208 | } 209 | } 210 | 211 | // Execute all the trims, we've buffered. 212 | future::join_all(trims.into_iter().map(|sector| { 213 | self.disk.trim(sector) 214 | })) 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /core/src/error.rs: -------------------------------------------------------------------------------- 1 | /// The category of an error. 2 | /// 3 | /// This enum contains variants representing general categories of TFS errors. 4 | #[derive(PartialEq)] 5 | pub enum Kind { 6 | /// Data corruption. 7 | Corruption, 8 | /// No more space to use. 9 | OutOfSpace, 10 | /// Implementation issue. 11 | Implementation, 12 | } 13 | 14 | /// A TFS error. 15 | #[derive(PartialEq)] 16 | pub struct Error { 17 | /// The type ("kind") of the error. 18 | pub kind: Kind, 19 | /// Description of the error. 20 | desc: Box, 21 | } 22 | 23 | /// Create a TFS error. 24 | /// 25 | /// This constructs a value of type `Error` defined by the given parameter. 26 | /// 27 | /// The first argument defines the kind (`Kind`) of the error. There is no need for importing the 28 | /// type, as it is already prefixed with the enum. 29 | /// 30 | /// The rest arguments are the usual formatting syntax (like `println!()`) representing the 31 | /// `Display` implementation of the error. If none, it will simply use the second argument (the 32 | /// description). 33 | #[macro_export] 34 | macro_rules! err { 35 | ($kind:ident, $($rest:tt)*) => { 36 | $crate::error::Error { 37 | kind: $crate::error::Kind::$kind, 38 | desc: format!($($rest)*), 39 | } 40 | }; 41 | } 42 | -------------------------------------------------------------------------------- /core/src/fs/array.rs: -------------------------------------------------------------------------------- 1 | use futures::Future; 2 | use std::marker::PhantomData; 3 | use std::ops::Range; 4 | 5 | use {disk, fs, Error}; 6 | use alloc::page; 7 | 8 | const POINTERS_IN_NODE: u64 = disk::SECTOR_SIZE / page::POINTER_SIZE; 9 | 10 | struct Array { 11 | root: page::Pointer, 12 | len: u64, 13 | _phantom: PhantomData, 14 | } 15 | 16 | impl Array { 17 | fn is_leaf(&self) -> bool { 18 | self.len <= POINTERS_IN_NODE 19 | } 20 | 21 | fn for_each(&self, fs: &fs::State, range: Range, f: F) -> future!(()) 22 | where F: Fn(usize, page::Pointer) { 23 | unimplemented!(); 24 | } 25 | } 26 | 27 | impl> fs::Object for Array { 28 | fn gc_visit(&self, fs: &fs::State) -> future!(()) { 29 | unimplemented!(); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/src/fs/mod.rs: -------------------------------------------------------------------------------- 1 | mod array; 2 | mod object; 3 | 4 | pub use self::object::Object; 5 | 6 | use {type_name, cbloom, alloc, Error}; 7 | use alloc::page; 8 | use futures::Future; 9 | use disk::{self, Disk}; 10 | 11 | struct State { 12 | alloc: alloc::Allocator, 13 | reachable: cbloom::Filter, 14 | } 15 | 16 | impl State { 17 | pub fn alloc( 18 | &self, 19 | buf: disk::SectorBuf, 20 | description: &'static str, 21 | ) -> future!(page::Pointer) { 22 | debug!(self, "allocating buffer"; "description" => description); 23 | 24 | // Allocate the buffer and insert it into the set of currently reachable pages in case that 25 | // it is reachable right now. 26 | Ok(self.alloc.alloc(buf).map(|ptr| self.visit(ptr))) 27 | } 28 | 29 | pub fn set_reachable(&self, ptr: page::Pointer) { 30 | self.reachable.insert(ptr); 31 | } 32 | 33 | pub fn visit(&self, obj: T) -> Result<(), Error> { 34 | trace!(self, "visting object"; "type" => type_name::get::()); 35 | 36 | obj.gc_visit(self) 37 | } 38 | } 39 | 40 | delegate_log!(State.alloc); 41 | -------------------------------------------------------------------------------- /core/src/fs/object.rs: -------------------------------------------------------------------------------- 1 | //! Unifying types and traits for on-disk structures. 2 | 3 | use futures::Future; 4 | 5 | use {fs, Error}; 6 | 7 | /// An on-disk object. 8 | /// 9 | /// This trait encompasses types which represents on-disk objects. It defines certain operations 10 | /// which such objects have in common. 11 | pub trait Object { 12 | /// "Visit" the node as a part of the GC cycle. 13 | /// 14 | /// Garbage collection works by traversing a graph and creating a set of visited nodes. This 15 | /// visits the node (the object) and adds it to `visited`, and then visits its adjacent nodes. 16 | fn gc_visit(&self, fs: &fs::State) -> future!(()); 17 | } 18 | -------------------------------------------------------------------------------- /core/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The TFS library. 2 | //! 3 | //! This is the official implementation of the TFS specification. It implements the specification 4 | //! in its full form, and is accessible as a library. 5 | 6 | #![feature(conservative_impl_trait, i128_type, try_from)] 7 | 8 | #[macro_use] 9 | extern crate slog; 10 | 11 | extern crate cbloom; 12 | extern crate crossbeam; 13 | extern crate futures; 14 | extern crate little_endian; 15 | extern crate lz4_compress; 16 | extern crate mlcr; 17 | extern crate rand; 18 | extern crate ring; 19 | extern crate ring_pwhash; 20 | extern crate seahash; 21 | extern crate speck; 22 | extern crate thread_object; 23 | extern crate type_name; 24 | 25 | #[macro_use] 26 | mod error; 27 | #[macro_use] 28 | mod macros; 29 | 30 | mod alloc; 31 | mod disk; 32 | mod fs; 33 | 34 | pub use error::Error; 35 | -------------------------------------------------------------------------------- /core/src/macros.rs: -------------------------------------------------------------------------------- 1 | /// Delegate logging to a field of a struct. 2 | /// 3 | /// This implements `slog::Drain` for a type, by delegating the calls into some field of the type. 4 | /// 5 | /// # Example 6 | /// 7 | /// ```rust 8 | /// delegate_log!(MyType.my_field); 9 | /// ``` 10 | macro_rules! delegate_log { 11 | ($ty:ident.$field:ident) => { 12 | impl ::slog::Drain for $ty 13 | where L: ::slog::Drain { 14 | type Error = E; 15 | 16 | fn log(&self, info: &::slog::Record, o: &::slog::OwnedKeyValueList) -> Result<(), E> { 17 | // Redirect the call to the field. 18 | self.$field.log(info, o) 19 | } 20 | } 21 | } 22 | } 23 | 24 | /// Convenience macro for creating a future. 25 | /// 26 | /// This creates a type `impl Future` with `T` being the given argument. 27 | // TODO: Eventually replace by type alias. 28 | macro_rules! future { 29 | ($ok:ty) => { 30 | impl Future 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /little-endian/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "little-endian" 3 | version = "1.0.0" 4 | authors = ["ticki "] 5 | description = "Encoding and decoding of little-endian format." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/little-endian" 8 | license = "MIT" 9 | keywords = ["bit", "binary", "encode", "endian", "byteorder"] 10 | exclude = ["target", "Cargo.lock"] 11 | -------------------------------------------------------------------------------- /little-endian/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Encoding and decoding of little-endian format. 2 | //! 3 | //! This was created out of fustration with the `byteorder` crate, which I felt had a heavy API, so 4 | //! I created this crate. 5 | 6 | #![feature(i128_type)] 7 | 8 | /// Read an integer from a buffer. 9 | /// 10 | /// This writes `buf` through the methods in `T`'s implementation of `Decode`. 11 | pub fn read(buf: &[u8]) -> T { 12 | T::read_le(buf) 13 | } 14 | 15 | /// Write some integer into a buffer. 16 | /// 17 | /// This writes `from` into `buf` through the methods in `T`'s implementation of `Encode`. 18 | pub fn write(buf: &mut [u8], from: T) { 19 | from.write_le(buf) 20 | } 21 | 22 | /// An encodable type. 23 | pub trait Encode { 24 | /// Write an integer in little-endian format. 25 | /// 26 | /// This writes `self` into the first n bytes (depending on the size of `Self`) of `into` in 27 | /// little-endian format (least significant byte first). 28 | /// 29 | /// # Panics 30 | /// 31 | /// This will potentially panic if `into` is not large enough. 32 | fn write_le(self, into: &mut [u8]); 33 | } 34 | 35 | /// A decodable type. 36 | pub trait Decode { 37 | /// Read an integer in little-endian format. 38 | /// 39 | /// This reads the first n bytes (depending on the size of `Self`) of `from` in little-endian 40 | /// (least significant byte first). 41 | /// 42 | /// # Panics 43 | /// 44 | /// This will potentially panic if `from` is not large enough. 45 | fn read_le(from: &[u8]) -> Self; 46 | } 47 | 48 | impl Decode for u8 { 49 | fn read_le(from: &[u8]) -> u8 { 50 | from[0] 51 | } 52 | } 53 | impl Encode for u8 { 54 | fn write_le(self, into: &mut [u8]) { 55 | into[0] = self; 56 | } 57 | } 58 | 59 | impl Decode for u16 { 60 | fn read_le(from: &[u8]) -> u16 { 61 | from[0] as u16 62 | | (from[1] as u16) << 8 63 | } 64 | } 65 | impl Encode for u16 { 66 | fn write_le(self, into: &mut [u8]) { 67 | into[0] = self as u8; 68 | into[1] = (self >> 8) as u8; 69 | } 70 | } 71 | 72 | 73 | impl Decode for u32 { 74 | fn read_le(from: &[u8]) -> u32 { 75 | from[0] as u32 76 | | (from[1] as u32) << 8 77 | | (from[2] as u32) << 16 78 | | (from[3] as u32) << 24 79 | } 80 | } 81 | impl Encode for u32 { 82 | fn write_le(self, into: &mut [u8]) { 83 | into[0] = self as u8; 84 | into[1] = (self >> 8) as u8; 85 | into[2] = (self >> 16) as u8; 86 | into[3] = (self >> 24) as u8; 87 | } 88 | } 89 | 90 | impl Decode for u64 { 91 | fn read_le(from: &[u8]) -> u64 { 92 | from[0] as u64 93 | | (from[1] as u64) << 8 94 | | (from[2] as u64) << 16 95 | | (from[3] as u64) << 24 96 | | (from[4] as u64) << 32 97 | | (from[5] as u64) << 40 98 | | (from[6] as u64) << 48 99 | | (from[7] as u64) << 56 100 | } 101 | } 102 | impl Encode for u64 { 103 | fn write_le(self, into: &mut [u8]) { 104 | into[0] = self as u8; 105 | into[1] = (self >> 8) as u8; 106 | into[2] = (self >> 16) as u8; 107 | into[3] = (self >> 24) as u8; 108 | into[4] = (self >> 32) as u8; 109 | into[5] = (self >> 40) as u8; 110 | into[6] = (self >> 48) as u8; 111 | into[7] = (self >> 56) as u8; 112 | } 113 | } 114 | 115 | impl Decode for u128 { 116 | fn read_le(from: &[u8]) -> u128 { 117 | from[0] as u128 118 | | (from[1] as u128) << 8 119 | | (from[2] as u128) << 16 120 | | (from[3] as u128) << 24 121 | | (from[4] as u128) << 32 122 | | (from[5] as u128) << 40 123 | | (from[6] as u128) << 48 124 | | (from[7] as u128) << 56 125 | | (from[8] as u128) << 64 126 | | (from[9] as u128) << 72 127 | | (from[10] as u128) << 80 128 | | (from[11] as u128) << 88 129 | | (from[12] as u128) << 96 130 | | (from[13] as u128) << 104 131 | | (from[14] as u128) << 112 132 | | (from[15] as u128) << 120 133 | } 134 | } 135 | impl Encode for u128 { 136 | fn write_le(self, into: &mut [u8]) { 137 | into[0] = self as u8; 138 | into[1] = (self >> 8) as u8; 139 | into[2] = (self >> 16) as u8; 140 | into[3] = (self >> 24) as u8; 141 | into[4] = (self >> 32) as u8; 142 | into[5] = (self >> 40) as u8; 143 | into[6] = (self >> 48) as u8; 144 | into[7] = (self >> 56) as u8; 145 | into[8] = (self >> 64) as u8; 146 | into[9] = (self >> 72) as u8; 147 | into[10] = (self >> 80) as u8; 148 | into[11] = (self >> 88) as u8; 149 | into[12] = (self >> 96) as u8; 150 | into[13] = (self >> 104) as u8; 151 | into[14] = (self >> 112) as u8; 152 | into[15] = (self >> 120) as u8; 153 | } 154 | } 155 | 156 | #[cfg(test)] 157 | mod tests { 158 | use super::*; 159 | use std::{ops, mem, fmt}; 160 | 161 | fn test_int(n: T) 162 | where T: Encode + Decode + Copy + PartialEq + From + fmt::Debug 163 | + ops::BitAnd + ops::Shr, 164 | { 165 | let len = mem::size_of::(); 166 | let mut buf = [0; 32]; 167 | write(&mut buf, n); 168 | 169 | for i in 0..len { 170 | assert_eq!(T::from(buf[i]), (n >> T::from(i as u8 * 8)) & T::from(0xFF)); 171 | } 172 | 173 | assert_eq!(read::(&buf), n); 174 | } 175 | 176 | #[test] 177 | fn u8() { 178 | test_int(255u8); 179 | test_int(130u8); 180 | test_int(12u8); 181 | test_int(1u8); 182 | test_int(0u8); 183 | } 184 | 185 | #[test] 186 | fn u16() { 187 | test_int::(0xFFFF); 188 | test_int::(0xABCD); 189 | test_int::(0xAB); 190 | test_int::(0xBA); 191 | test_int::(0); 192 | } 193 | 194 | #[test] 195 | fn u32() { 196 | test_int::(0xFFFFFFFF); 197 | test_int::(0xABCDEF01); 198 | test_int::(0xABCD); 199 | test_int::(0xDCBA); 200 | test_int::(0); 201 | } 202 | 203 | #[test] 204 | fn u64() { 205 | test_int::(0xFFFFFFFFFFFFFFFF); 206 | test_int::(0xABCDEF0123456789); 207 | test_int::(0xABCDEF0); 208 | test_int::(0x0FEDCBA); 209 | test_int::(0); 210 | } 211 | 212 | #[test] 213 | fn u128() { 214 | test_int::(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF); 215 | test_int::(0xABCDEF0123456789ABCDEF0123456789); 216 | test_int::(0xABCDEF012345678); 217 | test_int::(0x876543210FEDCBA); 218 | test_int::(0); 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /lz4/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "lz4-compress" 3 | version = "0.1.1" 4 | authors = ["ticki "] 5 | description = "Pure Rust implementation of raw LZ4 compression/decompression." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/lz4-compress" 8 | license = "MIT" 9 | keywords = ["compression", "lz4", "compress", "decompression", "decompress"] 10 | exclude = ["target", "Cargo.lock"] 11 | 12 | [dependencies] 13 | byteorder = "1" 14 | quick-error = "1" 15 | -------------------------------------------------------------------------------- /lz4/src/compress.rs: -------------------------------------------------------------------------------- 1 | //! The compression algorithm. 2 | //! 3 | //! We make use of hash tables to find duplicates. This gives a reasonable compression ratio with a 4 | //! high performance. It has fixed memory usage, which contrary to other approachs, makes it less 5 | //! memory hungry. 6 | 7 | use byteorder::{NativeEndian, ByteOrder}; 8 | 9 | /// Duplication dictionary size. 10 | /// 11 | /// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and 12 | /// thus collisions are more likely, hurting the compression ratio. 13 | const DICTIONARY_SIZE: usize = 4096; 14 | 15 | /// A LZ4 block. 16 | /// 17 | /// This defines a single compression "unit", consisting of two parts, a number of raw literals, 18 | /// and possibly a pointer to the already encoded buffer from which to copy. 19 | #[derive(Debug)] 20 | struct Block { 21 | /// The length (in bytes) of the literals section. 22 | lit_len: usize, 23 | /// The duplicates section if any. 24 | /// 25 | /// Only the last block in a stream can lack of the duplicates section. 26 | dup: Option, 27 | } 28 | 29 | /// A consecutive sequence of bytes found in already encoded part of the input. 30 | #[derive(Copy, Clone, Debug)] 31 | struct Duplicate { 32 | /// The number of bytes before our cursor, where the duplicate starts. 33 | offset: u16, 34 | /// The length beyond the four first bytes. 35 | /// 36 | /// Adding four to this number yields the actual length. 37 | extra_bytes: usize, 38 | } 39 | 40 | /// An LZ4 encoder. 41 | struct Encoder<'a> { 42 | /// The raw uncompressed input. 43 | input: &'a [u8], 44 | /// The compressed output. 45 | output: &'a mut Vec, 46 | /// The number of bytes from the input that are encoded. 47 | cur: usize, 48 | /// The dictionary of previously encoded sequences. 49 | /// 50 | /// This is used to find duplicates in the stream so they are not written multiple times. 51 | /// 52 | /// Every four bytes are hashed, and in the resulting slot their position in the input buffer 53 | /// is placed. This way we can easily look up a candidate to back references. 54 | dict: [usize; DICTIONARY_SIZE], 55 | } 56 | 57 | impl<'a> Encoder<'a> { 58 | /// Go forward by some number of bytes. 59 | /// 60 | /// This will update the cursor and dictionary to reflect the now processed bytes. 61 | /// 62 | /// This returns `false` if all the input bytes are processed. 63 | fn go_forward(&mut self, steps: usize) -> bool { 64 | // Go over all the bytes we are skipping and update the cursor and dictionary. 65 | for _ in 0..steps { 66 | // Insert the cursor position into the dictionary. 67 | self.insert_cursor(); 68 | // Increment the cursor. 69 | self.cur += 1; 70 | } 71 | 72 | // Return `true` if there's more to read. 73 | self.cur <= self.input.len() 74 | } 75 | 76 | /// Insert the batch under the cursor into the dictionary. 77 | fn insert_cursor(&mut self) { 78 | // Make sure that there is at least one batch remaining. 79 | if self.remaining_batch() { 80 | // Insert the cursor into the table. 81 | self.dict[self.get_cur_hash()] = self.cur; 82 | } 83 | } 84 | 85 | /// Check if there are any remaining batches. 86 | fn remaining_batch(&self) -> bool { 87 | self.cur + 4 < self.input.len() 88 | } 89 | 90 | /// Get the hash of the current four bytes below the cursor. 91 | /// 92 | /// This is guaranteed to be below `DICTIONARY_SIZE`. 93 | fn get_cur_hash(&self) -> usize { 94 | // Use PCG transform to generate a relatively good hash of the four bytes batch at the 95 | // cursor. 96 | let mut x = self.get_batch_at_cursor().wrapping_mul(0xa4d94a4f); 97 | let a = x >> 16; 98 | let b = x >> 30; 99 | x ^= a >> b; 100 | x = x.wrapping_mul(0xa4d94a4f); 101 | 102 | x as usize % DICTIONARY_SIZE 103 | } 104 | 105 | /// Read a 4-byte "batch" from some position. 106 | /// 107 | /// This will read a native-endian 4-byte integer from some position. 108 | fn get_batch(&self, n: usize) -> u32 { 109 | debug_assert!(self.remaining_batch(), "Reading a partial batch."); 110 | 111 | NativeEndian::read_u32(&self.input[n..]) 112 | } 113 | 114 | /// Read the batch at the cursor. 115 | fn get_batch_at_cursor(&self) -> u32 { 116 | self.get_batch(self.cur) 117 | } 118 | 119 | /// Find a duplicate of the current batch. 120 | /// 121 | /// If any duplicate is found, a tuple `(position, size - 4)` is returned. 122 | fn find_duplicate(&self) -> Option { 123 | // If there is no remaining batch, we return none. 124 | if !self.remaining_batch() { 125 | return None; 126 | } 127 | 128 | // Find a candidate in the dictionary by hashing the current four bytes. 129 | let candidate = self.dict[self.get_cur_hash()]; 130 | 131 | // Three requirements to the candidate exists: 132 | // - The candidate is not the trap value (0xFFFFFFFF), which represents an empty bucket. 133 | // - We should not return a position which is merely a hash collision, so w that the 134 | // candidate actually matches what we search for. 135 | // - We can address up to 16-bit offset, hence we are only able to address the candidate if 136 | // its offset is less than or equals to 0xFFFF. 137 | if candidate != !0 138 | && self.get_batch(candidate) == self.get_batch_at_cursor() 139 | && self.cur - candidate <= 0xFFFF { 140 | // Calculate the "extension bytes", i.e. the duplicate bytes beyond the batch. These 141 | // are the number of prefix bytes shared between the match and needle. 142 | let ext = self.input[self.cur + 4..] 143 | .iter() 144 | .zip(&self.input[candidate + 4..]) 145 | .take_while(|&(a, b)| a == b) 146 | .count(); 147 | 148 | Some(Duplicate { 149 | offset: (self.cur - candidate) as u16, 150 | extra_bytes: ext, 151 | }) 152 | } else { None } 153 | } 154 | 155 | /// Write an integer to the output in LSIC format. 156 | fn write_integer(&mut self, mut n: usize) { 157 | // Write the 0xFF bytes as long as the integer is higher than said value. 158 | while n >= 0xFF { 159 | n -= 0xFF; 160 | self.output.push(0xFF); 161 | } 162 | 163 | // Write the remaining byte. 164 | self.output.push(n as u8); 165 | } 166 | 167 | /// Read the block of the top of the stream. 168 | fn pop_block(&mut self) -> Block { 169 | // The length of the literals section. 170 | let mut lit = 0; 171 | 172 | loop { 173 | // Search for a duplicate. 174 | if let Some(dup) = self.find_duplicate() { 175 | // We found a duplicate, so the literals section is over... 176 | 177 | // Move forward. Note that `ext` is actually the steps minus 4, because of the 178 | // minimum matchlenght, so we need to add 4. 179 | self.go_forward(dup.extra_bytes + 4); 180 | 181 | return Block { 182 | lit_len: lit, 183 | dup: Some(dup), 184 | }; 185 | } 186 | 187 | // Try to move forward. 188 | if !self.go_forward(1) { 189 | // We reached the end of the stream, and no duplicates section follows. 190 | return Block { 191 | lit_len: lit, 192 | dup: None, 193 | }; 194 | } 195 | 196 | // No duplicates found yet, so extend the literals section. 197 | lit += 1; 198 | } 199 | } 200 | 201 | /// Complete the encoding into `self.output`. 202 | fn complete(&mut self) { 203 | // Construct one block at a time. 204 | loop { 205 | // The start of the literals section. 206 | let start = self.cur; 207 | 208 | // Read the next block into two sections, the literals and the duplicates. 209 | let block = self.pop_block(); 210 | 211 | // Generate the higher half of the token. 212 | let mut token = if block.lit_len < 0xF { 213 | // Since we can fit the literals length into it, there is no need for saturation. 214 | (block.lit_len as u8) << 4 215 | } else { 216 | // We were unable to fit the literals into it, so we saturate to 0xF. We will later 217 | // write the extensional value through LSIC encoding. 218 | 0xF0 219 | }; 220 | 221 | // Generate the lower half of the token, the duplicates length. 222 | let dup_extra_len = block.dup.map_or(0, |x| x.extra_bytes); 223 | token |= if dup_extra_len < 0xF { 224 | // We could fit it in. 225 | dup_extra_len as u8 226 | } else { 227 | // We were unable to fit it in, so we default to 0xF, which will later be extended 228 | // by LSIC encoding. 229 | 0xF 230 | }; 231 | 232 | // Push the token to the output stream. 233 | self.output.push(token); 234 | 235 | // If we were unable to fit the literals length into the token, write the extensional 236 | // part through LSIC. 237 | if block.lit_len >= 0xF { 238 | self.write_integer(block.lit_len - 0xF); 239 | } 240 | 241 | // Now, write the actual literals. 242 | self.output.extend_from_slice(&self.input[start..start + block.lit_len]); 243 | 244 | if let Some(Duplicate { offset, .. }) = block.dup { 245 | // Wait! There's more. Now, we encode the duplicates section. 246 | 247 | // Push the offset in little endian. 248 | self.output.push(offset as u8); 249 | self.output.push((offset >> 8) as u8); 250 | 251 | // If we were unable to fit the duplicates length into the token, write the 252 | // extensional part through LSIC. 253 | if dup_extra_len >= 0xF { 254 | self.write_integer(dup_extra_len - 0xF); 255 | } 256 | } else { 257 | break; 258 | } 259 | } 260 | } 261 | } 262 | 263 | /// Compress all bytes of `input` into `output`. 264 | pub fn compress_into(input: &[u8], output: &mut Vec) { 265 | Encoder { 266 | input: input, 267 | output: output, 268 | cur: 0, 269 | dict: [!0; DICTIONARY_SIZE], 270 | }.complete(); 271 | } 272 | 273 | /// Compress all bytes of `input`. 274 | pub fn compress(input: &[u8]) -> Vec { 275 | // In most cases, the compression won't expand the size, so we set the input size as capacity. 276 | let mut vec = Vec::with_capacity(input.len()); 277 | 278 | compress_into(input, &mut vec); 279 | 280 | vec 281 | } 282 | -------------------------------------------------------------------------------- /lz4/src/decompress.rs: -------------------------------------------------------------------------------- 1 | //! The decompression algorithm. 2 | 3 | use byteorder::{LittleEndian, ByteOrder}; 4 | 5 | quick_error! { 6 | /// An error representing invalid compressed data. 7 | #[derive(Debug)] 8 | pub enum Error { 9 | /// Expected another byte, but none found. 10 | ExpectedAnotherByte { 11 | description("Expected another byte, found none.") 12 | } 13 | /// Deduplication offset out of bounds (not in buffer). 14 | OffsetOutOfBounds { 15 | description("The offset to copy is not contained in the decompressed buffer.") 16 | } 17 | } 18 | } 19 | 20 | /// A LZ4 decoder. 21 | /// 22 | /// This will decode in accordance to the LZ4 format. It represents a particular state of the 23 | /// decompressor. 24 | struct Decoder<'a> { 25 | /// The compressed input. 26 | input: &'a [u8], 27 | /// The decompressed output. 28 | output: &'a mut Vec, 29 | /// The current block's "token". 30 | /// 31 | /// This token contains to 4-bit "fields", a higher and a lower, representing the literals' 32 | /// length and the back reference's length, respectively. LSIC is used if either are their 33 | /// maximal values. 34 | token: u8, 35 | } 36 | 37 | impl<'a> Decoder<'a> { 38 | /// Internal (partial) function for `take`. 39 | #[inline] 40 | fn take_imp(input: &mut &'a [u8], n: usize) -> Result<&'a [u8], Error> { 41 | // Check if we have enough bytes left. 42 | if input.len() < n { 43 | // No extra bytes. This is clearly not expected, so we return an error. 44 | Err(Error::ExpectedAnotherByte) 45 | } else { 46 | // Take the first n bytes. 47 | let res = Ok(&input[..n]); 48 | // Shift the stream to left, so that it is no longer the first byte. 49 | *input = &input[n..]; 50 | 51 | // Return the former first byte. 52 | res 53 | } 54 | } 55 | 56 | /// Pop n bytes from the start of the input stream. 57 | fn take(&mut self, n: usize) -> Result<&[u8], Error> { 58 | Self::take_imp(&mut self.input, n) 59 | } 60 | 61 | /// Write a buffer to the output stream. 62 | /// 63 | /// The reason this doesn't take `&mut self` is that we need partial borrowing due to the rules 64 | /// of the borrow checker. For this reason, we instead take some number of segregated 65 | /// references so we can read and write them independently. 66 | fn output(output: &mut Vec, buf: &[u8]) { 67 | // We use simple memcpy to extend the vector. 68 | output.extend_from_slice(&buf[..buf.len()]); 69 | } 70 | 71 | /// Write an already decompressed match to the output stream. 72 | /// 73 | /// This is used for the essential part of the algorithm: deduplication. We start at some 74 | /// position `start` and then keep pushing the following element until we've added 75 | /// `match_length` elements. 76 | fn duplicate(&mut self, start: usize, match_length: usize) { 77 | // We cannot simply use memcpy or `extend_from_slice`, because these do not allow 78 | // self-referential copies: http://ticki.github.io/img/lz4_runs_encoding_diagram.svg 79 | for i in start..start + match_length { 80 | let b = self.output[i]; 81 | self.output.push(b); 82 | } 83 | } 84 | 85 | /// Read an integer LSIC (linear small integer code) encoded. 86 | /// 87 | /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In 88 | /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add 89 | /// this byte to our sum and terminate the loop. 90 | /// 91 | /// # Example 92 | /// 93 | /// ```notest 94 | /// 255, 255, 255, 4, 2, 3, 4, 6, 7 95 | /// ``` 96 | /// 97 | /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because 98 | /// 4 is the first non-0xFF byte. 99 | #[inline] 100 | fn read_integer(&mut self) -> Result { 101 | // We start at zero and count upwards. 102 | let mut n = 0; 103 | // If this byte takes value 255 (the maximum value it can take), another byte is read 104 | // and added to the sum. This repeats until a byte lower than 255 is read. 105 | while { 106 | // We add the next byte until we get a byte which we add to the counting variable. 107 | let extra = self.take(1)?[0]; 108 | n += extra as usize; 109 | 110 | // We continue if we got 255. 111 | extra == 0xFF 112 | } {} 113 | 114 | Ok(n) 115 | } 116 | 117 | /// Read a little-endian 16-bit integer from the input stream. 118 | #[inline] 119 | fn read_u16(&mut self) -> Result { 120 | // We use byteorder to read an u16 in little endian. 121 | Ok(LittleEndian::read_u16(self.take(2)?)) 122 | } 123 | 124 | /// Read the literals section of a block. 125 | /// 126 | /// The literals section encodes some bytes which are to be copied to the output without any 127 | /// modification. 128 | /// 129 | /// It consists of two parts: 130 | /// 131 | /// 1. An LSIC integer extension to the literals length as defined by the first part of the 132 | /// token, if it takes the highest value (15). 133 | /// 2. The literals themself. 134 | fn read_literal_section(&mut self) -> Result<(), Error> { 135 | // The higher token is the literals part of the token. It takes a value from 0 to 15. 136 | let mut literal = (self.token >> 4) as usize; 137 | // If the initial value is 15, it is indicated that another byte will be read and added to 138 | // it. 139 | if literal == 15 { 140 | // The literal length took the maximal value, indicating that there is more than 15 141 | // literal bytes. We read the extra integer. 142 | literal += self.read_integer()?; 143 | } 144 | 145 | // Now we know the literal length. The number will be used to indicate how long the 146 | // following literal copied to the output buffer is. 147 | 148 | // Read the literals segment and output them without processing. 149 | Self::output(&mut self.output, Self::take_imp(&mut self.input, literal)?); 150 | 151 | Ok(()) 152 | } 153 | 154 | /// Read the duplicates section of the block. 155 | /// 156 | /// The duplicates section serves to reference an already decoded segment. This consists of two 157 | /// parts: 158 | /// 159 | /// 1. A 16-bit little-endian integer defining the "offset", i.e. how long back we need to go 160 | /// in the decoded buffer and copy. 161 | /// 2. An LSIC integer extension to the duplicate length as defined by the first part of the 162 | /// token, if it takes the highest value (15). 163 | fn read_duplicate_section(&mut self) -> Result<(), Error> { 164 | // Now, we will obtain the offset which we will use to copy from the output. It is an 165 | // 16-bit integer. 166 | let offset = self.read_u16()?; 167 | 168 | // Obtain the initial match length. The match length is the length of the duplicate segment 169 | // which will later be copied from data previously decompressed into the output buffer. The 170 | // initial length is derived from the second part of the token (the lower nibble), we read 171 | // earlier. Since having a match length of less than 4 would mean negative compression 172 | // ratio, we start at 4. 173 | let mut match_length = (4 + (self.token & 0xF)) as usize; 174 | 175 | // The intial match length can maximally be 19. As with the literal length, this indicates 176 | // that there are more bytes to read. 177 | if match_length == 4 + 15 { 178 | // The match length took the maximal value, indicating that there is more bytes. We 179 | // read the extra integer. 180 | match_length += self.read_integer()?; 181 | } 182 | 183 | // We now copy from the already decompressed buffer. This allows us for storing duplicates 184 | // by simply referencing the other location. 185 | 186 | // Calculate the start of this duplicate segment. We use wrapping subtraction to avoid 187 | // overflow checks, which we will catch later. 188 | let start = self.output.len().wrapping_sub(offset as usize); 189 | 190 | // We'll do a bound check to avoid panicking. 191 | if start < self.output.len() { 192 | // Write the duplicate segment to the output buffer. 193 | self.duplicate(start, match_length); 194 | 195 | Ok(()) 196 | } else { 197 | Err(Error::OffsetOutOfBounds) 198 | } 199 | } 200 | 201 | /// Complete the decompression by reading all the blocks. 202 | /// 203 | /// # Decompressing a block 204 | /// 205 | /// Blocks consists of: 206 | /// - A 1 byte token 207 | /// * A 4 bit integer $t_1$. 208 | /// * A 4 bit integer $t_2$. 209 | /// - A $n$ byte sequence of 0xFF bytes (if $t_1 \neq 15$, then $n = 0$). 210 | /// - $x$ non-0xFF 8-bit integers, L (if $t_1 = 15$, $x = 1$, else $x = 0$). 211 | /// - $t_1 + 15n + L$ bytes of uncompressed data (literals). 212 | /// - 16-bits offset (little endian), $a$. 213 | /// - A $m$ byte sequence of 0xFF bytes (if $t_2 \neq 15$, then $m = 0$). 214 | /// - $y$ non-0xFF 8-bit integers, $c$ (if $t_2 = 15$, $y = 1$, else $y = 0$). 215 | /// 216 | /// First, the literals are copied directly and unprocessed to the output buffer, then (after 217 | /// the involved parameters are read) $t_2 + 15m + c$ bytes are copied from the output buffer 218 | /// at position $a + 4$ and appended to the output buffer. Note that this copy can be 219 | /// overlapping. 220 | #[inline] 221 | fn complete(&mut self) -> Result<(), Error> { 222 | // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer 223 | // is empty. 224 | while !self.input.is_empty() { 225 | // Read the token. The token is the first byte in a block. It is divided into two 4-bit 226 | // subtokens, the higher and the lower. 227 | self.token = self.take(1)?[0]; 228 | 229 | // Now, we read the literals section. 230 | self.read_literal_section()?; 231 | 232 | // If the input stream is emptied, we break out of the loop. This is only the case 233 | // in the end of the stream, since the block is intact otherwise. 234 | if self.input.is_empty() { break; } 235 | 236 | // Now, we read the duplicates section. 237 | self.read_duplicate_section()?; 238 | } 239 | 240 | Ok(()) 241 | } 242 | } 243 | 244 | /// Decompress all bytes of `input` into `output`. 245 | pub fn decompress_into(input: &[u8], output: &mut Vec) -> Result<(), Error> { 246 | // Decode into our vector. 247 | Decoder { 248 | input: input, 249 | output: output, 250 | token: 0, 251 | }.complete()?; 252 | 253 | Ok(()) 254 | } 255 | 256 | /// Decompress all bytes of `input`. 257 | pub fn decompress(input: &[u8]) -> Result, Error> { 258 | // Allocate a vector to contain the decompressed stream. 259 | let mut vec = Vec::with_capacity(4096); 260 | 261 | decompress_into(input, &mut vec)?; 262 | 263 | Ok(vec) 264 | } 265 | 266 | #[cfg(test)] 267 | mod test { 268 | use super::*; 269 | 270 | #[test] 271 | fn aaaaaaaaaaa_lots_of_aaaaaaaaa() { 272 | assert_eq!(decompress(&[0x11, b'a', 1, 0]).unwrap(), b"aaaaaa"); 273 | } 274 | 275 | #[test] 276 | fn multiple_repeated_blocks() { 277 | assert_eq!(decompress(&[0x11, b'a', 1, 0, 0x22, b'b', b'c', 2, 0]).unwrap(), b"aaaaaabcbcbcbc"); 278 | } 279 | 280 | #[test] 281 | fn all_literal() { 282 | assert_eq!(decompress(&[0x30, b'a', b'4', b'9']).unwrap(), b"a49"); 283 | } 284 | 285 | #[test] 286 | fn offset_oob() { 287 | decompress(&[0x10, b'a', 2, 0]).unwrap_err(); 288 | decompress(&[0x40, b'a', 1, 0]).unwrap_err(); 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /lz4/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Pure Rust implementation of LZ4 compression. 2 | //! 3 | //! A detailed explanation of the algorithm can be found [here](http://ticki.github.io/blog/how-lz4-works/). 4 | 5 | #![warn(missing_docs)] 6 | 7 | extern crate byteorder; 8 | #[macro_use] 9 | extern crate quick_error; 10 | 11 | mod decompress; 12 | mod compress; 13 | #[cfg(test)] 14 | mod tests; 15 | 16 | pub use decompress::decompress; 17 | pub use compress::compress; 18 | -------------------------------------------------------------------------------- /lz4/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate lz4_compress as lz4; 2 | 3 | use std::{env, process}; 4 | use std::io::{self, Write, Read}; 5 | 6 | /// The help page for this command. 7 | const HELP: &'static [u8] = br#" 8 | Introduction: 9 | lz4 - an utility to decompress or compress a raw, headerless LZ4 stream. 10 | Usage: 11 | lz4 [option] 12 | Options: 13 | -c : Compress stdin and write the result to stdout. 14 | -d : Decompress stdin and write the result to stdout. 15 | -h : Write this manpage to stderr. 16 | "#; 17 | 18 | fn main() { 19 | let mut iter = env::args().skip(1); 20 | let mut flag = iter.next().unwrap_or(String::new()); 21 | // If another argument is provided (e.g. the user passes a file name), we need to make sure we 22 | // issue an error properly, so we set back the flag to `""`. 23 | if iter.next().is_some() { 24 | flag = String::new(); 25 | } 26 | 27 | match &*flag { 28 | "-c" => { 29 | // Read stream from stdin. 30 | let mut vec = Vec::new(); 31 | io::stdin().read_to_end(&mut vec).expect("Failed to read stdin"); 32 | 33 | // Compress it and write the result to stdout. 34 | io::stdout().write(&lz4::compress(&vec)).expect("Failed to write to stdout"); 35 | }, 36 | "-d" => { 37 | // Read stream from stdin. 38 | let mut vec = Vec::new(); 39 | io::stdin().read_to_end(&mut vec).expect("Failed to read stdin"); 40 | 41 | // Decompress the input. 42 | let decompressed = lz4::decompress(&vec).expect("Compressed data contains errors"); 43 | 44 | // Write the decompressed buffer to stdout. 45 | io::stdout().write(&decompressed).expect("Failed to write to stdout"); 46 | }, 47 | // If no valid arguments are given, we print the help page. 48 | _ => { 49 | io::stdout().write(HELP).expect("Failed to write to stdout"); 50 | 51 | process::exit(1); 52 | }, 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /lz4/src/tests.rs: -------------------------------------------------------------------------------- 1 | //! Tests. 2 | 3 | use std::str; 4 | 5 | use {decompress, compress}; 6 | 7 | /// Test that the compressed string decompresses to the original string. 8 | fn inverse(s: &str) { 9 | let compressed = compress(s.as_bytes()); 10 | println!("Compressed '{}' into {:?}", s, compressed); 11 | let decompressed = decompress(&compressed).unwrap(); 12 | println!("Decompressed it into {:?}", str::from_utf8(&decompressed).unwrap()); 13 | assert_eq!(decompressed, s.as_bytes()); 14 | } 15 | 16 | #[test] 17 | fn shakespear() { 18 | inverse("to live or not to live"); 19 | inverse("Love is a wonderful terrible thing"); 20 | inverse("There is nothing either good or bad, but thinking makes it so."); 21 | inverse("I burn, I pine, I perish."); 22 | } 23 | 24 | #[test] 25 | fn totally_not_edgy_antifa_propaganda() { 26 | // extra edginess 27 | inverse("The only good fascist is a dead fascist."); 28 | inverse("bash the fash"); 29 | inverse("the fash deserves no bash, only smash"); 30 | inverse("Dead fascists can't vote."); 31 | inverse("Good night, white pride."); 32 | inverse("Some say fascism started with gas chambers. I say that's where it ends."); 33 | } 34 | 35 | #[test] 36 | fn not_compressible() { 37 | inverse("as6yhol.;jrew5tyuikbfewedfyjltre22459ba"); 38 | inverse("jhflkdjshaf9p8u89ybkvjsdbfkhvg4ut08yfrr"); 39 | } 40 | 41 | #[test] 42 | fn short() { 43 | inverse("ahhd"); 44 | inverse("ahd"); 45 | inverse("x-29"); 46 | inverse("x"); 47 | inverse("k"); 48 | inverse("."); 49 | inverse("ajsdh"); 50 | } 51 | 52 | #[test] 53 | fn empty_string() { 54 | inverse(""); 55 | } 56 | 57 | #[test] 58 | fn nulls() { 59 | inverse("\0\0\0\0\0\0\0\0\0\0\0\0\0"); 60 | } 61 | 62 | #[test] 63 | fn compression_works() { 64 | let s = "micah (Micah Cohen, politics editor): Clinton’s lead has shrunk to a hair above 4 percentage points in our polls-only model, down from about 7 points two weeks ago. So we find ourselves in an odd position where Clinton still holds a clear lead, but it’s shrinking by the day. I’ve been getting questions from Clinton supporters wondering how panicked they should be, and while we advise everyone of all political stripes to always remain calm, let’s try to answer that question today. How safe is Clinton’s lead/how panicked should Democrats be? As tacky as it is to cite your own tweet, I’m going to do it anyway — here’s a handy scale: natesilver: It’s uncertain, in part, because of the risk of a popular vote-Electoral College split. And, in part, because there are various reasons to think polling error could be high this year, such as the number of undecided voters. You can see those forces at play in the recent tightening. Clinton hasn’t really declined very much in these latest polls. But she was at only 46 percent in national polls, and that left a little bit of wiggle room for Trump."; 65 | 66 | inverse(s); 67 | 68 | assert!(compress(s.as_bytes()).len() < s.len()); 69 | } 70 | 71 | #[test] 72 | fn big_compression() { 73 | let mut s = Vec::with_capacity(80_000000); 74 | 75 | for n in 0..80_000000 { 76 | s.push((n as u8).wrapping_mul(0xA).wrapping_add(33) ^ 0xA2); 77 | } 78 | 79 | assert_eq!(&decompress(&compress(&s)).unwrap(), &s); 80 | } 81 | -------------------------------------------------------------------------------- /mlcr/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mlcr" 3 | version = "0.2.0" 4 | authors = ["ticki "] 5 | description = "An adaptive machine-learning-based cache tracker/replacement policy." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/mlcr" 8 | license = "MIT" 9 | keywords = ["cache", "replacement", "machine", "neural", "lru"] 10 | exclude = ["target", "Cargo.lock"] 11 | 12 | [dependencies] 13 | crossbeam = "0.2" 14 | nn = "0.1" 15 | parking_lot = "0.3" 16 | -------------------------------------------------------------------------------- /mlcr/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! MLCR: Machine-Learning-based Cache Replacement 2 | //! 3 | //! MLCR trains a neural network to "guess" how long time will pass before the cache block is 4 | //! accessed again. In other words, it provides a qualified guess to approximate the ideal Bélády's 5 | //! algorithm without a time machine. 6 | //! 7 | //! MLCR is slow, because it needs to train a neural network, but in many cases, the added 8 | //! precision pays off by greatly reducing the number of cache misses. As such, it should only be 9 | //! used when the cached medium is significantly slower than training the network (e.g. hard disks or 10 | //! internet downloads). 11 | 12 | extern crate crossbeam; 13 | extern crate nn; 14 | extern crate parking_lot; 15 | 16 | use crossbeam::sync::SegQueue; 17 | use nn::NN; 18 | use parking_lot::{Mutex, MutexGuard}; 19 | 20 | use std::{cmp, f64}; 21 | use std::collections::{BinaryHeap, HashMap}; 22 | 23 | /// A clock tick count. 24 | /// 25 | /// Every touch (i.e. read) increments the _clock_ yielding a new _tick_. This tick is roughly used 26 | /// as a measure for the time passed (the actual time is irrelevant as it doesn't change the state 27 | /// of the cache). 28 | /// 29 | /// This tick count is used in the neural network model for the next hit prediction. 30 | type Tick = u32; 31 | /// The ID of a cache block. 32 | /// 33 | /// The ID uniquely identifies a particular cache block inhabitant. It is used in the prediction 34 | /// model and should thus be chosen carefully as representing the inner data (e.g. the disk 35 | /// address) in order to achieve least cache misses. 36 | pub type Id = u64; 37 | 38 | /// A cache block. 39 | /// 40 | /// This represents the state of a particular cache block. 41 | struct Block { 42 | /// The two last times the block was used. 43 | last_used: [Tick; 2], 44 | /// The tick where the block was added. 45 | instated: Tick, 46 | /// The number of times the block has been touched. 47 | times_used: u32, 48 | } 49 | 50 | impl Block { 51 | /// Convert the block data into a vector. 52 | fn as_vec(&self, id: Id) -> Vec { 53 | vec![id as f64, self.instated as f64, self.last_used[0] as f64, self.last_used[1] as f64, 54 | self.times_used as f64] 55 | } 56 | } 57 | 58 | /// A next usage prediction. 59 | /// 60 | /// This contains a prediction produced by the neural network, estimating when is the next tick, 61 | /// the block will be touched. 62 | #[derive(PartialEq)] 63 | struct Prediction { 64 | /// The ID of the block we're predicting. 65 | id: Id, 66 | /// The prediction produced by the neural network. 67 | /// 68 | /// Note that this does not represent a tick, but rather a monotone function thereof. 69 | prediction: f64, 70 | } 71 | 72 | impl cmp::Ord for Prediction { 73 | fn cmp(&self, other: &Prediction) -> cmp::Ordering { 74 | if self.prediction < other.prediction { 75 | cmp::Ordering::Less 76 | } else { 77 | cmp::Ordering::Greater 78 | } 79 | } 80 | } 81 | 82 | impl cmp::PartialOrd for Prediction { 83 | fn partial_cmp(&self, other: &Prediction) -> Option { 84 | Some(self.cmp(other)) 85 | } 86 | } 87 | 88 | impl cmp::Eq for Prediction {} 89 | 90 | /// An iterator over the coldest (best candidates for replacement) to hotter cache objects. 91 | /// 92 | /// This iterators from the objects predicted to be used in the farthest future to the nearest 93 | /// future. 94 | /// 95 | /// In other words, this goes over the best to worse candidates for replacement, trimming, or 96 | /// clearing. 97 | pub struct ColdIter { 98 | /// A binary heap over the predictions ordered by distance into the future. 99 | heap: BinaryHeap, 100 | } 101 | 102 | impl Iterator for ColdIter { 103 | type Item = Id; 104 | 105 | fn next(&mut self) -> Option { 106 | self.heap.pop().map(|Prediction { id, .. }| id) 107 | } 108 | } 109 | 110 | /// A learning cache tracker. 111 | /// 112 | /// This keeps track of cache blocks. 113 | /// 114 | /// A cache block represents some data, which is not managed by the cache tracker. The cache block 115 | /// is said to be _touched_ when this data is used in some way. 116 | /// 117 | /// The _ideal replacement_ is the block which is used in the most distant future. As this is not 118 | /// possible to know in advance, we make a prediction or a _approximate ideal replacement_, which 119 | /// is based around various data points of the block such as the time of the last uses, or the 120 | /// number of touches. 121 | /// 122 | /// The aim of the cache tracker is to provided _approximate ideal replacements_. Numerous 123 | /// algorithms for making these predictions exists (examples are LRU, PLRU, LFU, MFU, MRU, ARC, 124 | /// etc.), but MLCR uses an approach which is radically different: It feeds the data points into a 125 | /// neural network and lets this estimate the tick of the next touch. 126 | pub struct Cache { 127 | /// The blocks in this cache tracker. 128 | blocks: HashMap, 129 | /// The neural network mapping blocks to the ticks of next touch. 130 | nn: NN, 131 | /// The clock. 132 | /// 133 | /// This increments on every touch. 134 | clock: Tick, 135 | } 136 | 137 | impl Cache { 138 | /// Tick the clock. 139 | fn tick(&mut self) { 140 | self.clock += 1; 141 | } 142 | 143 | /// Create a new cache tracker. 144 | pub fn new() -> Cache { 145 | Cache { 146 | blocks: HashMap::new(), 147 | nn: NN::new(&[5, 6, 1]), 148 | clock: 0, 149 | } 150 | } 151 | 152 | /// Touch a cache block. 153 | /// 154 | /// This should be called whenever the object `id` represents is used (read, written, etc.). 155 | /// 156 | /// This will train the neural network with the new data. 157 | pub fn touch(&mut self, id: Id) { 158 | { 159 | // Get the block we need. 160 | let block = self.blocks.get_mut(&id).unwrap(); 161 | 162 | // Apply a bijective map from the clock to a float on the range (0,1), which can be 163 | // fed to the network. 164 | let goal = (self.clock as f64 * 0.01).tanh(); 165 | // Train the neural network with the existing data against the clock. 166 | self.nn.train(&[(block.as_vec(id), vec![goal])]); 167 | 168 | // Update the block with last used data. 169 | block.last_used[0] = block.last_used[1]; 170 | block.last_used[1] = self.clock; 171 | // Increment the frequency counter. 172 | block.times_used += 1; 173 | } 174 | 175 | // Tick the clock. 176 | self.tick(); 177 | } 178 | 179 | /// Insert a new cache block into the cache tracker. 180 | pub fn insert(&mut self, id: Id) { 181 | self.blocks.insert(id, Block { 182 | last_used: [!0; 2], 183 | instated: self.clock, 184 | times_used: 0, 185 | }); 186 | } 187 | 188 | /// Remove a cache block. 189 | pub fn remove(&mut self, id: Id) { 190 | self.blocks.remove(&id); 191 | } 192 | 193 | /// Get an iterator over blocks from cold to hot. 194 | pub fn cold(&mut self) -> ColdIter { 195 | // Build a heap over the predictions. 196 | let mut heap = BinaryHeap::new(); 197 | for (&id, block) in self.blocks.iter() { 198 | // Predict the next use. 199 | let prediction = self.nn.run(&block.as_vec(id))[0]; 200 | // Push the prediction to the heap. 201 | heap.push(Prediction { 202 | id: id, 203 | prediction: prediction, 204 | }); 205 | } 206 | 207 | ColdIter { 208 | heap: heap, 209 | } 210 | } 211 | 212 | /// Get at iterator over blocks to remove to trim the cache tracker to `to`. 213 | /// 214 | /// Note that this won't remove the blocks, and this should be handled manually with the 215 | /// `remove` method. 216 | pub fn trim(&mut self, to: usize) -> ::std::iter::Take { 217 | self.cold().take(self.blocks.len() - to) 218 | } 219 | } 220 | 221 | /// A cache operation. 222 | enum CacheOperation { 223 | /// Create a new cache block with some ID. 224 | Insert(Id), 225 | /// Remove a cache block. 226 | Remove(Id), 227 | /// Touch some block. 228 | Touch(Id), 229 | } 230 | 231 | /// A concurrent cache tracker. 232 | /// 233 | /// This has two parts to it: 234 | /// 235 | /// - A normal cache tracker, protected by a lock. 236 | /// - A queue of cache operations that will be executed when the lock is acquired. 237 | pub struct ConcurrentCache { 238 | /// The inner cache tracker, protected by a lock. 239 | inner: Mutex, 240 | /// The cache tracker operation queue. 241 | /// 242 | /// In order to avoid excessively locking and unlocking the cache tracker, we buffer the 243 | /// operations, which will then be executed in one go, when needed. 244 | queue: SegQueue, 245 | } 246 | 247 | impl ConcurrentCache { 248 | /// Create a new concurrent cache tracker. 249 | pub fn new() -> ConcurrentCache { 250 | ConcurrentCache { 251 | inner: Mutex::new(Cache::new()), 252 | queue: SegQueue::new(), 253 | } 254 | } 255 | 256 | /// Lock the inner cache. 257 | pub fn lock(&self) -> MutexGuard { 258 | // Lock the cache tracker. 259 | let mut lock = self.inner.lock(); 260 | // Commit the buffered operations to the tracker. 261 | while let Some(op) = self.queue.try_pop() { 262 | match op { 263 | CacheOperation::Insert(id) => lock.insert(id), 264 | CacheOperation::Remove(id) => lock.remove(id), 265 | CacheOperation::Touch(id) => lock.touch(id), 266 | } 267 | } 268 | 269 | lock 270 | } 271 | 272 | /// Insert a new cache block. 273 | pub fn insert(&mut self, id: Id) { 274 | self.queue.push(CacheOperation::Insert(id)); 275 | } 276 | 277 | /// Remove a cache block. 278 | pub fn remove(&mut self, id: Id) { 279 | self.queue.push(CacheOperation::Remove(id)); 280 | } 281 | 282 | /// Touch a cache block. 283 | pub fn touch(&mut self, id: Id) { 284 | self.queue.push(CacheOperation::Touch(id)); 285 | } 286 | } 287 | -------------------------------------------------------------------------------- /mlcr/tests/simple.rs: -------------------------------------------------------------------------------- 1 | extern crate mlcr; 2 | 3 | #[test] 4 | fn simple() { 5 | let mut cache = mlcr::Cache::new(); 6 | 7 | cache.insert(1); 8 | cache.insert(2); 9 | cache.insert(3); 10 | cache.insert(4); 11 | cache.insert(100); 12 | cache.insert(200); 13 | 14 | cache.touch(100); 15 | cache.touch(100); 16 | cache.touch(1); 17 | cache.touch(2); 18 | cache.touch(2); 19 | cache.touch(2); 20 | cache.touch(2); 21 | cache.touch(2); 22 | cache.touch(100); 23 | cache.touch(2); 24 | cache.touch(2); 25 | cache.touch(2); 26 | cache.touch(100); 27 | cache.touch(100); 28 | cache.touch(100); 29 | cache.touch(1); 30 | cache.touch(2); 31 | 32 | assert_eq!(cache.cold().next(), Some(200)); 33 | assert_eq!(cache.cold().next(), Some(100)); 34 | assert_eq!(cache.cold().next(), Some(1)); 35 | } 36 | -------------------------------------------------------------------------------- /notes/compression.md: -------------------------------------------------------------------------------- 1 | TFS uses an unique form of file system compression, namely it does full-disk compression, possibly at the first file system ever. 2 | 3 | Obviously, compressing the full disk in one piece is practically impossible, however, dividing it into smaller chunks (each of a couple of clusters) means that you can have random access. 4 | 5 | We call our approach RACC: Random access cluster compression. 6 | 7 | The core idea is to fit as many "pages" (virtual clusters) into a cluster, as possible. If we have N pages, which can be compressed to fit into one cluster, we can simply pass an offset into that cluster. 8 | 9 | Depending on the size of cluster, this can be somewhat space inefficient in many modern compression algorithm, which is why it is important that the compression curve isn't steep. 10 | 11 | In other words, the ideal compression algorithm shouldn't need a minimum base of data like a header, but should be to decode linearly. This is why adaptive algorithms are best. 12 | 13 | # Ideas that are being considered 14 | 15 | - Eliminate the compression flag by regularly compressing the clusters, instead of doing it on-the-go. 16 | -------------------------------------------------------------------------------- /notes/vdevs.md: -------------------------------------------------------------------------------- 1 | Vdevs or virtual devices are nodes in the disk configuration stack. Each vdev tranforms a virtual disk to another virtual disk. 2 | 3 | Vdevs might provide various features, such as error correction and RAID, encryption, and more. 4 | 5 | The vdev configuration is stored in the disk header. 6 | -------------------------------------------------------------------------------- /notes/zmicro.md: -------------------------------------------------------------------------------- 1 | ZMicro is a compression algorithm, which was specifically designed for the use 2 | in RACC. 3 | 4 | ZMicro uses a standard bit-by-bit arithmetic encoder, but has an adaptive 5 | model. 6 | 7 | The model consists of various submodels: 8 | 9 | - Prediction by partial matching with context congruence classes modulo N of order M. 10 | - Average bit on same congruence class modulo N. 11 | - Bit dependency table per N bits. 12 | - Fifty-fifty (identity) 13 | - Repeat last 14 | 15 | These are combined through adaptive context mixing, which works by weighting 16 | with the match rate. 17 | 18 | If model M reads a bit b=1 with prediction of being one P(b=1), then P(b=1) is 19 | the error. If b=0, P(b=0)=1-P(b=1) is the error. 20 | 21 | Each model gets an accumulated error rate, which consists of a weighted sum of 22 | the individual errors. For every new error, the sum is multiplied by some 23 | number K≤1, called the cool-down factor, then the error is added. 24 | 25 | Models are compared by ratio. For example model M1 is better than M2 by an 26 | factor of M1/M2. In other words, a model M is judged on its ratio to the sum of 27 | all error accumulations. 28 | 29 | If a model rates a k'th (for some factor k called the exit factor) of the 30 | accumulated error sum, it is dropped and it's error accumulation is subtracted 31 | from the overall sum. 32 | -------------------------------------------------------------------------------- /seahash/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "seahash" 3 | version = "3.0.5" 4 | authors = ["ticki "] 5 | description = "A blazingly fast, portable hash function with proven statistical guarantees." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/seahash" 8 | license = "MIT" 9 | keywords = ["hash", "hashing", "checksum", "checsumming", "portable"] 10 | exclude = ["target", "Cargo.lock"] 11 | -------------------------------------------------------------------------------- /seahash/README.md: -------------------------------------------------------------------------------- 1 |
Logo
2 | =================== 3 | 4 | SeaHash: A bizarrely fast hash function. 5 | 6 | SeaHash is a hash function with performance better than (around 3-20% improvement) xxHash and 7 | MetroHash. Furthermore, SeaHash has mathematically provable statistical guarantees. 8 | 9 | In action: 10 | 11 | [![The hash function in action.](http://ticki.github.io/img/seahash_construction_diagram.svg)](http://ticki.github.io/img/seahash_construction_diagram.svg) 12 | -------------------------------------------------------------------------------- /seahash/benches/gigabyte.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | extern crate seahash; 5 | 6 | #[bench] 7 | fn gigabyte(b: &mut test::Bencher) { 8 | b.iter(|| { 9 | let mut x = 0; 10 | let mut buf = [15; 4096]; 11 | 12 | for _ in 0..250000 { 13 | x ^= seahash::hash(&buf); 14 | buf[0] += buf[0].wrapping_add(1); 15 | } 16 | 17 | x 18 | }) 19 | } 20 | -------------------------------------------------------------------------------- /seahash/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redox-os/tfs/344ce0f56dee996f098d239ccc3515df0f6bb8ad/seahash/logo.png -------------------------------------------------------------------------------- /seahash/src/helper.rs: -------------------------------------------------------------------------------- 1 | //! Helper functions. 2 | 3 | /// Read a buffer smaller than 8 bytes into an integer in little-endian. 4 | /// 5 | /// This assumes that `buf.len() < 8`. If this is not satisfied, the behavior is unspecified. 6 | #[inline(always)] 7 | pub fn read_int(buf: &[u8]) -> u64 { 8 | // Because we want to make sure that it is register allocated, we fetch this into a variable. 9 | // It will likely make no difference anyway, though. 10 | let ptr = buf.as_ptr(); 11 | 12 | unsafe { 13 | // Break it down to reads of integers with widths in total spanning the buffer. This minimizes 14 | // the number of reads 15 | match buf.len() { 16 | // u8. 17 | 1 => *ptr as u64, 18 | // u16. 19 | 2 => (*(ptr as *const u16)).to_le() as u64, 20 | // u16 + u8. 21 | 3 => { 22 | let a = (*(ptr as *const u16)).to_le() as u64; 23 | let b = *ptr.offset(2) as u64; 24 | 25 | a | (b << 16) 26 | }, 27 | // u32. 28 | 4 => (*(ptr as *const u32)).to_le() as u64, 29 | // u32 + u8. 30 | 5 => { 31 | let a = (*(ptr as *const u32)).to_le() as u64; 32 | let b = *ptr.offset(4) as u64; 33 | 34 | a | (b << 32) 35 | }, 36 | // u32 + u16. 37 | 6 => { 38 | let a = (*(ptr as *const u32)).to_le() as u64; 39 | let b = (*(ptr.offset(4) as *const u16)).to_le() as u64; 40 | 41 | a | (b << 32) 42 | }, 43 | // u32 + u16 + u8. 44 | 7 => { 45 | let a = (*(ptr as *const u32)).to_le() as u64; 46 | let b = (*(ptr.offset(4) as *const u16)).to_le() as u64; 47 | let c = *ptr.offset(6) as u64; 48 | 49 | a | (b << 32) | (c << 48) 50 | }, 51 | _ => 0, 52 | } 53 | } 54 | } 55 | 56 | /// Read a little-endian 64-bit integer from some buffer. 57 | #[inline(always)] 58 | pub unsafe fn read_u64(ptr: *const u8) -> u64 { 59 | #[cfg(target_pointer_width = "32")] 60 | { 61 | // We cannot be sure about the memory layout of a potentially emulated 64-bit integer, so 62 | // we read it manually. If possible, the compiler should emit proper instructions. 63 | (*(ptr as *const u32)).to_le() as u64 | ((*(ptr as *const u32)).to_le() as u64) << 32 64 | } 65 | 66 | #[cfg(target_pointer_width = "64")] 67 | { 68 | (*(ptr as *const u64)).to_le() 69 | } 70 | } 71 | 72 | /// The diffusion function. 73 | /// 74 | /// This is a bijective function emitting chaotic behavior. Such functions are used as building 75 | /// blocks for hash functions. 76 | pub fn diffuse(mut x: u64) -> u64 { 77 | // These are derived from the PCG RNG's round. Thanks to @Veedrac for proposing this. The basic 78 | // idea is that we use dynamic shifts, which are determined by the input itself. The shift is 79 | // chosen by the higher bits, which means that changing those flips the lower bits, which 80 | // scatters upwards because of the multiplication. 81 | 82 | x = x.wrapping_mul(0x6eed0e9da4d94a4f); 83 | let a = x >> 32; 84 | let b = x >> 60; 85 | x ^= a >> b; 86 | x = x.wrapping_mul(0x6eed0e9da4d94a4f); 87 | 88 | x 89 | } 90 | 91 | /// Reverse the `diffuse` function. 92 | pub fn undiffuse(mut x: u64) -> u64 { 93 | // 0x2f72b4215a3d8caf is the modular multiplicative inverse of the constant used in `diffuse`. 94 | 95 | x = x.wrapping_mul(0x2f72b4215a3d8caf); 96 | let a = x >> 32; 97 | let b = x >> 60; 98 | x ^= a >> b; 99 | x = x.wrapping_mul(0x2f72b4215a3d8caf); 100 | 101 | x 102 | } 103 | 104 | #[cfg(test)] 105 | mod tests { 106 | use super::*; 107 | 108 | fn diffuse_test(x: u64, y: u64) { 109 | assert_eq!(diffuse(x), y); 110 | assert_eq!(x, undiffuse(y)); 111 | assert_eq!(undiffuse(diffuse(x)), x); 112 | } 113 | 114 | #[test] 115 | fn read_int_() { 116 | assert_eq!(read_int(&[2, 3]), 770); 117 | assert_eq!(read_int(&[3, 2]), 515); 118 | assert_eq!(read_int(&[3, 2, 5]), 328195); 119 | } 120 | 121 | #[test] 122 | fn read_u64_() { 123 | unsafe { 124 | assert_eq!(read_u64([1, 0, 0, 0, 0, 0, 0, 0].as_ptr()), 1); 125 | assert_eq!(read_u64([2, 1, 0, 0, 0, 0, 0, 0].as_ptr()), 258); 126 | } 127 | } 128 | 129 | #[test] 130 | fn diffuse_test_vectors() { 131 | diffuse_test(94203824938, 17289265692384716055); 132 | diffuse_test(0xDEADBEEF, 12110756357096144265); 133 | diffuse_test(0, 0); 134 | diffuse_test(1, 15197155197312260123); 135 | diffuse_test(2, 1571904453004118546); 136 | diffuse_test(3, 16467633989910088880); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /seahash/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! SeaHash: A blazingly fast, portable hash function with proven statistical guarantees. 2 | //! 3 | //! SeaHash is a hash function with performance better than (around 3-20% improvement) xxHash and 4 | //! MetroHash. Furthermore, SeaHash has mathematically provable statistical guarantees. 5 | //! 6 | //! SeaHash is a portable hash function, meaning that the output is not dependent on the hosting 7 | //! architecture, and makes no assumptions on endianness or the alike. This stable layout allows it 8 | //! to be used for on-disk/permanent storage (e.g. checksums). 9 | //! 10 | //! # Design, advantages, and features 11 | //! 12 | //! - **High quality**: It beats most other general purpose hash functions because it provides full 13 | //! avalanche inbetween state updates. 14 | //! - **Performance**: SeaHash beats every high-quality (grading 10/10 in smhasher) hash function 15 | //! that I know of. 16 | //! - **Provable quality guarantees**: Contrary to most other non-cryptographic hash function, 17 | //! SeaHash can be proved to satisfy the avalanche criterion as well as BIC. 18 | //! - **Parallelizable**: Consists of multiple, independent states to take advantage of ILP and/or 19 | //! software threads. 20 | //! - **Bulk reads**: Reads 8 or 4 bytes a time. 21 | //! - **Stable and portable**: Does not depend on the target architecture, and produces a stable 22 | //! value, which is only changed in major version bumps. 23 | //! - **Keyed**: Designed to not leak the seed/key. Note that it has not gone through 24 | //! cryptoanalysis yet, so the keyed version shouldn't be relied on when security is needed. 25 | //! - **Hardware accelerateable**: SeaHash is designed such that ASICs can implement it with really 26 | //! high performance. 27 | //! 28 | //! # A word of warning! 29 | //! 30 | //! This is **not** a cryptographic function, and it certainly should not be used as one. If you 31 | //! want a good cryptographic hash function, you should use SHA-3 (Keccak) or BLAKE2. 32 | //! 33 | //! It is not secure, nor does it aim to be. It aims to have high quality pseudorandom output and 34 | //! few collisions, as well as being fast. 35 | //! 36 | //! # Benchmark 37 | //! 38 | //! On normal hardware, it is expected to run with a rate around 5.9-6.7 GB/S on a 2.5 GHz CPU. 39 | //! Further improvement can be seen when hashing very big buffers in parallel. 40 | //! 41 | //! | Function | Quality | Cycles per byte (lower is better) | Author 42 | //! |-------------|---------------|-----------------------------------|------------------- 43 | //! | **SeaHash** | **Excellent** | **0.24** | **Ticki** 44 | //! | xxHash | Excellent | 0.31 | Collet 45 | //! | MetroHash | Excellent | 0.35 | Rogers 46 | //! | Murmur | Excellent | 0.64 | Appleby 47 | //! | Rabin | Medium | 1.51 | Rabin 48 | //! | CityHash | Excellent | 1.62 | Pike, Alakuijala 49 | //! | LoseLose | Terrible | 2.01 | Kernighan, Ritchie 50 | //! | FNV | Poor | 3.12 | Fowler, Noll, Vo 51 | //! | SipHash | Pseudorandom | 3.21 | Aumasson, Bernstein 52 | //! | CRC | Good | 3.91 | Peterson 53 | //! | DJB2 | Poor | 4.13 | Bernstein 54 | //! 55 | //! ## Ideal architecture 56 | //! 57 | //! SeaHash is designed and optimized for the most common architecture in use: 58 | //! 59 | //! - Little-endian 60 | //! - 64-bit 61 | //! - 64 or more bytes cache lines 62 | //! - 4 or more instruction pipelines 63 | //! - 4 or more 64-bit registers 64 | //! 65 | //! Anything that does not hold the above requirements will perform worse by up to 30-40%. Note that 66 | //! this means it is still faster than CityHash (~1 GB/S), MurMurHash (~2.6 GB/S), FNV (~0.5 GB/S), 67 | //! etc. 68 | //! 69 | //! # Achieving the performance 70 | //! 71 | //! Like any good general-purpose hash function, SeaHash reads 8 bytes at once effectively reducing 72 | //! the running time by an order of ~5. 73 | //! 74 | //! Secondly, SeaHash achieves the performance by heavily exploiting Instruction-Level Parallelism. 75 | //! In particular, it fetches 4 integers in every round and independently diffuses them. This 76 | //! yields four different states, which are finally combined. 77 | //! 78 | //! # Statistical guarantees 79 | //! 80 | //! SeaHash comes with certain proven guarantees about the statistical properties of the output: 81 | //! 82 | //! 1. Pick some _n_-byte sequence, _s_. The number of _n_-byte sequence colliding with _s_ is 83 | //! independent of the choice of _s_ (all equivalence class have equal size). 84 | //! 2. If you flip any bit in the input, the probability for any bit in the output to be flipped is 85 | //! 0.5. 86 | //! 3. The hash value of a sequence of uniformly distributed bytes is itself uniformly distributed. 87 | //! 88 | //! The first guarantee can be derived through deduction, by proving that the diffusion function is 89 | //! bijective (reverse the XORs and find the congruence inverses to the primes). 90 | //! 91 | //! The second guarantee requires more complex calculations: Construct a matrix of probabilities 92 | //! and set one to certain (1), then apply transformations through the respective operations. The 93 | //! proof is a bit long, but relatively simple. 94 | //! 95 | //! The third guarantee requires proving that the hash value is a tree, such that: 96 | //! - Leafs represents the input values. 97 | //! - Single-child nodes reduce to the diffusion of the child. 98 | //! - Multiple-child nodes reduce to the sum of the children. 99 | //! 100 | //! Then simply show that each of these reductions transform uniformly distributed variables to 101 | //! uniformly distributed variables. 102 | //! 103 | //! # Inner workings 104 | //! 105 | //! In technical terms, SeaHash follows a alternating 4-state length-padded Merkle–Damgård 106 | //! construction with an XOR-diffuse compression function (click to enlarge): 107 | //! 108 | //! [![A diagram.](http://ticki.github.io/img/seahash_construction_diagram.svg)] 109 | //! (http://ticki.github.io/img/seahash_construction_diagram.svg) 110 | //! 111 | //! It starts with 4 initial states, then it alternates between them (increment, wrap on 4) and 112 | //! does XOR with the respective block. When a state has been visited the diffusion function (f) is 113 | //! applied. The very last block is padded with zeros. 114 | //! 115 | //! After all the blocks have been gone over, all the states are XOR'd to the number of bytes 116 | //! written. The sum is then passed through the diffusion function, which produces the final hash 117 | //! value. 118 | //! 119 | //! The diffusion function is drawn below. 120 | //! 121 | //! ```notest 122 | //! x ← px 123 | //! x ← x ⊕ ((x ≫ 32) ≫ (x ≫ 60)) 124 | //! x ← px 125 | //! ``` 126 | //! 127 | //! The advantage of having four completely segregated (note that there is no mix round, so they're 128 | //! entirely independent) states is that fast parallelism is possible. For example, if I were to 129 | //! hash 1 TB, I can spawn up four threads which can run independently without _any_ 130 | //! intercommunication or synchronization before the last round. 131 | //! 132 | //! If the diffusion function (f) was cryptographically secure, it would pass cryptoanalysis 133 | //! trivially. This might seem irrelevant, as it clearly isn't cryptographically secure, but it 134 | //! tells us something about the inner semantics. In particular, any diffusion function with 135 | //! sufficient statistical quality will make up a good hash function in this construction. 136 | //! 137 | //! Read [the blog post](http://ticki.github.io/blog/seahash-explained/) for more details. 138 | //! 139 | //! # ASIC version 140 | //! 141 | //! SeaHash is specifically designed such that it can be efficiently implemented in the form of 142 | //! ASIC while only using very few transistors. 143 | //! 144 | //! # Specification 145 | //! 146 | //! See the [`reference`](./reference) module. 147 | //! 148 | //! # Credits 149 | //! 150 | //! Aside for myself (@ticki), there are couple of other people who have helped creating this. 151 | //! Joshua Landau suggested using the [PCG family of diffusions](http://www.pcg-random.org/), 152 | //! created by Melissa E. O'Neill. Sokolov Yura spotted multiple bugs in SeaHash. 153 | 154 | #![no_std] 155 | #![warn(missing_docs)] 156 | 157 | pub use buffer::{hash, hash_seeded, State}; 158 | pub use stream::SeaHasher; 159 | 160 | pub mod reference; 161 | mod buffer; 162 | mod helper; 163 | mod stream; 164 | -------------------------------------------------------------------------------- /seahash/src/reference.rs: -------------------------------------------------------------------------------- 1 | //! A slow, but clear reference implementation of SeaHash. 2 | //! 3 | //! # Specification 4 | //! 5 | //! The input buffer is padded with null bytes until the length is divisible by 8. 6 | //! 7 | //! We start out with state 8 | //! 9 | //! ```notest 10 | //! a = 0x16f11fe89b0d677c 11 | //! b = 0xb480a793d8e6c86c 12 | //! c = 0x6fe2e5aaf078ebc9 13 | //! d = 0x14f994a4c5259381 14 | //! ``` 15 | //! 16 | //! If a seed is given, each of the initial state component are modularly multiplied by the seed. 17 | //! 18 | //! From the stream, we read one 64-bit block (in little-endian) at a time. This number, `n`, 19 | //! determines the new state by: 20 | //! 21 | //! ```notest 22 | //! a' = b 23 | //! b' = c 24 | //! c' = d 25 | //! d' = g(a ⊕ n) 26 | //! ``` 27 | //! 28 | //! `g(x)` is defined as `g(x) = j(h(j(x)))` with `h(x) = (x ≫ 32) ≫ (x ≫ 60)` and `j(x) ≡ px (mod 29 | //! 2^64)` with `p = 0x7ed0e9fa0d94a33`. 30 | //! 31 | //! Let the final state be `(x, y, z, w)`. Then the final result is given by `H = g(x ⊕ y ⊕ z ⊕ w ⊕ 32 | //! l)` where `l` is the number of bytes in the original buffer. 33 | 34 | use helper; 35 | 36 | /// Read an integer in little-endian. 37 | fn read_int(int: &[u8]) -> u64 { 38 | debug_assert!(int.len() <= 8, "The buffer length of the integer must be less than or equal to \ 39 | the one of an u64."); 40 | 41 | // Start at 0. 42 | let mut x = 0; 43 | for &i in int.iter().rev() { 44 | // Shift up a byte. 45 | x <<= 8; 46 | // Set the lower byte. 47 | x |= i as u64; 48 | } 49 | 50 | x 51 | } 52 | 53 | /// A hash state. 54 | struct State { 55 | /// The `a` substate. 56 | a: u64, 57 | /// The `b` substate. 58 | b: u64, 59 | /// The `c` substate. 60 | c: u64, 61 | /// The `d` substate. 62 | d: u64, 63 | } 64 | 65 | impl State { 66 | /// Write a 64-bit integer to the state. 67 | fn write_u64(&mut self, x: u64) { 68 | let mut a = self.a; 69 | 70 | // Mix `x` into `a`. 71 | a = helper::diffuse(a ^ x); 72 | 73 | // Rotate around. 74 | // _______________________ 75 | // | v 76 | // a <---- b <---- c <---- d 77 | self.a = self.b; 78 | self.b = self.c; 79 | self.c = self.d; 80 | self.d = a; 81 | } 82 | 83 | /// Calculate the final hash. 84 | fn finish(self, total: usize) -> u64 { 85 | // Even though XORing is commutative, it doesn't matter, because the state vector's initial 86 | // components are mutually distinct, and thus swapping even and odd chunks will affect the 87 | // result, because it is sensitive to the initial condition. To add discreteness, we 88 | // diffuse. 89 | helper::diffuse( 90 | self.a ^ self.b ^ self.c ^ self.d 91 | // We XOR in the number of written bytes to make it zero-sensitive when excessive bytes 92 | // are written (0u32.0u8 ≠ 0u16.0u8). 93 | ^ total as u64 94 | ) 95 | } 96 | 97 | /// Create a new state with some initial values (seed). 98 | fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> State { 99 | State { 100 | // These values are randomly generated. 101 | a: k1, 102 | b: k2, 103 | c: k3, 104 | d: k4, 105 | } 106 | } 107 | } 108 | 109 | /// A reference implementation of SeaHash. 110 | /// 111 | /// This is bloody slow when compared to the optimized version. This is because SeaHash was 112 | /// specifically designed to take all sorts of hardware and software hacks into account to achieve 113 | /// maximal performance, but this makes code significantly less readable. As such, this version has 114 | /// only one goal: to make the algorithm readable and understandable. 115 | pub fn hash(buf: &[u8]) -> u64 { 116 | hash_seeded( 117 | buf, 118 | 0x16f11fe89b0d677c, 119 | 0xb480a793d8e6c86c, 120 | 0x6fe2e5aaf078ebc9, 121 | 0x14f994a4c5259381 122 | ) 123 | } 124 | 125 | /// The seeded version of the reference implementation. 126 | pub fn hash_seeded(buf: &[u8], k1: u64, k2: u64, k3: u64, k4: u64) -> u64 { 127 | // Initialize the state. 128 | let mut state = State::with_seeds(k1, k2, k3, k4); 129 | 130 | // Partition the rounded down buffer into chunks of 8 bytes, and iterate over them. The last 131 | // block might not be 8 bytes long. 132 | for int in buf.chunks(8) { 133 | // Read the chunk into an integer and write into the state. 134 | state.write_u64(read_int(int)); 135 | } 136 | 137 | // Finish the hash state and return the final value. 138 | state.finish(buf.len()) 139 | } 140 | 141 | #[cfg(test)] 142 | mod tests { 143 | use super::*; 144 | 145 | #[test] 146 | fn shakespear() { 147 | assert_eq!(hash(b"to be or not to be"), 1988685042348123509); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /seahash/src/stream.rs: -------------------------------------------------------------------------------- 1 | use core::hash::Hasher; 2 | 3 | use {hash_seeded, helper}; 4 | 5 | /// The streaming version of the algorithm. 6 | pub struct SeaHasher { 7 | /// The state of the hasher. 8 | state: u64, 9 | /// The first key. 10 | k1: u64, 11 | /// The second key. 12 | k2: u64, 13 | /// The third key. 14 | k3: u64, 15 | /// The fourth key. 16 | k4: u64, 17 | } 18 | 19 | impl Default for SeaHasher { 20 | fn default() -> SeaHasher { 21 | SeaHasher::with_seeds(0xe7b0c93ca8525013, 0x011d02b854ae8182, 0x7bcc5cf9c39cec76, 0xfa336285d102d083) 22 | } 23 | } 24 | 25 | impl SeaHasher { 26 | /// Create a new `SeaHasher` with default state. 27 | pub fn new() -> SeaHasher { 28 | SeaHasher::default() 29 | } 30 | 31 | /// Construct a new `SeaHasher` given some seed. 32 | /// 33 | /// For maximum quality, these seeds should be chosen at random. 34 | pub fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> SeaHasher { 35 | SeaHasher { 36 | state: k1 ^ k3, 37 | k1: k1, 38 | k2: k2, 39 | k3: k3, 40 | k4: k4, 41 | } 42 | } 43 | 44 | /// Write some integer in. 45 | /// 46 | /// This applies XEX key whitening with the keys given as argument. 47 | fn write(&mut self, n: u64, k1: u64, k2: u64) { 48 | self.state ^= n ^ k1; 49 | self.state = helper::diffuse(self.state) ^ k2; 50 | } 51 | } 52 | 53 | impl Hasher for SeaHasher { 54 | fn finish(&self) -> u64 { 55 | helper::diffuse(self.state ^ self.k3) ^ self.k4 56 | } 57 | 58 | fn write(&mut self, bytes: &[u8]) { 59 | self.state ^= hash_seeded(bytes, self.k1, self.k2, self.k3, self.k4); 60 | self.state = helper::diffuse(self.state); 61 | } 62 | 63 | fn write_u64(&mut self, n: u64) { 64 | let k1 = self.k1; 65 | let k2 = self.k2; 66 | self.write(n, k1, k2) 67 | } 68 | 69 | fn write_u8(&mut self, n: u8) { 70 | let k1 = self.k1; 71 | let k3 = self.k3; 72 | self.write(n as u64, k1, k3) 73 | } 74 | 75 | fn write_u16(&mut self, n: u16) { 76 | let k1 = self.k1; 77 | let k2 = self.k2; 78 | self.write(n as u64, k2, k1) 79 | } 80 | 81 | fn write_u32(&mut self, n: u32) { 82 | let k2 = self.k2; 83 | let k3 = self.k3; 84 | self.write(n as u64, k2, k3) 85 | } 86 | 87 | fn write_usize(&mut self, n: usize) { 88 | let k2 = self.k2; 89 | let k3 = self.k3; 90 | self.write(n as u64, k3, k2) 91 | } 92 | 93 | fn write_i64(&mut self, n: i64) { 94 | let k1 = self.k1; 95 | let k2 = self.k2; 96 | self.write(n as u64, !k1, !k2) 97 | } 98 | 99 | fn write_i8(&mut self, n: i8) { 100 | let k1 = self.k1; 101 | let k3 = self.k3; 102 | self.write(n as u64, !k1, !k3) 103 | } 104 | 105 | fn write_i16(&mut self, n: i16) { 106 | let k1 = self.k1; 107 | let k2 = self.k2; 108 | self.write(n as u64, !k2, !k1) 109 | } 110 | 111 | fn write_i32(&mut self, n: i32) { 112 | let k2 = self.k2; 113 | let k3 = self.k3; 114 | self.write(n as u64, !k2, !k3) 115 | } 116 | 117 | fn write_isize(&mut self, n: isize) { 118 | let k2 = self.k2; 119 | let k3 = self.k3; 120 | self.write(n as u64, !k3, !k2) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /speck/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "speck" 3 | version = "1.1.0" 4 | authors = ["ticki "] 5 | description = "Implementation of the SPECK block cipher." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/speck" 8 | license = "MIT" 9 | keywords = ["crypto", "cipher", "speck", "block", "simple"] 10 | categories = ["cryptography", "no-std"] 11 | exclude = ["target", "Cargo.lock"] 12 | 13 | [dev-dependencies] 14 | rand = { version = "0.3.16", features = ["i128_support"] } 15 | -------------------------------------------------------------------------------- /speck/benches/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(test, i128_type)] 2 | 3 | extern crate test; 4 | use test::Bencher; 5 | 6 | extern crate rand; 7 | extern crate speck; 8 | 9 | use rand::Rng; 10 | use rand::OsRng; 11 | 12 | use speck::Key; 13 | 14 | #[bench] 15 | fn generate_key(mut bencher: &mut Bencher) { 16 | let mut rng = OsRng::new().unwrap(); 17 | 18 | let key_input = rng.gen(); 19 | 20 | bencher.iter(|| test::black_box(Key::new(key_input))); 21 | } 22 | 23 | #[bench] 24 | fn encrypt(mut bencher: &mut Bencher) { 25 | let (key, block) = gen_test(); 26 | 27 | bencher.iter(|| test::black_box(key.encrypt_block(block))); 28 | } 29 | 30 | #[bench] 31 | fn decrypt(mut bencher: &mut Bencher) { 32 | let (key, block) = gen_test(); 33 | 34 | bencher.iter(|| test::black_box(key.decrypt_block(block))); 35 | } 36 | 37 | fn gen_test() -> (Key, u128) { 38 | let mut rng = OsRng::new().unwrap(); 39 | 40 | (Key::new(rng.gen()), rng.gen()) 41 | } 42 | -------------------------------------------------------------------------------- /speck/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of the SPECK block cipher. 2 | //! 3 | //! SPECK is a really simple block cipher designed by the NSA. It is famous for its simple 4 | //! structure and code size, which can fit in just a couple of lines, while still preserving 5 | //! security. 6 | #![no_std] 7 | #![forbid(unsafe_code)] 8 | 9 | use core::fmt; 10 | 11 | /// The number of rounds. 12 | const ROUNDS: u64 = 32; 13 | 14 | /// A single round of SPECK. 15 | /// 16 | /// This is a keyed ARX transformation. 17 | macro_rules! round { 18 | ($x:ident, $y:ident, $k:ident) => { 19 | $x = $x.rotate_right(8); 20 | $x = $x.wrapping_add($y); 21 | $x ^= $k; 22 | $y = $y.rotate_left(3); 23 | $y ^= $x; 24 | } 25 | } 26 | 27 | /// Revert a SPECK round given some subkey. 28 | macro_rules! inv_round { 29 | ($x:ident, $y:ident, $k:ident) => { 30 | $y ^= $x; 31 | $y = $y.rotate_right(3); 32 | $x ^= $k; 33 | $x = $x.wrapping_sub($y); 34 | $x = $x.rotate_left(8); 35 | } 36 | } 37 | 38 | /// Encrypt a block with key schedule generated on-the-go. 39 | /// 40 | /// This works great for one-time use of a key (such as usages other than encryption), because it 41 | /// should never read from memory (both the message and the keys are stored in the registers). As 42 | /// such, this should be really fast for such usage. 43 | /// 44 | /// If you want to reuse the key, however, it is recommended that you use the precomputed schedule 45 | /// provided by the `Key` struct. 46 | pub fn encrypt_block(m: u128, k: u128) -> u128 { 47 | let mut m1 = (m >> 64) as u64; 48 | let mut m2 = m as u64; 49 | let mut k1 = (k >> 64) as u64; 50 | let mut k2 = k as u64; 51 | 52 | // Run the initial round (similar to the loop below, but doesn't update the key schedule). 53 | round!(m1, m2, k2); 54 | 55 | for i in 0..ROUNDS - 1 { 56 | // Progress the key schedule. 57 | round!(k1, k2, i); 58 | // Run a round over the message. 59 | round!(m1, m2, k2); 60 | } 61 | 62 | m2 as u128 | (m1 as u128) << 64 63 | } 64 | 65 | /// A precomputed key. 66 | /// 67 | /// This precomputes a key schedule, which can then be used for both encrypting and decrypting 68 | /// messages. 69 | #[derive(Copy, Clone, PartialEq, Eq, Hash)] 70 | pub struct Key { 71 | /// The computed schedule. 72 | /// 73 | /// Each of these subkeys are used in a round of the cipher. The first subkey is used in the 74 | /// first round of the cipher and so on. 75 | schedule: [u64; ROUNDS as usize], 76 | } 77 | 78 | impl fmt::Debug for Key { 79 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 80 | write!(f, "[REDACTED]") 81 | } 82 | } 83 | 84 | impl Key { 85 | /// Generate a new key from some seed. 86 | pub fn new(k: u128) -> Key { 87 | let mut k1 = (k >> 64) as u64; 88 | let mut k2 = k as u64; 89 | 90 | let mut ret = Key { 91 | schedule: [0; ROUNDS as usize], 92 | }; 93 | 94 | // Run `ROUNDS - 1` rounds to generate the key's endpoint (the last key in the schedule). 95 | for i in 0..ROUNDS { 96 | // Insert the key into the schedule. 97 | ret.schedule[i as usize] = k2; 98 | 99 | // The beautiful thing about SPECK is that it reuses its round function to generate the 100 | // key schedule. 101 | round!(k1, k2, i); 102 | } 103 | 104 | ret 105 | } 106 | 107 | /// Encrypt a 128-bit block with this key. 108 | pub fn encrypt_block(&self, m: u128) -> u128 { 109 | let mut m1 = (m >> 64) as u64; 110 | let mut m2 = m as u64; 111 | 112 | // We run a round for every subkey in the generated key schedule. 113 | for &k in &self.schedule { 114 | // Run a round on the message. 115 | round!(m1, m2, k); 116 | } 117 | 118 | m2 as u128 | (m1 as u128) << 64 119 | } 120 | 121 | /// Decrypt a 128-bit block with this key. 122 | pub fn decrypt_block(&self, c: u128) -> u128 { 123 | let mut c1 = (c >> 64) as u64; 124 | let mut c2 = c as u64; 125 | 126 | // We run a round for every subkey in the generated key schedule. 127 | for &k in self.schedule.iter().rev() { 128 | // Run a round on the message. 129 | inv_round!(c1, c2, k); 130 | } 131 | 132 | c2 as u128 | (c1 as u128) << 64 133 | } 134 | } 135 | 136 | #[cfg(test)] 137 | mod tests { 138 | use super::*; 139 | 140 | #[test] 141 | fn encrypt_decrypt() { 142 | for mut x in 0u128..90000 { 143 | // <3 144 | x = x.wrapping_mul(0x6eed0e9da4d94a4f6eed0e9da4d94a4f); 145 | x ^= (x >> 6) >> (x >> 122); 146 | x = x.wrapping_mul(0x6eed0e9da4d94a4f6eed0e9da4d94a4f); 147 | 148 | let key = Key::new(!x); 149 | 150 | assert_eq!(key.decrypt_block(key.encrypt_block(x)), x); 151 | assert_eq!(key.encrypt_block(x), encrypt_block(x, !x)); 152 | } 153 | } 154 | 155 | #[test] 156 | fn test_vectors() { 157 | // These test vectors are taken from the SPECK paper. 158 | assert_eq!( 159 | encrypt_block( 160 | 0x6c617669757165207469206564616d20, 161 | 0x0f0e0d0c0b0a09080706050403020100 162 | ), 163 | 0xa65d9851797832657860fedf5c570d18 164 | ); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /thread-object/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "thread-object" 3 | version = "0.2.0" 4 | authors = ["ticki "] 5 | description = "Abstraction over thread-local storage allowing dynamic creation of TLS variables." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/thread-object" 8 | license = "MIT" 9 | keywords = ["tls", "thread-local", "storage", "object", "thread"] 10 | exclude = ["target", "Cargo.lock"] 11 | -------------------------------------------------------------------------------- /thread-object/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Thread-specific objects. 2 | //! 3 | //! This is an abstraction over usual thread-local storage, adding a special type which has a value 4 | //! for every thread. 5 | //! 6 | //! This means that you can dynamically create TLS variables, as opposed to the classical fixed 7 | //! static variable. This means that you can store the object reference in a struct, and have many 8 | //! in the same thread. 9 | //! 10 | //! It works by holding a TLS variable with a binary tree map associating unique object IDs with 11 | //! pointers to the object. 12 | //! 13 | //! Performance wise, this is suboptimal, but it is portable contrary to most other approaches. 14 | 15 | #![feature(const_fn)] 16 | 17 | use std::any::Any; 18 | use std::cell::RefCell; 19 | use std::collections::BTreeMap; 20 | use std::mem; 21 | use std::sync::atomic; 22 | 23 | /// The ID counter. 24 | /// 25 | /// This is incremented when a new object is created, associating an unique value with the object. 26 | static ID_COUNTER: atomic::AtomicUsize = atomic::AtomicUsize::new(0); 27 | 28 | thread_local! { 29 | /// This thread's thread object maps. 30 | /// 31 | /// This maps IDs to pointers to the associated object. 32 | static THREAD_OBJECTS: RefCell>> = RefCell::new(BTreeMap::new()); 33 | } 34 | 35 | /// A multi-faced object. 36 | /// 37 | /// An initial value is chosen upon creation. This value will be copied once the thread reads it 38 | /// for the first time. The value can be read and written, but will only be presented for the 39 | /// current thread. As such, it is "many-faced" meaning that different threads view different 40 | /// values. 41 | #[derive(Copy, Clone)] 42 | pub struct Object { 43 | /// The initial value cloned when read by a new thread. 44 | initial: T, 45 | /// The ID of the object. 46 | id: usize, 47 | } 48 | 49 | impl Object { 50 | /// Create a new thread object with some initial value. 51 | /// 52 | /// The specified value `initial` will be the value assigned when new threads read the object. 53 | pub fn new(initial: T) -> Object { 54 | Object { 55 | initial: initial, 56 | // Increment the ID counter and use the previous value. Relaxed ordering is fine as it 57 | // guarantees uniqueness, which is the only constraint we need. 58 | id: ID_COUNTER.fetch_add(1, atomic::Ordering::Relaxed), 59 | } 60 | } 61 | } 62 | 63 | impl Object { 64 | /// Read and/or modify the value associated with this thread. 65 | /// 66 | /// This reads the object's value associated with the current thread, and initializes it if 67 | /// necessary. The mutable reference to the object is passed through the closure `f` and the 68 | /// return value of said closure is then returned. 69 | /// 70 | /// The reason we use a closure is to prevent the programmer leaking the pointer to another 71 | /// thread, causing memory safety issues as the pointer is only valid in the current thread. 72 | pub fn with(&self, f: F) -> R 73 | where F: FnOnce(&mut T) -> R { 74 | // We'll fetch it from the thread object map. 75 | THREAD_OBJECTS.with(|map| { 76 | // TODO: Eliminate this `RefCell`. 77 | let mut guard = map.borrow_mut(); 78 | // Fetch the pointer to the object, and initialize if it doesn't exist. 79 | let ptr = guard.entry(self.id).or_insert_with(|| Box::new(self.initial.clone())); 80 | // Run it through the provided closure. 81 | f(ptr.downcast_mut().unwrap()) 82 | }) 83 | } 84 | 85 | /// Replace the inner value. 86 | /// 87 | /// This replaces the inner value with `new` and returns the old value. 88 | pub fn replace(&self, new: T) -> T { 89 | self.with(|x| mem::replace(x, new)) 90 | } 91 | 92 | /// Copy the inner value. 93 | pub fn get(&self) -> T 94 | where T: Copy { 95 | self.with(|x| *x) 96 | } 97 | } 98 | 99 | impl Default for Object { 100 | fn default() -> Object { 101 | Object::new(T::default()) 102 | } 103 | } 104 | 105 | #[cfg(test)] 106 | mod tests { 107 | use super::*; 108 | 109 | use std::thread; 110 | use std::sync::{Mutex, Arc}; 111 | 112 | #[test] 113 | fn initial_value() { 114 | let obj = Object::new(23); 115 | obj.with(|&mut x| assert_eq!(x, 23)); 116 | assert_eq!(obj.with(|&mut x| x), 23); 117 | } 118 | 119 | #[test] 120 | fn string() { 121 | let obj = Object::new(String::new()); 122 | 123 | obj.with(|x| { 124 | assert!(x.is_empty()); 125 | 126 | x.push('b'); 127 | }); 128 | 129 | obj.with(|x| { 130 | assert_eq!(x, "b"); 131 | 132 | x.push('a'); 133 | }); 134 | 135 | obj.with(|x| { 136 | assert_eq!(x, "ba"); 137 | }); 138 | } 139 | 140 | #[test] 141 | fn multiple_objects() { 142 | let obj1 = Object::new(0); 143 | let obj2 = Object::new(0); 144 | 145 | obj2.with(|x| *x = 1); 146 | 147 | obj1.with(|&mut x| assert_eq!(x, 0)); 148 | obj2.with(|&mut x| assert_eq!(x, 1)); 149 | } 150 | 151 | #[test] 152 | fn multi_thread() { 153 | let obj = Object::new(0); 154 | thread::spawn(move || { 155 | obj.with(|x| *x = 1); 156 | }).join().unwrap(); 157 | 158 | obj.with(|&mut x| assert_eq!(x, 0)); 159 | 160 | thread::spawn(move || { 161 | obj.with(|&mut x| assert_eq!(x, 0)); 162 | obj.with(|x| *x = 2); 163 | }).join().unwrap(); 164 | 165 | obj.with(|&mut x| assert_eq!(x, 0)); 166 | } 167 | 168 | #[test] 169 | fn replace() { 170 | let obj = Object::new(420); // blaze it 171 | assert_eq!(obj.replace(42), 420); 172 | assert_eq!(obj.replace(32), 42); 173 | assert_eq!(obj.replace(0), 32); 174 | } 175 | 176 | #[test] 177 | fn default() { 178 | assert_eq!(Object::::default().get(), 0); 179 | } 180 | 181 | #[derive(Clone)] 182 | struct Dropper { 183 | is_dropped: Arc>, 184 | } 185 | 186 | impl Drop for Dropper { 187 | fn drop(&mut self) { 188 | *self.is_dropped.lock().unwrap() = true; 189 | } 190 | } 191 | 192 | #[test] 193 | fn drop() { 194 | let is_dropped = Arc::new(Mutex::new(false)); 195 | let arc = is_dropped.clone(); 196 | thread::spawn(move || { 197 | let obj = Object::new(Dropper { 198 | is_dropped: arc, 199 | }); 200 | 201 | obj.with(|_| {}); 202 | 203 | mem::forget(obj); 204 | }).join().unwrap(); 205 | 206 | assert!(*is_dropped.lock().unwrap()); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /type-name/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "type-name" 3 | version = "0.1.0" 4 | authors = ["ticki "] 5 | description = "A safe wrapper around the type_name API." 6 | repository = "https://github.com/ticki/tfs" 7 | documentation = "https://docs.rs/type-name" 8 | license = "MIT" 9 | keywords = ["type", "types", "type-name", "dynamic", "debug"] 10 | exclude = ["target", "Cargo.lock"] 11 | -------------------------------------------------------------------------------- /type-name/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A safe wrapper around the `type_name` API. 2 | 3 | #![no_std] 4 | #![feature(core_intrinsics)] 5 | 6 | use core::intrinsics; 7 | 8 | /// Get the type name of `T`. 9 | pub fn get() -> &'static str { 10 | unsafe { 11 | intrinsics::type_name::() 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /zmicro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zmicro" 3 | version = "0.1.0" 4 | authors = ["ticki "] 5 | 6 | [dependencies] 7 | -------------------------------------------------------------------------------- /zmicro/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod range; 2 | -------------------------------------------------------------------------------- /zmicro/src/range.rs: -------------------------------------------------------------------------------- 1 | //! Ranges. 2 | //! 3 | //! This module contains the main primitive in zmicro. 4 | 5 | struct Range { 6 | start: u32, 7 | end: u32, 8 | } 9 | 10 | impl Range { 11 | fn write(&mut self, bit: bool, pr_0: u32) { 12 | let size_0 = (((self.end - self.start) as u64 << 32 + 0x80000000) / pr_0 as u64) as u32; 13 | 14 | if bit { 15 | self.start += size_0; 16 | self.end -= size_0; 17 | } else { 18 | self.end = self.start + size_0; 19 | } 20 | } 21 | } 22 | 23 | #[cfg(test)] 24 | mod tests { 25 | use super::*; 26 | 27 | #[test] 28 | fn write_read() { 29 | let mut range = Range::full(); 30 | 31 | range.write(true, 5000000); 32 | range.write(true, 2999); 33 | range.write(false, 500000); 34 | range.write(false, 50000000); 35 | range.write(true, 333333); 36 | 37 | assert!( range.read(5000000).unwrap()); 38 | assert!( range.read(2999).unwrap()); 39 | assert!(!range.read(500000).unwrap()); 40 | assert!(!range.read(50000000).unwrap()); 41 | assert!( range.read(333333).unwrap()); 42 | } 43 | 44 | #[test] 45 | fn write_ones() { 46 | let mut range = Range::full(); 47 | 48 | let mut n = 0; 49 | while range.write(true, 500) { 50 | n += 1; 51 | } 52 | 53 | for _ in 0..n { 54 | assert_eq!(range.read(500), Some(true)); 55 | } 56 | 57 | assert_eq!(range.read(500), None); 58 | } 59 | 60 | #[test] 61 | fn balanced_ones() { 62 | let mut range = Range::full(); 63 | 64 | while range.write(true, 0x80000000) {} 65 | 66 | assert_eq!(range.start, 0xFFFFFFFF); 67 | } 68 | 69 | #[test] 70 | fn balanced_zeros() { 71 | let mut range = Range::full(); 72 | 73 | while range.write(false, 0x80000000) {} 74 | 75 | assert_eq!(range.start, 0); 76 | } 77 | 78 | #[test] 79 | fn unbalanced_ones() { 80 | let mut range = Range::full(); 81 | 82 | while range.write(true, 30482) {} 83 | 84 | assert_eq!(range.start, 0xFFFFFFFF); 85 | } 86 | } 87 | --------------------------------------------------------------------------------