├── .gitignore ├── README.md ├── bloom_filter.hpp ├── detail.hpp ├── frequency_sketch.hpp ├── test └── test.cpp └── wtinylfu.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | TODO 2 | a.out 3 | misc 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Window-TinyLFU Cache 2 | This is a barebones C++11 header-only implementation of the state-of-the-art cache admission policy proposed in [this paper](https://arxiv.org/abs/1512.00727) with details borrowed from [Caffeine](https://github.com/ben-manes/caffeine)'s own implementation. 3 | 4 | ### Note 5 | My original use case for this cache was very specific, so some features are absent - most notably thread-safety and hash collision protection. 6 | -------------------------------------------------------------------------------- /bloom_filter.hpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 https://github.com/mandreyel 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | * software and associated documentation files (the "Software"), to deal in the Software 5 | * without restriction, including without limitation the rights to use, copy, modify, 6 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | * permit persons to whom the Software is furnished to do so, subject to the following 8 | * conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in all copies 11 | * or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 15 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 16 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 17 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 18 | * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | */ 20 | 21 | #ifndef BLOOM_FILTER_HEADER 22 | #define BLOOM_FILTER_HEADER 23 | 24 | #include "detail.hpp" 25 | 26 | #include 27 | #include 28 | 29 | /** 30 | * Standard 1 bit Bloom filter. 31 | * http://www.cs.princeton.edu/courses/archive/spr05/cos598E/bib/bloom_filters.pdf 32 | */ 33 | template< 34 | typename T, 35 | typename Hash = std::hash 36 | > class bloom_filter 37 | { 38 | std::vector bitset_; 39 | int capacity_; 40 | int num_hashes_; 41 | 42 | public: 43 | explicit bloom_filter(int capacity, double false_positive_error_rate = 0.01) 44 | : bloom_filter(capacity, false_positive_error_rate, 45 | best_bitset_size(capacity, false_positive_error_rate), 46 | best_num_hashes(capacity, false_positive_error_rate)) 47 | {} 48 | 49 | bloom_filter(int capacity, double false_positive_error_rate, 50 | int bitset_size, int num_hashes) 51 | : bitset_(bitset_size) 52 | , capacity_(capacity) 53 | , num_hashes_(num_hashes) 54 | {} 55 | 56 | /** 57 | * A truthy return value indicates that the item may or may not have been accessed. 58 | * A falsy return value guarantees that the item has not been accessed. 59 | */ 60 | bool contains(const T& t) const noexcept 61 | { 62 | // idea use a single 64bit hash and use the upper and lower parts as the two 63 | // base hashes TODO 64 | const uint32_t hash1 = detail::hash(t); 65 | const uint32_t hash2 = Hash()(t); 66 | for(auto i = 0; i < num_hashes_; ++i) 67 | { 68 | if(!bitset_[double_hash(hash1, hash2, i)]) { return false; } 69 | } 70 | return true; 71 | } 72 | 73 | void record_access(const T& t) 74 | { 75 | const uint32_t hash1 = detail::hash(t); 76 | const uint32_t hash2 = Hash()(t); 77 | for(auto i = 0; i < num_hashes_; ++i) 78 | { 79 | bitset_[double_hash(hash1, hash2, i)] = true; 80 | } 81 | } 82 | 83 | void clear() noexcept 84 | { 85 | bitset_.clear(); 86 | } 87 | 88 | protected: 89 | // From: http://matthias.vallentin.net/blog/2011/06/a-garden-variety-of-bloom-filters/ 90 | static int best_bitset_size(const int capacity, const double error_rate) noexcept 91 | { 92 | return std::ceil(-1 * capacity * std::log(error_rate) / std::pow(std::log(2), 2)); 93 | } 94 | 95 | static int best_num_hashes(const int capacity, const double error_rate) noexcept 96 | { 97 | const auto bitset_size = best_bitset_size(capacity, error_rate); 98 | return std::round(std::log(2) * bitset_size / double(capacity)); 99 | } 100 | 101 | uint32_t double_hash(uint32_t hash1, uint32_t hash2, int i) const noexcept 102 | { 103 | return (hash1 + i * hash2) % bitset_.size(); 104 | } 105 | }; 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /detail.hpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 https://github.com/mandreyel 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | * software and associated documentation files (the "Software"), to deal in the Software 5 | * without restriction, including without limitation the rights to use, copy, modify, 6 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | * permit persons to whom the Software is furnished to do so, subject to the following 8 | * conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in all copies 11 | * or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 15 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 16 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 17 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 18 | * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | */ 20 | 21 | #ifndef DETAIL_HEADER 22 | #define DETAIL_HEADER 23 | 24 | #include 25 | 26 | namespace detail 27 | { 28 | // This is Bob Jenkins' One-at-a-Time hash, see: 29 | // http://www.burtleburtle.net/bob/hash/doobs.html 30 | template 31 | constexpr uint32_t hash(const T& t) noexcept 32 | { 33 | const char* data = reinterpret_cast(&t); 34 | uint32_t hash = 0; 35 | 36 | for(auto i = 0; i < int(sizeof t); ++i) 37 | { 38 | hash += data[i]; 39 | hash += hash << 10; 40 | hash ^= hash >> 6; 41 | } 42 | 43 | hash += hash << 3; 44 | hash ^= hash >> 11; 45 | hash += hash << 15; 46 | 47 | return hash; 48 | } 49 | 50 | /** Returns the number of set bits in x. Also known as Hamming Weight. */ 51 | template< 52 | typename T, 53 | typename std::enable_if::value, int>::type = 0 54 | > constexpr int popcount(T x) noexcept 55 | { 56 | return std::bitset(x).count(); 57 | } 58 | 59 | // From: http://graphics.stanford.edu/~seander/bithacks.html 60 | constexpr uint32_t nearest_power_of_two(uint32_t x) noexcept 61 | { 62 | --x; 63 | x |= x >> 1; 64 | x |= x >> 2; 65 | x |= x >> 4; 66 | x |= x >> 8; 67 | x |= x >> 16; 68 | ++x; 69 | return x; 70 | } 71 | } // namespace detail 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /frequency_sketch.hpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 https://github.com/mandreyel 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | * software and associated documentation files (the "Software"), to deal in the Software 5 | * without restriction, including without limitation the rights to use, copy, modify, 6 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | * permit persons to whom the Software is furnished to do so, subject to the following 8 | * conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in all copies 11 | * or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 15 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 16 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 17 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 18 | * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | */ 20 | 21 | #ifndef FREQUENCY_SKETCH_HEADER 22 | #define FREQUENCY_SKETCH_HEADER 23 | 24 | #include "detail.hpp" 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | /** 32 | * A probabilistic set for estimating the popularity (frequency) of an element within an 33 | * access frequency based time window. The maximum frequency of an element is limited 34 | * to 15 (4-bits). 35 | * 36 | * NOTE: the capacity will be the nearest power of two of the input capacity (for various 37 | * efficiency and hash distribution gains). 38 | * 39 | * This is a slightly altered version of Caffeine's implementation: 40 | * https://github.com/ben-manes/caffeine 41 | * 42 | * The white paper: 43 | * http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf 44 | */ 45 | template 46 | class frequency_sketch 47 | { 48 | // Holds 64 bit blocks, each of which holds sixteen 4 bit counters. For simplicity's 49 | // sake, the 64 bit blocks are partitioned into four 16 bit sub-blocks, and the four 50 | // counters corresponding to some T is within a single such sub-block. 51 | std::vector table_; 52 | 53 | // Incremented with each call to record_access, if the frequency of the item could 54 | // be incremented, and halved when sampling size is reached. 55 | int size_; 56 | 57 | public: 58 | explicit frequency_sketch(int capacity) 59 | { 60 | change_capacity(capacity); 61 | } 62 | 63 | void change_capacity(const int n) 64 | { 65 | if(n <= 0) 66 | { 67 | throw std::invalid_argument("frequency_sketch capacity must be larger than 0"); 68 | } 69 | table_.resize(detail::nearest_power_of_two(n)); 70 | size_ = 0; 71 | } 72 | 73 | bool contains(const T& t) const noexcept 74 | { 75 | return frequency(t) > 0; 76 | } 77 | 78 | int frequency(const T& t) const noexcept 79 | { 80 | const uint32_t hash = detail::hash(t); 81 | int frequency = std::numeric_limits::max(); 82 | 83 | for(auto i = 0; i < 4; ++i) 84 | { 85 | frequency = std::min(frequency, get_count(hash, i)); 86 | } 87 | 88 | return frequency; 89 | } 90 | 91 | void record_access(const T& t) noexcept 92 | { 93 | const uint32_t hash = detail::hash(t); 94 | bool was_added = false; 95 | 96 | for(auto i = 0; i < 4; ++i) 97 | { 98 | was_added |= try_increment_counter_at(hash, i); 99 | } 100 | 101 | if(was_added && (++size_ == sampling_size())) 102 | { 103 | reset(); 104 | } 105 | } 106 | 107 | private: 108 | int get_count(const uint32_t hash, const int counter_index) const noexcept 109 | { 110 | const int table_index = this->table_index(hash, counter_index); 111 | const int offset = counter_offset(hash, counter_index); 112 | return (table_[table_index] >> offset) & 0xfL; 113 | } 114 | 115 | /** 116 | * Returns the table index where the counter associated with $hash at 117 | * $counter_index resides (since each item is mapped to four different counters in 118 | * $table_, an index is necessary to differentiate between each). 119 | */ 120 | int table_index(const uint32_t hash, const int counter_index) const noexcept 121 | { 122 | static constexpr uint64_t seeds[] = { 123 | 0xc3a5c85c97cb3127L, 124 | 0xb492b66fbe98f273L, 125 | 0x9ae16a3b2f90404fL, 126 | 0xcbf29ce484222325L 127 | }; 128 | uint64_t h = seeds[counter_index] * hash; 129 | h += h >> 32; 130 | return h & (table_.size() - 1); 131 | } 132 | 133 | /** 134 | * Increments ${counter_index}th counter by 1 if it's below the maximum value (15). 135 | * Returns true if the counter was incremented. 136 | */ 137 | bool try_increment_counter_at(const uint32_t hash, const int counter_index) 138 | { 139 | const int index = table_index(hash, counter_index); 140 | const int offset = counter_offset(hash, counter_index); 141 | if(can_increment_counter_at(index, offset)) 142 | { 143 | table_[index] += 1L << offset; 144 | return true; 145 | } 146 | return false; 147 | } 148 | 149 | /** 150 | * $table_ holds 64 bit blocks, while counters are 4 bit wide, i.e. there are 16 151 | * counters in a block. 152 | * This function determines the start offset of the ${counter_index}th counter 153 | * associated with $hash. 154 | * Offset may be [0, 60] and is a multiple of 4. $counter_index must be [0, 3]. 155 | */ 156 | int counter_offset(const uint32_t hash, const int counter_index) const noexcept 157 | { 158 | return (offset_multiplier(hash) + counter_index) << 2; 159 | } 160 | 161 | /** 162 | * $table_ holds 64 bit blocks, and each block is partitioned into four 16 bit 163 | * parts, starting at 0, 16, 32 and 48. Each part is further divided into four 4 bit 164 | * sub-parts (e.g. 0, 4, 8, 12), which are the start offsets of the counters. 165 | * 166 | * All counters of an item are within the same logical 16 bit part (though most 167 | * likely not in the same 64 bit block if the hash does its job). Which 16 bit part 168 | * an item is placed into is determined by its two least significant bits, which 169 | * this function determines. 170 | * 171 | * The return value may be 0, 4, 8 or 12. 172 | */ 173 | int offset_multiplier(const uint32_t hash) const noexcept 174 | { 175 | return (hash & 3) << 2; 176 | } 177 | 178 | /** Returns true if the counter has not reached the limit of 15. */ 179 | bool can_increment_counter_at(const int table_index, const int offset) const noexcept 180 | { 181 | const uint64_t mask = 0xfL << offset; 182 | return (table_[table_index] & mask) != mask; 183 | } 184 | 185 | /** Halves every counter and adjusts $size_. */ 186 | void reset() noexcept 187 | { 188 | for(auto& counters : table_) 189 | { 190 | // Do a 'bitwise_and' on each (4 bit) counter with 0111 (7) so as to 191 | // eliminate the bit that got shifted over from the counter to the left to 192 | // the leftmost position of the current counter. 193 | counters = (counters >> 1) & 0x7777777777777777L; 194 | } 195 | size_ /= 2; 196 | } 197 | 198 | /** 199 | * The reset operation is launched when $size_ reaches the value returned by this 200 | * function. 201 | */ 202 | int sampling_size() const noexcept 203 | { 204 | return table_.size() * 10; 205 | } 206 | }; 207 | 208 | #endif 209 | -------------------------------------------------------------------------------- /test/test.cpp: -------------------------------------------------------------------------------- 1 | #include "../wtinylfu.hpp" 2 | #include "../bloom_filter.hpp" 3 | #include 4 | 5 | struct big_object 6 | { 7 | char data[4096]; 8 | }; 9 | 10 | int main() 11 | { 12 | #define NUM_ENTRIES 1024 13 | #define SELECTED_BEGIN 100 14 | #define SELECTED_END 120 15 | 16 | wtinylfu_cache cache(NUM_ENTRIES); 17 | 18 | for(auto i = 0; i < NUM_ENTRIES; ++i) { 19 | cache.insert(i, big_object()); 20 | } 21 | 22 | for(auto i = 0; i < NUM_ENTRIES; ++i) { 23 | assert(cache[i]); 24 | } 25 | 26 | // Repeatedly access a few elements, pumping up their access frequencies. 27 | for(auto i = 0; i < 10; ++i) { 28 | for(auto s = SELECTED_BEGIN; s < SELECTED_END; ++s) { 29 | cache[s]; 30 | } 31 | } 32 | 33 | // Insert enough new entries (with new keys) to leave just num_selected in cache. 34 | for(auto i = 0; i < NUM_ENTRIES - (SELECTED_END - SELECTED_BEGIN); ++i) { 35 | cache.insert(i + NUM_ENTRIES, big_object()); 36 | } 37 | 38 | // Make sure selected entries were not evicted. 39 | for(auto s = SELECTED_BEGIN; s < SELECTED_END; ++s) { 40 | assert(cache[s]); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /wtinylfu.hpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 https://github.com/mandreyel 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | * software and associated documentation files (the "Software"), to deal in the Software 5 | * without restriction, including without limitation the rights to use, copy, modify, 6 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | * permit persons to whom the Software is furnished to do so, subject to the following 8 | * conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in all copies 11 | * or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 15 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 16 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 17 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 18 | * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | */ 20 | 21 | #ifndef WTINYLFU_HEADER 22 | #define WTINYLFU_HEADER 23 | 24 | #include "frequency_sketch.hpp" 25 | #include "detail.hpp" 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | /** 34 | * Window-TinyLFU Cache as per: https://arxiv.org/pdf/1512.00727.pdf 35 | * 36 | * 37 | * Window Cache Victim .---------. Main Cache Victim 38 | * .------------------->| TinyLFU |<-----------------. 39 | * | `---------' | 40 | * .-------------------. | .------------------. 41 | * | Window Cache (1%) | | | Main Cache (99%) | 42 | * | (LRU) | | | (SLRU) | 43 | * `-------------------' | `------------------' 44 | * ^ | ^ 45 | * | `---------------' 46 | * new item Winner 47 | * 48 | * 49 | * New entries are first placed in the window cache where they remain as long as they 50 | * have high temporal locality. An entry that's pushed out of the window cache gets a 51 | * chance to be admitted in the front of the main cache. If the main cache is full, 52 | * the TinyLFU admission policy determines whether this entry is to replace the main 53 | * cache's next victim based on TinyLFU's implementation defined historic frequency 54 | * filter. Currently a 4 bit frequency sketch is employed. 55 | * 56 | * TinyLFU's periodic reset operation ensures that lingering entries that are no longer 57 | * accessed are evicted. 58 | * 59 | * Values are stored in shared_ptr instances in order to ensure memory safety when 60 | * a cache entry is evicted while it is still being used by user. 61 | * 62 | * It is advised that trivially copiable, small keys be used as there persist two 63 | * copies of each within the cache. 64 | * 65 | * NOTE: it is NOT thread-safe! 66 | */ 67 | template< 68 | typename K, 69 | typename V 70 | > class wtinylfu_cache 71 | { 72 | enum class cache_slot 73 | { 74 | window, 75 | probationary, 76 | eden 77 | }; 78 | 79 | struct page 80 | { 81 | K key; 82 | enum cache_slot cache_slot; 83 | std::shared_ptr data; 84 | 85 | page(K key_, enum cache_slot cache_slot_, std::shared_ptr data_) 86 | : key(std::move(key_)) 87 | , cache_slot(cache_slot_) 88 | , data(data_) 89 | {} 90 | }; 91 | 92 | class lru 93 | { 94 | std::list lru_; 95 | int capacity_; 96 | 97 | public: 98 | using page_position = typename std::list::iterator; 99 | using const_page_position = typename std::list::const_iterator; 100 | 101 | explicit lru(int capacity) : capacity_(capacity) {} 102 | 103 | int size() const noexcept { return lru_.size(); } 104 | int capacity() const noexcept { return capacity_; } 105 | bool is_full() const noexcept { return size() >= capacity(); } 106 | 107 | /** 108 | * NOTE: doesn't actually remove any pages, it only sets the capacity. 109 | * 110 | * This is because otherwise there'd be no way to delete the corresponding 111 | * entries from the page map outside of this LRU instance, so this is handled 112 | * externally. 113 | */ 114 | void set_capacity(const int n) noexcept { capacity_ = n; } 115 | 116 | /** Returns the position of the hottest (most recently used) page. */ 117 | page_position mru_pos() noexcept { return lru_.begin(); } 118 | const_page_position mru_pos() const noexcept { return lru_.begin(); } 119 | 120 | /** Returns the position of the coldest (least recently used) page. */ 121 | page_position lru_pos() noexcept { return --lru_.end(); } 122 | const_page_position lru_pos() const noexcept { return --lru_.end(); } 123 | 124 | const K& victim_key() const noexcept 125 | { 126 | return lru_pos()->key; 127 | } 128 | 129 | void evict() 130 | { 131 | erase(lru_pos()); 132 | } 133 | 134 | void erase(page_position page) 135 | { 136 | lru_.erase(page); 137 | } 138 | 139 | /** Inserts new page at the MRU position of the cache. */ 140 | template 141 | page_position insert(Args&&... args) 142 | { 143 | return lru_.emplace(mru_pos(), std::forward(args)...); 144 | } 145 | 146 | /** Moves page to the MRU position. */ 147 | void handle_hit(page_position page) 148 | { 149 | transfer_page_from(page, *this); 150 | } 151 | 152 | /** Moves page from $source to the MRU position of this cache. */ 153 | void transfer_page_from(page_position page, lru& source) 154 | { 155 | lru_.splice(mru_pos(), source.lru_, page); 156 | } 157 | }; 158 | 159 | /** 160 | * A cache which is divided into two segments, a probationary and an eden 161 | * segment. Both are LRU caches. 162 | * 163 | * Pages that are cache hits are promoted to the top (MRU position) of the eden 164 | * segment, regardless of the segment in which they currently reside. Thus, pages 165 | * within the eden segment have been accessed at least twice. 166 | * 167 | * Pages that are cache misses are added to the cache at the MRU position of the 168 | * probationary segment. 169 | * 170 | * Each segment is finite in size, so the migration of a page from the probationary 171 | * segment may force the LRU page of the eden segment into the MRU position of 172 | * the probationary segment, giving it another chance. Likewise, if both segments 173 | * reach their capacity, a new entry is replaced with the LRU victim of the 174 | * probationary segment. 175 | * 176 | * In this implementation 80% of the capacity is allocated to the eden (or "hot") 177 | * pages and 20% for pages under probation (the "cold" pages). 178 | */ 179 | class slru 180 | { 181 | lru eden_; 182 | lru probationary_; 183 | 184 | public: 185 | using page_position = typename lru::page_position; 186 | using const_page_position = typename lru::const_page_position; 187 | 188 | explicit slru(int capacity) : slru(0.8f * capacity, capacity - 0.8f * capacity) 189 | { 190 | // correct truncation error 191 | if(this->capacity() < capacity) 192 | { 193 | eden_.set_capacity(eden_.capacity() + 1); 194 | } 195 | } 196 | 197 | slru(int eden_capacity, int probationary_capacity) 198 | : eden_(eden_capacity) 199 | , probationary_(probationary_capacity) 200 | {} 201 | 202 | const int size() const noexcept 203 | { 204 | return eden_.size() + probationary_.size(); 205 | } 206 | 207 | const int capacity() const noexcept 208 | { 209 | return eden_.capacity() + probationary_.capacity(); 210 | } 211 | 212 | const bool is_full() const noexcept 213 | { 214 | return size() >= capacity(); 215 | } 216 | 217 | void set_capacity(const int n) 218 | { 219 | eden_.set_capacity(0.8f * n); 220 | probationary_.set_capacity(n - eden_.capacity()); 221 | } 222 | 223 | page_position victim_pos() noexcept 224 | { 225 | return probationary_.lru_pos(); 226 | } 227 | 228 | const_page_position victim_pos() const noexcept 229 | { 230 | return probationary_.lru_pos(); 231 | } 232 | 233 | const K& victim_key() const noexcept 234 | { 235 | return victim_pos()->key; 236 | } 237 | 238 | void evict() 239 | { 240 | probationary_.evict(); 241 | } 242 | 243 | void erase(page_position page) 244 | { 245 | if(page->cache_slot == cache_slot::eden) 246 | eden_.erase(page); 247 | else 248 | probationary_.erase(page); 249 | } 250 | 251 | /** Moves page to the MRU position of the probationary segment. */ 252 | void transfer_page_from(page_position page, lru& source) 253 | { 254 | probationary_.transfer_page_from(page, source); 255 | page->cache_slot = cache_slot::probationary; 256 | } 257 | 258 | /** 259 | * If page is in the probationary segment: 260 | * promotes page to the MRU position of the eden segment, and if eden segment 261 | * capacity is reached, moves the LRU page of the eden segment to the MRU 262 | * position of the probationary segment. 263 | * 264 | * Otherwise, page is in eden: 265 | * promotes page to the MRU position of eden. 266 | */ 267 | void handle_hit(page_position page) 268 | { 269 | if(page->cache_slot == cache_slot::probationary) 270 | { 271 | promote_to_eden(page); 272 | if(eden_.is_full()) { demote_to_probationary(eden_.lru_pos()); } 273 | } 274 | else 275 | { 276 | assert(page->cache_slot == cache_slot::eden); // this shouldn't happen 277 | eden_.handle_hit(page); 278 | } 279 | } 280 | 281 | private: 282 | void promote_to_eden(page_position page) 283 | { 284 | eden_.transfer_page_from(page, probationary_); 285 | page->cache_slot = cache_slot::eden; 286 | } 287 | 288 | void demote_to_probationary(page_position page) 289 | { 290 | probationary_.transfer_page_from(page, eden_); 291 | page->cache_slot = cache_slot::probationary; 292 | } 293 | }; 294 | 295 | frequency_sketch filter_; 296 | 297 | // Maps keys to page positions of the LRU caches pointing to a page. 298 | std::map page_map_; 299 | 300 | // Allocated 1% of the total capacity. Window victims are granted the chance to 301 | // reenter the cache (into $main_). This is to remediate the problem where sparse 302 | // bursts cause repeated misses in the regular TinyLfu architecture. 303 | lru window_; 304 | 305 | // Allocated 99% of the total capacity. 306 | slru main_; 307 | 308 | // Statistics. 309 | int num_cache_hits_ = 0; 310 | int num_cache_misses_ = 0; 311 | 312 | public: 313 | explicit wtinylfu_cache(int capacity) 314 | : filter_(capacity) 315 | , window_(window_capacity(capacity)) 316 | , main_(capacity - window_.capacity()) 317 | {} 318 | 319 | int size() const noexcept 320 | { 321 | return window_.size() + main_.size(); 322 | } 323 | 324 | int capacity() const noexcept 325 | { 326 | return window_.capacity() + main_.capacity(); 327 | } 328 | 329 | int num_cache_hits() const noexcept { return num_cache_hits_; } 330 | int num_cache_misses() const noexcept { return num_cache_misses_; } 331 | 332 | bool contains(const K& key) const noexcept 333 | { 334 | return page_map_.find(key) != page_map_.cend(); 335 | } 336 | 337 | /** 338 | * NOTE: after this operation the accuracy of the cache will suffer until enough 339 | * historic data is gathered (because the frequency sketch is cleared). 340 | */ 341 | void change_capacity(const int n) 342 | { 343 | if(n <= 0) 344 | { 345 | throw std::invalid_argument("cache capacity must be greater than zero"); 346 | } 347 | 348 | filter_.change_capacity(n); 349 | window_.set_capacity(window_capacity(n)); 350 | main_.set_capacity(n - window_.capacity()); 351 | 352 | while(window_.is_full()) { evict_from_window(); } 353 | while(main_.is_full()) { evict_from_main(); } 354 | } 355 | 356 | std::shared_ptr get(const K& key) 357 | { 358 | filter_.record_access(key); 359 | auto it = page_map_.find(key); 360 | if(it != page_map_.end()) 361 | { 362 | auto& page = it->second; 363 | handle_hit(page); 364 | return page->data; 365 | } 366 | ++num_cache_misses_; 367 | return nullptr; 368 | } 369 | 370 | std::shared_ptr operator[](const K& key) 371 | { 372 | return get(key); 373 | } 374 | 375 | template 376 | std::shared_ptr get_and_insert_if_missing(const K& key, ValueLoader value_loader) 377 | { 378 | std::shared_ptr value = get(key); 379 | if(value == nullptr) 380 | { 381 | value = std::make_shared(value_loader(key)); 382 | insert(key, value); 383 | } 384 | return value; 385 | } 386 | 387 | void insert(K key, V value) 388 | { 389 | insert(std::move(key), std::make_shared(std::move(value))); 390 | } 391 | 392 | void erase(const K& key) 393 | { 394 | auto it = page_map_.find(key); 395 | if(it != page_map_.end()) 396 | { 397 | auto& page = it->second; 398 | if(page->cache_slot == cache_slot::window) 399 | window_.erase(page); 400 | else 401 | main_.erase(page); 402 | page_map_.erase(it); 403 | } 404 | } 405 | 406 | private: 407 | static int window_capacity(const int total_capacity) noexcept 408 | { 409 | return std::max(1, int(std::ceil(0.01f * total_capacity))); 410 | } 411 | 412 | void insert(const K& key, std::shared_ptr data) 413 | { 414 | if(window_.is_full()) { evict(); } 415 | 416 | auto it = page_map_.find(key); 417 | if(it != page_map_.end()) 418 | it->second->data = data; 419 | else 420 | page_map_.emplace(key, window_.insert(key, cache_slot::window, data)); 421 | } 422 | 423 | void handle_hit(typename lru::page_position page) 424 | { 425 | if(page->cache_slot == cache_slot::window) 426 | window_.handle_hit(page); 427 | else 428 | main_.handle_hit(page); 429 | ++num_cache_hits_; 430 | } 431 | 432 | /** 433 | * Evicts from the window cache to the main cache's probationary space. 434 | * Called when the window cache is full. 435 | * If the cache's total size exceeds its capacity, the window cache's victim and 436 | * the main cache's eviction candidate are evaluated and the one with the worse 437 | * (estimated) access frequency is evicted. Otherwise, the window cache's victim is 438 | * just transferred to the main cache. 439 | */ 440 | void evict() 441 | { 442 | if(size() >= capacity()) 443 | evict_from_window_or_main(); 444 | else 445 | main_.transfer_page_from(window_.lru_pos(), window_); 446 | } 447 | 448 | void evict_from_window_or_main() 449 | { 450 | const int window_victim_freq = filter_.frequency(window_.victim_key()); 451 | const int main_victim_freq = filter_.frequency(main_.victim_key()); 452 | if(window_victim_freq > main_victim_freq) 453 | { 454 | evict_from_main(); 455 | main_.transfer_page_from(window_.lru_pos(), window_); 456 | } 457 | else 458 | { 459 | evict_from_window(); 460 | } 461 | } 462 | 463 | void evict_from_main() 464 | { 465 | page_map_.erase(main_.victim_key()); 466 | main_.evict(); 467 | } 468 | 469 | void evict_from_window() 470 | { 471 | page_map_.erase(window_.victim_key()); 472 | window_.evict(); 473 | } 474 | }; 475 | 476 | #endif 477 | --------------------------------------------------------------------------------