├── .gitignore
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── benches
    └── benches.rs
└── src
    ├── bin
        └── cache_advisor_bench.rs
    ├── dll.rs
    └── lib.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | flamegraph.svg
4 | perf.data
5 | perf.data.old
6 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cache-advisor"
 3 | version = "1.0.16"
 4 | authors = ["Tyler Neely <tylerneely@gmail.com>"]
 5 | description = "scan-resistant concurrent cache eviction manager"
 6 | license = "MIT OR Apache-2.0"
 7 | homepage = "https://github.com/komora-io/cache-advisor"
 8 | repository = "https://github.com/komora-io/cache-advisor"
 9 | keywords = ["cache", "lfu", "concurrent", "caching", "lock-free"]
10 | categories = ["caching", "database-implementations", "concurrency", "data-structures", "algorithms"]
11 | documentation = "https://docs.rs/cache-advisor/"
12 | readme = "README.md"
13 | edition = "2021"
14 | 
15 | [profile.bench]
16 | debug = true
17 | opt-level = 3
18 | 
19 | [profile.release]
20 | debug = true
21 | opt-level = 3
22 | 
23 | [dependencies]
24 | crossbeam-queue = "0.3.5"
25 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2022 Tyler Neely
190 |    Copyright 2023 Tyler Neely
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022 Tyler Neely
 2 | Copyright (c) 2023 Tyler Neely
 3 | 
 4 | Permission is hereby granted, free of charge, to any
 5 | person obtaining a copy of this software and associated
 6 | documentation files (the "Software"), to deal in the
 7 | Software without restriction, including without
 8 | limitation the rights to use, copy, modify, merge,
 9 | publish, distribute, sublicense, and/or sell copies of
10 | the Software, and to permit persons to whom the Software
11 | is furnished to do so, subject to the following
12 | conditions:
13 | 
14 | The above copyright notice and this permission notice
15 | shall be included in all copies or substantial portions
16 | of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
19 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
20 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
21 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
22 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
25 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 | DEALINGS IN THE SOFTWARE.
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # cache-advisor
 2 | 
 3 | [docs](https://docs.rs/cache-advisor)
 4 | 
 5 | Tells you when to evict items from a cache. Should be able to sustain
 6 | dozens of millions of accesses per second on modern server hardware
 7 | without any blocking.
 8 | 
 9 | # features
10 | 
11 | * two-segment LRU, protects against cache pollution from single-hit items
12 | * 256 shards accessed via non-blocking flatcombining
13 | * local access buffer that must fill up before accessing shared state
14 | * compresses the costs associated with each item to a `u8` using a compression
15 |   technique that will converge to the overall true sum of costs over time, but
16 |   allows for much less memory to be used for accounting.
17 | 
18 | # api
19 | 
20 | ```rust
21 | impl CacheAdvisor {
22 |   /// Instantiates a new two-segment `CacheAdvisor` eviction manager.
23 |   ///
24 |   /// Choose an overall size and the percentage 0..=100 that should
25 |   /// be devoted to the entry cache. 20% is a safe default.
26 |   pub fn new(capacity: usize, entry_percent: u8) -> CacheAdvisor { .. }
27 | 
28 |   /// Mark items that are accessed with a certain cost.
29 |   /// Returns the items that should be evicted and their associated costs.
30 |   /// The returned costs are always a compressed power of two and may not
31 |   /// be the exact cost that you set for an item. Over time it converges
32 |   /// to a correct value, however.
33 |   pub fn accessed(&mut self, id: u64, cost: usize) -> Vec<(u64, usize)> { .. }
34 | 
35 |   /// Similar to `accessed` except this will reuse an internal vector for storing
36 |   /// items to be evicted, which will be passed by reference to callers. If the
37 |   /// returned slice is huge and you would like to reclaim underlying memory, call
38 |   /// the `reset_internal_access_buffer` method. This can improve throughput by around
39 |   /// 10% in some cases compared to the simpler `accessed` method above (which may
40 |   /// need to copy items several times as the returned vector is expanded).
41 |   pub fn accessed_reuse_buffer(&mut self, id: u64, cost: usize) -> &[(u64, usize)] { .. }
42 | 
43 |   /// Resets the internal access buffer, freeing any memory it may have been holding
44 |   /// onto. This should only be called in combination with `accessed_reuse_buffer` if
45 |   /// you want to release the memory that the internal buffer may be consuming. You
46 |   /// probably don't need to call this unless the previous slice returned by
47 |   /// `accessed_reuse_buffer` is over a few thousand items long, if not an order of magnitude
48 |   /// or two larger than that, which should ideally be rare events in workloads where
49 |   /// most items being inserted are somewhat clustered in size.
50 |   pub fn reset_internal_access_buffer(&mut self) { .. }
51 | }
52 | ```
53 | 


--------------------------------------------------------------------------------
/benches/benches.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | 
 5 | use test::Bencher;
 6 | 
 7 | use cache_advisor::CacheAdvisor;
 8 | 
 9 | #[bench]
10 | fn test(b: &mut Bencher) {
11 |     let mut cache_advisor = CacheAdvisor::new(1024);
12 | 
13 |     let mut id = 0;
14 |     b.iter(|| {
15 |         id += 1;
16 |         let _evicted = cache_advisor.accessed(id * 256, 1);
17 |     });
18 | }
19 | 


--------------------------------------------------------------------------------
/src/bin/cache_advisor_bench.rs:
--------------------------------------------------------------------------------
 1 | use cache_advisor::CacheAdvisor;
 2 | use std::sync::atomic;
 3 | 
 4 | const OPS: usize = 100_000_000;
 5 | const SZ: usize = 9;
 6 | const CAP: usize = 1024 * 1024;
 7 | 
 8 | static EVICTED_BYTES: atomic::AtomicUsize = atomic::AtomicUsize::new(0);
 9 | 
10 | fn main() {
11 |     let n_threads: usize = std::thread::available_parallelism()
12 |         .unwrap_or(8.try_into().unwrap())
13 |         .get();
14 | 
15 |     let ops_per_thread: usize = OPS / n_threads;
16 |     let cache_advisor = CacheAdvisor::new(CAP, 80);
17 | 
18 |     let mut threads = vec![];
19 | 
20 |     let before = std::time::Instant::now();
21 | 
22 |     for tn in 0..n_threads {
23 |         let mut cache_advisor = cache_advisor.clone();
24 |         let base = tn * ops_per_thread;
25 |         let thread = std::thread::spawn(move || {
26 |             for i in 0..ops_per_thread {
27 |                 let id = base + i;
28 |                 let evicted = cache_advisor.accessed_reuse_buffer(id as u64, SZ);
29 |                 let cost = evicted.iter().map(|(_id, cost)| cost).sum();
30 |                 EVICTED_BYTES.fetch_add(cost, atomic::Ordering::Relaxed);
31 |             }
32 |         });
33 |         threads.push(thread);
34 |     }
35 | 
36 |     for thread in threads.into_iter() {
37 |         thread.join().unwrap();
38 |     }
39 | 
40 |     let evicted = EVICTED_BYTES.load(atomic::Ordering::Acquire);
41 |     let added = OPS * SZ;
42 |     let present = added.saturating_sub(evicted);
43 | 
44 |     println!(
45 |         "added: {}mb, evicted: {}mb, present: {}kb ({} % above cap). {:.2} million accesses/s",
46 |         added / 1_000_000,
47 |         evicted / 1_000_000,
48 |         present / 1_000,
49 |         (100 * present.saturating_sub(CAP)) / CAP,
50 |         (OPS * 1000) as f64 / before.elapsed().as_millis() as f64 / 1_000_000.,
51 |     );
52 | }
53 | 


--------------------------------------------------------------------------------
/src/dll.rs:
--------------------------------------------------------------------------------
  1 | #![allow(unsafe_code)]
  2 | 
  3 | use std::{cell::UnsafeCell, ptr};
  4 | 
  5 | use super::CacheAccess;
  6 | 
  7 | /// A simple doubly linked list for use in the `Lru`
  8 | #[derive(Debug)]
  9 | pub(crate) struct Node {
 10 |     pub inner: UnsafeCell<CacheAccess>,
 11 |     next: *mut Node,
 12 |     prev: *mut Node,
 13 | }
 14 | 
 15 | impl std::ops::Deref for Node {
 16 |     type Target = CacheAccess;
 17 | 
 18 |     fn deref(&self) -> &CacheAccess {
 19 |         unsafe { &(*self.inner.get()) }
 20 |     }
 21 | }
 22 | 
 23 | impl Node {
 24 |     fn unwire(&mut self) {
 25 |         unsafe {
 26 |             if !self.prev.is_null() {
 27 |                 (*self.prev).next = self.next;
 28 |             }
 29 | 
 30 |             if !self.next.is_null() {
 31 |                 (*self.next).prev = self.prev;
 32 |             }
 33 |         }
 34 | 
 35 |         self.next = ptr::null_mut();
 36 |         self.prev = ptr::null_mut();
 37 |     }
 38 | }
 39 | 
 40 | /// A simple non-cyclical doubly linked
 41 | /// list where items can be efficiently
 42 | /// removed from the middle, for the purposes
 43 | /// of backing an LRU cache.
 44 | pub struct DoublyLinkedList {
 45 |     head: *mut Node,
 46 |     tail: *mut Node,
 47 |     len: usize,
 48 | }
 49 | 
 50 | unsafe impl Send for DoublyLinkedList {}
 51 | 
 52 | impl Drop for DoublyLinkedList {
 53 |     fn drop(&mut self) {
 54 |         let mut cursor = self.head;
 55 |         while !cursor.is_null() {
 56 |             unsafe {
 57 |                 let node = Box::from_raw(cursor);
 58 | 
 59 |                 // don't need to check for cycles
 60 |                 // because this Dll is non-cyclical
 61 |                 cursor = node.prev;
 62 | 
 63 |                 // this happens without the manual drop,
 64 |                 // but we keep it for explicitness
 65 |                 drop(node);
 66 |             }
 67 |         }
 68 |     }
 69 | }
 70 | 
 71 | impl Default for DoublyLinkedList {
 72 |     fn default() -> Self {
 73 |         Self {
 74 |             head: ptr::null_mut(),
 75 |             tail: ptr::null_mut(),
 76 |             len: 0,
 77 |         }
 78 |     }
 79 | }
 80 | 
 81 | impl DoublyLinkedList {
 82 |     pub(crate) const fn len(&self) -> usize {
 83 |         self.len
 84 |     }
 85 | 
 86 |     pub(crate) fn push_head(&mut self, item: CacheAccess) -> *mut Node {
 87 |         self.len += 1;
 88 | 
 89 |         let node = Node {
 90 |             inner: UnsafeCell::new(item),
 91 |             next: ptr::null_mut(),
 92 |             prev: self.head,
 93 |         };
 94 | 
 95 |         let ptr = Box::into_raw(Box::new(node));
 96 | 
 97 |         self.push_head_ptr(ptr);
 98 | 
 99 |         ptr
100 |     }
101 | 
102 |     fn push_head_ptr(&mut self, ptr: *mut Node) {
103 |         if !self.head.is_null() {
104 |             unsafe {
105 |                 (*self.head).next = ptr;
106 |                 (*ptr).prev = self.head;
107 |             }
108 |         }
109 | 
110 |         if self.tail.is_null() {
111 |             self.tail = ptr;
112 |         }
113 | 
114 |         self.head = ptr;
115 |     }
116 | 
117 |     pub(crate) fn unwire(&mut self, ptr: *mut Node) {
118 |         unsafe {
119 |             if self.tail == ptr {
120 |                 self.tail = (*ptr).next;
121 |             }
122 | 
123 |             if self.head == ptr {
124 |                 self.head = (*ptr).prev;
125 |             }
126 | 
127 |             (*ptr).unwire();
128 |         }
129 | 
130 |         self.len -= 1;
131 |     }
132 | 
133 |     pub(crate) fn install(&mut self, ptr: *mut Node) {
134 |         self.len += 1;
135 |         self.push_head_ptr(ptr);
136 |     }
137 | 
138 |     // NB: returns the Box<Node> instead of just the Option<CacheAccess>
139 |     // because the LRU is a map to the Node as well, and if the LRU
140 |     // accessed the map via PID, it would cause a use after free if
141 |     // we had already freed the Node in this function.
142 |     pub(crate) fn pop_tail(&mut self) -> Option<*mut Node> {
143 |         if self.tail.is_null() {
144 |             return None;
145 |         }
146 | 
147 |         self.len -= 1;
148 |         let tail_ptr = self.tail;
149 |         if self.head == self.tail {
150 |             self.head = ptr::null_mut();
151 |         }
152 | 
153 |         unsafe {
154 |             self.tail = (*tail_ptr).next;
155 | 
156 |             (*tail_ptr).unwire();
157 |         }
158 | 
159 |         Some(tail_ptr)
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! A simple eviction manager with 256 shards
  2 | //! and two segments to provide for scan resistance.
  3 | //! Tells you when to evict items from a cache.
  4 | //!
  5 | //! features:
  6 | //!
  7 | //! * two-segment LRU, protects against cache pollution from single-hit items
  8 | //! * 256 shards accessed via non-blocking flatcombining
  9 | //! * local access buffer that must fill up before accessing shared state
 10 | //! * compresses the costs associated with each item to a `u8` using a compression
 11 | //!   technique that will converge to the overall true sum of costs over time, but
 12 | //!   allows for much less memory to be used for accounting.
 13 | //!
 14 | //! # Examples
 15 | //! ```
 16 | //! use cache_advisor::CacheAdvisor;
 17 | //!
 18 | //! // each shard stores 10 bytes, 10% of that is in the entry cache
 19 | //! let mut ca = CacheAdvisor::new(256 * 10, 10);
 20 | //!
 21 | //! // add item 0 into entry cache
 22 | //! let should_evict = ca.accessed_reuse_buffer(0, 1);
 23 | //! assert!(should_evict.is_empty());
 24 | //!
 25 | //! // promote item 0 into main cache
 26 | //! let should_evict = ca.accessed_reuse_buffer(0, 1);
 27 | //! assert!(should_evict.is_empty());
 28 | //!
 29 | //! // hit other items only once, like a big scan
 30 | //! for i in 1..5000 {
 31 | //!     let id = i * 256;
 32 | //!     let evicted = ca.accessed_reuse_buffer(id, 1);
 33 | //!
 34 | //!     // assert that 0 is never evicted while scanning
 35 | //!     assert!(!evicted.contains(&(0, 1)));
 36 | //! }
 37 | //!
 38 | //! let mut zero_evicted = false;
 39 | //!
 40 | //! // hit other items more than once, assert that zero does get
 41 | //! // evicted eventually.
 42 | //! for i in 1..5000 {
 43 | //!     let id = i * 256;
 44 | //!     zero_evicted |= ca.accessed_reuse_buffer(id, 1).contains(&(0, 1));
 45 | //!     zero_evicted |= ca.accessed_reuse_buffer(id, 1).contains(&(0, 1));
 46 | //!     zero_evicted |= ca.accessed_reuse_buffer(id, 1).contains(&(0, 1));
 47 | //! }
 48 | //!
 49 | //! assert!(zero_evicted);
 50 | //! ```
 51 | use std::{
 52 |     borrow::Borrow,
 53 |     cell::UnsafeCell,
 54 |     fmt,
 55 |     hash::{Hash, Hasher},
 56 |     ops::{Deref, DerefMut},
 57 |     sync::{
 58 |         atomic::{AtomicBool, Ordering},
 59 |         Arc,
 60 |     },
 61 | };
 62 | 
 63 | use crossbeam_queue::SegQueue;
 64 | 
 65 | mod dll;
 66 | //mod dll2;
 67 | 
 68 | use crate::dll::{DoublyLinkedList, Node};
 69 | 
 70 | const MAX_QUEUE_ITEMS: usize = 32;
 71 | // ensures that usize::MAX compresses to less than 128,
 72 | // since the max bit of a u8 size is used to represent
 73 | // the cache tier tag.
 74 | const RESIZE_CUTOFF: usize = 63;
 75 | const RESIZE_CUTOFF_U8: u8 = RESIZE_CUTOFF as u8;
 76 | const N_SHARDS: usize = 256;
 77 | 
 78 | // very very simple mutex that reduces instruction cache pollution
 79 | struct TryMutex<T> {
 80 |     inner: UnsafeCell<T>,
 81 |     mu: AtomicBool,
 82 | }
 83 | 
 84 | impl<T> TryMutex<T> {
 85 |     fn new(inner: T) -> TryMutex<T> {
 86 |         TryMutex {
 87 |             inner: inner.into(),
 88 |             mu: false.into(),
 89 |         }
 90 |     }
 91 | 
 92 |     #[inline]
 93 |     fn try_lock(&self) -> Option<TryMutexGuard<'_, T>> {
 94 |         if self.mu.swap(true, Ordering::Acquire) {
 95 |             // already locked
 96 |             None
 97 |         } else {
 98 |             Some(TryMutexGuard { tm: self })
 99 |         }
100 |     }
101 | }
102 | 
103 | struct TryMutexGuard<'a, T> {
104 |     tm: &'a TryMutex<T>,
105 | }
106 | 
107 | unsafe impl<T: Send> Send for TryMutex<T> {}
108 | 
109 | unsafe impl<T: Send> Sync for TryMutex<T> {}
110 | 
111 | impl<'a, T> Drop for TryMutexGuard<'a, T> {
112 |     #[inline]
113 |     fn drop(&mut self) {
114 |         assert!(self.tm.mu.swap(false, Ordering::Release));
115 |     }
116 | }
117 | 
118 | impl<'a, T> Deref for TryMutexGuard<'a, T> {
119 |     type Target = T;
120 | 
121 |     fn deref(&self) -> &T {
122 |         unsafe { &*self.tm.inner.get() }
123 |     }
124 | }
125 | 
126 | impl<'a, T> DerefMut for TryMutexGuard<'a, T> {
127 |     #[inline]
128 |     fn deref_mut(&mut self) -> &mut T {
129 |         unsafe { &mut *self.tm.inner.get() }
130 |     }
131 | }
132 | 
133 | #[derive(Clone, Default)]
134 | struct Resizer {
135 |     actual: u128,
136 |     decompressed: u128,
137 | }
138 | 
139 | impl Resizer {
140 |     /// Returns a compressed size which
141 |     /// has been probabilistically chosen.
142 |     fn compress(&mut self, raw_input: usize) -> u8 {
143 |         if raw_input <= RESIZE_CUTOFF {
144 |             return u8::try_from(raw_input).unwrap();
145 |         }
146 | 
147 |         let upgraded_input = u128::try_from(raw_input).unwrap();
148 |         let po2 = upgraded_input.next_power_of_two();
149 |         let compressed = po2.trailing_zeros() as u8;
150 |         let decompressed = decompress(compressed + RESIZE_CUTOFF_U8) as u128;
151 |         self.actual += raw_input as u128;
152 | 
153 |         let ret = if self.decompressed + decompressed > self.actual {
154 |             compressed - 1
155 |         } else {
156 |             compressed
157 |         };
158 | 
159 |         self.decompressed += decompress(ret + RESIZE_CUTOFF_U8) as u128;
160 | 
161 |         let sz = ret + RESIZE_CUTOFF_U8;
162 | 
163 |         assert!(sz < 128);
164 | 
165 |         sz
166 |     }
167 | }
168 | 
169 | #[inline]
170 | const fn decompress(input: u8) -> usize {
171 |     // zero-out the access bit
172 |     let masked = input & 127;
173 |     match masked {
174 |         0..=RESIZE_CUTOFF_U8 => masked as usize,
175 |         _ => {
176 |             if let Some(o) = 1_usize.checked_shl((masked - RESIZE_CUTOFF_U8) as u32) {
177 |                 o
178 |             } else {
179 |                 usize::MAX
180 |             }
181 |         }
182 |     }
183 | }
184 | 
185 | struct Fnv(u64);
186 | 
187 | impl Default for Fnv {
188 |     #[inline]
189 |     fn default() -> Fnv {
190 |         Fnv(0xcbf29ce484222325)
191 |     }
192 | }
193 | 
194 | impl std::hash::Hasher for Fnv {
195 |     #[inline]
196 |     fn finish(&self) -> u64 {
197 |         self.0
198 |     }
199 | 
200 |     #[inline]
201 |     fn write(&mut self, bytes: &[u8]) {
202 |         let Fnv(mut hash) = *self;
203 | 
204 |         for byte in bytes.iter() {
205 |             hash ^= *byte as u64;
206 |             hash = hash.wrapping_mul(0x100000001b3);
207 |         }
208 | 
209 |         *self = Fnv(hash);
210 |     }
211 | }
212 | 
213 | pub(crate) type FnvSet8<V> = std::collections::HashSet<V, std::hash::BuildHasherDefault<Fnv>>;
214 | 
215 | type PageId = u64;
216 | 
217 | fn _sz_test() {
218 |     let _: [u8; 8] = [0; std::mem::size_of::<CacheAccess>()];
219 |     let _: [u8; 1] = [0; std::mem::align_of::<CacheAccess>()];
220 | }
221 | 
222 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
223 | pub(crate) struct CacheAccess {
224 |     size: u8,
225 |     pid_bytes: [u8; 7],
226 | }
227 | 
228 | impl CacheAccess {
229 |     fn was_promoted(&self) -> bool {
230 |         self.size & 128 != 0
231 |     }
232 | 
233 |     fn size(&self) -> usize {
234 |         decompress((self.size) as u8)
235 |     }
236 | 
237 |     fn pid(&self, shard: u8) -> PageId {
238 |         let mut pid_bytes = [0; 8];
239 |         pid_bytes[1..8].copy_from_slice(&self.pid_bytes);
240 |         pid_bytes[0] = shard;
241 |         PageId::from_le_bytes(pid_bytes)
242 |     }
243 | 
244 |     fn new(pid: PageId, sz: usize, resizer: &mut Resizer) -> CacheAccess {
245 |         let size = resizer.compress(sz);
246 | 
247 |         let mut pid_bytes = [0; 7];
248 |         pid_bytes.copy_from_slice(&pid.to_le_bytes()[1..8]);
249 | 
250 |         CacheAccess { size, pid_bytes }
251 |     }
252 | }
253 | 
254 | /// A simple eviction manager with 256 shards
255 | /// and two segments to provide for scan resistance.
256 | /// Tells you when to evict items from a cache.
257 | ///
258 | /// features:
259 | ///
260 | /// * two-segment LRU, protects against cache pollution from single-hit items
261 | /// * 256 shards accessed via non-blocking flatcombining
262 | /// * local access buffer that must fill up before accessing shared state
263 | /// * compresses the costs associated with each item to a `u8` using a compression
264 | ///   technique that will converge to the overall true sum of costs over time, but
265 | ///   allows for much less memory to be used for accounting.
266 | ///
267 | /// # Examples
268 | /// ```
269 | /// use cache_advisor::CacheAdvisor;
270 | ///
271 | /// // each shard stores 10 bytes, 10% of that is in the entry cache
272 | /// let mut ca = CacheAdvisor::new(256 * 10, 10);
273 | ///
274 | /// // add item 0 into entry cache
275 | /// let should_evict = ca.accessed(0, 1);
276 | /// assert!(should_evict.is_empty());
277 | ///
278 | /// // promote item 0 into main cache
279 | /// let should_evict = ca.accessed(0, 1);
280 | /// assert!(should_evict.is_empty());
281 | ///
282 | /// // hit other items only once, like a big scan
283 | /// for i in 1..5000 {
284 | ///     let id = i * 256;
285 | ///     let evicted = ca.accessed(id, 1);
286 | ///
287 | ///     // assert that 0 is never evicted while scanning
288 | ///     assert!(!evicted.contains(&(0, 1)));
289 | /// }
290 | ///
291 | /// let mut zero_evicted = false;
292 | ///
293 | /// // hit other items more than once, assert that zero does get
294 | /// // evicted eventually.
295 | /// for i in 1..5000 {
296 | ///     let id = i * 256;
297 | ///     zero_evicted |= ca.accessed(id, 1).contains(&(0, 1));
298 | ///     zero_evicted |= ca.accessed(id, 1).contains(&(0, 1));
299 | ///     zero_evicted |= ca.accessed(id, 1).contains(&(0, 1));
300 | /// }
301 | ///
302 | /// assert!(zero_evicted);
303 | /// ```
304 | pub struct CacheAdvisor {
305 |     shards: Arc<[TryMutex<Shard>]>,
306 |     access_queues: Arc<[SegQueue<CacheAccess>]>,
307 |     local_queue: Vec<(u64, usize)>,
308 |     resizer: Resizer,
309 |     access_buffer: Vec<(u64, usize)>,
310 | }
311 | 
312 | impl Clone for CacheAdvisor {
313 |     fn clone(&self) -> CacheAdvisor {
314 |         CacheAdvisor {
315 |             shards: self.shards.clone(),
316 |             access_queues: self.access_queues.clone(),
317 |             local_queue: vec![],
318 |             resizer: self.resizer.clone(),
319 |             access_buffer: vec![],
320 |         }
321 |     }
322 | }
323 | 
324 | impl fmt::Debug for CacheAdvisor {
325 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
326 |         f.debug_struct("CacheAdvisor").finish()
327 |     }
328 | }
329 | 
330 | impl Default for CacheAdvisor {
331 |     /// Returns a `CacheAdvisor` with a default of 1 million capacity, and 20% entry cache
332 |     fn default() -> CacheAdvisor {
333 |         CacheAdvisor::new(1024 * 1024, 20)
334 |     }
335 | }
336 | 
337 | const fn _send_sync_ca() {
338 |     const fn send_sync<T: Send + Sync>() {}
339 |     send_sync::<CacheAdvisor>();
340 | }
341 | 
342 | impl CacheAdvisor {
343 |     /// Instantiates a new `CacheAdvisor` eviction manager.
344 |     ///
345 |     /// `entry_percent` is how much of the cache should be
346 |     /// devoted to the "entry" cache. When new items are added
347 |     /// to the system, they are inserted into the entry cache
348 |     /// first. If they are accessed at some point while still
349 |     /// in the entry cache, they will be promoted to the main
350 |     /// cache. This provides "scan resistance" where the cache
351 |     /// will avoid being destroyed by things like a scan that
352 |     /// could otherwise push all of the frequently-accessed
353 |     /// items out. A value of `20` is a reasonable default,
354 |     /// which will reserve 20% of the cache capacity for the
355 |     /// entry cache, and 80% for the main cache. This value
356 |     /// must be less than or equal to 100. If the main cache
357 |     /// has never been filled to the point where items are
358 |     /// evicted, items that are pushed out of the entry cache
359 |     /// will flow into the main cache, so you don't need to
360 |     /// worry about under-utilizing available memory. This
361 |     /// only changes behavior once the cache is full to prevent
362 |     /// scans from kicking other items out.
363 |     pub fn new(capacity: usize, entry_percent: u8) -> Self {
364 |         assert!(
365 |             capacity >= N_SHARDS,
366 |             "Please configure the cache \
367 |              capacity to be at least 256"
368 |         );
369 |         let shard_capacity = capacity / N_SHARDS;
370 | 
371 |         let mut shards = Vec::with_capacity(N_SHARDS);
372 |         for _ in 0..N_SHARDS {
373 |             shards.push(TryMutex::new(Shard::new(shard_capacity, entry_percent)))
374 |         }
375 | 
376 |         let mut access_queues = Vec::with_capacity(N_SHARDS);
377 |         for _ in 0..N_SHARDS {
378 |             access_queues.push(SegQueue::default());
379 |         }
380 | 
381 |         Self {
382 |             shards: shards.into(),
383 |             access_queues: access_queues.into(),
384 |             local_queue: Vec::with_capacity(MAX_QUEUE_ITEMS),
385 |             resizer: Resizer::default(),
386 |             access_buffer: vec![],
387 |         }
388 |     }
389 | 
390 |     /// Called when an item is accessed. Returns a Vec of items to be
391 |     /// evicted. Avoids blocking under contention by using flat-combining
392 |     /// on 256 LRU shards.
393 |     pub fn accessed(&mut self, id: u64, cost: usize) -> Vec<(u64, usize)> {
394 |         let mut ret = vec![];
395 |         self.accessed_inner(id, cost, &mut ret);
396 |         ret
397 |     }
398 | 
399 |     /// Similar to `accessed` except this will reuse an internal vector for storing
400 |     /// items to be evicted, which will be passed by reference to callers. If the
401 |     /// returned slice is huge and you would like to reclaim underlying memory, call
402 |     /// the `reset_internal_access_buffer` method. This can improve throughput by around
403 |     /// 10% in some cases compared to the simpler `accessed` method above (which may
404 |     /// need to copy items several times as the returned vector is expanded).
405 |     pub fn accessed_reuse_buffer(&mut self, id: u64, cost: usize) -> &[(u64, usize)] {
406 |         let mut swapped = std::mem::take(&mut self.access_buffer);
407 |         swapped.clear();
408 |         self.accessed_inner(id, cost, &mut swapped);
409 |         self.access_buffer = swapped;
410 |         &self.access_buffer
411 |     }
412 | 
413 |     /// Resets the internal access buffer, freeing any memory it may have been holding
414 |     /// onto. This should only be called in combination with `accessed_reuse_buffer` if
415 |     /// you want to release the memory that the internal buffer may be consuming. You
416 |     /// probably don't need to call this unless the previous slice returned by
417 |     /// `accessed_reuse_buffer` is over a few thousand items long, if not an order of magnitude
418 |     /// or two larger than that, which should ideally be rare events in workloads where
419 |     /// most items being inserted are somewhat clustered in size.
420 |     pub fn reset_internal_access_buffer(&mut self) {
421 |         self.access_buffer = vec![]
422 |     }
423 | 
424 |     fn accessed_inner(&mut self, id: u64, cost: usize, ret: &mut Vec<(u64, usize)>) {
425 |         self.local_queue.push((id, cost));
426 | 
427 |         if self.local_queue.len() < MAX_QUEUE_ITEMS {
428 |             return;
429 |         }
430 | 
431 |         while let Some((id, cost)) = self.local_queue.pop() {
432 |             let shard_idx = (id.to_le_bytes()[0] as u64 % N_SHARDS as u64) as usize;
433 |             let shard_mu = &self.shards[shard_idx];
434 |             let access_queue = &self.access_queues[shard_idx];
435 |             let cache_access = CacheAccess::new(id, cost, &mut self.resizer);
436 | 
437 |             // use flat-combining to avoid lock contention
438 |             if let Some(mut shard) = shard_mu.try_lock() {
439 |                 // we take len here and bound pops to this number
440 |                 // because we don't want to keep going forever
441 |                 // if new items are flowing in - we need to get
442 |                 // back to our own work eventually.
443 |                 for _ in 0..access_queue.len() {
444 |                     if let Some(queued_cache_access) = access_queue.pop() {
445 |                         shard.accessed(queued_cache_access, shard_idx, ret);
446 |                     }
447 |                 }
448 | 
449 |                 shard.accessed(cache_access, shard_idx, ret);
450 |             } else {
451 |                 access_queue.push(cache_access);
452 |             }
453 |         }
454 |     }
455 | }
456 | 
457 | #[derive(Eq)]
458 | struct Entry(*mut Node);
459 | 
460 | unsafe impl Send for Entry {}
461 | 
462 | impl Ord for Entry {
463 |     fn cmp(&self, other: &Entry) -> std::cmp::Ordering {
464 |         let left_pid: &[u8; 7] = self.borrow();
465 |         let right_pid: &[u8; 7] = other.borrow();
466 |         left_pid.cmp(&right_pid)
467 |     }
468 | }
469 | 
470 | impl PartialOrd<Entry> for Entry {
471 |     fn partial_cmp(&self, other: &Entry) -> Option<std::cmp::Ordering> {
472 |         Some(self.cmp(other))
473 |     }
474 | }
475 | 
476 | impl PartialEq for Entry {
477 |     fn eq(&self, other: &Entry) -> bool {
478 |         unsafe { (*self.0).pid_bytes == (*other.0).pid_bytes }
479 |     }
480 | }
481 | 
482 | impl Borrow<[u8; 7]> for Entry {
483 |     fn borrow(&self) -> &[u8; 7] {
484 |         unsafe { &(*self.0).pid_bytes }
485 |     }
486 | }
487 | 
488 | // we only hash on pid, since we will change
489 | // sz sometimes and we access the item by pid
490 | impl Hash for Entry {
491 |     fn hash<H: Hasher>(&self, hasher: &mut H) {
492 |         unsafe { (*self.0).pid_bytes.hash(hasher) }
493 |     }
494 | }
495 | 
496 | struct Shard {
497 |     entry_cache: DoublyLinkedList,
498 |     main_cache: DoublyLinkedList,
499 |     entries: FnvSet8<Entry>,
500 |     entry_capacity: usize,
501 |     entry_size: usize,
502 |     main_capacity: usize,
503 |     main_size: usize,
504 |     ever_evicted_main: bool,
505 | }
506 | 
507 | impl Shard {
508 |     fn new(capacity: usize, entry_pct: u8) -> Self {
509 |         assert!(
510 |             entry_pct <= 100,
511 |             "entry cache percent must be less than or equal to 100"
512 |         );
513 |         assert!(capacity > 0, "shard capacity must be non-zero");
514 | 
515 |         let entry_capacity = (capacity * entry_pct as usize) / 100;
516 |         let main_capacity = capacity - entry_capacity;
517 | 
518 |         Self {
519 |             entry_cache: DoublyLinkedList::default(),
520 |             main_cache: DoublyLinkedList::default(),
521 |             entries: FnvSet8::default(),
522 |             entry_capacity,
523 |             main_capacity,
524 |             entry_size: 0,
525 |             main_size: 0,
526 |             ever_evicted_main: false,
527 |         }
528 |     }
529 | 
530 |     fn accessed(
531 |         &mut self,
532 |         cache_access: CacheAccess,
533 |         shard_idx: usize,
534 |         ret: &mut Vec<(u64, usize)>,
535 |     ) {
536 |         let new_size = cache_access.size();
537 | 
538 |         if let Some(entry) = self.entries.get(&cache_access.pid_bytes) {
539 |             let (old_size, was_promoted) = unsafe {
540 |                 let old_size = (*entry.0).size();
541 |                 let was_promoted = (*entry.0).was_promoted();
542 | 
543 |                 // This is a bit hacky but it's done
544 |                 // this way because HashSet doesn't have
545 |                 // a get_mut method.
546 |                 //
547 |                 // This is safe to do because the hash
548 |                 // happens based on the PageId of the
549 |                 // CacheAccess, rather than the size
550 |                 // that we modify here.
551 |                 (*entry.0).inner.get_mut().size = 128 | cache_access.size;
552 | 
553 |                 (old_size, was_promoted)
554 |             };
555 | 
556 |             if was_promoted {
557 |                 // item is already in main cache
558 | 
559 |                 self.main_size -= old_size;
560 | 
561 |                 self.main_cache.unwire(entry.0);
562 |                 self.main_cache.install(entry.0);
563 |             } else {
564 |                 // item is in entry cache
565 | 
566 |                 self.entry_size -= old_size;
567 | 
568 |                 self.entry_cache.unwire(entry.0);
569 |                 self.main_cache.install(entry.0);
570 |             }
571 | 
572 |             self.main_size += new_size;
573 |         } else if !self.ever_evicted_main {
574 |             // We can put new writes into the
575 |             // main cache directly until it fills
576 |             // up, letting us get higher hit rates,
577 |             // assuming the entry cache is smaller
578 |             // than the main cache.
579 |             let mut cache_access = cache_access;
580 |             cache_access.size |= 128;
581 |             let ptr = self.main_cache.push_head(cache_access);
582 |             self.entries.insert(Entry(ptr));
583 |             self.main_size += new_size;
584 |         } else {
585 |             let ptr = self.entry_cache.push_head(cache_access);
586 |             self.entries.insert(Entry(ptr));
587 |             self.entry_size += new_size;
588 |         };
589 | 
590 |         while self.entry_size > self.entry_capacity && self.entry_cache.len() > 1 {
591 |             let node: *mut Node = self.entry_cache.pop_tail().unwrap();
592 | 
593 |             let popped_entry: CacheAccess = unsafe { *(*node).inner.get() };
594 |             let node_size = popped_entry.size();
595 |             let item = popped_entry.pid(u8::try_from(shard_idx).unwrap());
596 | 
597 |             self.entry_size -= node_size;
598 | 
599 |             assert!(
600 |                 !popped_entry.was_promoted(),
601 |                 "somehow, promoted item was still in entry cache"
602 |             );
603 | 
604 |             let pid_bytes = popped_entry.pid_bytes;
605 |             assert!(self.entries.remove(&pid_bytes));
606 | 
607 |             ret.push((item, node_size));
608 |             let node_box: Box<Node> = unsafe { Box::from_raw(node) };
609 | 
610 |             // NB: node is stored in our entries map
611 |             // via a raw pointer, which points to
612 |             // the same allocation used in the DLL.
613 |             // We have to be careful to free node
614 |             // only after removing it from both
615 |             // the DLL and our entries map.
616 |             drop(node_box);
617 |         }
618 | 
619 |         while self.main_size > self.main_capacity && self.main_cache.len() > 1 {
620 |             self.ever_evicted_main = true;
621 | 
622 |             let node: *mut Node = self.main_cache.pop_tail().unwrap();
623 | 
624 |             let popped_main: CacheAccess = unsafe { *(*node).inner.get() };
625 |             let node_size = popped_main.size();
626 |             let item = popped_main.pid(u8::try_from(shard_idx).unwrap());
627 | 
628 |             self.main_size -= node_size;
629 | 
630 |             let pid_bytes = popped_main.pid_bytes;
631 |             assert!(self.entries.remove(&pid_bytes));
632 | 
633 |             ret.push((item, node_size));
634 | 
635 |             let node_box: Box<Node> = unsafe { Box::from_raw(node) };
636 | 
637 |             // NB: node is stored in our entries map
638 |             // via a raw pointer, which points to
639 |             // the same allocation used in the DLL.
640 |             // We have to be careful to free node
641 |             // only after removing it from both
642 |             // the DLL and our entries map.
643 |             drop(node_box);
644 |         }
645 |     }
646 | }
647 | 
648 | #[test]
649 | fn lru_smoke_test() {
650 |     let mut lru = CacheAdvisor::new(256, 50);
651 |     let mut evicted = 0;
652 |     for i in 0..10_000 {
653 |         evicted += lru.accessed(i, 16).len();
654 |     }
655 |     assert!(evicted > 9700, "only evicted {} items", evicted);
656 | }
657 | 
658 | #[test]
659 | fn probabilistic_sum() {
660 |     let mut resizer = Resizer::default();
661 |     let mut resized = 0;
662 |     let mut actual = 0;
663 |     for i in 0..1000 {
664 |         let compressed = resizer.compress(i);
665 |         let decompressed = decompress(compressed);
666 |         resized += decompressed;
667 |         actual += i;
668 |     }
669 | 
670 |     let abs_delta = ((resized as f64 / actual as f64) - 1.).abs();
671 | 
672 |     assert!(abs_delta < 0.005, "delta is actually {}", abs_delta);
673 | }
674 | 
675 | #[test]
676 | fn probabilistic_ev() {
677 |     let mut resizer = Resizer::default();
678 | 
679 |     fn assert_rt(i: usize, resizer: &mut Resizer) {
680 |         let mut resized = 0_u128;
681 |         let mut actual = 0_u128;
682 |         for _ in 1..10_000 {
683 |             let compressed = resizer.compress(i);
684 |             let decompressed = decompress(compressed);
685 |             resized += decompressed as u128;
686 |             actual += i as u128;
687 |         }
688 | 
689 |         if i == 0 {
690 |             assert_eq!(actual, 0);
691 |             assert_eq!(resized, 0);
692 |         } else {
693 |             let abs_delta = ((resized as f64 / actual as f64) - 1.).abs();
694 |             assert!(
695 |                 abs_delta < 0.0001,
696 |                 "delta is actually {} for inputs of size {}. actual: {} round-trip: {}",
697 |                 abs_delta,
698 |                 i,
699 |                 actual,
700 |                 resized
701 |             );
702 |         }
703 |     }
704 | 
705 |     for i in 0..1024 {
706 |         assert_rt(i, &mut resizer)
707 |     }
708 | 
709 |     assert_rt(usize::MAX, &mut resizer)
710 | }
711 | 
712 | #[test]
713 | fn probabilistic_n() {
714 |     const N: usize = 9;
715 | 
716 |     let mut resizer = Resizer::default();
717 |     let mut resized = 0;
718 |     let mut actual = 0;
719 | 
720 |     for _ in 0..1000 {
721 |         let compressed = resizer.compress(N);
722 |         let decompressed = decompress(compressed);
723 |         resized += decompressed;
724 |         actual += N;
725 |     }
726 | 
727 |     let abs_delta = ((resized as f64 / actual as f64) - 1.).abs();
728 | 
729 |     assert!(abs_delta < 0.005, "delta is actually {}", abs_delta);
730 | }
731 | 
732 | #[test]
733 | fn scan_resistance() {
734 |     // each shard stores 10 bytes, 10% of that is in the entry cache
735 |     let mut ca = CacheAdvisor::new(256 * 10, 10);
736 | 
737 |     // add 0 into entry cache
738 |     ca.accessed(0, 1);
739 | 
740 |     // promote 0 into main cache
741 |     ca.accessed(0, 1);
742 | 
743 |     // hit other items only once, like a big scan
744 |     for i in 1..5000 {
745 |         let id = i * 256;
746 |         let evicted = ca.accessed(id, 1);
747 | 
748 |         // assert that 0 is never evicted while scanning
749 |         assert!(!evicted.contains(&(0, 1)));
750 |     }
751 | 
752 |     let mut zero_evicted = false;
753 | 
754 |     // hit other items more than once, assert that zero does get
755 |     // evicted eventually.
756 |     for i in 1..5000 {
757 |         let id = i * 256;
758 |         zero_evicted |= ca.accessed(id, 1).contains(&(0, 1));
759 |         zero_evicted |= ca.accessed(id, 1).contains(&(0, 1));
760 |         zero_evicted |= ca.accessed(id, 1).contains(&(0, 1));
761 |     }
762 | 
763 |     assert!(zero_evicted);
764 | }
765 | 


--------------------------------------------------------------------------------