├── README.md
├── bench
    ├── conftest.py
    └── test_sieve.py
├── poetry.lock
├── pyproject.toml
├── sieve
    ├── __init__.py
    ├── lru.py
    └── sieve.py
└── tests
    └── __init__.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Sieve implementation in Python
 2 | 
 3 | [SIEVE](https://cachemon.github.io/SIEVE-website/) is an eviction algorithm
 4 | that is simpler and more efficient than LRU. This project implements a cache
 5 | using a Python decorator that uses SIEVE for eviction. The linked list
 6 | implementation is taken directly from the Python standard library which makes
 7 | it easy to compare Python's LRU and SIEVE.
 8 | 
 9 | You can use a SIEVE-backed cache in your code with the `@sieve_cache`
10 | decorator.
11 | 
12 | ## Benchmarks
13 | 
14 | Here's a comparison of SIEVE against the pure Python LRU implementation from
15 | the Python standard library.
16 | 
17 | ```
18 | --------------------------------------------------------------------------------- benchmark: 4 tests ---------------------------------------------------------------------------------
19 | Name (time in ms)        Min               Max              Mean            StdDev            Median               IQR            Outliers  OPS (Kops/s)            Rounds  Iterations
20 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
21 | test_sieve_hit        0.0003 (1.0)      0.0018 (1.0)      0.0003 (1.0)      0.0000 (1.0)      0.0003 (1.0)      0.0000 (1.0)    13072;28199    2,974.5402 (1.0)      986760          16
22 | test_lru_hit          0.0005 (1.72)     0.0034 (1.91)     0.0006 (1.69)     0.0000 (1.33)     0.0006 (1.69)     0.0000 (1.55)   15094;24408    1,763.8459 (0.59)     923173          10
23 | test_lru_miss         0.7248 (>1000.0)  1.0644 (594.54)   0.7414 (>1000.0)  0.0093 (397.57)   0.7402 (>1000.0)  0.0092 (>1000.0)   1056;70        1.3489 (0.00)       6911           1
24 | test_sieve_miss       0.7313 (>1000.0)  1.5177 (847.71)   0.7703 (>1000.0)  0.0262 (>1000.0)  0.7678 (>1000.0)  0.0185 (>1000.0)    347;33        1.2981 (0.00)       6863           1
25 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
26 | ```
27 | 
28 | Cache hits are around 2x faster with SIEVE eviction than LRU mainly because of
29 | the fact that SIEVE doesn't take a lock on the hit path.
30 | 
31 | ## References
32 | 
33 | - [Official SIEVE website](https://cachemon.github.io/SIEVE-website/)
34 | - [NSDI 24 Paper](https://cachemon.github.io/SIEVE-website/)
35 | - [Why aren't we SIEVE'ing?](https://brooker.co.za/blog/2023/12/15/sieve.html)
36 | 


--------------------------------------------------------------------------------
/bench/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | # Force pytest benchmark to use seconds
3 | def pytest_benchmark_scale_unit(config, unit, benchmarks, best, worst, sort):
4 |     if unit == "seconds":
5 |         return "m", 1000
6 | 


--------------------------------------------------------------------------------
/bench/test_sieve.py:
--------------------------------------------------------------------------------
 1 | from sieve.sieve import sieve_cache
 2 | from sieve.lru import lru_cache
 3 | 
 4 | @sieve_cache(maxsize=128)
 5 | def func_sieve(obj):
 6 |     return obj
 7 | 
 8 | @lru_cache(maxsize=128)
 9 | def func_lru(obj):
10 |     return obj
11 | 
12 | def sieve_cache_test_hit():
13 |     func_sieve(1)
14 | 
15 | def test_sieve_hit(benchmark):
16 |     benchmark(sieve_cache_test_hit)
17 | 
18 | def lru_cache_test_hit():
19 |     func_lru(1)
20 | 
21 | def test_lru_hit(benchmark):
22 |     benchmark(lru_cache_test_hit)
23 | 
24 | def sieve_cache_test_miss():
25 |     for i in range(1000):
26 |         func_sieve(i)
27 | 
28 | def test_sieve_miss(benchmark):
29 |     benchmark(sieve_cache_test_miss)
30 | 
31 | def lru_cache_test_hit():
32 |     func_lru(1)
33 | 
34 | def lru_cache_test_miss():
35 |     for i in range(1000):
36 |         func_lru(i)
37 | 
38 | def test_lru_miss(benchmark):
39 |     benchmark(lru_cache_test_miss)
40 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
  2 | 
  3 | [[package]]
  4 | name = "colorama"
  5 | version = "0.4.6"
  6 | description = "Cross-platform colored terminal text."
  7 | optional = false
  8 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
  9 | files = [
 10 |     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
 11 |     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "exceptiongroup"
 16 | version = "1.2.0"
 17 | description = "Backport of PEP 654 (exception groups)"
 18 | optional = false
 19 | python-versions = ">=3.7"
 20 | files = [
 21 |     {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"},
 22 |     {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"},
 23 | ]
 24 | 
 25 | [package.extras]
 26 | test = ["pytest (>=6)"]
 27 | 
 28 | [[package]]
 29 | name = "iniconfig"
 30 | version = "2.0.0"
 31 | description = "brain-dead simple config-ini parsing"
 32 | optional = false
 33 | python-versions = ">=3.7"
 34 | files = [
 35 |     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
 36 |     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 37 | ]
 38 | 
 39 | [[package]]
 40 | name = "packaging"
 41 | version = "23.2"
 42 | description = "Core utilities for Python packages"
 43 | optional = false
 44 | python-versions = ">=3.7"
 45 | files = [
 46 |     {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
 47 |     {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
 48 | ]
 49 | 
 50 | [[package]]
 51 | name = "pluggy"
 52 | version = "1.3.0"
 53 | description = "plugin and hook calling mechanisms for python"
 54 | optional = false
 55 | python-versions = ">=3.8"
 56 | files = [
 57 |     {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
 58 |     {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
 59 | ]
 60 | 
 61 | [package.extras]
 62 | dev = ["pre-commit", "tox"]
 63 | testing = ["pytest", "pytest-benchmark"]
 64 | 
 65 | [[package]]
 66 | name = "py-cpuinfo"
 67 | version = "9.0.0"
 68 | description = "Get CPU info with pure Python"
 69 | optional = false
 70 | python-versions = "*"
 71 | files = [
 72 |     {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"},
 73 |     {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"},
 74 | ]
 75 | 
 76 | [[package]]
 77 | name = "pytest"
 78 | version = "7.4.4"
 79 | description = "pytest: simple powerful testing with Python"
 80 | optional = false
 81 | python-versions = ">=3.7"
 82 | files = [
 83 |     {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
 84 |     {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
 85 | ]
 86 | 
 87 | [package.dependencies]
 88 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
 89 | exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
 90 | iniconfig = "*"
 91 | packaging = "*"
 92 | pluggy = ">=0.12,<2.0"
 93 | tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
 94 | 
 95 | [package.extras]
 96 | testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 97 | 
 98 | [[package]]
 99 | name = "pytest-benchmark"
100 | version = "4.0.0"
101 | description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer."
102 | optional = false
103 | python-versions = ">=3.7"
104 | files = [
105 |     {file = "pytest-benchmark-4.0.0.tar.gz", hash = "sha256:fb0785b83efe599a6a956361c0691ae1dbb5318018561af10f3e915caa0048d1"},
106 |     {file = "pytest_benchmark-4.0.0-py3-none-any.whl", hash = "sha256:fdb7db64e31c8b277dff9850d2a2556d8b60bcb0ea6524e36e28ffd7c87f71d6"},
107 | ]
108 | 
109 | [package.dependencies]
110 | py-cpuinfo = "*"
111 | pytest = ">=3.8"
112 | 
113 | [package.extras]
114 | aspect = ["aspectlib"]
115 | elasticsearch = ["elasticsearch"]
116 | histogram = ["pygal", "pygaljs"]
117 | 
118 | [[package]]
119 | name = "tomli"
120 | version = "2.0.1"
121 | description = "A lil' TOML parser"
122 | optional = false
123 | python-versions = ">=3.7"
124 | files = [
125 |     {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
126 |     {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
127 | ]
128 | 
129 | [metadata]
130 | lock-version = "2.0"
131 | python-versions = "^3.10"
132 | content-hash = "a1c7d9386592eeb6b7424b3deb6169f02847b94c5f1e5fc3c35d2be24a920273"
133 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "sieve"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Matt Fleming <matt@nyrk.io>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.10"
10 | pytest = "^7.4.4"
11 | pytest-benchmark = "^4.0.0"
12 | 
13 | 
14 | [build-system]
15 | requires = ["poetry-core"]
16 | build-backend = "poetry.core.masonry.api"
17 | 


--------------------------------------------------------------------------------
/sieve/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mfleming/sieve/fe9d7275e76550d72560b0988830beb305329505/sieve/__init__.py


--------------------------------------------------------------------------------
/sieve/lru.py:
--------------------------------------------------------------------------------
 1 | from functools import _make_key
 2 | from _thread import RLock
 3 | 
 4 | def _my_lru_wrapper(user_func, maxsize):
 5 |     cache = {}
 6 |     root = []
 7 |     full = None
 8 |     root[:] = [
 9 |         root, # PREV
10 |         root, # NEXT
11 |         None, # KEY
12 |         None, # RESULT
13 |     ]
14 |     PREV, NEXT, KEY, RESULT = 0, 1, 2, 3
15 | 
16 |     make_key = _make_key
17 |     cache_get = cache.get
18 |     cache_len = cache.__len__ 
19 |     lock = RLock()
20 | 
21 |     def wrapper(*args, **kwargs):
22 |         nonlocal root, full
23 |         key = make_key(args, kwargs, typed=False)
24 |         with lock:
25 |             link = cache_get(key) 
26 |             if link is not None:
27 |                 # Move link to front of circular doubly linked list
28 |                 link_prev, link_next, key, result = link
29 |                 link_prev[NEXT] = link_next
30 |                 link_next[PREV] = link_prev
31 |                 last = root[PREV]
32 |                 last[NEXT] = root[PREV] = link
33 |                 link[PREV] = last
34 |                 link[NEXT] = root
35 |                 return result
36 | 
37 |         result = user_func(*args, **kwargs)
38 |         with lock:
39 |             # Cache miss
40 |             if key in cache:
41 |                 # another thread might have already computed the value
42 |                 pass
43 |             elif full:
44 |                 # Insert new key at root (which turns from a root node
45 |                 # into a regular one) and convert an existing node into
46 |                 # a root node (and "evict" it).
47 |                 #
48 |                 # All this dance is required to limit updates to just the
49 |                 # KEY and RESULT fields and avoid updating PREV/NEXT links.
50 |                 oldroot = root
51 |                 oldroot[KEY] = key
52 |                 oldroot[RESULT] = result
53 |                 root = oldroot[NEXT]
54 |                 oldkey = root[KEY]
55 |                 oldresult = root[RESULT]
56 |                 root[KEY] = root[RESULT] = None
57 |                 del cache[oldkey]
58 |                 cache[key] = oldroot
59 |             else:
60 |                 # Insert at end head of linked list
61 |                 last = root[PREV]
62 |                 new_last = [last, root, key, result]
63 |                 last[NEXT] = root[PREV] = cache[key] = new_last
64 |                 full = (cache_len() >= maxsize)
65 | 
66 |         return result
67 |     return wrapper
68 | 
69 | def lru_cache(maxsize=128):
70 |     def wrapper(user_func):
71 |         w = _my_lru_wrapper(user_func, maxsize)
72 |         return w
73 |     return wrapper
74 | 


--------------------------------------------------------------------------------
/sieve/sieve.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | #
 4 | # An implementation of the SIEVE cache eviction algorithm. SIEVE has two
 5 | # desirable properties:
 6 | #
 7 | # - Lazy promotion
 8 | # - Quick demotion
 9 | #
10 | # One of the really nice attributes of SIEVE is that it doesn't require
11 | # any locking for cache hits because, unlike LRU, objects do not change
12 | # position. This alone contributes to a 2x increase in throughput
13 | # compared with Python's lru_cache().
14 | 
15 | from functools import _make_key
16 | from _thread import RLock
17 | 
18 | def _sieve_wrapper(user_func, maxsize):
19 |     cache = {}
20 |     tail = []
21 |     full = None
22 |     tail[:] = [
23 |         tail, # PREV
24 |         tail, # NEXT
25 |         None, # KEY
26 |         None, # RESULT
27 |         None, # VISITED
28 |     ]
29 |     PREV, NEXT, KEY, RESULT, VISITED = 0, 1, 2, 3, 4
30 | 
31 |     make_key = _make_key
32 |     cache_get = cache.get
33 |     cache_len = cache.__len__
34 |     lock = RLock()
35 | 
36 |     hand = tail
37 | 
38 |     def wrapper(*args, **kwargs):
39 |         nonlocal tail, full, hand
40 |         key = make_key(args, kwargs, typed=False)
41 |         link = cache_get(key)
42 |         if link is not None:
43 |             link[VISITED] = True
44 |             return link[RESULT]
45 | 
46 |         result = user_func(*args, **kwargs)
47 |         with lock:
48 |             # Cache miss
49 |             if key in cache:
50 |                 # another thread might have already computed the value
51 |                 pass
52 |             elif full:
53 |                 o = hand
54 |                 if o[KEY] is None:
55 |                     o = tail[PREV]
56 | 
57 |                 while o[VISITED]:
58 |                     o[VISITED] = False
59 |                     o = o[PREV]
60 |                     if o[KEY] is None:
61 |                         o = tail[PREV]
62 | 
63 |                 # Evict o
64 |                 hand = o[PREV]
65 |                 oldkey = o[KEY]
66 |                 hand[NEXT] = o[NEXT]
67 |                 o[NEXT][PREV] = hand
68 |                 del cache[oldkey]
69 | 
70 |                 # Insert at head of linked list
71 |                 head = tail[NEXT]
72 |                 new_head = [tail, head, key, result, True]
73 |                 head[PREV] = tail[NEXT] = cache[key] = new_head
74 |             else:
75 |                 # Insert at head of linked list
76 |                 head = tail[NEXT]
77 |                 new_head = [tail, head, key, result, True]
78 |                 head[PREV] = tail[NEXT] = cache[key] = new_head
79 |                 full = (cache_len() >= maxsize)
80 | 
81 | 
82 |         return result
83 |     return wrapper
84 | 
85 | def sieve_cache(maxsize=128):
86 |     def wrapper(user_func):
87 |         w = _sieve_wrapper(user_func, maxsize)
88 |         return w
89 |     return wrapper
90 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mfleming/sieve/fe9d7275e76550d72560b0988830beb305329505/tests/__init__.py


--------------------------------------------------------------------------------