├── README.md ├── bench ├── conftest.py └── test_sieve.py ├── poetry.lock ├── pyproject.toml ├── sieve ├── __init__.py ├── lru.py └── sieve.py └── tests └── __init__.py /README.md: -------------------------------------------------------------------------------- 1 | # Sieve implementation in Python 2 | 3 | [SIEVE](https://cachemon.github.io/SIEVE-website/) is an eviction algorithm 4 | that is simpler and more efficient than LRU. This project implements a cache 5 | using a Python decorator that uses SIEVE for eviction. The linked list 6 | implementation is taken directly from the Python standard library which makes 7 | it easy to compare Python's LRU and SIEVE. 8 | 9 | You can use a SIEVE-backed cache in your code with the `@sieve_cache` 10 | decorator. 11 | 12 | ## Benchmarks 13 | 14 | Here's a comparison of SIEVE against the pure Python LRU implementation from 15 | the Python standard library. 16 | 17 | ``` 18 | --------------------------------------------------------------------------------- benchmark: 4 tests --------------------------------------------------------------------------------- 19 | Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS (Kops/s) Rounds Iterations 20 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 21 | test_sieve_hit 0.0003 (1.0) 0.0018 (1.0) 0.0003 (1.0) 0.0000 (1.0) 0.0003 (1.0) 0.0000 (1.0) 13072;28199 2,974.5402 (1.0) 986760 16 22 | test_lru_hit 0.0005 (1.72) 0.0034 (1.91) 0.0006 (1.69) 0.0000 (1.33) 0.0006 (1.69) 0.0000 (1.55) 15094;24408 1,763.8459 (0.59) 923173 10 23 | test_lru_miss 0.7248 (>1000.0) 1.0644 (594.54) 0.7414 (>1000.0) 0.0093 (397.57) 0.7402 (>1000.0) 0.0092 (>1000.0) 1056;70 1.3489 (0.00) 6911 1 24 | test_sieve_miss 0.7313 (>1000.0) 1.5177 (847.71) 0.7703 (>1000.0) 0.0262 (>1000.0) 0.7678 (>1000.0) 0.0185 (>1000.0) 347;33 1.2981 (0.00) 6863 1 25 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 26 | ``` 27 | 28 | Cache hits are around 2x faster with SIEVE eviction than LRU mainly because of 29 | the fact that SIEVE doesn't take a lock on the hit path. 30 | 31 | ## References 32 | 33 | - [Official SIEVE website](https://cachemon.github.io/SIEVE-website/) 34 | - [NSDI 24 Paper](https://cachemon.github.io/SIEVE-website/) 35 | - [Why aren't we SIEVE'ing?](https://brooker.co.za/blog/2023/12/15/sieve.html) 36 | -------------------------------------------------------------------------------- /bench/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | # Force pytest benchmark to use seconds 3 | def pytest_benchmark_scale_unit(config, unit, benchmarks, best, worst, sort): 4 | if unit == "seconds": 5 | return "m", 1000 6 | -------------------------------------------------------------------------------- /bench/test_sieve.py: -------------------------------------------------------------------------------- 1 | from sieve.sieve import sieve_cache 2 | from sieve.lru import lru_cache 3 | 4 | @sieve_cache(maxsize=128) 5 | def func_sieve(obj): 6 | return obj 7 | 8 | @lru_cache(maxsize=128) 9 | def func_lru(obj): 10 | return obj 11 | 12 | def sieve_cache_test_hit(): 13 | func_sieve(1) 14 | 15 | def test_sieve_hit(benchmark): 16 | benchmark(sieve_cache_test_hit) 17 | 18 | def lru_cache_test_hit(): 19 | func_lru(1) 20 | 21 | def test_lru_hit(benchmark): 22 | benchmark(lru_cache_test_hit) 23 | 24 | def sieve_cache_test_miss(): 25 | for i in range(1000): 26 | func_sieve(i) 27 | 28 | def test_sieve_miss(benchmark): 29 | benchmark(sieve_cache_test_miss) 30 | 31 | def lru_cache_test_hit(): 32 | func_lru(1) 33 | 34 | def lru_cache_test_miss(): 35 | for i in range(1000): 36 | func_lru(i) 37 | 38 | def test_lru_miss(benchmark): 39 | benchmark(lru_cache_test_miss) 40 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "colorama" 5 | version = "0.4.6" 6 | description = "Cross-platform colored terminal text." 7 | optional = false 8 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" 9 | files = [ 10 | {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, 11 | {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, 12 | ] 13 | 14 | [[package]] 15 | name = "exceptiongroup" 16 | version = "1.2.0" 17 | description = "Backport of PEP 654 (exception groups)" 18 | optional = false 19 | python-versions = ">=3.7" 20 | files = [ 21 | {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, 22 | {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, 23 | ] 24 | 25 | [package.extras] 26 | test = ["pytest (>=6)"] 27 | 28 | [[package]] 29 | name = "iniconfig" 30 | version = "2.0.0" 31 | description = "brain-dead simple config-ini parsing" 32 | optional = false 33 | python-versions = ">=3.7" 34 | files = [ 35 | {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, 36 | {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, 37 | ] 38 | 39 | [[package]] 40 | name = "packaging" 41 | version = "23.2" 42 | description = "Core utilities for Python packages" 43 | optional = false 44 | python-versions = ">=3.7" 45 | files = [ 46 | {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, 47 | {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, 48 | ] 49 | 50 | [[package]] 51 | name = "pluggy" 52 | version = "1.3.0" 53 | description = "plugin and hook calling mechanisms for python" 54 | optional = false 55 | python-versions = ">=3.8" 56 | files = [ 57 | {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, 58 | {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, 59 | ] 60 | 61 | [package.extras] 62 | dev = ["pre-commit", "tox"] 63 | testing = ["pytest", "pytest-benchmark"] 64 | 65 | [[package]] 66 | name = "py-cpuinfo" 67 | version = "9.0.0" 68 | description = "Get CPU info with pure Python" 69 | optional = false 70 | python-versions = "*" 71 | files = [ 72 | {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"}, 73 | {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"}, 74 | ] 75 | 76 | [[package]] 77 | name = "pytest" 78 | version = "7.4.4" 79 | description = "pytest: simple powerful testing with Python" 80 | optional = false 81 | python-versions = ">=3.7" 82 | files = [ 83 | {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, 84 | {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, 85 | ] 86 | 87 | [package.dependencies] 88 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 89 | exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} 90 | iniconfig = "*" 91 | packaging = "*" 92 | pluggy = ">=0.12,<2.0" 93 | tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} 94 | 95 | [package.extras] 96 | testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] 97 | 98 | [[package]] 99 | name = "pytest-benchmark" 100 | version = "4.0.0" 101 | description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer." 102 | optional = false 103 | python-versions = ">=3.7" 104 | files = [ 105 | {file = "pytest-benchmark-4.0.0.tar.gz", hash = "sha256:fb0785b83efe599a6a956361c0691ae1dbb5318018561af10f3e915caa0048d1"}, 106 | {file = "pytest_benchmark-4.0.0-py3-none-any.whl", hash = "sha256:fdb7db64e31c8b277dff9850d2a2556d8b60bcb0ea6524e36e28ffd7c87f71d6"}, 107 | ] 108 | 109 | [package.dependencies] 110 | py-cpuinfo = "*" 111 | pytest = ">=3.8" 112 | 113 | [package.extras] 114 | aspect = ["aspectlib"] 115 | elasticsearch = ["elasticsearch"] 116 | histogram = ["pygal", "pygaljs"] 117 | 118 | [[package]] 119 | name = "tomli" 120 | version = "2.0.1" 121 | description = "A lil' TOML parser" 122 | optional = false 123 | python-versions = ">=3.7" 124 | files = [ 125 | {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, 126 | {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, 127 | ] 128 | 129 | [metadata] 130 | lock-version = "2.0" 131 | python-versions = "^3.10" 132 | content-hash = "a1c7d9386592eeb6b7424b3deb6169f02847b94c5f1e5fc3c35d2be24a920273" 133 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "sieve" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Matt Fleming "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.10" 10 | pytest = "^7.4.4" 11 | pytest-benchmark = "^4.0.0" 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core"] 16 | build-backend = "poetry.core.masonry.api" 17 | -------------------------------------------------------------------------------- /sieve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mfleming/sieve/fe9d7275e76550d72560b0988830beb305329505/sieve/__init__.py -------------------------------------------------------------------------------- /sieve/lru.py: -------------------------------------------------------------------------------- 1 | from functools import _make_key 2 | from _thread import RLock 3 | 4 | def _my_lru_wrapper(user_func, maxsize): 5 | cache = {} 6 | root = [] 7 | full = None 8 | root[:] = [ 9 | root, # PREV 10 | root, # NEXT 11 | None, # KEY 12 | None, # RESULT 13 | ] 14 | PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 15 | 16 | make_key = _make_key 17 | cache_get = cache.get 18 | cache_len = cache.__len__ 19 | lock = RLock() 20 | 21 | def wrapper(*args, **kwargs): 22 | nonlocal root, full 23 | key = make_key(args, kwargs, typed=False) 24 | with lock: 25 | link = cache_get(key) 26 | if link is not None: 27 | # Move link to front of circular doubly linked list 28 | link_prev, link_next, key, result = link 29 | link_prev[NEXT] = link_next 30 | link_next[PREV] = link_prev 31 | last = root[PREV] 32 | last[NEXT] = root[PREV] = link 33 | link[PREV] = last 34 | link[NEXT] = root 35 | return result 36 | 37 | result = user_func(*args, **kwargs) 38 | with lock: 39 | # Cache miss 40 | if key in cache: 41 | # another thread might have already computed the value 42 | pass 43 | elif full: 44 | # Insert new key at root (which turns from a root node 45 | # into a regular one) and convert an existing node into 46 | # a root node (and "evict" it). 47 | # 48 | # All this dance is required to limit updates to just the 49 | # KEY and RESULT fields and avoid updating PREV/NEXT links. 50 | oldroot = root 51 | oldroot[KEY] = key 52 | oldroot[RESULT] = result 53 | root = oldroot[NEXT] 54 | oldkey = root[KEY] 55 | oldresult = root[RESULT] 56 | root[KEY] = root[RESULT] = None 57 | del cache[oldkey] 58 | cache[key] = oldroot 59 | else: 60 | # Insert at end head of linked list 61 | last = root[PREV] 62 | new_last = [last, root, key, result] 63 | last[NEXT] = root[PREV] = cache[key] = new_last 64 | full = (cache_len() >= maxsize) 65 | 66 | return result 67 | return wrapper 68 | 69 | def lru_cache(maxsize=128): 70 | def wrapper(user_func): 71 | w = _my_lru_wrapper(user_func, maxsize) 72 | return w 73 | return wrapper 74 | -------------------------------------------------------------------------------- /sieve/sieve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # 4 | # An implementation of the SIEVE cache eviction algorithm. SIEVE has two 5 | # desirable properties: 6 | # 7 | # - Lazy promotion 8 | # - Quick demotion 9 | # 10 | # One of the really nice attributes of SIEVE is that it doesn't require 11 | # any locking for cache hits because, unlike LRU, objects do not change 12 | # position. This alone contributes to a 2x increase in throughput 13 | # compared with Python's lru_cache(). 14 | 15 | from functools import _make_key 16 | from _thread import RLock 17 | 18 | def _sieve_wrapper(user_func, maxsize): 19 | cache = {} 20 | tail = [] 21 | full = None 22 | tail[:] = [ 23 | tail, # PREV 24 | tail, # NEXT 25 | None, # KEY 26 | None, # RESULT 27 | None, # VISITED 28 | ] 29 | PREV, NEXT, KEY, RESULT, VISITED = 0, 1, 2, 3, 4 30 | 31 | make_key = _make_key 32 | cache_get = cache.get 33 | cache_len = cache.__len__ 34 | lock = RLock() 35 | 36 | hand = tail 37 | 38 | def wrapper(*args, **kwargs): 39 | nonlocal tail, full, hand 40 | key = make_key(args, kwargs, typed=False) 41 | link = cache_get(key) 42 | if link is not None: 43 | link[VISITED] = True 44 | return link[RESULT] 45 | 46 | result = user_func(*args, **kwargs) 47 | with lock: 48 | # Cache miss 49 | if key in cache: 50 | # another thread might have already computed the value 51 | pass 52 | elif full: 53 | o = hand 54 | if o[KEY] is None: 55 | o = tail[PREV] 56 | 57 | while o[VISITED]: 58 | o[VISITED] = False 59 | o = o[PREV] 60 | if o[KEY] is None: 61 | o = tail[PREV] 62 | 63 | # Evict o 64 | hand = o[PREV] 65 | oldkey = o[KEY] 66 | hand[NEXT] = o[NEXT] 67 | o[NEXT][PREV] = hand 68 | del cache[oldkey] 69 | 70 | # Insert at head of linked list 71 | head = tail[NEXT] 72 | new_head = [tail, head, key, result, True] 73 | head[PREV] = tail[NEXT] = cache[key] = new_head 74 | else: 75 | # Insert at head of linked list 76 | head = tail[NEXT] 77 | new_head = [tail, head, key, result, True] 78 | head[PREV] = tail[NEXT] = cache[key] = new_head 79 | full = (cache_len() >= maxsize) 80 | 81 | 82 | return result 83 | return wrapper 84 | 85 | def sieve_cache(maxsize=128): 86 | def wrapper(user_func): 87 | w = _sieve_wrapper(user_func, maxsize) 88 | return w 89 | return wrapper 90 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mfleming/sieve/fe9d7275e76550d72560b0988830beb305329505/tests/__init__.py --------------------------------------------------------------------------------