├── .github └── workflows │ └── pythonpackage.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── benchmark.py ├── lmdbm ├── __init__.py ├── lmdbm.py └── py.typed ├── pyproject.toml ├── readme.md ├── tests └── test_lmdbm.py └── tox.ini /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | 11 | lint: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-python@v5 17 | with: 18 | python-version: 3.8 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install -U pip wheel 22 | python -m pip install -U black isort flake8 bandit[toml] 23 | - run: python -m black . --check 24 | - run: python -m isort . --check-only 25 | - run: python -m flake8 . 26 | - run: python -m bandit . --recursive -c pyproject.toml 27 | 28 | test: 29 | needs: lint 30 | strategy: 31 | matrix: 32 | os: [ubuntu-20.04, macos-13, windows-2019] 33 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] 34 | exclude: 35 | - os: windows-2019 36 | python-version: '3.12' 37 | - os: windows-2019 38 | python-version: '3.13' 39 | runs-on: ${{ matrix.os }} 40 | 41 | steps: 42 | - uses: actions/checkout@v4 43 | - uses: actions/setup-python@v5 44 | with: 45 | python-version: ${{ matrix.python-version }} 46 | - name: Install dependencies 47 | run: | 48 | python -m pip install -U pip wheel 49 | python -m pip install -e .[test] 50 | - name: Run tests 51 | run: | 52 | python -m unittest discover -s tests 53 | 54 | deploy: 55 | needs: test 56 | runs-on: ubuntu-latest 57 | 58 | steps: 59 | - uses: actions/checkout@v4 60 | - uses: actions/setup-python@v5 61 | with: 62 | python-version: 3.8 63 | - name: Build dists 64 | run: | 65 | python -m pip install -U pip wheel build 66 | python -m build 67 | - name: Publish a Python distribution to PyPI 68 | uses: pypa/gh-action-pypi-publish@release/v1 69 | with: 70 | user: __token__ 71 | password: ${{ secrets.pypi_password }} 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bak 2 | 3 | __pycache__/ 4 | *.py[cod] 5 | *.egg-info 6 | build/ 7 | dist/ 8 | 9 | bench-dbs/ 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v5.0.0 4 | hooks: 5 | - id: check-added-large-files 6 | - id: check-case-conflict 7 | - id: check-json 8 | - id: check-merge-conflict 9 | - id: check-symlinks 10 | - id: check-toml 11 | - id: check-yaml 12 | - id: debug-statements 13 | - id: detect-private-key 14 | - id: end-of-file-fixer 15 | - id: mixed-line-ending 16 | args: [--fix=no] 17 | - id: requirements-txt-fixer 18 | - id: trailing-whitespace 19 | args: [--markdown-linebreak-ext=md] 20 | - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks 21 | rev: v2.13.0 22 | hooks: 23 | - id: pretty-format-yaml 24 | args: [--autofix] 25 | - repo: https://github.com/tox-dev/pyproject-fmt 26 | rev: 2.2.4 27 | hooks: 28 | - id: pyproject-fmt 29 | - repo: https://github.com/asottile/pyupgrade 30 | rev: v3.16.0 31 | hooks: 32 | - id: pyupgrade 33 | args: [--py37-plus] 34 | - repo: https://github.com/psf/black-pre-commit-mirror 35 | rev: 24.8.0 36 | hooks: 37 | - id: black 38 | - repo: https://github.com/PyCQA/isort 39 | rev: 5.13.2 40 | hooks: 41 | - id: isort 42 | - repo: https://github.com/PyCQA/bandit 43 | rev: 1.7.10 44 | hooks: 45 | - id: bandit 46 | args: [-c, pyproject.toml] 47 | additional_dependencies: ['.[toml]'] 48 | - repo: https://github.com/pycqa/flake8 49 | rev: 7.1.1 50 | hooks: 51 | - id: flake8 52 | additional_dependencies: 53 | - flake8-annotations 54 | - flake8-bugbear 55 | - flake8-eradicate 56 | - flake8-mutable 57 | - flake8-simplify 58 | - repo: https://github.com/pre-commit/mirrors-mypy 59 | rev: v1.13.0 60 | hooks: 61 | - id: mypy 62 | args: [--ignore-missing-imports, --install-types, --non-interactive] 63 | additional_dependencies: 64 | - lmdb==1.4.1 65 | - typing-extensions>=4.0.0 66 | - repo: https://github.com/Yelp/detect-secrets 67 | rev: v1.5.0 68 | hooks: 69 | - id: detect-secrets 70 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTHON=3.13 2 | FROM python:$PYTHON-slim-bookworm 3 | 4 | ARG LMDBM=0.0.6 5 | RUN apt update && apt -y install build-essential && \ 6 | pip install lmdbm==$LMDBM pytablewriter genutility rich && \ 7 | apt purge --auto-remove --yes build-essential && apt clean && \ 8 | rm --recursive --force /var/lib/apt/lists/* /tmp/* /var/tmp/* 9 | 10 | ENV PYTHONUNBUFFERED=1 11 | COPY benchmark.py / 12 | ENTRYPOINT [ "python", "benchmark.py" ] 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2021 Dobatymo 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 8 | -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | import dbm.dumb 2 | import json 3 | import os 4 | import os.path 5 | import pathlib 6 | import pickle # nosec 7 | import shutil 8 | import sys 9 | from abc import ABC, abstractmethod 10 | from collections import defaultdict 11 | from contextlib import closing, suppress 12 | from importlib import import_module 13 | from random import randrange 14 | from typing import Any, Callable, ContextManager, DefaultDict, Dict, Iterable, List, Sequence, TextIO 15 | 16 | from genutility.iter import batch 17 | from genutility.time import MeasureTime 18 | from pytablewriter import MarkdownTableWriter 19 | 20 | import lmdbm 21 | import lmdbm.lmdbm 22 | 23 | ResultsDict = Dict[int, Dict[str, Dict[str, float]]] 24 | 25 | # Do not continue benchmark if the current 26 | # step requires more seconds than MAX_TIME 27 | MAX_TIME = 10 28 | BATCH_SIZE = 10000 29 | 30 | 31 | class BaseBenchmark(ABC): 32 | def __init__(self, db_tpl, db_type, db_module): 33 | self.available = self.load_module(db_module) 34 | self.batch_available = True 35 | self.path = db_tpl.format(db_type) 36 | self.name = db_type 37 | self.write = -1 38 | self.batch = -1 39 | self.read = -1 40 | self.combined = -1 41 | 42 | def load_module(self, name): 43 | """ "Load module and ignore benchmark if module is unavailable""" 44 | if name is None or name in sys.modules: 45 | return True 46 | 47 | try: 48 | globals()[name.split(".")[-1]] = import_module(name) 49 | print(f"Loaded module {name}") 50 | except ImportError: 51 | return False 52 | return True 53 | 54 | @abstractmethod 55 | def open(self) -> ContextManager: 56 | """Open the database""" 57 | 58 | pass 59 | 60 | def commit(self) -> None: # noqa: B027 61 | """Commit the changes, if it is not done automatically""" 62 | 63 | pass 64 | 65 | def purge(self) -> None: 66 | """Remove the database file(s)""" 67 | 68 | with suppress(FileNotFoundError): 69 | os.unlink(self.path) 70 | 71 | def encode(self, value: Any) -> Any: 72 | """Convert Python objects to database-capable ones""" 73 | 74 | return value 75 | 76 | def decode(self, value: Any) -> Any: 77 | """Convert database values to Python objects""" 78 | 79 | return value 80 | 81 | def measure_writes(self, N: int) -> None: 82 | with MeasureTime() as t, self.open() as db: 83 | for key, value in self.generate_data(N): 84 | if t.get() > MAX_TIME: 85 | break 86 | db[key] = self.encode(value) 87 | self.commit() 88 | if t.get() < MAX_TIME: 89 | self.write = t.get() 90 | self.print_time("write", N, t) 91 | 92 | def measure_batch(self, N: int) -> None: 93 | with MeasureTime() as t, self.open() as db: 94 | for pairs in batch(self.generate_data(N), BATCH_SIZE): 95 | if t.get() > MAX_TIME: 96 | break 97 | db.update({key: self.encode(value) for key, value in pairs}) 98 | self.commit() 99 | if t.get() < MAX_TIME: 100 | self.batch = t.get() 101 | self.print_time("batch write", N, t) 102 | 103 | def measure_reads(self, N: int) -> None: 104 | with MeasureTime() as t, self.open() as db: 105 | for key in self.random_keys(N, N): 106 | if t.get() > MAX_TIME: 107 | break 108 | self.decode(db[key]) 109 | if t.get() < MAX_TIME: 110 | self.read = t.get() 111 | self.print_time("read", N, t) 112 | 113 | def measure_combined(self, read=1, write=10, repeat=100) -> None: 114 | with MeasureTime() as t, self.open() as db: 115 | for _ in range(repeat): 116 | if t.get() > MAX_TIME: 117 | break 118 | for key, value in self.generate_data(read): 119 | db[key] = self.encode(value) 120 | self.commit() 121 | for key in self.random_keys(10, write): 122 | self.decode(db[key]) 123 | if t.get() < MAX_TIME: 124 | self.combined = t.get() 125 | self.print_time("combined", (read + write) * repeat, t) 126 | 127 | def database_is_built(self): 128 | return self.batch >= 0 or self.write >= 0 129 | 130 | def print_time(self, measure_type, numbers, t): 131 | print(f"{self.name:<20s} {measure_type:<15s} {str(numbers):<10s} {t.get():10.5f}") 132 | 133 | @staticmethod 134 | def generate_data(size): 135 | for i in range(size): 136 | yield "key_" + str(i), {"some": "object_" + str(i)} 137 | 138 | @staticmethod 139 | def random_keys(num, size): 140 | for _ in range(num): 141 | yield "key_" + str(randrange(0, size)) # nosec 142 | 143 | 144 | class JsonEncodedBenchmark(BaseBenchmark): 145 | def encode(self, value): 146 | return json.dumps(value) 147 | 148 | def decode(self, value): 149 | return json.loads(value) 150 | 151 | 152 | class DummyPickleBenchmark(BaseBenchmark): 153 | class MyDict(dict): 154 | def close(self): 155 | pass 156 | 157 | def __init__(self, db_tpl): 158 | self.native_dict = None 159 | super().__init__(db_tpl, "dummypickle", None) 160 | 161 | def open(self): 162 | if pathlib.Path(self.path).exists(): 163 | with open(self.path, "rb") as f: 164 | self.native_dict = self.MyDict(pickle.load(f)) # nosec 165 | else: 166 | self.native_dict = self.MyDict() 167 | return closing(self.native_dict) 168 | 169 | def commit(self): 170 | tmp_file = self.path + ".tmp" 171 | with open(tmp_file, "wb") as f: 172 | pickle.dump(self.native_dict, f) 173 | shutil.move(tmp_file, self.path) 174 | 175 | 176 | class DummyJsonBenchmark(BaseBenchmark): 177 | class MyDict(dict): 178 | def close(self): 179 | pass 180 | 181 | def __init__(self, db_tpl): 182 | self.native_dict = None 183 | super().__init__(db_tpl, "dummyjson", None) 184 | 185 | def open(self): 186 | if pathlib.Path(self.path).exists(): 187 | with open(self.path) as f: 188 | self.native_dict = self.MyDict(json.load(f)) 189 | else: 190 | self.native_dict = self.MyDict() 191 | return closing(self.native_dict) 192 | 193 | def commit(self): 194 | tmp_file = self.path + ".tmp" 195 | with open(tmp_file, "w") as f: 196 | json.dump(self.native_dict, f, ensure_ascii=False, check_circular=False, sort_keys=False) 197 | shutil.move(tmp_file, self.path) 198 | 199 | 200 | class DumbDbmBenchmark(JsonEncodedBenchmark): 201 | def __init__(self, db_tpl): 202 | super().__init__(db_tpl, "dbm.dumb", "dbm.dumb") 203 | 204 | def open(self): 205 | return dbm.dumb.open(self.path, "c") 206 | 207 | def purge(self): 208 | with suppress(FileNotFoundError): 209 | os.unlink(self.path + ".dat") 210 | with suppress(FileNotFoundError): 211 | os.unlink(self.path + ".bak") 212 | with suppress(FileNotFoundError): 213 | os.unlink(self.path + ".dir") 214 | 215 | 216 | class SemiDbmBenchmark(JsonEncodedBenchmark): 217 | def __init__(self, db_tpl): 218 | super().__init__(db_tpl, "semidbm", "semidbm") 219 | self.batch_available = False 220 | 221 | def open(self): 222 | return closing(semidbm.open(self.path, "c")) 223 | 224 | def purge(self): 225 | with suppress(FileNotFoundError): 226 | os.unlink(self.path + "/data") 227 | with suppress(FileNotFoundError): 228 | os.rmdir(self.path) 229 | 230 | 231 | class LdbmBenchmark(JsonEncodedBenchmark): 232 | def __init__(self, db_tpl): 233 | super().__init__(db_tpl, "lmdbm", "lmdbm") 234 | 235 | def open(self): 236 | return lmdbm.Lmdb.open(self.path, "c") 237 | 238 | def purge(self): 239 | lmdbm.lmdbm.remove_lmdbm(self.path) 240 | 241 | 242 | class PysosBenchmark(BaseBenchmark): 243 | def __init__(self, db_tpl): 244 | super().__init__(db_tpl, "pysos", "pysos") 245 | self.batch_available = False 246 | 247 | def open(self): 248 | return closing(pysos.Dict(self.path)) 249 | 250 | 251 | class SqliteAutocommitBenchmark(BaseBenchmark): 252 | def __init__(self, db_tpl): 253 | super().__init__(db_tpl, "sqlite-autocommit", "sqlitedict") 254 | 255 | def open(self): 256 | return sqlitedict.SqliteDict(self.path, autocommit=True) 257 | 258 | 259 | class SqliteWalBenchmark(BaseBenchmark): 260 | def __init__(self, db_tpl): 261 | super().__init__(db_tpl, "sqlite-wal", "sqlitedict") 262 | 263 | def open(self): 264 | return sqlitedict.SqliteDict(self.path, autocommit=True, journal_mode="WAL") 265 | 266 | 267 | class SqliteBatchBenchmark(BaseBenchmark): 268 | def __init__(self, db_tpl): 269 | super().__init__(db_tpl, "sqlite-batch", "sqlitedict") 270 | self.db = None 271 | 272 | def open(self): 273 | self.db = sqlitedict.SqliteDict(self.path, autocommit=False) 274 | return self.db 275 | 276 | def commit(self): 277 | self.db.commit() 278 | 279 | 280 | class GnuDbmBenchmark(JsonEncodedBenchmark): 281 | def __init__(self, db_tpl): 282 | super().__init__(db_tpl, "dbm.gnu", "dbm.gnu") 283 | if self.available: 284 | self.gnu_dbm = dbm.gnu 285 | self.batch_available = False 286 | 287 | def open(self): 288 | return self.gnu_dbm.open(self.path, "c") 289 | 290 | 291 | class ShelveBenchmark(JsonEncodedBenchmark): 292 | def __init__(self, db_tpl): 293 | super().__init__(db_tpl, "shelve", "shelve") 294 | 295 | def open(self): 296 | return shelve.open(self.path) # nosec B301 297 | 298 | 299 | class VedisBenchmark(JsonEncodedBenchmark): 300 | def __init__(self, db_tpl): 301 | super().__init__(db_tpl, "vedis", "vedis") 302 | 303 | def open(self): 304 | return vedis.Vedis(self.path) 305 | 306 | 307 | class UnqliteBenchmark(JsonEncodedBenchmark): 308 | def __init__(self, db_tpl): 309 | super().__init__(db_tpl, "unqlite", "unqlite") 310 | 311 | def open(self): 312 | return unqlite.UnQLite(self.path) 313 | 314 | 315 | class RocksdictBenchmark(JsonEncodedBenchmark): 316 | def __init__(self, db_tpl): 317 | super().__init__(db_tpl, "rocksdict", "rocksdict") 318 | self.batch_available = False 319 | 320 | def open(self): 321 | return closing(rocksdict.Rdict(self.path)) 322 | 323 | def purge(self): 324 | rocksdict.Rdict.destroy(self.path) 325 | 326 | 327 | BENCHMARK_CLASSES = [ 328 | LdbmBenchmark, 329 | VedisBenchmark, 330 | UnqliteBenchmark, 331 | RocksdictBenchmark, 332 | GnuDbmBenchmark, 333 | ShelveBenchmark, 334 | SemiDbmBenchmark, 335 | PysosBenchmark, 336 | DumbDbmBenchmark, 337 | SqliteWalBenchmark, 338 | SqliteAutocommitBenchmark, 339 | SqliteBatchBenchmark, 340 | DummyPickleBenchmark, 341 | DummyJsonBenchmark, 342 | ] 343 | 344 | 345 | def run_bench(N, db_tpl) -> Dict[str, Dict[str, float]]: 346 | benchmarks = [C(db_tpl) for C in BENCHMARK_CLASSES] 347 | 348 | for benchmark in benchmarks: 349 | if not benchmark.available: 350 | continue 351 | benchmark.purge() 352 | benchmark.measure_writes(N) 353 | if benchmark.batch_available: 354 | benchmark.purge() 355 | benchmark.measure_batch(N) 356 | if benchmark.database_is_built(): 357 | benchmark.measure_reads(N) 358 | benchmark.measure_combined(read=1, write=10, repeat=100) 359 | 360 | ret: DefaultDict[str, Dict[str, float]] = defaultdict(dict) 361 | for benchmark in benchmarks: 362 | ret[benchmark.name]["read"] = benchmark.read 363 | ret[benchmark.name]["write"] = benchmark.write 364 | ret[benchmark.name]["batch"] = benchmark.batch 365 | ret[benchmark.name]["combined"] = benchmark.combined 366 | 367 | return ret 368 | 369 | 370 | def bench(base: str, nums: Iterable[int]) -> ResultsDict: 371 | with suppress(FileExistsError): 372 | os.mkdir(base) 373 | 374 | ret = {} 375 | db_tpl = os.path.join(base, "test_{}.db") 376 | 377 | for num in nums: 378 | print("") 379 | ret[num] = run_bench(num, db_tpl) 380 | 381 | return ret 382 | 383 | 384 | def write_markdown_table(stream: TextIO, results: ResultsDict, method: str): 385 | for v in results.values(): 386 | headers = list(v.keys()) 387 | break 388 | 389 | value_matrix = [] 390 | for k, v in results.items(): 391 | row = [str(k)] 392 | for h in headers: 393 | value = v[h].get(method) 394 | if value is None or value < 0: 395 | new_value = "-" 396 | else: 397 | new_value = format(value, ".04f") 398 | row.append(new_value) 399 | value_matrix.append(row) 400 | 401 | headers = ["items"] + headers 402 | 403 | writer = MarkdownTableWriter(table_name=method, headers=headers, value_matrix=value_matrix) 404 | writer.dump(stream, close_after_write=False) 405 | 406 | 407 | def _check_same_keys(dicts: Sequence[dict]): 408 | assert len(dicts) >= 2 409 | 410 | for d in dicts[1:]: 411 | assert dicts[0].keys() == d.keys() 412 | 413 | 414 | def merge_results(results: Sequence[ResultsDict], func: Callable = min) -> ResultsDict: 415 | out: ResultsDict = {} 416 | 417 | _check_same_keys(results) 418 | for key1 in results[0].keys(): 419 | _check_same_keys([d[key1] for d in results]) 420 | out.setdefault(key1, {}) 421 | for key2 in results[0][key1].keys(): 422 | _check_same_keys([d[key1][key2] for d in results]) 423 | out[key1].setdefault(key2, {}) 424 | for key3 in results[0][key1][key2].keys(): 425 | out[key1][key2][key3] = func(d[key1][key2][key3] for d in results) 426 | 427 | return out 428 | 429 | 430 | if __name__ == "__main__": 431 | from argparse import ArgumentParser 432 | 433 | from genutility.rich import Progress 434 | from rich.progress import Progress as RichProgress 435 | 436 | parser = ArgumentParser() 437 | parser.add_argument("--outpath", default="bench-dbs", help="Directory to store temporary benchmarking databases") 438 | parser.add_argument("--version", action="version", version=lmdbm.__version__) 439 | parser.add_argument( 440 | "--sizes", 441 | nargs="+", 442 | type=int, 443 | metavar="N", 444 | default=[10, 100, 10**3, 10**4, 10**5, 10**6], 445 | help="Number of records to read/write", 446 | ) 447 | parser.add_argument("--bestof", type=int, metavar="N", default=3, help="Run N benchmarks") 448 | parser.add_argument("--outfile", default="benchmarks.md", help="Benchmark results") 449 | args = parser.parse_args() 450 | 451 | results: List[ResultsDict] = [] 452 | 453 | with RichProgress() as progress: 454 | p = Progress(progress) 455 | for _ in p.track(range(args.bestof)): 456 | results.append(bench(args.outpath, args.sizes)) 457 | 458 | if args.bestof == 1: 459 | best_results = results[0] 460 | else: 461 | best_results = merge_results(results) 462 | 463 | with open(args.outfile, "w", encoding="utf-8") as fw: 464 | write_markdown_table(fw, best_results, "write") 465 | write_markdown_table(fw, best_results, "batch") 466 | write_markdown_table(fw, best_results, "read") 467 | write_markdown_table(fw, best_results, "combined") 468 | -------------------------------------------------------------------------------- /lmdbm/__init__.py: -------------------------------------------------------------------------------- 1 | """Python DBM style wrapper around LMDB (Lightning Memory-Mapped Database)""" 2 | 3 | from .lmdbm import Lmdb, LmdbGzip, error, open 4 | 5 | __version__ = "0.0.6" 6 | 7 | __all__ = ["Lmdb", "LmdbGzip", "error", "open", "__version__"] 8 | -------------------------------------------------------------------------------- /lmdbm/lmdbm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections.abc import Mapping, MutableMapping 3 | from gzip import compress, decompress 4 | from pathlib import Path 5 | from sys import exit 6 | from typing import Any, Generic, Iterator, List, Optional, Tuple, TypeVar, Union 7 | 8 | import lmdb 9 | from typing_extensions import Self 10 | 11 | T = TypeVar("T") 12 | KT = TypeVar("KT") 13 | VT = TypeVar("VT") 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | _DEFAULT = object() 18 | 19 | 20 | class error(Exception): 21 | pass 22 | 23 | 24 | class MissingOk: 25 | # for python < 3.8 compatibility 26 | 27 | def __init__(self, ok: bool) -> None: 28 | self.ok = ok 29 | 30 | def __enter__(self) -> Self: 31 | return self 32 | 33 | def __exit__(self, exc_type, exc_value, traceback): 34 | if isinstance(exc_value, FileNotFoundError) and self.ok: 35 | return True 36 | 37 | 38 | def remove_lmdbm(file: str, missing_ok: bool = True) -> None: 39 | base = Path(file) 40 | with MissingOk(missing_ok): 41 | (base / "data.mdb").unlink() 42 | with MissingOk(missing_ok): 43 | (base / "lock.mdb").unlink() 44 | with MissingOk(missing_ok): 45 | base.rmdir() 46 | 47 | 48 | class Lmdb(MutableMapping, Generic[KT, VT]): 49 | autogrow_error = "Failed to grow LMDB ({}). Is there enough disk space available?" 50 | autogrow_msg = "Grew database (%s) map size to %s" 51 | 52 | def __init__(self, env: lmdb.Environment, autogrow: bool) -> None: 53 | self.env = env 54 | self.autogrow = autogrow 55 | 56 | @classmethod 57 | def open( 58 | cls, file: str, flag: str = "r", mode: int = 0o755, map_size: int = 2**20, autogrow: bool = True, **kwargs 59 | ) -> "Lmdb": 60 | """ 61 | Opens the database `file`. 62 | `flag`: r (read only, existing), w (read and write, existing), 63 | c (read, write, create if not exists), n (read, write, overwrite existing) 64 | `map_size`: Initial database size. Defaults to 2**20 (1MB). 65 | `autogrow`: Automatically grow the database size when `map_size` is exceeded. 66 | WARNING: Set this to `False` for multi-process write access. 67 | `**kwargs`: All other keyword arguments are passed through to `lmdb.open`. 68 | """ 69 | 70 | if flag == "r": # Open existing database for reading only (default) 71 | env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=True, create=False, mode=mode, **kwargs) 72 | elif flag == "w": # Open existing database for reading and writing 73 | env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=False, create=False, mode=mode, **kwargs) 74 | elif flag == "c": # Open database for reading and writing, creating it if it doesn't exist 75 | env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=False, create=True, mode=mode, **kwargs) 76 | elif flag == "n": # Always create a new, empty database, open for reading and writing 77 | remove_lmdbm(file) 78 | env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=False, create=True, mode=mode, **kwargs) 79 | else: 80 | raise ValueError("Invalid flag") 81 | 82 | return cls(env, autogrow) 83 | 84 | @property 85 | def map_size(self) -> int: 86 | return self.env.info()["map_size"] 87 | 88 | @map_size.setter 89 | def map_size(self, value: int) -> None: 90 | self.env.set_mapsize(value) 91 | 92 | def _pre_key(self, key: KT) -> bytes: 93 | if isinstance(key, bytes): 94 | return key 95 | elif isinstance(key, str): 96 | return key.encode("Latin-1") 97 | 98 | raise TypeError(key) 99 | 100 | def _post_key(self, key: bytes) -> KT: 101 | return key 102 | 103 | def _pre_value(self, value: VT) -> bytes: 104 | if isinstance(value, bytes): 105 | return value 106 | elif isinstance(value, str): 107 | return value.encode("Latin-1") 108 | 109 | raise TypeError(value) 110 | 111 | def _post_value(self, value: bytes) -> VT: 112 | return value 113 | 114 | def __getitem__(self, key: KT) -> VT: 115 | with self.env.begin() as txn: 116 | value = txn.get(self._pre_key(key)) 117 | if value is None: 118 | raise KeyError(key) 119 | return self._post_value(value) 120 | 121 | def __setitem__(self, key: KT, value: VT) -> None: 122 | k = self._pre_key(key) 123 | v = self._pre_value(value) 124 | for _i in range(12): 125 | try: 126 | with self.env.begin(write=True) as txn: 127 | txn.put(k, v) 128 | return 129 | except lmdb.MapFullError: 130 | if not self.autogrow: 131 | raise 132 | new_map_size = self.map_size * 2 133 | self.map_size = new_map_size 134 | logger.info(self.autogrow_msg, self.env.path(), new_map_size) 135 | 136 | exit(self.autogrow_error.format(self.env.path())) 137 | 138 | def __delitem__(self, key: KT) -> None: 139 | with self.env.begin(write=True) as txn: 140 | txn.delete(self._pre_key(key)) 141 | 142 | def keys(self) -> Iterator[KT]: 143 | with self.env.begin() as txn: 144 | for key in txn.cursor().iternext(keys=True, values=False): 145 | yield self._post_key(key) 146 | 147 | def items(self) -> Iterator[Tuple[KT, VT]]: 148 | with self.env.begin() as txn: 149 | for key, value in txn.cursor().iternext(keys=True, values=True): 150 | yield (self._post_key(key), self._post_value(value)) 151 | 152 | def values(self) -> Iterator[VT]: 153 | with self.env.begin() as txn: 154 | for value in txn.cursor().iternext(keys=False, values=True): 155 | yield self._post_value(value) 156 | 157 | def __contains__(self, key: KT) -> bool: 158 | with self.env.begin() as txn: 159 | value = txn.get(self._pre_key(key)) 160 | return value is not None 161 | 162 | def __iter__(self) -> Iterator[KT]: 163 | return self.keys() 164 | 165 | def __len__(self) -> int: 166 | with self.env.begin() as txn: 167 | return txn.stat()["entries"] 168 | 169 | def pop(self, key: KT, default: Union[VT, T] = _DEFAULT) -> Union[VT, T]: 170 | with self.env.begin(write=True) as txn: 171 | value = txn.pop(self._pre_key(key)) 172 | if value is None: 173 | return default 174 | return self._post_value(value) 175 | 176 | def update(self, __other: Any = (), **kwds: VT) -> None: # python3.8 only: update(self, other=(), /, **kwds) 177 | # fixme: `kwds` 178 | 179 | # note: benchmarking showed that there is no real difference between using lists or iterables 180 | # as input to `putmulti`. 181 | # lists: Finished 14412594 in 253496 seconds. 182 | # iter: Finished 14412594 in 256315 seconds. 183 | 184 | # save generated lists in case the insert fails and needs to be retried 185 | # for performance reasons, but mostly because `__other` could be an iterable 186 | # which would already be exhausted on the second try 187 | pairs_other: Optional[List[Tuple[bytes, bytes]]] = None 188 | pairs_kwds: Optional[List[Tuple[bytes, bytes]]] = None 189 | 190 | for _i in range(12): 191 | try: 192 | with self.env.begin(write=True) as txn: 193 | with txn.cursor() as curs: 194 | if isinstance(__other, Mapping): 195 | pairs_other = pairs_other or [ 196 | (self._pre_key(key), self._pre_value(__other[key])) for key in __other 197 | ] 198 | curs.putmulti(pairs_other) 199 | elif hasattr(__other, "keys"): 200 | pairs_other = pairs_other or [ 201 | (self._pre_key(key), self._pre_value(__other[key])) for key in __other.keys() 202 | ] 203 | curs.putmulti(pairs_other) 204 | else: 205 | pairs_other = pairs_other or [ 206 | (self._pre_key(key), self._pre_value(value)) for key, value in __other 207 | ] 208 | curs.putmulti(pairs_other) 209 | 210 | pairs_kwds = pairs_kwds or [ 211 | (self._pre_key(key), self._pre_value(value)) for key, value in kwds.items() 212 | ] 213 | curs.putmulti(pairs_kwds) 214 | 215 | return 216 | except lmdb.MapFullError: 217 | if not self.autogrow: 218 | raise 219 | new_map_size = self.map_size * 2 220 | self.map_size = new_map_size 221 | logger.info(self.autogrow_msg, self.env.path(), new_map_size) 222 | 223 | exit(self.autogrow_error.format(self.env.path())) 224 | 225 | def sync(self) -> None: 226 | self.env.sync() 227 | 228 | def close(self) -> None: 229 | self.env.close() 230 | 231 | def __enter__(self) -> Self: 232 | return self 233 | 234 | def __exit__(self, *args): 235 | self.close() 236 | 237 | 238 | class LmdbGzip(Lmdb): 239 | def __init__(self, env, autogrow: bool, compresslevel: int = 9): 240 | Lmdb.__init__(self, env, autogrow) 241 | self.compresslevel = compresslevel 242 | 243 | def _pre_value(self, value: VT) -> bytes: 244 | value = Lmdb._pre_value(self, value) 245 | return compress(value, self.compresslevel) 246 | 247 | def _post_value(self, value: bytes) -> VT: 248 | return decompress(value) 249 | 250 | 251 | def open(file, flag="r", mode=0o755, **kwargs): 252 | return Lmdb.open(file, flag, mode, **kwargs) 253 | -------------------------------------------------------------------------------- /lmdbm/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dobatymo/lmdb-python-dbm/9436e49c4fe584d446575809ae9e4859deeb5411/lmdbm/py.typed -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "flit_core.buildapi" 3 | requires = [ 4 | "flit-core<4,>=3.2", 5 | ] 6 | 7 | [project] 8 | name = "lmdbm" 9 | readme = "readme.md" 10 | authors = [ { name = "Dobatymo", email = "Dobatymo@users.noreply.github.com" } ] 11 | requires-python = ">=3.7" 12 | classifiers = [ 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: ISC License (ISCL)", 15 | "Operating System :: OS Independent", 16 | "Programming Language :: Python :: 3 :: Only", 17 | "Programming Language :: Python :: 3.7", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Programming Language :: Python :: 3.13", 24 | "Topic :: Database", 25 | ] 26 | dynamic = [ 27 | "description", 28 | "version", 29 | ] 30 | dependencies = [ 31 | "lmdb", 32 | "typing-extensions>=4", 33 | ] 34 | optional-dependencies.bench = [ 35 | "genutility[iter,rich,time]>=0.0.103", 36 | "pysos==1.2.9", 37 | "pytablewriter==0.63", 38 | "rocksdict==0.3.5", 39 | "semidbm==0.5.1", 40 | "sqlitedict==1.7", 41 | "unqlite==0.9.2", 42 | "vedis==0.7.1", 43 | ] 44 | optional-dependencies.test = [ 45 | "genutility[test]", 46 | ] 47 | urls.Home = "https://github.com/Dobatymo/lmdb-python-dbm" 48 | 49 | [tool.black] 50 | line-length = 120 51 | 52 | [tool.ruff] 53 | line-length = 120 54 | 55 | [tool.isort] 56 | profile = "black" 57 | line_length = 120 58 | 59 | [tool.bandit] 60 | skips = [ "B101" ] 61 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # lmdbm 2 | 3 | This is a Python DBM interface style wrapper around [LMDB](http://www.lmdb.tech/doc/) (Lightning Memory-Mapped Database). 4 | It uses the existing lower level Python bindings [py-lmdb](https://lmdb.readthedocs.io). 5 | This is especially useful on Windows, where otherwise `dbm.dumb` is the default `dbm` database. 6 | 7 | ## Install 8 | - `pip install lmdbm` 9 | 10 | ## Example 11 | ```python 12 | from lmdbm import Lmdb 13 | with Lmdb.open("test.db", "c") as db: 14 | db[b"key"] = b"value" 15 | db.update({b"key1": b"value1", b"key2": b"value2"}) # batch insert, uses a single transaction 16 | ``` 17 | 18 | ### Use inheritance to store Python objects using json serialization 19 | 20 | ```python 21 | import json 22 | from lmdbm import Lmdb 23 | 24 | class JsonLmdb(Lmdb): 25 | def _pre_key(self, value): 26 | return value.encode("utf-8") 27 | def _post_key(self, value): 28 | return value.decode("utf-8") 29 | def _pre_value(self, value): 30 | return json.dumps(value).encode("utf-8") 31 | def _post_value(self, value): 32 | return json.loads(value.decode("utf-8")) 33 | 34 | with JsonLmdb.open("test.db", "c") as db: 35 | db["key"] = {"some": "object"} 36 | obj = db["key"] 37 | print(obj["some"]) # prints "object" 38 | ``` 39 | 40 | ## Warning 41 | 42 | As of `lmdb==1.2.1` the docs say that calling `lmdb.Environment.set_mapsize` from multiple processes "may cause catastrophic loss of data". If `lmdbm` is used in write mode from multiple processes, set `autogrow=False` and map_size to a large enough value: `Lmdb.open(..., map_size=2**30, autogrow=False)`. 43 | 44 | ## Benchmarks 45 | 46 | Install `lmdbm[bench]` and run `benchmark.py`. Other storage engines which could be tested: `wiredtiger`, `berkeleydb`. 47 | 48 | Storage engines not benchmarked: 49 | - `tinydb` (because it doesn't have built-in str/bytes keys) 50 | 51 | ### continuous writes in seconds (best of 3) 52 | | items | lmdbm |lmdbm-batch|pysos |sqlitedict|sqlitedict-batch|dbm.dumb|semidbm|vedis |vedis-batch|unqlite|unqlite-batch| 53 | |------:|-------:|----------:|-----:|---------:|---------------:|-------:|------:|-----:|----------:|------:|------------:| 54 | | 10| 0.000| 0.015| 0.000| 0.031| 0.000| 0.016| 0.000| 0.000| 0.000| 0.000| 0.000| 55 | | 100| 0.094| 0.000| 0.000| 0.265| 0.016| 0.188| 0.000| 0.000| 0.000| 0.000| 0.000| 56 | | 1000| 1.684| 0.016| 0.015| 3.885| 0.124| 2.387| 0.016| 0.015| 0.015| 0.016| 0.000| 57 | | 10000| 16.895| 0.093| 0.265| 45.334| 1.326| 25.350| 0.156| 0.093| 0.094| 0.094| 0.093| 58 | | 100000| 227.106| 1.030| 2.698| 461.638| 12.964| 238.400| 1.623| 1.388| 1.467| 1.466| 1.357| 59 | |1000000|3482.520| 13.104|27.815| 5851.239| 133.396|2432.945| 16.411|15.693| 15.709| 14.508| 14.103| 60 | 61 | ### random reads in seconds (best of 3) 62 | | items |lmdbm |lmdbm-batch|pysos |sqlitedict|sqlitedict-batch|dbm.dumb|semidbm| vedis |vedis-batch|unqlite|unqlite-batch| 63 | |------:|-----:|-----------|-----:|---------:|----------------|-------:|------:|------:|-----------|------:|-------------| 64 | | 10| 0.000| | 0.000| 0.000| | 0.000| 0.000| 0.000| | 0.000| | 65 | | 100| 0.000| | 0.000| 0.031| | 0.000| 0.000| 0.000| | 0.000| | 66 | | 1000| 0.016| | 0.015| 0.250| | 0.109| 0.016| 0.015| | 0.000| | 67 | | 10000| 0.109| | 0.156| 2.558| | 1.123| 0.171| 0.109| | 0.109| | 68 | | 100000| 1.014| | 2.137| 27.769| | 11.419| 2.090| 1.170| | 1.170| | 69 | |1000000|10.390| |24.258| 447.613| | 870.580| 22.838|214.486| |211.319| | 70 | -------------------------------------------------------------------------------- /tests/test_lmdbm.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from genutility.test import MyTestCase 4 | from lmdb import Error 5 | 6 | from lmdbm import Lmdb 7 | from lmdbm.lmdbm import remove_lmdbm 8 | 9 | 10 | class LmdbmTests(MyTestCase): 11 | _name = "./test.db" 12 | 13 | _dict = { 14 | b"a": b"Python:", 15 | b"b": b"Programming", 16 | b"c": b"the", 17 | b"d": b"way", 18 | b"f": b"Guido", 19 | b"g": b"intended", 20 | } 21 | 22 | def _init_db(self): 23 | with Lmdb.open(self._name, "n") as db: 24 | for k, v in self._dict.items(): 25 | db[k] = v 26 | 27 | def _delete_db(self): 28 | remove_lmdbm(self._name, False) 29 | 30 | def test_mem_grow(self): 31 | with Lmdb.open(self._name, "n", map_size=1024) as db: 32 | key = b"asd" 33 | value = b"asd" * 1000 34 | 35 | db[key] = value 36 | assert db.setdefault(key, b"asd") == value 37 | assert db[key] == value 38 | assert db.get(key) == value 39 | 40 | self._delete_db() 41 | 42 | def test_mem_grow_batch(self): 43 | value = b"asd" * 1000 44 | 45 | def data(): 46 | yield "key_1", value 47 | yield "key_2", value 48 | 49 | with Lmdb.open(self._name, "n", map_size=1024) as db: 50 | db.update(data()) 51 | assert db["key_1"] == value 52 | assert db["key_2"] == value 53 | 54 | self._delete_db() 55 | 56 | def test_missing_read_only(self): 57 | with self.assertRaises(Error): 58 | with Lmdb.open(self._name, "r", map_size=1024) as db: 59 | db["key"] = "value" 60 | 61 | assert not Path(self._name).exists() 62 | 63 | def test_modify(self): 64 | self._init_db() 65 | with Lmdb.open(self._name, "c") as f: 66 | self._dict[b"g"] = f[b"g"] = b"indented" 67 | self.assertUnorderedMappingEqual(f, self._dict) 68 | 69 | self.assertEqual(f.setdefault(b"xxx", b"foo"), b"foo") 70 | self.assertEqual(f[b"xxx"], b"foo") 71 | 72 | self._delete_db() 73 | 74 | 75 | if __name__ == "__main__": 76 | import unittest 77 | 78 | unittest.main() 79 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | select = B, E7, E9, W2, W3, W6, F 4 | ignore = E704 5 | exclude = .git,.mypy_cache,__pycache__,build,dist 6 | per-file-ignores = 7 | benchmark.py:F821 8 | --------------------------------------------------------------------------------