├── .github
    └── workflows
    │   └── pythonpackage.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── LICENSE
├── benchmark.py
├── lmdbm
    ├── __init__.py
    ├── lmdbm.py
    └── py.typed
├── pyproject.toml
├── readme.md
├── tests
    └── test_lmdbm.py
└── tox.ini


/.github/workflows/pythonpackage.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |     - master
 8 | 
 9 | jobs:
10 | 
11 |   lint:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |     - uses: actions/setup-python@v5
17 |       with:
18 |         python-version: 3.8
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install -U pip wheel
22 |         python -m pip install -U black isort flake8 bandit[toml]
23 |     - run: python -m black . --check
24 |     - run: python -m isort . --check-only
25 |     - run: python -m flake8 .
26 |     - run: python -m bandit . --recursive -c pyproject.toml
27 | 
28 |   test:
29 |     needs: lint
30 |     strategy:
31 |       matrix:
32 |         os: [ubuntu-20.04, macos-13, windows-2019]
33 |         python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
34 |         exclude:
35 |         - os: windows-2019
36 |           python-version: '3.12'
37 |         - os: windows-2019
38 |           python-version: '3.13'
39 |     runs-on: ${{ matrix.os }}
40 | 
41 |     steps:
42 |     - uses: actions/checkout@v4
43 |     - uses: actions/setup-python@v5
44 |       with:
45 |         python-version: ${{ matrix.python-version }}
46 |     - name: Install dependencies
47 |       run: |
48 |         python -m pip install -U pip wheel
49 |         python -m pip install -e .[test]
50 |     - name: Run tests
51 |       run: |
52 |         python -m unittest discover -s tests
53 | 
54 |   deploy:
55 |     needs: test
56 |     runs-on: ubuntu-latest
57 | 
58 |     steps:
59 |     - uses: actions/checkout@v4
60 |     - uses: actions/setup-python@v5
61 |       with:
62 |         python-version: 3.8
63 |     - name: Build dists
64 |       run: |
65 |         python -m pip install -U pip wheel build
66 |         python -m build
67 |     - name: Publish a Python distribution to PyPI
68 |       uses: pypa/gh-action-pypi-publish@release/v1
69 |       with:
70 |         user: __token__
71 |         password: ${{ secrets.pypi_password }}
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.bak
 2 | 
 3 | __pycache__/
 4 | *.py[cod]
 5 | *.egg-info
 6 | build/
 7 | dist/
 8 | 
 9 | bench-dbs/
10 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v5.0.0
 4 |   hooks:
 5 |   - id: check-added-large-files
 6 |   - id: check-case-conflict
 7 |   - id: check-json
 8 |   - id: check-merge-conflict
 9 |   - id: check-symlinks
10 |   - id: check-toml
11 |   - id: check-yaml
12 |   - id: debug-statements
13 |   - id: detect-private-key
14 |   - id: end-of-file-fixer
15 |   - id: mixed-line-ending
16 |     args: [--fix=no]
17 |   - id: requirements-txt-fixer
18 |   - id: trailing-whitespace
19 |     args: [--markdown-linebreak-ext=md]
20 | - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
21 |   rev: v2.13.0
22 |   hooks:
23 |   - id: pretty-format-yaml
24 |     args: [--autofix]
25 | - repo: https://github.com/tox-dev/pyproject-fmt
26 |   rev: 2.2.4
27 |   hooks:
28 |   - id: pyproject-fmt
29 | - repo: https://github.com/asottile/pyupgrade
30 |   rev: v3.16.0
31 |   hooks:
32 |   - id: pyupgrade
33 |     args: [--py37-plus]
34 | - repo: https://github.com/psf/black-pre-commit-mirror
35 |   rev: 24.8.0
36 |   hooks:
37 |   - id: black
38 | - repo: https://github.com/PyCQA/isort
39 |   rev: 5.13.2
40 |   hooks:
41 |   - id: isort
42 | - repo: https://github.com/PyCQA/bandit
43 |   rev: 1.7.10
44 |   hooks:
45 |   - id: bandit
46 |     args: [-c, pyproject.toml]
47 |     additional_dependencies: ['.[toml]']
48 | - repo: https://github.com/pycqa/flake8
49 |   rev: 7.1.1
50 |   hooks:
51 |   - id: flake8
52 |     additional_dependencies:
53 |     - flake8-annotations
54 |     - flake8-bugbear
55 |     - flake8-eradicate
56 |     - flake8-mutable
57 |     - flake8-simplify
58 | - repo: https://github.com/pre-commit/mirrors-mypy
59 |   rev: v1.13.0
60 |   hooks:
61 |   - id: mypy
62 |     args: [--ignore-missing-imports, --install-types, --non-interactive]
63 |     additional_dependencies:
64 |     - lmdb==1.4.1
65 |     - typing-extensions>=4.0.0
66 | - repo: https://github.com/Yelp/detect-secrets
67 |   rev: v1.5.0
68 |   hooks:
69 |   - id: detect-secrets
70 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTHON=3.13
 2 | FROM python:$PYTHON-slim-bookworm
 3 | 
 4 | ARG LMDBM=0.0.6
 5 | RUN apt update && apt -y install build-essential && \
 6 | 	pip install lmdbm==$LMDBM pytablewriter genutility rich && \
 7 | 	apt purge --auto-remove --yes build-essential && apt clean && \
 8 | 	rm --recursive --force /var/lib/apt/lists/* /tmp/* /var/tmp/*
 9 | 
10 | ENV PYTHONUNBUFFERED=1
11 | COPY benchmark.py /
12 | ENTRYPOINT [ "python", "benchmark.py" ]
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | ISC License
2 | 
3 | Copyright (c) 2021 Dobatymo
4 | 
5 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
  1 | import dbm.dumb
  2 | import json
  3 | import os
  4 | import os.path
  5 | import pathlib
  6 | import pickle  # nosec
  7 | import shutil
  8 | import sys
  9 | from abc import ABC, abstractmethod
 10 | from collections import defaultdict
 11 | from contextlib import closing, suppress
 12 | from importlib import import_module
 13 | from random import randrange
 14 | from typing import Any, Callable, ContextManager, DefaultDict, Dict, Iterable, List, Sequence, TextIO
 15 | 
 16 | from genutility.iter import batch
 17 | from genutility.time import MeasureTime
 18 | from pytablewriter import MarkdownTableWriter
 19 | 
 20 | import lmdbm
 21 | import lmdbm.lmdbm
 22 | 
 23 | ResultsDict = Dict[int, Dict[str, Dict[str, float]]]
 24 | 
 25 | # Do not continue benchmark if the current
 26 | # step requires more seconds than MAX_TIME
 27 | MAX_TIME = 10
 28 | BATCH_SIZE = 10000
 29 | 
 30 | 
 31 | class BaseBenchmark(ABC):
 32 |     def __init__(self, db_tpl, db_type, db_module):
 33 |         self.available = self.load_module(db_module)
 34 |         self.batch_available = True
 35 |         self.path = db_tpl.format(db_type)
 36 |         self.name = db_type
 37 |         self.write = -1
 38 |         self.batch = -1
 39 |         self.read = -1
 40 |         self.combined = -1
 41 | 
 42 |     def load_module(self, name):
 43 |         """ "Load module and ignore benchmark if module is unavailable"""
 44 |         if name is None or name in sys.modules:
 45 |             return True
 46 | 
 47 |         try:
 48 |             globals()[name.split(".")[-1]] = import_module(name)
 49 |             print(f"Loaded module {name}")
 50 |         except ImportError:
 51 |             return False
 52 |         return True
 53 | 
 54 |     @abstractmethod
 55 |     def open(self) -> ContextManager:
 56 |         """Open the database"""
 57 | 
 58 |         pass
 59 | 
 60 |     def commit(self) -> None:  # noqa: B027
 61 |         """Commit the changes, if it is not done automatically"""
 62 | 
 63 |         pass
 64 | 
 65 |     def purge(self) -> None:
 66 |         """Remove the database file(s)"""
 67 | 
 68 |         with suppress(FileNotFoundError):
 69 |             os.unlink(self.path)
 70 | 
 71 |     def encode(self, value: Any) -> Any:
 72 |         """Convert Python objects to database-capable ones"""
 73 | 
 74 |         return value
 75 | 
 76 |     def decode(self, value: Any) -> Any:
 77 |         """Convert database values to Python objects"""
 78 | 
 79 |         return value
 80 | 
 81 |     def measure_writes(self, N: int) -> None:
 82 |         with MeasureTime() as t, self.open() as db:
 83 |             for key, value in self.generate_data(N):
 84 |                 if t.get() > MAX_TIME:
 85 |                     break
 86 |                 db[key] = self.encode(value)
 87 |                 self.commit()
 88 |         if t.get() < MAX_TIME:
 89 |             self.write = t.get()
 90 |         self.print_time("write", N, t)
 91 | 
 92 |     def measure_batch(self, N: int) -> None:
 93 |         with MeasureTime() as t, self.open() as db:
 94 |             for pairs in batch(self.generate_data(N), BATCH_SIZE):
 95 |                 if t.get() > MAX_TIME:
 96 |                     break
 97 |                 db.update({key: self.encode(value) for key, value in pairs})
 98 |                 self.commit()
 99 |         if t.get() < MAX_TIME:
100 |             self.batch = t.get()
101 |         self.print_time("batch write", N, t)
102 | 
103 |     def measure_reads(self, N: int) -> None:
104 |         with MeasureTime() as t, self.open() as db:
105 |             for key in self.random_keys(N, N):
106 |                 if t.get() > MAX_TIME:
107 |                     break
108 |                 self.decode(db[key])
109 |         if t.get() < MAX_TIME:
110 |             self.read = t.get()
111 |         self.print_time("read", N, t)
112 | 
113 |     def measure_combined(self, read=1, write=10, repeat=100) -> None:
114 |         with MeasureTime() as t, self.open() as db:
115 |             for _ in range(repeat):
116 |                 if t.get() > MAX_TIME:
117 |                     break
118 |                 for key, value in self.generate_data(read):
119 |                     db[key] = self.encode(value)
120 |                     self.commit()
121 |                 for key in self.random_keys(10, write):
122 |                     self.decode(db[key])
123 |         if t.get() < MAX_TIME:
124 |             self.combined = t.get()
125 |         self.print_time("combined", (read + write) * repeat, t)
126 | 
127 |     def database_is_built(self):
128 |         return self.batch >= 0 or self.write >= 0
129 | 
130 |     def print_time(self, measure_type, numbers, t):
131 |         print(f"{self.name:<20s} {measure_type:<15s} {str(numbers):<10s} {t.get():10.5f}")
132 | 
133 |     @staticmethod
134 |     def generate_data(size):
135 |         for i in range(size):
136 |             yield "key_" + str(i), {"some": "object_" + str(i)}
137 | 
138 |     @staticmethod
139 |     def random_keys(num, size):
140 |         for _ in range(num):
141 |             yield "key_" + str(randrange(0, size))  # nosec
142 | 
143 | 
144 | class JsonEncodedBenchmark(BaseBenchmark):
145 |     def encode(self, value):
146 |         return json.dumps(value)
147 | 
148 |     def decode(self, value):
149 |         return json.loads(value)
150 | 
151 | 
152 | class DummyPickleBenchmark(BaseBenchmark):
153 |     class MyDict(dict):
154 |         def close(self):
155 |             pass
156 | 
157 |     def __init__(self, db_tpl):
158 |         self.native_dict = None
159 |         super().__init__(db_tpl, "dummypickle", None)
160 | 
161 |     def open(self):
162 |         if pathlib.Path(self.path).exists():
163 |             with open(self.path, "rb") as f:
164 |                 self.native_dict = self.MyDict(pickle.load(f))  # nosec
165 |         else:
166 |             self.native_dict = self.MyDict()
167 |         return closing(self.native_dict)
168 | 
169 |     def commit(self):
170 |         tmp_file = self.path + ".tmp"
171 |         with open(tmp_file, "wb") as f:
172 |             pickle.dump(self.native_dict, f)
173 |         shutil.move(tmp_file, self.path)
174 | 
175 | 
176 | class DummyJsonBenchmark(BaseBenchmark):
177 |     class MyDict(dict):
178 |         def close(self):
179 |             pass
180 | 
181 |     def __init__(self, db_tpl):
182 |         self.native_dict = None
183 |         super().__init__(db_tpl, "dummyjson", None)
184 | 
185 |     def open(self):
186 |         if pathlib.Path(self.path).exists():
187 |             with open(self.path) as f:
188 |                 self.native_dict = self.MyDict(json.load(f))
189 |         else:
190 |             self.native_dict = self.MyDict()
191 |         return closing(self.native_dict)
192 | 
193 |     def commit(self):
194 |         tmp_file = self.path + ".tmp"
195 |         with open(tmp_file, "w") as f:
196 |             json.dump(self.native_dict, f, ensure_ascii=False, check_circular=False, sort_keys=False)
197 |         shutil.move(tmp_file, self.path)
198 | 
199 | 
200 | class DumbDbmBenchmark(JsonEncodedBenchmark):
201 |     def __init__(self, db_tpl):
202 |         super().__init__(db_tpl, "dbm.dumb", "dbm.dumb")
203 | 
204 |     def open(self):
205 |         return dbm.dumb.open(self.path, "c")
206 | 
207 |     def purge(self):
208 |         with suppress(FileNotFoundError):
209 |             os.unlink(self.path + ".dat")
210 |         with suppress(FileNotFoundError):
211 |             os.unlink(self.path + ".bak")
212 |         with suppress(FileNotFoundError):
213 |             os.unlink(self.path + ".dir")
214 | 
215 | 
216 | class SemiDbmBenchmark(JsonEncodedBenchmark):
217 |     def __init__(self, db_tpl):
218 |         super().__init__(db_tpl, "semidbm", "semidbm")
219 |         self.batch_available = False
220 | 
221 |     def open(self):
222 |         return closing(semidbm.open(self.path, "c"))
223 | 
224 |     def purge(self):
225 |         with suppress(FileNotFoundError):
226 |             os.unlink(self.path + "/data")
227 |         with suppress(FileNotFoundError):
228 |             os.rmdir(self.path)
229 | 
230 | 
231 | class LdbmBenchmark(JsonEncodedBenchmark):
232 |     def __init__(self, db_tpl):
233 |         super().__init__(db_tpl, "lmdbm", "lmdbm")
234 | 
235 |     def open(self):
236 |         return lmdbm.Lmdb.open(self.path, "c")
237 | 
238 |     def purge(self):
239 |         lmdbm.lmdbm.remove_lmdbm(self.path)
240 | 
241 | 
242 | class PysosBenchmark(BaseBenchmark):
243 |     def __init__(self, db_tpl):
244 |         super().__init__(db_tpl, "pysos", "pysos")
245 |         self.batch_available = False
246 | 
247 |     def open(self):
248 |         return closing(pysos.Dict(self.path))
249 | 
250 | 
251 | class SqliteAutocommitBenchmark(BaseBenchmark):
252 |     def __init__(self, db_tpl):
253 |         super().__init__(db_tpl, "sqlite-autocommit", "sqlitedict")
254 | 
255 |     def open(self):
256 |         return sqlitedict.SqliteDict(self.path, autocommit=True)
257 | 
258 | 
259 | class SqliteWalBenchmark(BaseBenchmark):
260 |     def __init__(self, db_tpl):
261 |         super().__init__(db_tpl, "sqlite-wal", "sqlitedict")
262 | 
263 |     def open(self):
264 |         return sqlitedict.SqliteDict(self.path, autocommit=True, journal_mode="WAL")
265 | 
266 | 
267 | class SqliteBatchBenchmark(BaseBenchmark):
268 |     def __init__(self, db_tpl):
269 |         super().__init__(db_tpl, "sqlite-batch", "sqlitedict")
270 |         self.db = None
271 | 
272 |     def open(self):
273 |         self.db = sqlitedict.SqliteDict(self.path, autocommit=False)
274 |         return self.db
275 | 
276 |     def commit(self):
277 |         self.db.commit()
278 | 
279 | 
280 | class GnuDbmBenchmark(JsonEncodedBenchmark):
281 |     def __init__(self, db_tpl):
282 |         super().__init__(db_tpl, "dbm.gnu", "dbm.gnu")
283 |         if self.available:
284 |             self.gnu_dbm = dbm.gnu
285 |         self.batch_available = False
286 | 
287 |     def open(self):
288 |         return self.gnu_dbm.open(self.path, "c")
289 | 
290 | 
291 | class ShelveBenchmark(JsonEncodedBenchmark):
292 |     def __init__(self, db_tpl):
293 |         super().__init__(db_tpl, "shelve", "shelve")
294 | 
295 |     def open(self):
296 |         return shelve.open(self.path)  # nosec B301
297 | 
298 | 
299 | class VedisBenchmark(JsonEncodedBenchmark):
300 |     def __init__(self, db_tpl):
301 |         super().__init__(db_tpl, "vedis", "vedis")
302 | 
303 |     def open(self):
304 |         return vedis.Vedis(self.path)
305 | 
306 | 
307 | class UnqliteBenchmark(JsonEncodedBenchmark):
308 |     def __init__(self, db_tpl):
309 |         super().__init__(db_tpl, "unqlite", "unqlite")
310 | 
311 |     def open(self):
312 |         return unqlite.UnQLite(self.path)
313 | 
314 | 
315 | class RocksdictBenchmark(JsonEncodedBenchmark):
316 |     def __init__(self, db_tpl):
317 |         super().__init__(db_tpl, "rocksdict", "rocksdict")
318 |         self.batch_available = False
319 | 
320 |     def open(self):
321 |         return closing(rocksdict.Rdict(self.path))
322 | 
323 |     def purge(self):
324 |         rocksdict.Rdict.destroy(self.path)
325 | 
326 | 
327 | BENCHMARK_CLASSES = [
328 |     LdbmBenchmark,
329 |     VedisBenchmark,
330 |     UnqliteBenchmark,
331 |     RocksdictBenchmark,
332 |     GnuDbmBenchmark,
333 |     ShelveBenchmark,
334 |     SemiDbmBenchmark,
335 |     PysosBenchmark,
336 |     DumbDbmBenchmark,
337 |     SqliteWalBenchmark,
338 |     SqliteAutocommitBenchmark,
339 |     SqliteBatchBenchmark,
340 |     DummyPickleBenchmark,
341 |     DummyJsonBenchmark,
342 | ]
343 | 
344 | 
345 | def run_bench(N, db_tpl) -> Dict[str, Dict[str, float]]:
346 |     benchmarks = [C(db_tpl) for C in BENCHMARK_CLASSES]
347 | 
348 |     for benchmark in benchmarks:
349 |         if not benchmark.available:
350 |             continue
351 |         benchmark.purge()
352 |         benchmark.measure_writes(N)
353 |         if benchmark.batch_available:
354 |             benchmark.purge()
355 |             benchmark.measure_batch(N)
356 |         if benchmark.database_is_built():
357 |             benchmark.measure_reads(N)
358 |             benchmark.measure_combined(read=1, write=10, repeat=100)
359 | 
360 |     ret: DefaultDict[str, Dict[str, float]] = defaultdict(dict)
361 |     for benchmark in benchmarks:
362 |         ret[benchmark.name]["read"] = benchmark.read
363 |         ret[benchmark.name]["write"] = benchmark.write
364 |         ret[benchmark.name]["batch"] = benchmark.batch
365 |         ret[benchmark.name]["combined"] = benchmark.combined
366 | 
367 |     return ret
368 | 
369 | 
370 | def bench(base: str, nums: Iterable[int]) -> ResultsDict:
371 |     with suppress(FileExistsError):
372 |         os.mkdir(base)
373 | 
374 |     ret = {}
375 |     db_tpl = os.path.join(base, "test_{}.db")
376 | 
377 |     for num in nums:
378 |         print("")
379 |         ret[num] = run_bench(num, db_tpl)
380 | 
381 |     return ret
382 | 
383 | 
384 | def write_markdown_table(stream: TextIO, results: ResultsDict, method: str):
385 |     for v in results.values():
386 |         headers = list(v.keys())
387 |         break
388 | 
389 |     value_matrix = []
390 |     for k, v in results.items():
391 |         row = [str(k)]
392 |         for h in headers:
393 |             value = v[h].get(method)
394 |             if value is None or value < 0:
395 |                 new_value = "-"
396 |             else:
397 |                 new_value = format(value, ".04f")
398 |             row.append(new_value)
399 |         value_matrix.append(row)
400 | 
401 |     headers = ["items"] + headers
402 | 
403 |     writer = MarkdownTableWriter(table_name=method, headers=headers, value_matrix=value_matrix)
404 |     writer.dump(stream, close_after_write=False)
405 | 
406 | 
407 | def _check_same_keys(dicts: Sequence[dict]):
408 |     assert len(dicts) >= 2
409 | 
410 |     for d in dicts[1:]:
411 |         assert dicts[0].keys() == d.keys()
412 | 
413 | 
414 | def merge_results(results: Sequence[ResultsDict], func: Callable = min) -> ResultsDict:
415 |     out: ResultsDict = {}
416 | 
417 |     _check_same_keys(results)
418 |     for key1 in results[0].keys():
419 |         _check_same_keys([d[key1] for d in results])
420 |         out.setdefault(key1, {})
421 |         for key2 in results[0][key1].keys():
422 |             _check_same_keys([d[key1][key2] for d in results])
423 |             out[key1].setdefault(key2, {})
424 |             for key3 in results[0][key1][key2].keys():
425 |                 out[key1][key2][key3] = func(d[key1][key2][key3] for d in results)
426 | 
427 |     return out
428 | 
429 | 
430 | if __name__ == "__main__":
431 |     from argparse import ArgumentParser
432 | 
433 |     from genutility.rich import Progress
434 |     from rich.progress import Progress as RichProgress
435 | 
436 |     parser = ArgumentParser()
437 |     parser.add_argument("--outpath", default="bench-dbs", help="Directory to store temporary benchmarking databases")
438 |     parser.add_argument("--version", action="version", version=lmdbm.__version__)
439 |     parser.add_argument(
440 |         "--sizes",
441 |         nargs="+",
442 |         type=int,
443 |         metavar="N",
444 |         default=[10, 100, 10**3, 10**4, 10**5, 10**6],
445 |         help="Number of records to read/write",
446 |     )
447 |     parser.add_argument("--bestof", type=int, metavar="N", default=3, help="Run N benchmarks")
448 |     parser.add_argument("--outfile", default="benchmarks.md", help="Benchmark results")
449 |     args = parser.parse_args()
450 | 
451 |     results: List[ResultsDict] = []
452 | 
453 |     with RichProgress() as progress:
454 |         p = Progress(progress)
455 |         for _ in p.track(range(args.bestof)):
456 |             results.append(bench(args.outpath, args.sizes))
457 | 
458 |     if args.bestof == 1:
459 |         best_results = results[0]
460 |     else:
461 |         best_results = merge_results(results)
462 | 
463 |     with open(args.outfile, "w", encoding="utf-8") as fw:
464 |         write_markdown_table(fw, best_results, "write")
465 |         write_markdown_table(fw, best_results, "batch")
466 |         write_markdown_table(fw, best_results, "read")
467 |         write_markdown_table(fw, best_results, "combined")
468 | 


--------------------------------------------------------------------------------
/lmdbm/__init__.py:
--------------------------------------------------------------------------------
1 | """Python DBM style wrapper around LMDB (Lightning Memory-Mapped Database)"""
2 | 
3 | from .lmdbm import Lmdb, LmdbGzip, error, open
4 | 
5 | __version__ = "0.0.6"
6 | 
7 | __all__ = ["Lmdb", "LmdbGzip", "error", "open", "__version__"]
8 | 


--------------------------------------------------------------------------------
/lmdbm/lmdbm.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections.abc import Mapping, MutableMapping
  3 | from gzip import compress, decompress
  4 | from pathlib import Path
  5 | from sys import exit
  6 | from typing import Any, Generic, Iterator, List, Optional, Tuple, TypeVar, Union
  7 | 
  8 | import lmdb
  9 | from typing_extensions import Self
 10 | 
 11 | T = TypeVar("T")
 12 | KT = TypeVar("KT")
 13 | VT = TypeVar("VT")
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | _DEFAULT = object()
 18 | 
 19 | 
 20 | class error(Exception):
 21 |     pass
 22 | 
 23 | 
 24 | class MissingOk:
 25 |     # for python < 3.8 compatibility
 26 | 
 27 |     def __init__(self, ok: bool) -> None:
 28 |         self.ok = ok
 29 | 
 30 |     def __enter__(self) -> Self:
 31 |         return self
 32 | 
 33 |     def __exit__(self, exc_type, exc_value, traceback):
 34 |         if isinstance(exc_value, FileNotFoundError) and self.ok:
 35 |             return True
 36 | 
 37 | 
 38 | def remove_lmdbm(file: str, missing_ok: bool = True) -> None:
 39 |     base = Path(file)
 40 |     with MissingOk(missing_ok):
 41 |         (base / "data.mdb").unlink()
 42 |     with MissingOk(missing_ok):
 43 |         (base / "lock.mdb").unlink()
 44 |     with MissingOk(missing_ok):
 45 |         base.rmdir()
 46 | 
 47 | 
 48 | class Lmdb(MutableMapping, Generic[KT, VT]):
 49 |     autogrow_error = "Failed to grow LMDB ({}). Is there enough disk space available?"
 50 |     autogrow_msg = "Grew database (%s) map size to %s"
 51 | 
 52 |     def __init__(self, env: lmdb.Environment, autogrow: bool) -> None:
 53 |         self.env = env
 54 |         self.autogrow = autogrow
 55 | 
 56 |     @classmethod
 57 |     def open(
 58 |         cls, file: str, flag: str = "r", mode: int = 0o755, map_size: int = 2**20, autogrow: bool = True, **kwargs
 59 |     ) -> "Lmdb":
 60 |         """
 61 |         Opens the database `file`.
 62 |         `flag`: r (read only, existing), w (read and write, existing),
 63 |                 c (read, write, create if not exists), n (read, write, overwrite existing)
 64 |         `map_size`: Initial database size. Defaults to 2**20 (1MB).
 65 |         `autogrow`: Automatically grow the database size when `map_size` is exceeded.
 66 |                 WARNING: Set this to `False` for multi-process write access.
 67 |         `**kwargs`: All other keyword arguments are passed through to `lmdb.open`.
 68 |         """
 69 | 
 70 |         if flag == "r":  # Open existing database for reading only (default)
 71 |             env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=True, create=False, mode=mode, **kwargs)
 72 |         elif flag == "w":  # Open existing database for reading and writing
 73 |             env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=False, create=False, mode=mode, **kwargs)
 74 |         elif flag == "c":  # Open database for reading and writing, creating it if it doesn't exist
 75 |             env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=False, create=True, mode=mode, **kwargs)
 76 |         elif flag == "n":  # Always create a new, empty database, open for reading and writing
 77 |             remove_lmdbm(file)
 78 |             env = lmdb.open(file, map_size=map_size, max_dbs=1, readonly=False, create=True, mode=mode, **kwargs)
 79 |         else:
 80 |             raise ValueError("Invalid flag")
 81 | 
 82 |         return cls(env, autogrow)
 83 | 
 84 |     @property
 85 |     def map_size(self) -> int:
 86 |         return self.env.info()["map_size"]
 87 | 
 88 |     @map_size.setter
 89 |     def map_size(self, value: int) -> None:
 90 |         self.env.set_mapsize(value)
 91 | 
 92 |     def _pre_key(self, key: KT) -> bytes:
 93 |         if isinstance(key, bytes):
 94 |             return key
 95 |         elif isinstance(key, str):
 96 |             return key.encode("Latin-1")
 97 | 
 98 |         raise TypeError(key)
 99 | 
100 |     def _post_key(self, key: bytes) -> KT:
101 |         return key
102 | 
103 |     def _pre_value(self, value: VT) -> bytes:
104 |         if isinstance(value, bytes):
105 |             return value
106 |         elif isinstance(value, str):
107 |             return value.encode("Latin-1")
108 | 
109 |         raise TypeError(value)
110 | 
111 |     def _post_value(self, value: bytes) -> VT:
112 |         return value
113 | 
114 |     def __getitem__(self, key: KT) -> VT:
115 |         with self.env.begin() as txn:
116 |             value = txn.get(self._pre_key(key))
117 |         if value is None:
118 |             raise KeyError(key)
119 |         return self._post_value(value)
120 | 
121 |     def __setitem__(self, key: KT, value: VT) -> None:
122 |         k = self._pre_key(key)
123 |         v = self._pre_value(value)
124 |         for _i in range(12):
125 |             try:
126 |                 with self.env.begin(write=True) as txn:
127 |                     txn.put(k, v)
128 |                     return
129 |             except lmdb.MapFullError:
130 |                 if not self.autogrow:
131 |                     raise
132 |                 new_map_size = self.map_size * 2
133 |                 self.map_size = new_map_size
134 |                 logger.info(self.autogrow_msg, self.env.path(), new_map_size)
135 | 
136 |         exit(self.autogrow_error.format(self.env.path()))
137 | 
138 |     def __delitem__(self, key: KT) -> None:
139 |         with self.env.begin(write=True) as txn:
140 |             txn.delete(self._pre_key(key))
141 | 
142 |     def keys(self) -> Iterator[KT]:
143 |         with self.env.begin() as txn:
144 |             for key in txn.cursor().iternext(keys=True, values=False):
145 |                 yield self._post_key(key)
146 | 
147 |     def items(self) -> Iterator[Tuple[KT, VT]]:
148 |         with self.env.begin() as txn:
149 |             for key, value in txn.cursor().iternext(keys=True, values=True):
150 |                 yield (self._post_key(key), self._post_value(value))
151 | 
152 |     def values(self) -> Iterator[VT]:
153 |         with self.env.begin() as txn:
154 |             for value in txn.cursor().iternext(keys=False, values=True):
155 |                 yield self._post_value(value)
156 | 
157 |     def __contains__(self, key: KT) -> bool:
158 |         with self.env.begin() as txn:
159 |             value = txn.get(self._pre_key(key))
160 |         return value is not None
161 | 
162 |     def __iter__(self) -> Iterator[KT]:
163 |         return self.keys()
164 | 
165 |     def __len__(self) -> int:
166 |         with self.env.begin() as txn:
167 |             return txn.stat()["entries"]
168 | 
169 |     def pop(self, key: KT, default: Union[VT, T] = _DEFAULT) -> Union[VT, T]:
170 |         with self.env.begin(write=True) as txn:
171 |             value = txn.pop(self._pre_key(key))
172 |         if value is None:
173 |             return default
174 |         return self._post_value(value)
175 | 
176 |     def update(self, __other: Any = (), **kwds: VT) -> None:  # python3.8 only: update(self, other=(), /, **kwds)
177 |         # fixme: `kwds`
178 | 
179 |         # note: benchmarking showed that there is no real difference between using lists or iterables
180 |         # as input to `putmulti`.
181 |         # lists: Finished 14412594 in 253496 seconds.
182 |         # iter:  Finished 14412594 in 256315 seconds.
183 | 
184 |         # save generated lists in case the insert fails and needs to be retried
185 |         # for performance reasons, but mostly because `__other` could be an iterable
186 |         # which would already be exhausted on the second try
187 |         pairs_other: Optional[List[Tuple[bytes, bytes]]] = None
188 |         pairs_kwds: Optional[List[Tuple[bytes, bytes]]] = None
189 | 
190 |         for _i in range(12):
191 |             try:
192 |                 with self.env.begin(write=True) as txn:
193 |                     with txn.cursor() as curs:
194 |                         if isinstance(__other, Mapping):
195 |                             pairs_other = pairs_other or [
196 |                                 (self._pre_key(key), self._pre_value(__other[key])) for key in __other
197 |                             ]
198 |                             curs.putmulti(pairs_other)
199 |                         elif hasattr(__other, "keys"):
200 |                             pairs_other = pairs_other or [
201 |                                 (self._pre_key(key), self._pre_value(__other[key])) for key in __other.keys()
202 |                             ]
203 |                             curs.putmulti(pairs_other)
204 |                         else:
205 |                             pairs_other = pairs_other or [
206 |                                 (self._pre_key(key), self._pre_value(value)) for key, value in __other
207 |                             ]
208 |                             curs.putmulti(pairs_other)
209 | 
210 |                         pairs_kwds = pairs_kwds or [
211 |                             (self._pre_key(key), self._pre_value(value)) for key, value in kwds.items()
212 |                         ]
213 |                         curs.putmulti(pairs_kwds)
214 | 
215 |                         return
216 |             except lmdb.MapFullError:
217 |                 if not self.autogrow:
218 |                     raise
219 |                 new_map_size = self.map_size * 2
220 |                 self.map_size = new_map_size
221 |                 logger.info(self.autogrow_msg, self.env.path(), new_map_size)
222 | 
223 |         exit(self.autogrow_error.format(self.env.path()))
224 | 
225 |     def sync(self) -> None:
226 |         self.env.sync()
227 | 
228 |     def close(self) -> None:
229 |         self.env.close()
230 | 
231 |     def __enter__(self) -> Self:
232 |         return self
233 | 
234 |     def __exit__(self, *args):
235 |         self.close()
236 | 
237 | 
238 | class LmdbGzip(Lmdb):
239 |     def __init__(self, env, autogrow: bool, compresslevel: int = 9):
240 |         Lmdb.__init__(self, env, autogrow)
241 |         self.compresslevel = compresslevel
242 | 
243 |     def _pre_value(self, value: VT) -> bytes:
244 |         value = Lmdb._pre_value(self, value)
245 |         return compress(value, self.compresslevel)
246 | 
247 |     def _post_value(self, value: bytes) -> VT:
248 |         return decompress(value)
249 | 
250 | 
251 | def open(file, flag="r", mode=0o755, **kwargs):
252 |     return Lmdb.open(file, flag, mode, **kwargs)
253 | 


--------------------------------------------------------------------------------
/lmdbm/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dobatymo/lmdb-python-dbm/9436e49c4fe584d446575809ae9e4859deeb5411/lmdbm/py.typed


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | build-backend = "flit_core.buildapi"
 3 | requires = [
 4 |   "flit-core<4,>=3.2",
 5 | ]
 6 | 
 7 | [project]
 8 | name = "lmdbm"
 9 | readme = "readme.md"
10 | authors = [ { name = "Dobatymo", email = "Dobatymo@users.noreply.github.com" } ]
11 | requires-python = ">=3.7"
12 | classifiers = [
13 |   "Intended Audience :: Developers",
14 |   "License :: OSI Approved :: ISC License (ISCL)",
15 |   "Operating System :: OS Independent",
16 |   "Programming Language :: Python :: 3 :: Only",
17 |   "Programming Language :: Python :: 3.7",
18 |   "Programming Language :: Python :: 3.8",
19 |   "Programming Language :: Python :: 3.9",
20 |   "Programming Language :: Python :: 3.10",
21 |   "Programming Language :: Python :: 3.11",
22 |   "Programming Language :: Python :: 3.12",
23 |   "Programming Language :: Python :: 3.13",
24 |   "Topic :: Database",
25 | ]
26 | dynamic = [
27 |   "description",
28 |   "version",
29 | ]
30 | dependencies = [
31 |   "lmdb",
32 |   "typing-extensions>=4",
33 | ]
34 | optional-dependencies.bench = [
35 |   "genutility[iter,rich,time]>=0.0.103",
36 |   "pysos==1.2.9",
37 |   "pytablewriter==0.63",
38 |   "rocksdict==0.3.5",
39 |   "semidbm==0.5.1",
40 |   "sqlitedict==1.7",
41 |   "unqlite==0.9.2",
42 |   "vedis==0.7.1",
43 | ]
44 | optional-dependencies.test = [
45 |   "genutility[test]",
46 | ]
47 | urls.Home = "https://github.com/Dobatymo/lmdb-python-dbm"
48 | 
49 | [tool.black]
50 | line-length = 120
51 | 
52 | [tool.ruff]
53 | line-length = 120
54 | 
55 | [tool.isort]
56 | profile = "black"
57 | line_length = 120
58 | 
59 | [tool.bandit]
60 | skips = [ "B101" ]
61 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # lmdbm
 2 | 
 3 | This is a Python DBM interface style wrapper around [LMDB](http://www.lmdb.tech/doc/) (Lightning Memory-Mapped Database).
 4 | It uses the existing lower level Python bindings [py-lmdb](https://lmdb.readthedocs.io).
 5 | This is especially useful on Windows, where otherwise `dbm.dumb` is the default `dbm` database.
 6 | 
 7 | ## Install
 8 | - `pip install lmdbm`
 9 | 
10 | ## Example
11 | ```python
12 | from lmdbm import Lmdb
13 | with Lmdb.open("test.db", "c") as db:
14 |   db[b"key"] = b"value"
15 |   db.update({b"key1": b"value1", b"key2": b"value2"})  # batch insert, uses a single transaction
16 | ```
17 | 
18 | ### Use inheritance to store Python objects using json serialization
19 | 
20 | ```python
21 | import json
22 | from lmdbm import Lmdb
23 | 
24 | class JsonLmdb(Lmdb):
25 |   def _pre_key(self, value):
26 |     return value.encode("utf-8")
27 |   def _post_key(self, value):
28 |     return value.decode("utf-8")
29 |   def _pre_value(self, value):
30 |     return json.dumps(value).encode("utf-8")
31 |   def _post_value(self, value):
32 |     return json.loads(value.decode("utf-8"))
33 | 
34 | with JsonLmdb.open("test.db", "c") as db:
35 |   db["key"] = {"some": "object"}
36 |   obj = db["key"]
37 |   print(obj["some"])  # prints "object"
38 | ```
39 | 
40 | ## Warning
41 | 
42 | As of `lmdb==1.2.1` the docs say that calling `lmdb.Environment.set_mapsize` from multiple processes "may cause catastrophic loss of data". If `lmdbm` is used in write mode from multiple processes, set `autogrow=False` and map_size to a large enough value: `Lmdb.open(..., map_size=2**30, autogrow=False)`.
43 | 
44 | ## Benchmarks
45 | 
46 | Install `lmdbm[bench]` and run `benchmark.py`. Other storage engines which could be tested: `wiredtiger`, `berkeleydb`.
47 | 
48 | Storage engines not benchmarked:
49 | - `tinydb` (because it doesn't have built-in str/bytes keys)
50 | 
51 | ### continuous writes in seconds (best of 3)
52 | | items | lmdbm  |lmdbm-batch|pysos |sqlitedict|sqlitedict-batch|dbm.dumb|semidbm|vedis |vedis-batch|unqlite|unqlite-batch|
53 | |------:|-------:|----------:|-----:|---------:|---------------:|-------:|------:|-----:|----------:|------:|------------:|
54 | |     10|   0.000|      0.015| 0.000|     0.031|           0.000|   0.016|  0.000| 0.000|      0.000|  0.000|        0.000|
55 | |    100|   0.094|      0.000| 0.000|     0.265|           0.016|   0.188|  0.000| 0.000|      0.000|  0.000|        0.000|
56 | |   1000|   1.684|      0.016| 0.015|     3.885|           0.124|   2.387|  0.016| 0.015|      0.015|  0.016|        0.000|
57 | |  10000|  16.895|      0.093| 0.265|    45.334|           1.326|  25.350|  0.156| 0.093|      0.094|  0.094|        0.093|
58 | | 100000| 227.106|      1.030| 2.698|   461.638|          12.964| 238.400|  1.623| 1.388|      1.467|  1.466|        1.357|
59 | |1000000|3482.520|     13.104|27.815|  5851.239|         133.396|2432.945| 16.411|15.693|     15.709| 14.508|       14.103|
60 | 
61 | ### random reads in seconds (best of 3)
62 | | items |lmdbm |lmdbm-batch|pysos |sqlitedict|sqlitedict-batch|dbm.dumb|semidbm| vedis |vedis-batch|unqlite|unqlite-batch|
63 | |------:|-----:|-----------|-----:|---------:|----------------|-------:|------:|------:|-----------|------:|-------------|
64 | |     10| 0.000|           | 0.000|     0.000|                |   0.000|  0.000|  0.000|           |  0.000|             |
65 | |    100| 0.000|           | 0.000|     0.031|                |   0.000|  0.000|  0.000|           |  0.000|             |
66 | |   1000| 0.016|           | 0.015|     0.250|                |   0.109|  0.016|  0.015|           |  0.000|             |
67 | |  10000| 0.109|           | 0.156|     2.558|                |   1.123|  0.171|  0.109|           |  0.109|             |
68 | | 100000| 1.014|           | 2.137|    27.769|                |  11.419|  2.090|  1.170|           |  1.170|             |
69 | |1000000|10.390|           |24.258|   447.613|                | 870.580| 22.838|214.486|           |211.319|             |
70 | 


--------------------------------------------------------------------------------
/tests/test_lmdbm.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from genutility.test import MyTestCase
 4 | from lmdb import Error
 5 | 
 6 | from lmdbm import Lmdb
 7 | from lmdbm.lmdbm import remove_lmdbm
 8 | 
 9 | 
10 | class LmdbmTests(MyTestCase):
11 |     _name = "./test.db"
12 | 
13 |     _dict = {
14 |         b"a": b"Python:",
15 |         b"b": b"Programming",
16 |         b"c": b"the",
17 |         b"d": b"way",
18 |         b"f": b"Guido",
19 |         b"g": b"intended",
20 |     }
21 | 
22 |     def _init_db(self):
23 |         with Lmdb.open(self._name, "n") as db:
24 |             for k, v in self._dict.items():
25 |                 db[k] = v
26 | 
27 |     def _delete_db(self):
28 |         remove_lmdbm(self._name, False)
29 | 
30 |     def test_mem_grow(self):
31 |         with Lmdb.open(self._name, "n", map_size=1024) as db:
32 |             key = b"asd"
33 |             value = b"asd" * 1000
34 | 
35 |             db[key] = value
36 |             assert db.setdefault(key, b"asd") == value
37 |             assert db[key] == value
38 |             assert db.get(key) == value
39 | 
40 |         self._delete_db()
41 | 
42 |     def test_mem_grow_batch(self):
43 |         value = b"asd" * 1000
44 | 
45 |         def data():
46 |             yield "key_1", value
47 |             yield "key_2", value
48 | 
49 |         with Lmdb.open(self._name, "n", map_size=1024) as db:
50 |             db.update(data())
51 |             assert db["key_1"] == value
52 |             assert db["key_2"] == value
53 | 
54 |         self._delete_db()
55 | 
56 |     def test_missing_read_only(self):
57 |         with self.assertRaises(Error):
58 |             with Lmdb.open(self._name, "r", map_size=1024) as db:
59 |                 db["key"] = "value"
60 | 
61 |         assert not Path(self._name).exists()
62 | 
63 |     def test_modify(self):
64 |         self._init_db()
65 |         with Lmdb.open(self._name, "c") as f:
66 |             self._dict[b"g"] = f[b"g"] = b"indented"
67 |             self.assertUnorderedMappingEqual(f, self._dict)
68 | 
69 |             self.assertEqual(f.setdefault(b"xxx", b"foo"), b"foo")
70 |             self.assertEqual(f[b"xxx"], b"foo")
71 | 
72 |         self._delete_db()
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     import unittest
77 | 
78 |     unittest.main()
79 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | select = B, E7, E9, W2, W3, W6, F
4 | ignore = E704
5 | exclude = .git,.mypy_cache,__pycache__,build,dist
6 | per-file-ignores =
7 |     benchmark.py:F821
8 | 


--------------------------------------------------------------------------------