├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── aiodiskdb ├── __init__.py ├── abstracts.py ├── aiodiskdb.py ├── exceptions.py ├── internals.py ├── local_types.py └── transaction.py ├── docs ├── aiodiskdb.gif └── logo128.png ├── runtests ├── setup.py └── test ├── __init__.py ├── test_checkpoints.py ├── test_concurrency.py ├── test_concurrency_small_files.py ├── test_drop_index.py ├── test_errors.py ├── test_events.py ├── test_ltrim.py ├── test_read_write.py ├── test_rtrim.py ├── test_signals.py └── test_transaction.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .coveralls.yml 131 | .idea/* 132 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | os: 3 | - linux 4 | dist: 5 | - focal 6 | python: 7 | - 3.8.5 8 | - 3.9.1 9 | script: 10 | - pip install coverage==4.5.4 11 | - coverage run --source=aiodiskdb -m unittest discover 12 | after_success: 13 | - coverage report 14 | - if [[ $(python3 -V 2>&1) == *"Python 3.9"* ]]; then 15 | pip install python-coveralls; 16 | coveralls; 17 | fi 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Guido Dassori // mempoolco. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![aiodiskdb logo](./docs/logo128.png "aiodiskdb") 2 | ### Minimal, embeddable on-disk DB, tailored for asyncio. 3 | 4 | --- 5 | [![Coverage Status](https://coveralls.io/repos/github/mempoolco/aiodiskdb/badge.svg?branch=main)](https://coveralls.io/github/mempoolco/aiodiskdb?branch=main) 6 | [![PyPI version](https://badge.fury.io/py/aiodiskdb.svg)](https://badge.fury.io/py/aiodiskdb) 7 | [![PyPI license](https://img.shields.io/pypi/l/aiodiskdb.svg)](https://pypi.python.org/pypi/aiodiskdb/) 8 | [![PyPI pyversions](https://img.shields.io/pypi/pyversions/aiodiskdb.svg)](https://pypi.python.org/pypi/aiodiskdb/) 9 | [![Build Status](https://travis-ci.com/mempoolco/aiodiskdb.svg?branch=main)](https://travis-ci.com/mempoolco/aiodiskdb) 10 | [![Chat on Telegram](https://img.shields.io/badge/Chat%20on-Telegram-brightgreen.svg)](https://t.me/mempoolco) 11 | [![Donate with Bitcoin](https://en.cryptobadges.io/badge/micro/3FVGopUDc6tyAP6t4P8f3GkYTJ5JD5tPwV)](https://en.cryptobadges.io/donate/3FVGopUDc6tyAP6t4P8f3GkYTJ5JD5tPwV) 12 | 13 | 14 | **aiodiskdb** is a lightweight, fast, simple **append only** database. 15 | 16 | To be used in the `asyncio` event loop. 17 | 18 | ### Install 19 | 20 | ```bash 21 | pip install aiodiskdb 22 | ``` 23 | 24 | ### Usage 25 | 26 | Start the DB by fire and forget: 27 | ```python 28 | from aiodiskdb import AioDiskDB, ItemLocation 29 | 30 | db = AioDiskDB('/tmp/aiodiskdb') 31 | 32 | loop.create_task(db.start()) 33 | 34 | ``` 35 | 36 | Use the db API to write and read data from a coroutine. 37 | 38 | ```python 39 | async def read_and_write(): 40 | new_data_location: ItemLocation = await db.add(b'data') 41 | data: bytes = await db.read(location) 42 | assert data == b'data' 43 | 44 | noted_location = ItemLocation( 45 | index=0, 46 | position=80, 47 | size=1024333 48 | ) 49 | prev_saved_data: bytes = await db.read(noted_location) 50 | assert len(prev_saved_data) == 1024333 51 | ``` 52 | 53 | Stop the DB before closing the application. 54 | ```python 55 | await db.stop() 56 | ``` 57 | 58 | Be alerted when data is actually persisted to disk: 59 | 60 | ```python 61 | async def callback(timestamp: int, event: WriteEvent): 62 | human_time = datetime.fromtimestamp(timestamp).isoformat() 63 | log(f'{human_time} - {event} persisted to disk.') 64 | await do_something(location) 65 | 66 | db.events.on_write = callback 67 | ``` 68 | 69 | Or hook to other events: 70 | ```python 71 | db.events.on_start = ... 72 | db.events.on_stop = ... 73 | db.events.on_failure = ... 74 | db.events.on_index_drop = ... 75 | ``` 76 | 77 | ### Asynchronous non-blocking 78 | 79 | Handle file writes with no locks. 80 | Data is appended in RAM and persisted asynchronously, according to customizable settings. 81 | 82 | ### Transactional 83 | 84 | "All or nothing" commit. 85 | Lock all the DB write operations during commits, still allowing the reads. 86 | Ensure an arbitrary sequence of data is persisted to disk. 87 | 88 | Transaction is scoped. Data added into a transaction is not available outside until committed. 89 | ```python 90 | transaction = await db.transaction() 91 | 92 | transaction.add(b'cafe') 93 | transaction.add(b'babe') 94 | transaction.add(b'deadbeef') 95 | 96 | locations: typing.Sequence[ItemLocation] = await transaction.commit() 97 | ``` 98 | 99 | 100 | ### Not-so-append-only 101 | 102 | **Aiodiskdb** is an append-only database. It means you'll never see methods to *delete* or *remove* single entries. 103 | 104 | However, data pruning is supported, with the following methods: 105 | 106 | ```python 107 | db.enable_overwrite() 108 | db.rtrim(0, 400) 109 | db.ltrim(8, 900) 110 | db.drop_index(3) 111 | db.disable_overwrite() 112 | ``` 113 | 114 | These three methods respectively: 115 | - prune data from the right, at index `0`, starting from the location `400` to the index end (`rtrim`) 116 | - prune data from the left, at index `8`, starting from the beginning to the location `900` (`ltrim`) 117 | - drop the whole index `3`, resulting in a file deletion: `drop_index` 118 | 119 | All the items locations not involved into a TRIM operation remains unmodified, even after an `ltrim`. 120 | 121 | ### Highly customizable 122 | 123 | The default parameters: 124 | ```python 125 | _FILE_SIZE = 128 126 | _FILE_PREFIX = 'data' 127 | _FILE_ZEROS_PADDING = 5 128 | _BUFFER_SIZE = 16 129 | _BUFFER_ITEMS = 1000 130 | _FLUSH_INTERVAL = 30 131 | _TIMEOUT = 30 132 | _CONCURRENCY = 32 133 | ``` 134 | can be easily customized. In the following example the files max size is 16 MB, 135 | and data is persisted to disk every 1 MB **OR** every 100 new items **OR** every minute. 136 | 137 | ```python 138 | db = AioDiskDB( 139 | max_file_size=16 140 | max_buffer_size=1, 141 | max_buffer_items=100, 142 | flush_interval=60 143 | ) 144 | ``` 145 | The max DB size is `max_file_size * max_files`. 146 | With `file_padding=5` the max number of files is 10,000. 147 | 148 | A DB created with `file_padding=5` and `max_file_size=16` is capable to store up to 160 GB, or 167,772,160,000 items, 149 | at its maximum capacity will allocate 10,000 files. 150 | 151 | ### Try to do its best 152 | 153 | Hook the blocking `on_stop_signal` method to avoid data losses on exit. 154 | ```python 155 | import signal 156 | from aiodiskdb import AioDiskDB 157 | 158 | db = AioDiskDB(...) 159 | 160 | signal.signal(signal.SIGINT, db.on_stop_signal) 161 | signal.signal(signal.SIGTERM, db.on_stop_signal) 162 | signal.signal(signal.SIGKILL, db.on_stop_signal) 163 | ``` 164 | 165 | ### Quite enough fast for some use cases 166 | 167 | ![aiodiskdb files](./docs/aiodiskdb.gif) 168 | 169 | Concurrency tests, part of the unit tests, can be replicated as system benchmark. 170 | The followings are performed on a common consumer SSD: 171 | ``` 172 | Duration: 14.12s, 173 | Reads: 2271 (~162/s), 174 | Writes: 2014 (~143/s), 175 | Bandwidth: 1000MB (71MB/s), 176 | Avg file size: 508.0kB 177 | ``` 178 | 179 | ``` 180 | Duration: 18.97s, 181 | Reads: 10244 (~540/s), 182 | Writes: 10245 (~540/s), 183 | Bandwidth: 20MB (1.05MB/s), 184 | Avg file size: 1.0kB 185 | ``` 186 | 187 | ### Limitations 188 | 189 | ```python 190 | assert len(data) <= max_buffer_size 191 | assert max_transaction_size < RAM 192 | assert max_file_size < 4096 193 | ``` 194 | 195 | If `rtrim` is applied on the **current** index, the space is reused, otherwise no. 196 | With `ltrim`, once the space is freed, it is not allocated again. 197 | With `drop_index` the discarded index is not reused. 198 | 199 | With a lot of data turn-over (pruning by trimming), it may be necessary to set an unusual high `file_padding`, and 200 | increase the database potential size. 201 | 202 | --- 203 | 204 | ### Credits 205 | 206 | Inspired by the raw block data storage of the [bitcoincore blocks database](https://en.bitcoin.it/wiki/Bitcoin_Core_0.11_(ch_2):_Data_Storage). 207 | 208 | Logo by mepheesto. 209 | 210 | ### Notes 211 | 212 | **Alpha stage. Still under development, use with care and expect data losses.** 213 | 214 | Donate :heart: **Bitcoin** to: 3FVGopUDc6tyAP6t4P8f3GkYTJ5JD5tPwV or [paypal](https://paypal.me/gdax) 215 | -------------------------------------------------------------------------------- /aiodiskdb/__init__.py: -------------------------------------------------------------------------------- 1 | from aiodiskdb.aiodiskdb import AioDiskDB 2 | from aiodiskdb.local_types import ItemLocation 3 | -------------------------------------------------------------------------------- /aiodiskdb/abstracts.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import asyncio 3 | import time 4 | 5 | import typing 6 | 7 | from aiodiskdb import exceptions 8 | from aiodiskdb.internals import ensure_running, GracefulExit, logger 9 | from aiodiskdb.local_types import EventsHandlers, ItemLocation 10 | 11 | 12 | class AsyncLockable(metaclass=abc.ABCMeta): 13 | def __init__(self, *_, **__): 14 | super().__init__() 15 | self._write_lock = asyncio.Lock() 16 | self._transaction_lock = asyncio.Lock() 17 | self._read_lock = asyncio.Lock() 18 | self._reads_count = 0 19 | 20 | def _incr_read(self): 21 | self._reads_count += 1 22 | 23 | def _decr_read(self): 24 | self._reads_count -= 1 25 | 26 | 27 | class AsyncObservable(metaclass=abc.ABCMeta): 28 | def __init__(self, *_, **__): 29 | super().__init__(self, *_, **__) 30 | self._events = EventsHandlers() 31 | 32 | @property 33 | def events(self): 34 | return self._events 35 | 36 | 37 | class AsyncRunnable(AsyncObservable, AsyncLockable, metaclass=abc.ABCMeta): 38 | def __init__(self, *_, timeout=0, **__): 39 | super().__init__(*_, **__) 40 | self._running = False 41 | self._error = False 42 | self._set_stop = False 43 | self._timeout = timeout 44 | self._blocking_stop = False 45 | self._stopped = False 46 | 47 | @abc.abstractmethod 48 | def _pre_stop_signal(self): 49 | pass # pragma: no cover 50 | 51 | def on_stop_signal(self, *args): 52 | """ 53 | Non async method. Handle stop signals. 54 | """ 55 | logger.warning('Requested stop signal: %s', args) 56 | if self._pre_stop_signal(): 57 | raise GracefulExit() 58 | 59 | @abc.abstractmethod 60 | async def _pre_loop(self): 61 | pass # pragma: no cover 62 | 63 | @abc.abstractmethod 64 | async def _run_loop(self): 65 | pass # pragma: no cover 66 | 67 | @abc.abstractmethod 68 | def _teardown(self): 69 | pass # pragma: no cover 70 | 71 | @property 72 | def running(self): 73 | return self._running 74 | 75 | @property 76 | def stopped(self): 77 | return self._stopped 78 | 79 | @ensure_running(False) 80 | async def run(self): 81 | """ 82 | Must be launched before using the Database as a non blocking task. 83 | example: 84 | loop.create_task(instance.run()) 85 | loop.run_until_complete() 86 | """ 87 | if self._stopped or self._set_stop or self._error: 88 | raise exceptions.InvalidDBStateException('DB instance not clean.') 89 | 90 | loop = asyncio.get_event_loop() 91 | try: 92 | await self._pre_loop() 93 | except Exception as e: 94 | self._running = False 95 | self._error = e 96 | self._stopped = True 97 | raise 98 | start_fired = False 99 | logger.info('Starting aiodiskdb') 100 | while 1: 101 | if not start_fired and self.running and self.events.on_start: 102 | loop.create_task(self.events.on_start(time.time())) 103 | start_fired = True 104 | if self._blocking_stop: 105 | break 106 | try: 107 | if self._set_stop: 108 | await self._teardown() 109 | self._stopped = True 110 | break 111 | await self._run_loop() 112 | await asyncio.sleep(0.005) 113 | self._running = True 114 | except Exception as e: 115 | if self.events.on_failure: 116 | loop.create_task(self.events.on_failure(time.time())) 117 | self._running = False 118 | self._error = e 119 | self.events.on_stop and \ 120 | loop.create_task(self.events.on_stop(time.time())) 121 | self._stopped = True 122 | raise 123 | self.events.on_stop and loop.create_task(self.events.on_stop(time.time())) 124 | logger.warning('Aiodiskdb is stopped') 125 | self._running = False 126 | self._stopped = True 127 | if self._error and isinstance(self._error, Exception): 128 | raise 129 | 130 | @ensure_running(True) 131 | async def stop(self): 132 | logger.debug('Requested stop') 133 | stop_requested_at = time.time() 134 | self._set_stop = True 135 | while time.time() - stop_requested_at < self._timeout: 136 | if not self._running: 137 | self._stopped = True 138 | return True 139 | await asyncio.sleep(0.1) 140 | raise exceptions.TimeoutException(f'Loop is still running after {self._timeout} seconds') 141 | 142 | 143 | class AioDiskDBTransactionAbstract(metaclass=abc.ABCMeta): 144 | @abc.abstractmethod 145 | def add(self, data: bytes) -> None: 146 | pass # pragma: no cover 147 | 148 | @abc.abstractmethod 149 | async def commit(self) -> typing.Iterable[ItemLocation]: 150 | pass # pragma: no cover 151 | -------------------------------------------------------------------------------- /aiodiskdb/aiodiskdb.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import hashlib 3 | import os 4 | import shutil 5 | from collections import OrderedDict 6 | from concurrent.futures.thread import ThreadPoolExecutor 7 | from pathlib import Path 8 | import time 9 | import typing 10 | 11 | from aiodiskdb import exceptions 12 | from aiodiskdb.internals import ensure_running, ensure_async_lock, logger 13 | from aiodiskdb.abstracts import AsyncRunnable, AioDiskDBTransactionAbstract 14 | from aiodiskdb.local_types import ItemLocation, LockType, Buffer, TempBufferData, WriteEvent, FileHeader 15 | 16 | _FILE_SIZE = 128 17 | _FILE_PREFIX = 'data' 18 | _FILE_ZEROS_PADDING = 5 19 | _BUFFER_SIZE = 16 20 | _BUFFER_ITEMS = 100 21 | _FLUSH_INTERVAL = 30 22 | _GENESIS_BYTES = b'\r\xce\x8f7' 23 | _TIMEOUT = 30 24 | _CONCURRENCY = 32 25 | 26 | 27 | class AioDiskDB(AsyncRunnable): 28 | GENESIS_BYTES_LENGTH = 4 29 | HEADER_TRIM_OFFSET = 4 30 | RESERVED_HEADER_LENGTH = 16 # for future usage 31 | 32 | """ 33 | Minimal on-disk DB, with buffers and timeouts. 34 | Made with love for Asyncio. 35 | """ 36 | def __init__( 37 | self, 38 | path: str, 39 | create_if_not_exists: bool = False, 40 | overwrite: bool = False, 41 | clean_stale_data: bool = True, 42 | file_padding: int = _FILE_ZEROS_PADDING, 43 | file_prefix: str = _FILE_PREFIX, 44 | max_file_size: int = _FILE_SIZE, 45 | max_buffer_items: int = _BUFFER_ITEMS, 46 | max_buffer_size: int = _BUFFER_SIZE, 47 | flush_interval: int = _FLUSH_INTERVAL, 48 | genesis_bytes: bytes = _GENESIS_BYTES, 49 | timeout: int = _TIMEOUT, 50 | concurrency: int = _CONCURRENCY 51 | ): 52 | super().__init__() 53 | if not file_prefix.isalpha(): 54 | raise exceptions.InvalidConfigurationException('Wrong file prefix (must be alphabetic string)') 55 | self.path = Path(path) 56 | if create_if_not_exists: 57 | self.path.mkdir(parents=True, exist_ok=True) 58 | self._file_prefix = file_prefix 59 | if max_file_size <= 0 or max_buffer_size <= 0: 60 | raise exceptions.InvalidConfigurationException('max file size and max buffer size must be > 0') 61 | if max_file_size < max_buffer_size: 62 | raise exceptions.InvalidConfigurationException('max_file_size must be >= max_buffer_size') 63 | self._file_padding = int(file_padding) 64 | self._max_file_size = int(float(max_file_size) * 1024 ** 2) 65 | self._max_buffer_items = int(max_buffer_items) 66 | self._max_buffer_size = int(float(max_buffer_size) * 1024 ** 2) 67 | self._flush_interval = int(flush_interval) 68 | if len(genesis_bytes) != self.GENESIS_BYTES_LENGTH: 69 | raise exceptions.InvalidConfigurationException('Genesis bytes length must be 4') 70 | self._genesis_bytes = genesis_bytes 71 | self._timeout = int(timeout) 72 | self._buffer_index = OrderedDict() 73 | self._buffers: typing.List[Buffer] = list() 74 | self._last_flush = None 75 | self._concurrency = concurrency 76 | self._tmp_idx_and_buffer = TempBufferData(idx=dict(), buffer=None) 77 | self._executor = ThreadPoolExecutor(max_workers=concurrency) 78 | self._overwrite = overwrite 79 | self._clean_stale_data = clean_stale_data 80 | self._init_db() 81 | 82 | def _init_db(self): 83 | self._file_header_size = self.GENESIS_BYTES_LENGTH + \ 84 | self.HEADER_TRIM_OFFSET + \ 85 | self.RESERVED_HEADER_LENGTH 86 | if self._clean_stale_data: 87 | self._drop_existing_temp_files() 88 | self._apply_checkpoint() 89 | else: 90 | self._ensure_no_pending_checkpoint() 91 | _max_accepted_file_size = (2 ** 32 * 1024 ** 2) - 1 - self._file_header_size 92 | if self._max_file_size > _max_accepted_file_size: 93 | raise exceptions.InvalidConfigurationException(f'max file size is {_max_accepted_file_size}b') 94 | 95 | def reset(self): 96 | logger.debug('Requested DB reset to init state') 97 | assert self._stopped 98 | self._buffer_index = OrderedDict() 99 | self._buffers: typing.List[Buffer] = list() 100 | self._last_flush = None 101 | self._executor = ThreadPoolExecutor(max_workers=self._concurrency) 102 | self._tmp_idx_and_buffer = TempBufferData(idx=dict(), buffer=None) 103 | self._running = False 104 | self._error = False 105 | self._set_stop = False 106 | self._blocking_stop = False 107 | self._stopped = False 108 | self._init_db() 109 | 110 | def _hash_file(self, f: typing.IO) -> typing.Optional[bytes]: 111 | """ 112 | Hash files chunk by chunk, avoid to load the whole file in RAM. 113 | """ 114 | i = 0 115 | chunk_size = 1024 ** 2 116 | _hash = None 117 | while 1: 118 | f.seek(i) 119 | c = f.read(chunk_size) 120 | if not c: 121 | break 122 | i += chunk_size 123 | _hash = hashlib.sha256().digest() 124 | f.seek(0) 125 | 126 | return _hash 127 | 128 | def _bake_new_file_header(self) -> bytes: 129 | """ 130 | Bake a fresh file header for a new database file. 131 | """ 132 | return FileHeader( 133 | genesis_bytes=self._genesis_bytes, 134 | trim_offset=0, 135 | ).serialize() 136 | 137 | def _read_file_header(self, f: typing.IO): 138 | """ 139 | Read the first bytes of a file, and return the FileHeader 140 | """ 141 | f.seek(0) 142 | header = f.read(self._file_header_size) 143 | if not self._is_file_header(header): 144 | raise exceptions.InvalidDataFileException 145 | return FileHeader( 146 | genesis_bytes=self._genesis_bytes, 147 | trim_offset=int.from_bytes( 148 | header[self.GENESIS_BYTES_LENGTH:self.GENESIS_BYTES_LENGTH+self.HEADER_TRIM_OFFSET], 149 | 'little' 150 | ) 151 | ) 152 | 153 | def _bake_new_buffer(self, index: int): 154 | """ 155 | Bake a fresh buffer, for a new database file. 156 | """ 157 | return Buffer( 158 | index=index, 159 | data=b'', 160 | size=0, 161 | items=0, 162 | file_size=0, 163 | offset=0, 164 | head=True 165 | ) 166 | 167 | async def _refresh_current_buffer(self): 168 | """ 169 | Reload the current session buffer from the system state. 170 | To be used after a transaction or an index change (trim\drop). 171 | """ 172 | self._buffer_index.pop(self._buffers[-1].index) 173 | self._buffers = [] 174 | await self._setup_current_buffer() 175 | 176 | def _pre_stop_signal(self) -> bool: 177 | """ 178 | Handle graceful stop signals. Flush buffer to disk. 179 | """ 180 | if self._blocking_stop: 181 | return False 182 | self._blocking_stop = True 183 | assert not self._tmp_idx_and_buffer.idx 184 | self._tmp_idx_and_buffer = None 185 | while self._buffer_index: 186 | buffer = self._buffers.pop(0) 187 | v = self._buffer_index.pop(buffer.index) 188 | self._save_buffer_to_disk(TempBufferData(idx={buffer.index: v}, buffer=buffer)) 189 | return True 190 | 191 | @ensure_async_lock(LockType.WRITE) 192 | async def _clean_temp_buffer(self): 193 | """ 194 | Cleanup the current temp buffer. Unload the RAM, to be triggered after a disk flush. 195 | """ 196 | self._tmp_idx_and_buffer = TempBufferData(idx=dict(), buffer=None) 197 | 198 | def _read_data_from_buffer(self, location: ItemLocation): 199 | """ 200 | Read data from the main buffer. 201 | Data is here until the flush_buffer task triggers a disk flush. 202 | """ 203 | try: 204 | idx = self._buffer_index[location.index][location.position] 205 | buffer = self._buffers[idx] 206 | except KeyError: 207 | return None 208 | 209 | buffer_shift = 0 if buffer.size == buffer.file_size else buffer.file_size - buffer.size 210 | relative_position = location.position - buffer_shift 211 | data = buffer.data[relative_position: relative_position + location.size] 212 | assert len(data) == location.size, f'{len(data)} != {location.size}' 213 | return data 214 | 215 | def _read_data_from_temp_buffer(self, location: ItemLocation): 216 | """ 217 | Data is placed into the temp buffer while saving it on disk, 218 | so that writes are non blocking. 219 | """ 220 | try: 221 | idx = self._tmp_idx_and_buffer.idx[location.index][location.position] 222 | except KeyError: 223 | return None 224 | 225 | buffer = self._tmp_idx_and_buffer.buffer 226 | # just ensures the idx was previously saved, the temp buffer is flat. 227 | buffer_shift = 0 if buffer.size == buffer.file_size else buffer.file_size - buffer.size 228 | relative_position = location.position - buffer_shift 229 | data = idx is not None and buffer.data[relative_position:relative_position + location.size] or None 230 | assert data is not None and len(data) == location.size or data is None 231 | return data 232 | 233 | def _get_filename_by_idx(self, idx: int, temp=False) -> str: 234 | t = temp and '.tmp.' or '' 235 | return f'{self.path}/{t}{self._file_prefix}' + f'{idx}'.zfill(self._file_padding) + '.dat' 236 | 237 | async def _setup_current_buffer(self): 238 | """ 239 | Setup the current buffer, starting from the disk files. 240 | If no files are found, setup a fresh buffer, otherwise check the genesis bytes. 241 | """ 242 | logger.debug('Setting up current buffer') 243 | files = sorted(os.listdir(self.path)) 244 | last_file = files and list( 245 | filter(lambda x: x.startswith(self._file_prefix) and x.endswith('.dat'), files) 246 | )[-1] 247 | if not last_file: 248 | # No previous files found for the current setup. Starting a new database. 249 | buffer = self._bake_new_buffer(0) 250 | offset, curr_idx = 0, 0 251 | else: 252 | curr_idx = int(last_file.replace(self._file_prefix, '').replace('.dat', '')) 253 | filename = self._get_filename_by_idx(curr_idx) 254 | curr_size = os.path.getsize(filename) - self._file_header_size 255 | if curr_size < 0: 256 | raise exceptions.InvalidDataFileException('Invalid file size') 257 | with open(filename, 'rb') as f: 258 | header = self._read_file_header(f) 259 | offset = header.trim_offset 260 | data = b'' 261 | buffer = Buffer( 262 | index=curr_idx, data=data, size=len(data), 263 | items=0, file_size=curr_size, offset=offset, 264 | head=False 265 | ) 266 | self._buffers.append(buffer) 267 | self._buffer_index[curr_idx] = OrderedDict() 268 | logger.debug('Current buffer setup done') 269 | 270 | @ensure_async_lock(LockType.TRANSACTION) 271 | async def _teardown(self): 272 | self._buffers[-1].size and await self._flush_buffer_no_transaction_lock() 273 | self._executor.shutdown(wait=True) 274 | 275 | def enable_overwrite(self): 276 | self._overwrite = True 277 | 278 | def disable_overwrite(self): 279 | self._overwrite = False 280 | 281 | @ensure_async_lock(LockType.WRITE) 282 | async def _pop_buffer_data(self) -> TempBufferData: 283 | """ 284 | Remove the buffer from the data queue. 285 | Put it into the temp storage for disk writing. 286 | Allocate a new buffer for the data queue. 287 | """ 288 | assert not self._tmp_idx_and_buffer.buffer, 'wrong state, cannot recover. buffer lost.' 289 | buffer = self._buffers.pop(0) 290 | v = self._buffer_index.pop(buffer.index) 291 | self._tmp_idx_and_buffer = TempBufferData(idx={buffer.index: v}, buffer=buffer) 292 | if buffer.file_size > self._max_file_size: 293 | new_buffer = self._bake_new_buffer(buffer.index + 1) 294 | else: 295 | new_buffer = Buffer( 296 | index=buffer.index, data=b'', 297 | size=0, items=0, file_size=buffer.file_size, 298 | offset=buffer.offset, head=False 299 | ) 300 | self._buffers.append(new_buffer) 301 | self._buffer_index[new_buffer.index] = OrderedDict() 302 | return self._tmp_idx_and_buffer 303 | 304 | def _save_buffer_to_disk(self, buffer_data: TempBufferData): 305 | """ 306 | Actually saves data from a temp buffer to the target file and position. 307 | """ 308 | logger.debug('Saving buffer to disk') 309 | assert buffer_data.buffer and buffer_data.idx, (buffer_data.buffer, buffer_data.idx) 310 | buffer = buffer_data.buffer 311 | filename = self._get_filename_by_idx(buffer.index) 312 | try: 313 | if buffer.head: 314 | if Path(filename).exists(): 315 | self._stop = True 316 | raise exceptions.FilesInconsistencyException(f'File {filename} should not exists.') 317 | 318 | elif os.path.getsize(filename) != buffer.file_size - buffer.size + self._file_header_size: 319 | self._stop = True 320 | raise exceptions.InvalidDataFileException(f'File {filename} has unexpected size.') 321 | 322 | with open(filename, 'ab') as file: 323 | if buffer.head: 324 | file.write(self._bake_new_file_header()) 325 | file.write(buffer.data) 326 | except FileNotFoundError: 327 | self._stop = True 328 | raise exceptions.FilesInconsistencyException(f'Missing file {filename}') 329 | 330 | def _is_file_header(self, data: bytes) -> bool: 331 | """ 332 | Verify the header correctness 333 | """ 334 | if len(data) != self._file_header_size: 335 | return False 336 | p2 = self.GENESIS_BYTES_LENGTH + self.HEADER_TRIM_OFFSET 337 | if not data[:self.GENESIS_BYTES_LENGTH] == self._genesis_bytes: 338 | return False 339 | if not int.from_bytes( 340 | data[self.GENESIS_BYTES_LENGTH:p2], 'little' 341 | ) < self._max_file_size - self._file_header_size: 342 | return False 343 | if not data[p2:p2+self.RESERVED_HEADER_LENGTH] == int(0).to_bytes(self.RESERVED_HEADER_LENGTH, 'little'): 344 | return False 345 | return True 346 | 347 | @ensure_async_lock(LockType.TRANSACTION) 348 | async def flush(self): 349 | logger.debug('Requested explicit flush') 350 | if not self._buffers[-1].data: 351 | return 352 | return await self._flush_buffer_no_transaction_lock() 353 | 354 | @ensure_async_lock(LockType.TRANSACTION) 355 | async def _flush_buffer(self): 356 | """ 357 | Trigger blocking\non-blocking operations. 358 | - pop_buffer: coroutine, locked for writing 359 | - save_buffer_to_disk: blocking threaded task, non locked 360 | - clean_temp_buffer: coroutine, locked for writing 361 | """ 362 | await self._flush_buffer_no_transaction_lock() 363 | 364 | async def _flush_buffer_no_transaction_lock(self): 365 | timestamp = int(time.time()*1000) 366 | temp_buffer_data = await self._pop_buffer_data() 367 | await self._write_db_checkpoint(timestamp, temp_buffer_data.buffer.index) 368 | await asyncio.get_event_loop().run_in_executor( 369 | self._executor, 370 | self._save_buffer_to_disk, 371 | temp_buffer_data 372 | ) 373 | flush_time = time.time() 374 | if self.events.on_write: 375 | offset = temp_buffer_data.buffer.offset 376 | asyncio.get_event_loop().create_task( 377 | self.events.on_write( 378 | flush_time, 379 | WriteEvent( 380 | index=temp_buffer_data.buffer.index, 381 | position=temp_buffer_data.buffer.file_size - temp_buffer_data.buffer.size + offset, 382 | size=temp_buffer_data.buffer.size 383 | ) 384 | ) 385 | ) 386 | 387 | await self._clean_temp_buffer() 388 | self._last_flush = flush_time 389 | await self._clean_db_checkpoint(timestamp) 390 | 391 | def _process_location_read(self, location: ItemLocation): 392 | """ 393 | Actually read data from files. 394 | """ 395 | try: 396 | filename = self._get_filename_by_idx(location.index) 397 | with open(filename, 'rb') as file: 398 | header = self._read_file_header(file) 399 | if location.position < header.trim_offset: 400 | return None 401 | file.seek(self._file_header_size + location.position - header.trim_offset) 402 | read = file.read(location.size) 403 | return read or None 404 | except FileNotFoundError: 405 | return None 406 | 407 | async def _pre_loop(self): 408 | if self._last_flush is None: 409 | self._last_flush = time.time() 410 | await self._setup_current_buffer() 411 | 412 | async def _run_loop(self): 413 | buffer = self._buffers[0] 414 | if buffer.file_size >= self._max_file_size: 415 | await self._flush_buffer() 416 | elif buffer.size >= self._max_buffer_size: 417 | await self._flush_buffer() 418 | elif time.time() - self._last_flush >= self._flush_interval: 419 | await self._flush_buffer() 420 | elif buffer.items >= self._max_buffer_items: 421 | await self._flush_buffer() 422 | 423 | @ensure_running(True) 424 | async def add(self, data: bytes) -> ItemLocation: 425 | """ 426 | Enqueue data into a buffer, update the indexes for reads from RAM. 427 | 428 | :param data: bytes 429 | :return: ItemLocation(int, int, int) 430 | """ 431 | logger.debug('Adding item to db, size: %s', len(data)) 432 | if not data: 433 | raise exceptions.EmptyPayloadException 434 | s = time.time() 435 | while self._buffers[-1].file_size + self._buffers[-1].offset >= self._max_file_size \ 436 | or self._buffers[-1].size >= self._max_buffer_size: 437 | # wait for the LRT to shift the buffers 438 | await asyncio.sleep(0.01) 439 | if time.time() - s > self._timeout: 440 | raise exceptions.TimeoutException 441 | return await self._add(data) 442 | 443 | @ensure_async_lock(LockType.WRITE) 444 | async def _add(self, data: bytes) -> ItemLocation: 445 | """ 446 | Add data into the current buffer. 447 | """ 448 | buffer = self._buffers[-1] 449 | data_size = len(data) 450 | if data_size > self._max_buffer_size: 451 | raise exceptions.WriteFailedException( 452 | f'File too big: {data_size} > {self._max_buffer_size}' 453 | ) 454 | location = ItemLocation( 455 | index=buffer.index, 456 | position=buffer.file_size, 457 | size=data_size 458 | ) 459 | self._buffer_index[location.index][location.position] = len(self._buffers) - 1 460 | buffer.data += data 461 | buffer.items += 1 462 | buffer.size += data_size 463 | buffer.file_size += data_size 464 | return location 465 | 466 | @ensure_running(True) 467 | @ensure_async_lock(LockType.READ) 468 | async def read(self, location: ItemLocation): 469 | """ 470 | Reads data from the DB. 471 | If there's no data in RAM for the given location, try with a disk read. 472 | 473 | :param location: ItemLocation(int, int, int) 474 | :return: bytes 475 | """ 476 | logger.debug('Reading location from db: %s', location) 477 | return self._read_data_from_buffer(location) or \ 478 | self._read_data_from_temp_buffer(location) or \ 479 | await asyncio.get_event_loop().run_in_executor( 480 | self._executor, 481 | self._process_location_read, 482 | location 483 | ) 484 | 485 | @ensure_running(True) 486 | async def transaction(self) -> AioDiskDBTransactionAbstract: 487 | from aiodiskdb.transaction import AioDiskDBTransaction 488 | return AioDiskDBTransaction(self) 489 | 490 | def destroy_db(self): 491 | """ 492 | Destroy the DB, clean the disk. 493 | """ 494 | logger.warning('Requested DB destroy') 495 | if self.running: 496 | raise exceptions.RunningException('Database must be stopped before destroying it') 497 | if not self._overwrite: 498 | raise exceptions.ReadOnlyDatabaseException 499 | shutil.rmtree(self.path) 500 | return True 501 | 502 | @ensure_running(True) 503 | @ensure_async_lock(LockType.TRANSACTION) 504 | async def drop_index(self, index: int) -> int: 505 | """ 506 | Drop a single index. 507 | Ensures a flush first. 508 | If the deleted index is the current one, setup it again from scratch. 509 | """ 510 | logger.info('Requested index drop: %s', index) 511 | if not self._overwrite: 512 | raise exceptions.ReadOnlyDatabaseException 513 | assert not self._tmp_idx_and_buffer.buffer 514 | return await self._drop_index_non_locked(index) 515 | 516 | async def _drop_index_non_locked(self, index: int): 517 | filename = self._get_filename_by_idx(index) 518 | if not Path(filename).exists(): 519 | raise exceptions.IndexDoesNotExist 520 | dropped_index_size = os.path.getsize(filename) - self._file_header_size 521 | os.remove(filename) 522 | if self._buffers[-1].index == index: 523 | await self._refresh_current_buffer() 524 | 525 | if self.events.on_index_drop: 526 | asyncio.get_event_loop().create_task( 527 | self.events.on_index_drop(time.time(), index, dropped_index_size) 528 | ) 529 | return dropped_index_size 530 | 531 | @ensure_running(True, allow_stop_state=True) 532 | async def _write_db_checkpoint(self, timestamp: int, *files_indexes: int): 533 | """ 534 | Persist the current status of files before appending data. 535 | It will be reverted in case of a commit failure. 536 | """ 537 | checkpoints = self._get_checkpoints() 538 | if checkpoints: 539 | raise exceptions.InvalidDBStateException('Requested a checkpoint, but a checkpoint already exist') 540 | 541 | filenames = map(self._get_filename_by_idx, files_indexes) 542 | 543 | checkpoint = [] 544 | for i, file_name in enumerate(filenames): 545 | try: 546 | file_size = os.path.getsize(file_name) 547 | except FileNotFoundError: 548 | continue 549 | with open(file_name, 'rb') as f: 550 | file_hash = self._hash_file(f) 551 | f.seek(max(0, file_size - 8)) 552 | last_file_bytes = f.read(8) 553 | checkpoint.append( 554 | [ 555 | files_indexes[i].to_bytes(6, 'little'), 556 | file_size.to_bytes(6, 'little'), 557 | last_file_bytes, 558 | file_hash 559 | ] 560 | ) 561 | with open(os.path.join(self.path, f'.checkpoint-{timestamp}'), 'wb') as f: 562 | for s in checkpoint: 563 | f.write(b''.join(s)) 564 | 565 | @ensure_running(True, allow_stop_state=True) 566 | async def _clean_db_checkpoint(self, timestamp: int): 567 | """ 568 | The transaction is successfully committed. 569 | The checkpoint file must be deleted. 570 | """ 571 | try: 572 | os.remove(os.path.join(self.path, f'.checkpoint-{timestamp}')) 573 | except FileNotFoundError as e: 574 | raise exceptions.InvalidDBStateException( 575 | 'Requested to delete a checkpoint that does not exist' 576 | ) from e 577 | 578 | def _get_checkpoints(self) -> typing.List[str]: 579 | checkpoints = list( 580 | filter( 581 | lambda x: x.startswith('.checkpoint-'), 582 | os.listdir(self.path) 583 | ) 584 | ) 585 | if len(checkpoints) > 1: 586 | # This should NEVER happen. 587 | raise exceptions.InvalidDBStateException('Multiple checkpoints, unrecoverable error.') 588 | return checkpoints 589 | 590 | def _apply_checkpoint(self): 591 | """ 592 | Apply a database checkpoint to recover a previous state. 593 | A checkpoint is created before a transaction commit is made. 594 | A checkpoint is created before a transaction commit is made. 595 | Having a checkpoint file during the AioDiskDB boot mean that a transaction commit is failed, 596 | and stale data may exists in the files. 597 | Applying the checkpoint discard all the data added to the files after taking it. 598 | Stale data is deleted. 599 | """ 600 | assert not self.running 601 | checkpoints = self._get_checkpoints() 602 | if not checkpoints: 603 | return 604 | checkpoint_file = checkpoints[0] 605 | checkpoint_filename = os.path.join(str(self.path), checkpoint_file) 606 | with open(checkpoint_filename, 'rb') as f: 607 | checkpoint = f.read() 608 | assert checkpoint 609 | pos = 0 610 | files = [] 611 | while pos < len(checkpoint): 612 | files.append( 613 | [ 614 | int.from_bytes(checkpoint[pos:pos+6], 'little'), 615 | int.from_bytes(checkpoint[pos+6:pos+12], 'little'), 616 | checkpoint[pos+12:pos+20], 617 | checkpoint[pos+20:pos+52] 618 | ] 619 | ) 620 | pos += 52 621 | for file_data in files: 622 | self._recover_files_from_checkpoint(file_data) 623 | os.remove(checkpoint_filename) 624 | 625 | def _recover_files_from_checkpoint(self, file_data: typing.List): 626 | """ 627 | Actually apply checkpoint rules to existing files. 628 | """ 629 | file_id = file_data[0] 630 | checkpoint_file_size = file_data[1] 631 | checkpoint_file_last_bytes = file_data[2] 632 | expected_hash = file_data[3] 633 | origin_file_name = self._get_filename_by_idx(file_id) 634 | bkp_file_name = self._get_filename_by_idx(file_id, temp=True) 635 | shutil.copy(origin_file_name, bkp_file_name) 636 | exc = None 637 | with open(bkp_file_name, 'r+b') as f: 638 | header = f.read(self._file_header_size) 639 | if not self._is_file_header(header): 640 | exc = exceptions.InvalidDataFileException('Invalid file header') 641 | else: 642 | f.seek(checkpoint_file_size - len(checkpoint_file_last_bytes)) 643 | data = f.read(len(checkpoint_file_last_bytes)) 644 | f.seek(0) 645 | f.truncate(checkpoint_file_size) 646 | if data != checkpoint_file_last_bytes: 647 | exc = exceptions.InvalidDataFileException('File corrupted. Unrecoverable error') 648 | final_hash = self._hash_file(f) 649 | if final_hash != expected_hash: 650 | exc = exceptions.InvalidDataFileException('Invalid file hash recovered') 651 | if not exc: 652 | shutil.copy(bkp_file_name, origin_file_name) 653 | os.remove(bkp_file_name) 654 | if exc: 655 | raise exc 656 | 657 | def _drop_existing_temp_files(self): 658 | for file in filter( 659 | lambda x: x.startswith(f'.tmp.{self._file_prefix}') and x.endswith('.dat'), 660 | os.listdir(self.path) 661 | ): 662 | os.remove(os.path.join(str(self.path), file)) 663 | 664 | def _ensure_no_pending_checkpoint(self): 665 | assert not self.running 666 | if any(map( 667 | lambda x: x.startswith('.checkpoint-'), 668 | os.listdir(self.path) 669 | )): 670 | raise exceptions.InvalidDBStateException('Pending checkpoint. DB must be cleaned.') 671 | 672 | @ensure_running(True) 673 | @ensure_async_lock(LockType.TRANSACTION) 674 | async def rtrim(self, index: int, trim_from: int, safety_check: bytes = b'') -> int: 675 | """ 676 | Trim an index, from the right. 677 | 678 | trim_from: the index location from which to delete data. 679 | safety_check: optional, must match the first bytes of the trimmed slice. 680 | 681 | return the size of the trimmed slice. 682 | """ 683 | logger.debug('Requested rtrim: %s, %s, %s', index, trim_from, safety_check) 684 | if not self._overwrite: 685 | raise exceptions.ReadOnlyDatabaseException 686 | assert not self._tmp_idx_and_buffer.buffer, self._tmp_idx_and_buffer.buffer 687 | try: 688 | await self._flush_buffer_no_transaction_lock() 689 | return await self._do_rtrim(index, trim_from, safety_check) 690 | except FileNotFoundError: 691 | raise exceptions.IndexDoesNotExist 692 | 693 | async def _do_rtrim(self, index: int, trim_from: int, safety_check: bytes) -> int: 694 | if index < 0: 695 | raise exceptions.IndexDoesNotExist('Index must be > 0') 696 | filename = self._get_filename_by_idx(index) 697 | assert isinstance(trim_from, int) 698 | if not trim_from and not safety_check: 699 | return await self._drop_index_non_locked(index) 700 | 701 | elif not trim_from: 702 | with open(filename, 'r+b') as f: 703 | if safety_check != f.read(len(safety_check)): 704 | raise exceptions.InvalidTrimCommandException('safety check failed') 705 | return await self._drop_index_non_locked(index) 706 | 707 | pre_trim_file_size = os.path.getsize(filename) 708 | with open(filename, 'r+b') as f: 709 | f.seek(trim_from + self._file_header_size) 710 | if safety_check: 711 | if safety_check != f.read(len(safety_check)): 712 | raise exceptions.InvalidTrimCommandException('safety check failed') 713 | f.seek(trim_from + self._file_header_size) 714 | f.truncate() 715 | file_size = os.path.getsize(filename) 716 | if self._buffers[-1].index == index: 717 | await self._refresh_current_buffer() 718 | return pre_trim_file_size - file_size 719 | 720 | @ensure_running(True) 721 | @ensure_async_lock(LockType.TRANSACTION) 722 | async def ltrim(self, index: int, trim_to: int, safety_check: bytes = b''): 723 | """ 724 | Trim an index, from the left. 725 | 726 | trim_to: the index location of data that are going to be kept, 727 | anything before this point is trimmed out. 728 | safety_check: optional, must match the last bytes of the trimmed slice. 729 | """ 730 | logger.debug('Requested ltrim: %s, %s, %s', index, trim_to, safety_check) 731 | if not self._overwrite: 732 | raise exceptions.ReadOnlyDatabaseException 733 | 734 | if index < 0: 735 | raise exceptions.InvalidDataFileException( 736 | 'Index must be >= 0' 737 | ) 738 | assert not self._tmp_idx_and_buffer.buffer, self._tmp_idx_and_buffer.buffer 739 | filename = self._get_filename_by_idx(index) 740 | temp_filename = self._get_filename_by_idx(index, temp=True) 741 | try: 742 | os.path.getsize(temp_filename) 743 | exc = exceptions.InvalidDBStateException('trim file already exists') 744 | self._error = exc 745 | self._stop = True 746 | except FileNotFoundError: 747 | pass 748 | try: 749 | await self._flush_buffer_no_transaction_lock() 750 | with open(filename, 'rb') as origin: 751 | header = self._read_file_header(origin) 752 | if trim_to <= header.trim_offset: 753 | raise exceptions.InvalidTrimCommandException( 754 | f'trim_to must be > current_offset ({header.trim_offset})' 755 | ) 756 | 757 | await self._do_ltrim(index, trim_to, safety_check, header) 758 | if self._buffers[-1].index == index: 759 | await self._refresh_current_buffer() 760 | except FileNotFoundError: 761 | raise exceptions.IndexDoesNotExist 762 | 763 | async def _do_ltrim(self, index: int, trim_to: int, safety_check: bytes, header: FileHeader) -> int: 764 | filename = self._get_filename_by_idx(index) 765 | index_size = os.path.getsize(filename) - self._file_header_size + header.trim_offset 766 | if index_size < trim_to: 767 | raise exceptions.InvalidTrimCommandException('trim_to must be <= index_size') 768 | elif index_size == trim_to: 769 | return await self._drop_index_non_locked(index) 770 | 771 | with open(filename, 'rb') as origin: 772 | safety_check_length = len(safety_check) 773 | seek_at = self._file_header_size + trim_to - header.trim_offset 774 | if safety_check_length: 775 | origin.seek(seek_at - safety_check_length) 776 | check = origin.read(safety_check_length) 777 | if check != safety_check: 778 | raise exceptions.InvalidTrimCommandException('safety check failed') 779 | else: 780 | origin.seek(seek_at) 781 | temp_filename = self._get_filename_by_idx(index, temp=True) 782 | with open(temp_filename, 'wb') as target: 783 | header.trim_offset = trim_to 784 | target.write(header.serialize()) 785 | data = origin.read(1024**2) 786 | while data: 787 | target.write(data) 788 | data = origin.read(1024 ** 2) 789 | shutil.move(temp_filename, filename) 790 | return trim_to 791 | -------------------------------------------------------------------------------- /aiodiskdb/exceptions.py: -------------------------------------------------------------------------------- 1 | class AioDiskDBException(Exception): 2 | pass 3 | 4 | 5 | class RunningException(AioDiskDBException): 6 | pass 7 | 8 | 9 | class NotRunningException(AioDiskDBException): 10 | pass 11 | 12 | 13 | class TimeoutException(AioDiskDBException): 14 | pass 15 | 16 | 17 | class ReadTimeoutException(AioDiskDBException): 18 | pass 19 | 20 | 21 | class DBNotInitializedException(AioDiskDBException): 22 | pass 23 | 24 | 25 | class InvalidDataFileException(AioDiskDBException): 26 | pass 27 | 28 | 29 | class ReadOnlyDatabaseException(AioDiskDBException): 30 | pass 31 | 32 | 33 | class FilesInconsistencyException(AioDiskDBException): 34 | pass 35 | 36 | 37 | class WriteFailedException(AioDiskDBException): 38 | pass 39 | 40 | 41 | class InvalidConfigurationException(AioDiskDBException): 42 | pass 43 | 44 | 45 | class EmptyTransactionException(AioDiskDBException): 46 | pass 47 | 48 | 49 | class TransactionCommitOnGoingException(AioDiskDBException): 50 | pass 51 | 52 | 53 | class TransactionAlreadyCommittedException(AioDiskDBException): 54 | pass 55 | 56 | 57 | class InvalidDBStateException(AioDiskDBException): 58 | pass 59 | 60 | 61 | class IndexDoesNotExist(AioDiskDBException): 62 | pass 63 | 64 | 65 | class EmptyPayloadException(AioDiskDBException): 66 | pass 67 | 68 | 69 | class InvalidTrimCommandException(AioDiskDBException): 70 | pass 71 | -------------------------------------------------------------------------------- /aiodiskdb/internals.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from aiodiskdb import exceptions 4 | from aiodiskdb.local_types import LockType 5 | 6 | 7 | def ensure_running(expected_state: bool, allow_stop_state=False): 8 | def _decorator(f): 9 | async def _ensure(self, *a, **kw): 10 | if self._error: 11 | raise exceptions.NotRunningException('Database is in ERROR state') 12 | if expected_state and self._stopped and not allow_stop_state: 13 | raise exceptions.NotRunningException('Database is in STOP state') 14 | if self.running != expected_state: 15 | raise exceptions.NotRunningException('Database it not running') 16 | return await f(self, *a, **kw) 17 | return _ensure 18 | return _decorator 19 | 20 | 21 | def ensure_async_lock(lock_type: LockType): 22 | def _decorator(f): 23 | async def _ensure(self, *a, **kw): 24 | if lock_type == LockType.READ: 25 | await self._read_lock.acquire() 26 | self._incr_read() 27 | if self._reads_count == 1: 28 | await self._write_lock.acquire() 29 | self._read_lock.release() 30 | try: 31 | return await f(self, *a, **kw) 32 | finally: 33 | await self._read_lock.acquire() 34 | self._decr_read() 35 | if not self._reads_count: 36 | self._write_lock.release() 37 | self._read_lock.release() 38 | elif lock_type == LockType.WRITE: 39 | in_transaction = self._transaction_lock.locked() 40 | if not in_transaction: 41 | await self._transaction_lock.acquire() 42 | await self._write_lock.acquire() 43 | try: 44 | return await f(self, *a, **kw) 45 | finally: 46 | if not in_transaction: 47 | self._transaction_lock.release() 48 | self._write_lock.release() 49 | elif lock_type == LockType.TRANSACTION: 50 | await self._transaction_lock.acquire() 51 | try: 52 | return await f(self, *a, **kw) 53 | finally: 54 | self._transaction_lock.release() 55 | else: 56 | raise ValueError('Value must be LockType.READ or WRITE') 57 | return _ensure 58 | return _decorator 59 | 60 | 61 | class GracefulExit(SystemExit): 62 | code = 1 63 | 64 | 65 | logger = logging.getLogger('aiodiskdb') 66 | -------------------------------------------------------------------------------- /aiodiskdb/local_types.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from dataclasses import dataclass 3 | 4 | import typing 5 | from enum import Enum 6 | 7 | 8 | @dataclass 9 | class ItemLocation: 10 | index: int 11 | position: int 12 | size: int 13 | 14 | def serialize(self): 15 | return self.index.to_bytes(4, 'little') + \ 16 | self.position.to_bytes(4, 'little') + \ 17 | self.size.to_bytes(4, 'little') 18 | 19 | @classmethod 20 | def deserialize(cls, location: bytes): 21 | return cls( 22 | index=int.from_bytes(location[:4], 'little'), 23 | position=int.from_bytes(location[4:8], 'little'), 24 | size=int.from_bytes(location[8:12], 'little'), 25 | ) 26 | 27 | 28 | class LockType(Enum): 29 | READ = 0 30 | WRITE = 1 31 | TRANSACTION = 2 32 | 33 | 34 | @dataclass 35 | class Buffer: 36 | index: int 37 | data: bytes 38 | size: int 39 | items: int 40 | file_size: int 41 | offset: int 42 | head: bool 43 | 44 | 45 | @dataclass 46 | class TempBufferData: 47 | buffer: typing.Optional[Buffer] 48 | idx: typing.Dict 49 | 50 | 51 | @dataclass 52 | class EventsHandlers: 53 | """ 54 | Callback signature, first argument is always the execution timestamp (time.time()). 55 | 56 | async def callback(fired_at: int, *callback_data): 57 | pass 58 | """ 59 | on_start: typing.Optional[callable] = None 60 | on_stop: typing.Optional[callable] = None 61 | on_failure: typing.Optional[callable] = None 62 | on_index_drop: typing.Optional[callable] = None 63 | on_write: typing.Optional[callable] = None 64 | 65 | def __setattr__(self, key, value): 66 | if value is not None and not asyncio.iscoroutinefunction(value): 67 | raise TypeError(f'{key} must be a coroutine') 68 | self.__dict__[key] = value 69 | 70 | 71 | class TransactionStatus(Enum): 72 | INITIALIZED = 1 73 | ONGOING = 2 74 | DONE = 3 75 | 76 | 77 | @dataclass 78 | class WriteEvent: 79 | index: int 80 | position: int 81 | size: int 82 | 83 | 84 | @dataclass 85 | class FileHeader: 86 | genesis_bytes: bytes 87 | trim_offset: int 88 | 89 | def serialize(self) -> bytes: 90 | return self.genesis_bytes + \ 91 | int(self.trim_offset).to_bytes(4, 'little') + \ 92 | int(0).to_bytes(16, 'little') # reserved 93 | -------------------------------------------------------------------------------- /aiodiskdb/transaction.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import time 3 | 4 | import typing 5 | from collections import OrderedDict, deque 6 | 7 | from aiodiskdb import AioDiskDB, exceptions 8 | from aiodiskdb.abstracts import AioDiskDBTransactionAbstract 9 | from aiodiskdb.internals import ensure_async_lock, logger 10 | from aiodiskdb.local_types import TempBufferData, TransactionStatus, Buffer, ItemLocation, LockType 11 | 12 | 13 | class AioDiskDBTransaction(AioDiskDBTransactionAbstract): 14 | def __init__(self, session: AioDiskDB): 15 | self.session = session 16 | self._stack = deque() 17 | self._status = TransactionStatus.INITIALIZED 18 | self._lock = asyncio.Lock() 19 | self._locations = list() 20 | logger.debug('Initialized a new transaction') 21 | 22 | @property 23 | def _transaction_lock(self): 24 | return self.session._transaction_lock 25 | 26 | def _bake_new_temp_buffer_data(self, res: typing.List[TempBufferData]) -> None: 27 | """ 28 | The current buffer must be full if this method is called. 29 | Bake a new buffer contiguous to the current. 30 | """ 31 | new_idx = res[-1].buffer.index + 1 32 | res.append( 33 | TempBufferData( 34 | self.session._bake_new_buffer(new_idx), 35 | idx={new_idx: dict()} 36 | ) 37 | ) 38 | 39 | def _bake_temp_buffer_data(self) -> typing.List[TempBufferData]: 40 | """ 41 | Bake buffer data so that 42 | function could handle it with no changes. 43 | There is no buffer size limit while in a Transaction, 44 | only the file size limit is respected. 45 | """ 46 | assert self.session._buffers[-1].size == 0 47 | current_buffer = self.session._buffers[-1] 48 | res = [ 49 | TempBufferData( 50 | buffer=current_buffer, 51 | idx={current_buffer.index: self.session._buffer_index} 52 | ) 53 | ] 54 | max_file_size = self.session._max_file_size 55 | while len(self._stack): 56 | data = self._stack.popleft() 57 | data_size = len(data) 58 | buffer: Buffer = res[-1].buffer 59 | if data_size + buffer.file_size > max_file_size: 60 | self._bake_new_temp_buffer_data(res) 61 | self._stack.insert(0, data) 62 | continue 63 | self._locations.append( 64 | ItemLocation(buffer.index, buffer.file_size, data_size) 65 | ) 66 | buffer.data += data 67 | buffer.size += data_size 68 | buffer.file_size += data_size 69 | return res 70 | 71 | async def _update_session_buffer(self, temp_buffer_data: TempBufferData): 72 | """ 73 | This is fired after a Transaction is successfully saved to disk. 74 | Set the session buffer to the latest baked by the Transaction. 75 | """ 76 | temp_buffer_data.buffer.data = b'' 77 | temp_buffer_data.buffer.size = 0 78 | temp_buffer_data.buffer.head = not temp_buffer_data.buffer.file_size 79 | self.session._buffers[-1] = temp_buffer_data.buffer 80 | self.session._buffer_index = OrderedDict({temp_buffer_data.buffer.index: dict()}) 81 | 82 | def add(self, data: bytes): 83 | """ 84 | Add some data to a transaction. 85 | Data added into this scope is not available into the session 86 | until the transaction is committed. 87 | """ 88 | logger.debug('Adding item to transaction: %s', len(data)) 89 | if len(data) > self.session._max_file_size: 90 | raise exceptions.WriteFailedException( 91 | f'File too big: {len(data)} > {self.session._max_file_size}' 92 | ) 93 | if self._status == TransactionStatus.DONE: 94 | raise exceptions.TransactionAlreadyCommittedException 95 | elif self._status == TransactionStatus.ONGOING: 96 | raise exceptions.TransactionCommitOnGoingException 97 | 98 | self._stack.append(data) 99 | 100 | async def commit(self) -> typing.Iterable[ItemLocation]: 101 | """ 102 | Commit a transaction, save to data the <_stack> content, using TempBufferData objects. 103 | """ 104 | logger.debug('Requested transaction commit, tx size: %s', len(self._stack)) 105 | await self._lock.acquire() 106 | try: 107 | if self._status == TransactionStatus.DONE: 108 | raise exceptions.TransactionAlreadyCommittedException 109 | elif not self._stack: 110 | raise exceptions.EmptyTransactionException 111 | now = int(time.time()*1000) 112 | 113 | await self.session._flush_buffer() 114 | await self._do_commit(now) 115 | locations = self._locations 116 | self._locations = list() 117 | logger.debug('Transaction commit done') 118 | return locations 119 | finally: 120 | self._lock.release() 121 | 122 | @ensure_async_lock(LockType.TRANSACTION) 123 | async def _do_commit(self, timestamp: int): 124 | """ 125 | Part of the method. 126 | Actually saves data to disk. 127 | """ 128 | self._status = TransactionStatus.ONGOING 129 | assert not self.session._buffers[-1].size 130 | idx_involved_in_batch = list() 131 | temp_buffers_data = self._bake_temp_buffer_data() 132 | for buff in temp_buffers_data: 133 | idx_involved_in_batch.extend(list(buff.idx)) 134 | await self.session._write_db_checkpoint( 135 | timestamp, *set(idx_involved_in_batch) 136 | ) 137 | assert temp_buffers_data 138 | temp_buffer_data = None 139 | for temp_buffer_data in temp_buffers_data: 140 | await asyncio.get_event_loop().run_in_executor( 141 | None, 142 | self.session._save_buffer_to_disk, 143 | temp_buffer_data 144 | ) 145 | assert temp_buffer_data 146 | await self._update_session_buffer(temp_buffer_data) 147 | await self.session._clean_db_checkpoint(timestamp) 148 | self._status = TransactionStatus.DONE 149 | -------------------------------------------------------------------------------- /docs/aiodiskdb.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mempoolco/aiodiskdb/8d162b637e7059d3d105716e1eba60851258101a/docs/aiodiskdb.gif -------------------------------------------------------------------------------- /docs/logo128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mempoolco/aiodiskdb/8d162b637e7059d3d105716e1eba60851258101a/docs/logo128.png -------------------------------------------------------------------------------- /runtests: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | rm .coverage 2>/dev/null 3 | rm -rf htmlcov 2>/dev/null 4 | coverage run --source=aiodiskdb -m unittest discover 5 | coverage report 6 | coverage html 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name='aiodiskdb', 8 | version='0.2.4a1', 9 | long_description=long_description, 10 | long_description_content_type="text/markdown", 11 | url='https://github.com/mempoolco/aiodiskdb/', 12 | license='MIT', 13 | author='Guido Dassori', 14 | author_email='guido.dassori@gmail.com', 15 | python_requires='>=3.8', 16 | description='Embeddable minimal asynchronous on disk DB', 17 | classifiers=[ 18 | 'Development Status :: 3 - Alpha', 19 | 'License :: OSI Approved :: MIT License', 20 | 'Programming Language :: Python :: 3.8', 21 | 'Programming Language :: Python :: 3.9', 22 | ], 23 | include_package_data=True, 24 | packages=['aiodiskdb'], 25 | package_dir={'aiodiskdb': 'aiodiskdb'}, 26 | ) 27 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import random 4 | import shutil 5 | import time 6 | from unittest import IsolatedAsyncioTestCase 7 | 8 | import typing 9 | 10 | from aiodiskdb import exceptions 11 | from aiodiskdb.aiodiskdb import AioDiskDB, _TIMEOUT, _FLUSH_INTERVAL 12 | from aiodiskdb.local_types import WriteEvent 13 | 14 | 15 | class AioDiskDBTestCase(IsolatedAsyncioTestCase): 16 | _path = '/tmp/aiodiskdb_test' 17 | 18 | def setUp( 19 | self, 20 | max_file_size=128, 21 | max_buffer_size=16, 22 | overwrite=True, 23 | genesis_bytes=b'test', 24 | timeout=_TIMEOUT, 25 | file_prefix='data', 26 | flush_interval=_FLUSH_INTERVAL 27 | ): 28 | self._flush_interval = flush_interval 29 | self.loop = asyncio.get_event_loop() 30 | self._timeout = timeout 31 | self._overwrite = overwrite 32 | self._max_file_size = max_file_size 33 | self._max_buffer_size = max_buffer_size 34 | self._genesis_bytes = genesis_bytes 35 | self._file_prefix = file_prefix 36 | self._writes = [] 37 | self._starts = [] 38 | self._stops = [] 39 | self._index_drops = [] 40 | self._failures = [] 41 | try: 42 | shutil.rmtree(self._path) 43 | except FileNotFoundError: 44 | pass 45 | self._setup_sut() 46 | 47 | def _hook_events(self): 48 | self.sut.events.on_start = self._on_start 49 | self.sut.events.on_stop = self._on_stop 50 | self.sut.events.on_write = self._on_write 51 | self.sut.events.on_index_drop = self._on_index_drop 52 | self.sut.events.on_failure = self._on_failure 53 | 54 | async def _on_write(self, timestamp, event: WriteEvent): 55 | self._writes.append([timestamp, event]) 56 | 57 | async def _on_start(self, timestamp): 58 | self._starts.append([timestamp]) 59 | 60 | async def _on_stop(self, timestamp): 61 | self._stops.append([timestamp]) 62 | 63 | async def _on_index_drop(self, timestamp, index: int, size: int): 64 | self._index_drops.append([timestamp, index, size]) 65 | 66 | async def _on_failure(self, timestamp, exception: typing.Optional[Exception] = None): 67 | self._failures.append([timestamp, exception]) 68 | 69 | def tearDown(self) -> None: 70 | self.sut.destroy_db() 71 | 72 | def _setup_sut(self, clean_stale_data=True): 73 | self.sut = AioDiskDB( 74 | self._path, 75 | create_if_not_exists=True, 76 | timeout=self._timeout, 77 | max_file_size=self._max_file_size, 78 | max_buffer_size=self._max_buffer_size, 79 | overwrite=self._overwrite, 80 | genesis_bytes=self._genesis_bytes, 81 | clean_stale_data=clean_stale_data, 82 | file_prefix=self._file_prefix, 83 | flush_interval=self._flush_interval 84 | ) 85 | self._hook_events() 86 | 87 | async def _run(self, expect_failure: str = ''): 88 | async def _handle_run(): 89 | try: 90 | await self.sut.run() 91 | except Exception as e: 92 | if expect_failure: 93 | self.assertTrue(expect_failure in str(e)) 94 | return 95 | raise 96 | 97 | self.loop.create_task(_handle_run(), name='aiodiskdb_main_loop') 98 | s = time.time() 99 | while not self.sut.running: 100 | await asyncio.sleep(0.01) 101 | self.assertLess(time.time() - s, 3, msg='timeout') 102 | 103 | async def _stop(self): 104 | await self.sut.stop() 105 | 106 | 107 | def run_test_db(f): 108 | async def _decorator(self, *a, **kw): 109 | try: 110 | await self._run() 111 | return await f(self, *a, **kw) 112 | finally: 113 | try: 114 | await self._stop() 115 | except exceptions.NotRunningException: 116 | print('run_test_db requested to shutdown a not running database') 117 | return _decorator 118 | 119 | 120 | class AioDiskDBConcurrencyTest(AioDiskDBTestCase): 121 | def setUp(self, *a, **kw): 122 | super().setUp() 123 | self._data = list() 124 | self._ongoing_reads = False 125 | self._reads_count = 0 126 | self._writes_count = 0 127 | self._transactions = 0 128 | self._pause_reads = False 129 | self._stop_reads = False 130 | 131 | async def _random_reads(self): 132 | self._ongoing_reads = True 133 | while 1: 134 | if self._pause_reads: 135 | self._ongoing_reads = False 136 | await asyncio.sleep(0.1) 137 | continue 138 | else: 139 | self._ongoing_reads = self._ongoing_reads or True 140 | if self._stop_reads: 141 | self._ongoing_reads = False 142 | break 143 | try: 144 | if not self._data: 145 | await asyncio.sleep(0.01) 146 | continue 147 | p = random.randint(0, len(self._data)) 148 | location_and_data = self._data[p - 1] 149 | location, expected_data = location_and_data 150 | self.assertEqual( 151 | expected_data, 152 | await self.sut.read(location), 153 | msg=location 154 | ) 155 | self._reads_count += 1 156 | await asyncio.sleep(0.0001) 157 | except: 158 | self._ongoing_reads = False 159 | raise 160 | self._ongoing_reads = False 161 | -------------------------------------------------------------------------------- /test/test_checkpoints.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | from aiodiskdb import exceptions 5 | from test import AioDiskDBTestCase 6 | 7 | 8 | class TestCheckpoints(AioDiskDBTestCase): 9 | def setUp( 10 | self, *a, 11 | max_file_size=16, 12 | max_buffer_size=1 13 | ): 14 | super().setUp(*a, max_file_size=max_file_size, max_buffer_size=max_buffer_size) 15 | 16 | async def test(self): 17 | await self._run() 18 | 19 | with self.assertRaises(exceptions.InvalidDBStateException): 20 | await self.sut._clean_db_checkpoint(333) 21 | 22 | checkpoint_id = 333 23 | data = os.urandom(1024**2) 24 | data2 = b'daf' 25 | item_location = await self.sut.add(data) 26 | await self.sut._flush_buffer() 27 | await self.sut._write_db_checkpoint(checkpoint_id, 0) 28 | shutil.move(self._path + '/.checkpoint-333', self._path + '/_checkpoint') 29 | second_location_add = await self.sut.add(data2) 30 | assert await self.sut.read(second_location_add) == data2 31 | await self._stop() 32 | shutil.move(self._path + '/_checkpoint', self._path + '/.checkpoint-333') 33 | 34 | with self.assertRaises(exceptions.InvalidDBStateException): 35 | self._setup_sut(clean_stale_data=False) 36 | self._setup_sut() 37 | 38 | with self.assertRaises(FileNotFoundError): 39 | os.path.getsize(self._path + '/.checkpoint-333') 40 | 41 | await self._run() 42 | self.assertIsNone(await self.sut.read(second_location_add)) 43 | self.assertEqual(data, await self.sut.read(item_location)) 44 | await self._stop() 45 | with open(self._path + '/data00000.dat', 'rb') as f: 46 | x = f.read() 47 | self.assertEqual(x, self.sut._bake_new_file_header() + data) 48 | 49 | self._setup_sut() 50 | await self._run(expect_failure='Multiple checkpoint') 51 | await self.sut._write_db_checkpoint(checkpoint_id, 0) 52 | with self.assertRaises(exceptions.InvalidDBStateException): 53 | await self.sut._write_db_checkpoint(checkpoint_id, 0) 54 | self.assertTrue(self.sut.running) 55 | with self.assertRaises(exceptions.InvalidDBStateException): 56 | await self.sut._write_db_checkpoint(checkpoint_id + 1, 0) 57 | self.assertTrue(self.sut.running) 58 | shutil.copy(self._path + '/.checkpoint-333', self._path + '/.checkpoint-334') 59 | self.assertTrue(self.sut.running) 60 | with self.assertRaises(exceptions.NotRunningException) as e: 61 | for x in range(0, 20): 62 | await self.sut.add(os.urandom(1024 ** 2)) 63 | self.assertFalse(self.sut.running) 64 | 65 | 66 | class TestCleanStaleData(AioDiskDBTestCase): 67 | def test(self): 68 | with open(self._path + '/.tmp.data00000.dat', 'wb') as f: 69 | f.write(os.urandom(5)) 70 | 71 | self._setup_sut() 72 | -------------------------------------------------------------------------------- /test/test_concurrency.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import time 4 | from random import randint 5 | 6 | from test import run_test_db, AioDiskDBConcurrencyTest 7 | 8 | 9 | class TestAioDiskDBConcurrentReadWrite(AioDiskDBConcurrencyTest): 10 | 11 | @run_test_db 12 | async def test(self): 13 | self._running_test = True 14 | self.loop.create_task(self._random_reads()) 15 | data_stored = dict() 16 | total_size = 0 17 | s = time.time() 18 | sizes = [] 19 | while sum(data_stored.values()) < 1000 * 1024 ** 2: 20 | size = randint(1024, 1024**2) 21 | sizes.append(size) 22 | data = os.urandom(size) 23 | location = await self.sut.add(data) 24 | self._writes_count += 1 25 | data_stored.setdefault(location.index, 0) 26 | self._data.append([location, data]) 27 | data_stored[location.index] += size 28 | total_size += size 29 | await asyncio.sleep(0.00001) 30 | self.assertTrue(self._ongoing_reads, msg='reads failed') 31 | self._pause_reads = True 32 | print(f'R/W concurrency test over. Duration: {time.time() - s:.2f}s, ' 33 | f'Reads: {self._reads_count}, Writes: {self._writes_count}, ' 34 | f'Bandwidth: {total_size // 1024 ** 2}MB, ' 35 | f'Avg file size: {sum(sizes) / len(sizes) // 1024}kB' 36 | ) 37 | 38 | current_reads = self._reads_count 39 | # test is over, repeat the random reads with a new DB instance on the same data. 40 | print('Read only test with no-cache instance:') 41 | while self._ongoing_reads: 42 | await asyncio.sleep(1) 43 | await self.sut.stop() 44 | self._setup_sut() 45 | self.loop.create_task(self.sut.run()) 46 | while not self.sut.running: 47 | await asyncio.sleep(0.01) 48 | self._pause_reads = False 49 | s = time.time() 50 | while time.time() - s < 10: 51 | await asyncio.sleep(2) 52 | self.assertTrue(self._ongoing_reads, msg='reads failed') 53 | self._stop_reads = True 54 | print(f'Read only test from disk over. Reads: {self._reads_count - current_reads}') 55 | 56 | -------------------------------------------------------------------------------- /test/test_concurrency_small_files.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import time 4 | from random import randint 5 | 6 | from test import run_test_db, AioDiskDBConcurrencyTest 7 | 8 | 9 | class TestAioDiskDBConcurrentReadWriteSmallFiles(AioDiskDBConcurrencyTest): 10 | @run_test_db 11 | async def test(self): 12 | self._running_test = True 13 | self.loop.create_task(self._random_reads()) 14 | data_stored = dict() 15 | total_size = 0 16 | s = time.time() 17 | sizes = [] 18 | while sum(data_stored.values()) < 20 * 1024 ** 2: 19 | size = randint(1, 4096) 20 | sizes.append(size) 21 | data = os.urandom(size) 22 | location = await self.sut.add(data) 23 | self._writes_count += 1 24 | data_stored.setdefault(location.index, 0) 25 | self._data.append([location, data]) 26 | data_stored[location.index] += size 27 | total_size += size 28 | await asyncio.sleep(0.00001) 29 | self.assertTrue(self._ongoing_reads, msg='reads failed') 30 | self._pause_reads = True 31 | print(f'R/W concurrency test over. Duration: {time.time() - s:.2f}s, ' 32 | f'Reads: {self._reads_count}, Writes: {self._writes_count}, ' 33 | f'Bandwidth: {total_size // 1024 ** 2}MB, ' 34 | f'Avg file size: {sum(sizes) / len(sizes) // 1024}kB' 35 | ) 36 | 37 | current_reads = self._reads_count 38 | # test is over, repeat the random reads with a new DB instance on the same data. 39 | print('Read only test with no-cache instance:') 40 | while self._ongoing_reads: 41 | await asyncio.sleep(1) 42 | await self.sut.stop() 43 | self._setup_sut() 44 | self.loop.create_task(self.sut.run()) 45 | while not self.sut.running: 46 | await asyncio.sleep(0.01) 47 | self._pause_reads = False 48 | s = time.time() 49 | while time.time() - s < 10: 50 | await asyncio.sleep(2) 51 | self.assertTrue(self._ongoing_reads, msg='reads failed') 52 | self._stop_reads = True 53 | print(f'Read only test from disk over. Reads: {self._reads_count - current_reads}') 54 | -------------------------------------------------------------------------------- /test/test_drop_index.py: -------------------------------------------------------------------------------- 1 | from aiodiskdb import exceptions 2 | from test import AioDiskDBTestCase, run_test_db 3 | 4 | 5 | class AioDBTestDropIndex(AioDiskDBTestCase): 6 | def setUp(self, *a, **kw): 7 | super().setUp(max_file_size=1, max_buffer_size=1, overwrite=False) 8 | 9 | async def test(self): 10 | await self._run() 11 | with self.assertRaises(exceptions.ReadOnlyDatabaseException): 12 | await self.sut.drop_index(99) 13 | with self.assertRaises(exceptions.RunningException): 14 | self.sut.destroy_db() 15 | await self.sut.stop() 16 | with self.assertRaises(exceptions.ReadOnlyDatabaseException): 17 | self.sut.destroy_db() 18 | self._setup_sut() 19 | await self._run() 20 | self.sut.enable_overwrite() 21 | with self.assertRaises(exceptions.IndexDoesNotExist): 22 | await self.sut.drop_index(99) 23 | transaction = await self.sut.transaction() 24 | transaction.add(b'cafe') 25 | t_loc = await transaction.commit() 26 | self.assertTrue(bool(self.sut._buffers)) 27 | loc_0 = await self.sut.add(b'babe') 28 | self.assertEqual(b'babe', await self.sut.read(loc_0)) 29 | self.assertEqual(b'cafe', await self.sut.read(t_loc[0])) 30 | await self.sut.drop_index(0) 31 | self.assertIsNone(await self.sut.read(loc_0)) 32 | self.assertIsNone(await self.sut.read(t_loc[0])) 33 | loc = await self.sut.add(b'test_after') 34 | d = await self.sut.read(loc) 35 | self.assertEqual(d, b'test_after') 36 | await self._stop() 37 | self._setup_sut() 38 | self.sut.enable_overwrite() 39 | await self._run() 40 | await self.sut.drop_index(0) 41 | await self._stop() 42 | 43 | def tearDown(self) -> None: 44 | self.assertEqual(2, len(self._index_drops)) 45 | self.assertIsInstance(self._index_drops[0][0], float) 46 | self.assertEqual(self._index_drops[0][1], 0) # index 47 | self.assertEqual(self._index_drops[0][2], 4) # length of 'cafe' 48 | super().tearDown() 49 | -------------------------------------------------------------------------------- /test/test_errors.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import time 4 | from pathlib import Path 5 | 6 | from aiodiskdb import exceptions 7 | from test import AioDiskDBTestCase 8 | 9 | 10 | class AioDBTestRunTwiceAndReset(AioDiskDBTestCase): 11 | async def test(self): 12 | await self._run() 13 | self.assertTrue(self.sut.running) 14 | await self.sut.stop() 15 | self.assertFalse(self.sut.running) 16 | s = time.time() 17 | while not self.sut._stopped: 18 | if time.time() - s > 3: 19 | raise ValueError('Unusually slow') 20 | await asyncio.sleep(0.1) 21 | self.assertTrue(self.sut._stopped) 22 | with self.assertRaises(exceptions.InvalidDBStateException): 23 | await self.sut.run() 24 | self.assertTrue(self.sut._set_stop) 25 | self.sut.reset() 26 | self.assertFalse(self.sut._stopped) 27 | self.assertFalse(self.sut.running) 28 | await self._run() 29 | self.assertTrue(self.sut.running) 30 | await self.sut.stop() 31 | s = time.time() 32 | while not self.sut._stopped: 33 | if time.time() - s > 3: 34 | raise ValueError('Unusually slow') 35 | await asyncio.sleep(0.1) 36 | 37 | 38 | class AioDBTestErrorWrongFiles(AioDiskDBTestCase): 39 | def setUp(self, *a, **kw): 40 | super().setUp(max_file_size=1, max_buffer_size=1) 41 | 42 | async def test(self): 43 | await self._run() 44 | with self.assertRaises(exceptions.EmptyPayloadException): 45 | await self.sut.add(b'') 46 | b = os.urandom(1024 ** 2 + 10) 47 | with self.assertRaises(exceptions.WriteFailedException): 48 | await self.sut.add(b) 49 | await self._stop() 50 | 51 | def tearDown(self) -> None: 52 | self.assertEqual(1, len(self._stops)) 53 | self.assertIsInstance(self._stops[0][0], float) 54 | self.assertEqual(1, len(self._starts)) 55 | self.assertIsInstance(self._starts[0][0], float) 56 | self.assertEqual(0, len(self._failures)) 57 | self.assertEqual(0, len(self._writes)) 58 | super().tearDown() 59 | 60 | 61 | class AioDBTestErrorWrongHeaderShort(AioDiskDBTestCase): 62 | def setUp(self, *a, **kw): 63 | super().setUp() 64 | Path(self._path).mkdir(parents=True, exist_ok=True) 65 | with open(self._path + '/data00000.dat', 'wb') as f: 66 | f.write(b'aa'*8) 67 | 68 | async def test(self): 69 | with self.assertRaises(exceptions.InvalidDataFileException): 70 | await self.sut.run() 71 | 72 | 73 | class AioDBTestErrorWrongHeader(AioDiskDBTestCase): 74 | def setUp(self, *a, **kw): 75 | super().setUp() 76 | Path(self._path).mkdir(parents=True, exist_ok=True) 77 | with open(self._path + '/data00000.dat', 'wb') as f: 78 | f.write(b'aa'*16) 79 | 80 | async def test(self): 81 | with self.assertRaises(exceptions.InvalidDataFileException): 82 | await self.sut.run() 83 | 84 | 85 | class AioDBTestErrorWrongGenesisFileShouldNotExists(AioDiskDBTestCase): 86 | def setUp(self, *a, **kw): 87 | super().setUp(max_file_size=0.1, max_buffer_size=0.1) 88 | 89 | def _corrupt_file(self): 90 | Path(self._path).mkdir(parents=True, exist_ok=True) 91 | with open(self._path + '/data00001.dat', 'wb') as f: 92 | f.write(b'aa'*8) 93 | 94 | async def test(self): 95 | await self._run(expect_failure='data00001.dat should not exists') 96 | self._corrupt_file() 97 | with self.assertRaises(exceptions.NotRunningException): 98 | for _ in range(0, 100): 99 | await self.sut.add(os.urandom(10240)) 100 | with self.assertRaises(exceptions.NotRunningException) as e: 101 | await self.sut.run() 102 | self.assertTrue('ERROR state' in str(e.exception)) 103 | 104 | def tearDown(self): 105 | self.assertEqual(1, len(self._stops)) 106 | self.assertIsInstance(self._stops[0][0], float) 107 | self.assertEqual(1, len(self._starts)) 108 | self.assertIsInstance(self._starts[0][0], float) 109 | self.assertEqual(1, len(self._failures)) 110 | self.assertIsInstance(self._failures[0][0], float) 111 | self.assertTrue(self._failures[0][0] - time.time() < 2) 112 | super().tearDown() 113 | 114 | 115 | class AioDBTestErrorZeroDBSizeError(AioDiskDBTestCase): 116 | async def test(self): 117 | with self.assertRaises(exceptions.InvalidConfigurationException): 118 | super().setUp(max_file_size=0, max_buffer_size=0) 119 | 120 | 121 | class AioDBTestErrorInvalidDBSizeError(AioDiskDBTestCase): 122 | async def test(self): 123 | with self.assertRaises(exceptions.InvalidConfigurationException): 124 | super().setUp(max_file_size=1, max_buffer_size=2) 125 | with self.assertRaises(exceptions.InvalidConfigurationException): 126 | super().setUp(max_file_size=2**32, max_buffer_size=2) 127 | 128 | 129 | class AioDBTestErrorInvalidGenesisBytes(AioDiskDBTestCase): 130 | async def test(self): 131 | with self.assertRaises(exceptions.InvalidConfigurationException): 132 | super().setUp(max_file_size=1, max_buffer_size=1, genesis_bytes=b'testtest') 133 | 134 | 135 | class AioDBTestErrorDBRestartedAfterError(AioDiskDBTestCase): 136 | async def test(self): 137 | with self.assertRaises(exceptions.InvalidConfigurationException): 138 | super().setUp(max_file_size=1, max_buffer_size=1, genesis_bytes=b'testtest') 139 | 140 | 141 | class AioDBTestErrorWrongFilesPrefix(AioDiskDBTestCase): 142 | async def test(self): 143 | with self.assertRaises(exceptions.InvalidConfigurationException): 144 | super().setUp(file_prefix='1222') 145 | with self.assertRaises(exceptions.InvalidConfigurationException): 146 | super().setUp(file_prefix='__aaa') 147 | with self.assertRaises(exceptions.InvalidConfigurationException): 148 | super().setUp(file_prefix='') 149 | 150 | def tearDown(self) -> None: 151 | pass 152 | 153 | 154 | class AioDBTestStopTimeoutError(AioDiskDBTestCase): 155 | def setUp(self, *a, **kw): 156 | super().setUp(max_file_size=1, max_buffer_size=1, timeout=1) 157 | 158 | async def _fail_flush(self): 159 | try: 160 | await self.sut._flush_buffer() 161 | except exceptions.NotRunningException as e: 162 | print('Expected exception raised:', str(e)) 163 | 164 | async def test(self): 165 | await self._run(expect_failure='wrong state, cannot recover. buffer lost.') 166 | await self.sut.add(b'a') 167 | await self.sut._write_lock.acquire() 168 | self.loop.create_task(self._fail_flush()) 169 | with self.assertRaises(exceptions.TimeoutException): 170 | await self.sut.stop() 171 | self.sut._write_lock.release() 172 | 173 | def tearDown(self) -> None: 174 | pass 175 | 176 | 177 | class AioDBTestFileSizeChangedError(AioDiskDBTestCase): 178 | def setUp(self, *a, **kw): 179 | super().setUp(max_file_size=10, max_buffer_size=2, timeout=1) 180 | 181 | async def _fail_flush(self): 182 | try: 183 | await self.sut._flush_buffer() 184 | except exceptions.InvalidDataFileException as e: 185 | print('Expected exception raised:', str(e)) 186 | 187 | async def test(self): 188 | await self._run(expect_failure='wrong state, cannot recover. buffer lost.') 189 | await self.sut.add(os.urandom(2 * 1024**2)) 190 | await self.sut._flush_buffer() 191 | filename = str(self.sut.path) + '/data00000.dat' 192 | 193 | with open(filename, 'r+b') as f: 194 | f.seek(os.path.getsize(filename)-10) 195 | f.truncate() 196 | await self.sut.add(os.urandom(2 * 1024 ** 2)) 197 | await self._fail_flush() 198 | 199 | def tearDown(self) -> None: 200 | pass 201 | 202 | 203 | class AioDBTestMissingFileException(AioDiskDBTestCase): 204 | def setUp(self, *a, **kw): 205 | super().setUp(max_file_size=10, max_buffer_size=2, timeout=1) 206 | 207 | async def _fail_flush(self): 208 | try: 209 | await self.sut._flush_buffer() 210 | except exceptions.FilesInconsistencyException as e: 211 | print('Expected exception raised:', str(e)) 212 | 213 | async def test(self): 214 | await self._run(expect_failure='wrong state, cannot recover. buffer lost.') 215 | await self.sut.add(os.urandom(2 * 1024**2)) 216 | await self.sut._flush_buffer() 217 | filename = str(self.sut.path) + '/data00000.dat' 218 | os.remove(filename) 219 | await self.sut.add(os.urandom(2 * 1024 ** 2)) 220 | await self._fail_flush() 221 | 222 | def tearDown(self) -> None: 223 | pass 224 | -------------------------------------------------------------------------------- /test/test_events.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from unittest import IsolatedAsyncioTestCase 3 | 4 | from aiodiskdb import exceptions 5 | from aiodiskdb.aiodiskdb import AioDiskDB 6 | from aiodiskdb.local_types import EventsHandlers 7 | 8 | 9 | class TestEventsHandlerStrictTyping(IsolatedAsyncioTestCase): 10 | def setUp(self) -> None: 11 | self.sut = EventsHandlers() 12 | self._dummy_ex = 0 13 | 14 | async def _dummy(self, *a, **kw): 15 | self._dummy_ex += 1 16 | 17 | async def test(self): 18 | with self.assertRaises(TypeError): 19 | self.sut.on_start = lambda w: '' 20 | 21 | self.sut.on_start = self._dummy 22 | await self.sut.on_start() 23 | self.sut.on_start = None 24 | self.assertEqual(1, self._dummy_ex) 25 | 26 | 27 | class AioDiskDBTestCase(IsolatedAsyncioTestCase): 28 | _path = '/tmp/aiodiskdb_test' 29 | 30 | def setUp(self, max_file_size=128, max_buffer_size=16, overwrite=True): 31 | self.loop = asyncio.get_event_loop() 32 | self._overwrite = True 33 | self._max_file_size = max_file_size 34 | self._max_buffer_size = max_buffer_size 35 | self._setup_sut() 36 | self.sut.destroy_db() 37 | self._overwrite = overwrite 38 | self._setup_sut() 39 | 40 | def tearDown(self) -> None: 41 | self.sut.destroy_db() 42 | 43 | def _setup_sut(self): 44 | self.sut = AioDiskDB( 45 | self._path, 46 | create_if_not_exists=True, 47 | read_timeout=5, 48 | max_file_size=self._max_file_size, 49 | max_buffer_size=self._max_buffer_size, 50 | overwrite=self._overwrite 51 | ) 52 | 53 | 54 | def run_test_db(f): 55 | async def _decorator(self, *a, **kw): 56 | try: 57 | self.loop.create_task(self.sut.run(), name='aiodiskdb_main_loop') 58 | while not self.sut.running: 59 | await asyncio.sleep(0.01) 60 | return await f(self, *a, **kw) 61 | finally: 62 | try: 63 | await self.sut.stop() 64 | except exceptions.NotRunningException: 65 | print('run_test_db requested to shutdown a not running database') 66 | return _decorator 67 | -------------------------------------------------------------------------------- /test/test_ltrim.py: -------------------------------------------------------------------------------- 1 | import os 2 | from aiodiskdb import exceptions 3 | from aiodiskdb.local_types import ItemLocation, WriteEvent 4 | from test import AioDiskDBTestCase, run_test_db 5 | 6 | 7 | class TestLTRIM(AioDiskDBTestCase): 8 | @run_test_db 9 | async def test(self): 10 | item_location = await self.sut.add(b'aaaa_1') 11 | self.assertEqual( 12 | ItemLocation(0, 0, 6), 13 | item_location 14 | ) 15 | item_location_2 = await self.sut.add(b'bbbb_2') 16 | self.assertEqual( 17 | ItemLocation(0, 6, 6), 18 | item_location_2 19 | ) 20 | item_location_3 = await self.sut.add(b'cccc_3') 21 | self.assertEqual( 22 | ItemLocation(0, 12, 6), 23 | item_location_3 24 | ) 25 | await self.sut.ltrim(0, 6, safety_check=b'_1') 26 | item_2 = await self.sut.read(item_location_2) 27 | self.assertEqual(item_2, b'bbbb_2') 28 | item_3 = await self.sut.read(item_location_3) 29 | self.assertEqual(item_3, b'cccc_3') 30 | with self.assertRaises(exceptions.InvalidTrimCommandException): 31 | await self.sut.ltrim(0, 6, safety_check=b'_1') 32 | await self.sut.ltrim(0, 12, safety_check=b'_2') 33 | item_2 = await self.sut.read(item_location_2) 34 | self.assertIsNone(item_2) 35 | item_3 = await self.sut.read(item_location_3) 36 | self.assertEqual(item_3, b'cccc_3') 37 | -------------------------------------------------------------------------------- /test/test_read_write.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from aiodiskdb.local_types import ItemLocation, WriteEvent 3 | from test import AioDiskDBTestCase, run_test_db 4 | 5 | 6 | class TestFlush(AioDiskDBTestCase): 7 | @run_test_db 8 | async def test(self): 9 | item_location = await self.sut.add(b'test_1') 10 | self.assertEqual( 11 | ItemLocation(0, 0, 6), 12 | item_location 13 | ) 14 | await self.sut.flush() 15 | self.assertEqual(self.sut._buffers[-1].data, b'') 16 | read1 = await self.sut.read(item_location) 17 | self.assertEqual(b'test_1', read1) 18 | 19 | 20 | class TestReadWriteCached(AioDiskDBTestCase): 21 | @run_test_db 22 | async def test(self): 23 | item_location = await self.sut.add(b'test_1') 24 | self.assertEqual( 25 | ItemLocation(0, 0, 6), 26 | item_location 27 | ) 28 | item_location_2 = await self.sut.add(b'test_2') 29 | self.assertEqual( 30 | ItemLocation(0, 6, 6), 31 | item_location_2 32 | ) 33 | read1 = await self.sut.read(item_location) 34 | self.assertEqual(b'test_1', read1) 35 | read2 = await self.sut.read(item_location_2) 36 | self.assertEqual(b'test_2', read2) 37 | self.assertEqual(self._writes, []) 38 | 39 | def tearDown(self): 40 | self.assertEqual(1, len(self._stops)) 41 | self.assertIsInstance(self._stops[0][0], float) 42 | self.assertEqual(1, len(self._starts)) 43 | self.assertIsInstance(self._starts[0][0], float) 44 | self.assertEqual(1, len(self._writes)) 45 | self.assertIsInstance(self._writes[0][0], float) 46 | self.assertEqual( 47 | WriteEvent(index=0, position=0, size=12), 48 | self._writes[0][1] 49 | ) 50 | super().tearDown() 51 | 52 | 53 | class TestReadWriteNonCached(AioDiskDBTestCase): 54 | @run_test_db 55 | async def test(self): 56 | item_location = await self.sut.add(b'test_1') 57 | self.assertEqual( 58 | ItemLocation(0, 0, 6), 59 | item_location 60 | ) 61 | item_location_2 = await self.sut.add(b'test_2') 62 | self.assertEqual( 63 | ItemLocation(0, 6, 6), 64 | item_location_2 65 | ) 66 | await self.sut.stop() # stop the sut, ensures the data write 67 | self.assertEqual( 68 | WriteEvent(index=0, position=0, size=12), 69 | self._writes[0][1] 70 | ) 71 | self._setup_sut() # re-instance the sut from scratch. 72 | self.loop.create_task(self.sut.run()) 73 | while not self.sut.running: 74 | await asyncio.sleep(0.01) 75 | new_loc_2 = ItemLocation.deserialize(item_location_2.serialize()) 76 | read1 = await self.sut.read(new_loc_2) 77 | self.assertEqual(b'test_2', read1) 78 | read2 = await self.sut.read(item_location_2) 79 | self.assertEqual(b'test_2', read2) 80 | 81 | item_location_3 = item_location_2 82 | item_location_3.index = 99 83 | self.assertEqual(None, await self.sut.read(item_location_3)) 84 | 85 | 86 | class TestFlushInterval(AioDiskDBTestCase): 87 | def setUp(self, *a): 88 | super().setUp(*a, flush_interval=3) 89 | 90 | @run_test_db 91 | async def test(self): 92 | await self.sut.add(b'test_2') 93 | self.assertEqual(6, self.sut._buffers[-1].size) 94 | await asyncio.sleep(self.sut._flush_interval + 1) 95 | self.assertEqual(0, self.sut._buffers[-1].size) 96 | -------------------------------------------------------------------------------- /test/test_rtrim.py: -------------------------------------------------------------------------------- 1 | import os 2 | from aiodiskdb import exceptions 3 | from aiodiskdb.local_types import ItemLocation, WriteEvent 4 | from test import AioDiskDBTestCase, run_test_db 5 | 6 | 7 | class TestRTRIM(AioDiskDBTestCase): 8 | @run_test_db 9 | async def test(self): 10 | item_location = await self.sut.add(b'test_1') 11 | self.assertEqual( 12 | ItemLocation(0, 0, 6), 13 | item_location 14 | ) 15 | item_location_2 = await self.sut.add(b'test_2') 16 | self.assertEqual( 17 | ItemLocation(0, 6, 6), 18 | item_location_2 19 | ) 20 | await self.sut.stop() # stop the sut, ensures the data write 21 | self.assertEqual( 22 | WriteEvent(index=0, position=0, size=12), 23 | self._writes[0][1] 24 | ) 25 | self._setup_sut() # re-instance the sut from scratch. 26 | await self._run() 27 | read1 = await self.sut.read(item_location_2) 28 | self.assertEqual(b'test_2', read1) 29 | read2 = await self.sut.read(item_location_2) 30 | self.assertEqual(b'test_2', read2) 31 | slice_size = await self.sut.rtrim(0, 9, safety_check=b't_2') 32 | slice_size_2 = await self.sut.rtrim(0, 6) 33 | self.assertEqual(slice_size + slice_size_2, len(read2)) 34 | self.assertIsNone(await self.sut.read(item_location_2)) 35 | item_location_2 = await self.sut.add(b'test_3') 36 | self.assertEqual( 37 | ItemLocation(0, 6, 6), 38 | item_location_2 39 | ) 40 | read3 = await self.sut.read(item_location_2) 41 | self.assertEqual(b'test_3', read3) 42 | self.sut.disable_overwrite() 43 | with self.assertRaises(exceptions.ReadOnlyDatabaseException): 44 | await self.sut.rtrim(0, 6) 45 | self.sut.enable_overwrite() 46 | 47 | 48 | class TestTrimIndexDoesNotExist(AioDiskDBTestCase): 49 | @run_test_db 50 | async def test(self): 51 | with self.assertRaises(exceptions.IndexDoesNotExist): 52 | await self.sut.rtrim(99, 111) 53 | 54 | with self.assertRaises(exceptions.IndexDoesNotExist): 55 | await self.sut.rtrim(-1, 111) 56 | 57 | 58 | class TestTrimWholeFile(AioDiskDBTestCase): 59 | @run_test_db 60 | async def test(self): 61 | for _ in range(0, 20): 62 | await self.sut.add(os.urandom(1024**2)) 63 | await self.sut.rtrim(0, 0) 64 | with self.assertRaises(FileNotFoundError): 65 | os.path.getsize(self._path + '/data00000.dat') 66 | for _ in range(0, 20): 67 | await self.sut.add(os.urandom(1024**2)) 68 | with self.assertRaises(exceptions.InvalidTrimCommandException): 69 | await self.sut.rtrim(0, 0, safety_check=b'wrong_bytes') 70 | with self.assertRaises(exceptions.InvalidTrimCommandException): 71 | await self.sut.rtrim(0, 1, safety_check=b'wrong_bytes') 72 | -------------------------------------------------------------------------------- /test/test_signals.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from test import AioDiskDBTestCase, run_test_db 5 | 6 | 7 | class AioDBTestExitSignals(AioDiskDBTestCase): 8 | def setUp(self, *a, **kw): 9 | super().setUp(max_file_size=1, max_buffer_size=1) 10 | self._added_location = None 11 | 12 | async def _persist_data(self): 13 | while not self.sut.running: 14 | await asyncio.sleep(1) 15 | self._added_location = await self.sut.add(b'data') 16 | 17 | @run_test_db 18 | async def test(self): 19 | """ 20 | test that the stop signals hook writes data before exiting 21 | """ 22 | await self._persist_data() 23 | self.assertIsNotNone(self._added_location) 24 | with self.assertRaises(FileNotFoundError): 25 | os.path.getsize(self._path + '/data00000.dat') 26 | with self.assertRaises(SystemExit): 27 | self.sut.on_stop_signal() 28 | self.sut.on_stop_signal() # fire it twice, it must be executed once 29 | self.assertEqual( 30 | self.sut._file_header_size + 4, 31 | os.path.getsize(self._path + '/data00000.dat') 32 | ) 33 | self._setup_sut() 34 | asyncio.get_event_loop().create_task(self.sut.run()) 35 | while not self.sut.running: 36 | await asyncio.sleep(1) 37 | data = await self.sut.read(self._added_location) 38 | self.assertEqual(b'data', data) 39 | -------------------------------------------------------------------------------- /test/test_transaction.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import random 4 | import time 5 | 6 | from aiodiskdb import exceptions 7 | from test import AioDiskDBTestCase, run_test_db, AioDiskDBConcurrencyTest 8 | 9 | 10 | class TestAioDiskDBTransaction(AioDiskDBTestCase): 11 | 12 | @run_test_db 13 | async def test(self): 14 | location1 = await self.sut.add(b'data1') 15 | transaction = await self.sut.transaction() 16 | transaction.add(b'data2') 17 | transaction.add(b'data3') 18 | await transaction.commit() 19 | with open(self._path + '/data00000.dat', 'rb') as f: 20 | x = f.read() 21 | self.assertEqual(x, self.sut._bake_new_file_header() + b'data1data2data3') 22 | location1.size += 10 # increase the location size to read contiguous data 23 | self.assertEqual( 24 | b'data1data2data3', 25 | await self.sut.read(location1) 26 | ) 27 | 28 | 29 | class TestAioDiskDBConcurrentTransactions(AioDiskDBConcurrencyTest): 30 | @run_test_db 31 | async def test(self): 32 | self._running_test = True 33 | self.loop.create_task(self._random_reads()) 34 | data_stored = dict() 35 | total_size = 0 36 | s = time.time() 37 | sizes = [] 38 | while sum(data_stored.values()) < 1000 * 1024 ** 2: 39 | random_transaction = random.randint(0, 10) 40 | if random_transaction < 3: 41 | """ 42 | Add some data by transactions 43 | """ 44 | transaction = await self.sut.transaction() 45 | transactions_data = [] 46 | for x in range(0, random.randint(1, 10)): 47 | size = random.randint(1024, 1024 ** 2) 48 | sizes.append(size) 49 | data = os.urandom(size) 50 | transactions_data.append(data) 51 | transaction.add(data) 52 | await asyncio.sleep(0.001) 53 | locations = await transaction.commit() 54 | self._writes_count += 1 55 | self._transactions += 1 56 | for x in zip(locations, transactions_data): 57 | self._data.append(x) # append [location, data] so the random reads can request it 58 | tx_chunk_size = len(x[1]) 59 | data_stored.setdefault(x[0].index, 0) 60 | data_stored[x[0].index] += tx_chunk_size 61 | total_size += tx_chunk_size 62 | else: 63 | """ 64 | Mix normal adds 65 | """ 66 | size = random.randint(1024, 1024 ** 2) 67 | sizes.append(size) 68 | data = os.urandom(size) 69 | location = await self.sut.add(data) 70 | self._writes_count += 1 71 | data_stored.setdefault(location.index, 0) 72 | self._data.append([location, data]) 73 | data_stored[location.index] += size 74 | total_size += size 75 | await asyncio.sleep(0.00001) 76 | self.assertTrue(self._ongoing_reads, msg='reads failed') 77 | self._pause_reads = True 78 | print(f'R/W concurrency test over. Duration: {time.time() - s:.2f}s, ' 79 | f'Reads: {self._reads_count}, Writes: {self._writes_count}, Transactions: {self._transactions}' 80 | f'Bandwidth: {total_size // 1024 ** 2}MB, ' 81 | f'Avg file size: {sum(sizes) / len(sizes) // 1024}kB' 82 | ) 83 | current_reads = self._reads_count 84 | 85 | # test is over, repeat the random reads with a new DB instance on the same data. 86 | print('Read only test with no-cache instance:') 87 | while self._ongoing_reads: 88 | await asyncio.sleep(0.2) 89 | await self.sut.stop() 90 | self._setup_sut() 91 | await self._run() 92 | while not self.sut.running: 93 | await asyncio.sleep(0.01) 94 | self._pause_reads = False 95 | s = time.time() 96 | while time.time() - s < 10: 97 | await asyncio.sleep(2) 98 | self.assertTrue(self._ongoing_reads, msg='reads failed') 99 | self._stop_reads = True 100 | print(f'Read only test from disk over. Reads: {self._reads_count - current_reads}') 101 | 102 | 103 | class TransactionErrors(AioDiskDBTestCase): 104 | def setUp(self, *a): 105 | super().setUp(*a, max_file_size=2, max_buffer_size=1) 106 | 107 | @run_test_db 108 | async def test(self): 109 | transaction = await self.sut.transaction() 110 | with self.assertRaises(exceptions.WriteFailedException): 111 | transaction.add(os.urandom(3*1024**2)) 112 | with self.assertRaises(exceptions.EmptyTransactionException): 113 | await transaction.commit() 114 | transaction.add(b'test') 115 | await transaction.commit() 116 | with self.assertRaises(exceptions.TransactionAlreadyCommittedException): 117 | await transaction.commit() 118 | with self.assertRaises(exceptions.TransactionAlreadyCommittedException): 119 | await transaction.add(b'bbb') 120 | transaction = await self.sut.transaction() 121 | for x in range(0, 100): 122 | transaction.add(os.urandom(1024**2)) 123 | self.assertTrue(self.sut.running) 124 | self.loop.create_task(transaction.commit()) 125 | await asyncio.sleep(0.01) 126 | self.assertTrue(self.sut.running) 127 | with self.assertRaises(exceptions.TransactionCommitOnGoingException): 128 | transaction.add(b'aaaa') 129 | --------------------------------------------------------------------------------