├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── DictDataBase.code-workspace
├── LICENSE
├── README.md
├── assets
    ├── coverage.svg
    ├── logo.afdesign
    └── logo.png
├── dictdatabase
    ├── __init__.py
    ├── byte_codes.py
    ├── configuration.py
    ├── indexing.py
    ├── io_bytes.py
    ├── io_safe.py
    ├── io_unsafe.py
    ├── locking.py
    ├── models.py
    ├── sessions.py
    └── utils.py
├── justfile
├── profiler.py
├── pyproject.toml
├── scenario_comparison.py
├── scene_random_writes.py
├── test_key_finder.py
├── tests
    ├── __init__.py
    ├── benchmark
    │   ├── locking.py
    │   ├── parallel_appends.py
    │   ├── run_async.py
    │   ├── run_big_file.py
    │   ├── run_parallel.py
    │   ├── run_parallel_multi.py
    │   ├── run_threaded.py
    │   ├── sequential_appends.py
    │   ├── sqlite
    │   │   ├── run.sh
    │   │   ├── test.py
    │   │   └── test_parallel_runner.py
    │   └── utils.py
    ├── conftest.py
    ├── system_checks
    │   ├── test_clocks.py
    │   ├── test_monotonic_over_threads.py
    │   └── test_tick_rate.py
    ├── test_at.py
    ├── test_create.py
    ├── test_delete.py
    ├── test_excepts.py
    ├── test_exists.py
    ├── test_indentation.py
    ├── test_indexer.py
    ├── test_io_bytes.py
    ├── test_io_safe.py
    ├── test_locking.py
    ├── test_parallel_crud.py
    ├── test_parallel_sessions.py
    ├── test_partial.py
    ├── test_read.py
    ├── test_threaded_sessions.py
    ├── test_utils.py
    ├── test_where.py
    ├── test_write.py
    └── utils.py
└── uv.lock


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
  1 | name: Tests
  2 | on: [push, pull_request]
  3 | 
  4 | jobs:
  5 | 
  6 |   # JOB: Tests
  7 |   tests-job:
  8 |     runs-on: ubuntu-latest
  9 | 
 10 |     strategy:
 11 |       matrix:
 12 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 13 | 
 14 |     steps:
 15 |       #----------------------------------------------
 16 |       #---- Checkout and install uv and python
 17 |       #----------------------------------------------
 18 | 
 19 |       - uses: actions/checkout@v4
 20 |       - name: Install uv
 21 |         uses: astral-sh/setup-uv@v4
 22 |         with:
 23 |           enable-cache: true
 24 |       - name: Set up Python ${{ matrix.python-version }}
 25 |         run: uv python install ${{ matrix.python-version }}
 26 | 
 27 |       #----------------------------------------------
 28 |       #---- Install dependencies
 29 |       #----------------------------------------------
 30 | 
 31 |       - name: uv install
 32 |         run: uv sync --all-extras --dev
 33 | 
 34 |       #----------------------------------------------
 35 |       #---- Show installation details
 36 |       #----------------------------------------------
 37 | 
 38 |       - name: uv --version
 39 |         run: uv --version
 40 |       - name: uv run python --version
 41 |         run: uv run python --version
 42 |       - name: ls -lah
 43 |         run: ls -lah
 44 |       - name: uv tree
 45 |         run: uv tree
 46 | 
 47 |       #----------------------------------------------
 48 |       #---- Pre-Checks
 49 |       #----------------------------------------------
 50 | 
 51 |       - name: Show clock resolution
 52 |         run: uv run python tests/system_checks/test_tick_rate.py
 53 |       - name: Test clocks
 54 |         run: uv run python tests/system_checks/test_clocks.py
 55 |       - name: Test monotonicity
 56 |         run: uv run python tests/system_checks/test_monotonic_over_threads.py
 57 | 
 58 |       #----------------------------------------------
 59 |       #---- Run tests with coverage report
 60 |       #----------------------------------------------
 61 | 
 62 |       - name: 🚀 Run tests with code coverage report
 63 |         run: uv run pytest --cov=dictdatabase --cov-report term-missing
 64 | 
 65 |       #----------------------------------------------
 66 |       #---- Save coverage artifact
 67 |       #----------------------------------------------
 68 | 
 69 |       - name: Debug coverage file
 70 |         run: ls -lah
 71 |       - uses: actions/upload-artifact@v4
 72 |         with:
 73 |           name: coverage-${{ matrix.python-version }}
 74 |           include-hidden-files: true
 75 |           if-no-files-found: error
 76 |           path: ".coverage"
 77 | 
 78 |   # JOB: Coverage Badge
 79 |   cov-badge-job:
 80 |     # Only run this job on push events to the main branch, after tests succeed
 81 |     if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.tests-job.result == 'success'
 82 |     needs: tests-job
 83 |     runs-on: ubuntu-latest
 84 |     steps:
 85 |       - uses: actions/checkout@v4
 86 | 
 87 |       #----------------------------------------------
 88 |       #---- Download and debug artifact
 89 |       #----------------------------------------------
 90 | 
 91 |       - name: Debug workspace
 92 |         run: ls -lah
 93 | 
 94 |       - uses: actions/download-artifact@v4
 95 |         with:
 96 |           name: coverage-3.12
 97 |           path: .
 98 | 
 99 |       - name: Debug downloaded artifact
100 |         run: ls -lah
101 | 
102 |       #----------------------------------------------
103 |       #---- Generate coverage badge
104 |       #----------------------------------------------
105 | 
106 |       - name: Generate Coverage Badge
107 |         uses: tj-actions/coverage-badge-py@v2
108 |         with:
109 |           output: assets/coverage.svg
110 | 
111 |       #----------------------------------------------
112 |       #---- Verify and commit changes
113 |       #----------------------------------------------
114 | 
115 |       - name: Verify Changed Files
116 |         uses: tj-actions/verify-changed-files@v17
117 |         id: changed_files
118 |         with:
119 |           files: assets/coverage.svg
120 | 
121 |       - name: Commit Files
122 |         if: steps.changed_files.outputs.files_changed == 'true'
123 |         run: |
124 |           git config --local user.email "github-actions[bot]@users.noreply.github.com"
125 |           git config --local user.name "github-actions[bot]"
126 |           git add assets/coverage.svg
127 |           git commit -m "Updated assets/coverage.svg"
128 | 
129 |       - name: Push Changes
130 |         if: steps.changed_files.outputs.files_changed == 'true'
131 |         uses: ad-m/github-push-action@master
132 |         with:
133 |           github_token: ${{ secrets.github_token }}
134 |           branch: ${{ github.ref }}
135 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv/
 2 | .ddb_storage_testing/
 3 | .ddb_pytest_storage
 4 | .ddb*
 5 | .coverage*
 6 | ddb_storage
 7 | test_db/
 8 | *.prof
 9 | dist/
10 | __pycache__
11 | 


--------------------------------------------------------------------------------
/DictDataBase.code-workspace:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"folders": [
 3 | 		{
 4 | 			"path": "."
 5 | 		}
 6 | 	],
 7 | 	"settings": {
 8 | 		"[python]": {
 9 | 			"editor.formatOnSave": true,
10 | 			"editor.defaultFormatter": "charliermarsh.ruff"
11 | 		},
12 | 		"editor.codeActionsOnSave": {
13 | 			"source.organizeImports": "explicit"
14 | 		},
15 | 	}
16 | }
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 mkrd
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Logo](https://github.com/mkrd/DictDataBase/blob/main/assets/logo.png?raw=true)
  2 | 
  3 | [![Downloads](https://static.pepy.tech/badge/dictdatabase)](https://pepy.tech/project/dictdatabase)
  4 | ![Tests](https://github.com/mkrd/DictDataBase/actions/workflows/test.yml/badge.svg)
  5 | ![Coverage](https://github.com/mkrd/DictDataBase/blob/main/assets/coverage.svg?raw=1)
  6 | 
  7 | DictDataBase is a fast document-based database that uses json files or compressed json files for storage.
  8 | - **Multi threading and multi processing safe**. Multiple processes on the same machine
  9 | can simultaneously read and write to dicts without losing data.
 10 | - **ACID** compliant. Unlike TinyDB, it is suited for concurrent environments.
 11 | - **No Conflict resolution** required. Unlike with ZODB, lock-based access control is used, such that conflicts never occur.
 12 | - **No database server** required. Simply import DictDataBase in your project and use
 13 | it.
 14 | - **Compression**. Configure if the files should be stored as raw json or as json
 15 | compressed with zlib.
 16 | - **Fast**. Key-value pairs inside a json file can be accessed quickly and efficiently because the keys are indexed.
 17 | - **Tested** with 98%+ coverage on Python 3.8 to 3.13.
 18 | 
 19 | ### Why use DictDataBase
 20 | - Your application concurrently reads and writes data from multiple processes or threads.
 21 | - Using database server is a bit too much for your application.
 22 |     - But you need [ACID](https://en.wikipedia.org/wiki/ACID) guarantees.
 23 | - Your use case requires reading key-value pairs from very large json files repeatedly. (For example, DictDataBase can handle about 2000 reads per second when reading single key-value pairs from a 2.5GB json file with 20000 key-value pairs.)
 24 | - You need to repeatedly read and write many smaller json files.
 25 | - Your use case is suited for working with json data, or you have to work with a lot of
 26 | json data.
 27 | 
 28 | ### Why not DictDataBase
 29 | - If your storage is slow.
 30 | - Your use cases requires repeatedly modifying or writing data in a single very large json file
 31 | - If a relational database is better suited for your use case.
 32 | - If you need to read files that are larger than your system's RAM.
 33 | 
 34 | Install
 35 | ========================================================================================
 36 | 
 37 | ```sh
 38 | pip install dictdatabase
 39 | ```
 40 | 
 41 | Configuration
 42 | ========================================================================================
 43 | The following configuration parameters can be modified using `DDB.config`:
 44 | 
 45 | ### Storage directory
 46 | Set storage_directory to the path of the directory that will contain your json files:
 47 | ```python
 48 | DDB.config.storage_directory = "./ddb_storage" # Default value
 49 | ```
 50 | 
 51 | ### Compression
 52 | If you want to use compressed files, set use_compression to `True`.
 53 | This will make the db files significantly smaller and might improve performance if your
 54 | disk is slow. However, the files will not be human readable.
 55 | ```python
 56 | DDB.config.use_compression = False # Default value
 57 | ```
 58 | 
 59 | ### Indentation
 60 | Set the way how written json files should be indented. Behaves exactly like
 61 | `json.dumps(indent=...)`. It can be an `int` for the number of spaces, the tab
 62 | character, or `None` if you don't want the files to be indented.
 63 | ```python
 64 | DDB.config.indent = "\t" # Default value
 65 | ```
 66 | Notice: If `DDB.config.use_orjson = True`, then the value can only be 2 (spaces) or
 67 | 0/None for no indentation.
 68 | 
 69 | ### Use orjson
 70 | You can use the orjson encoder and decoder if you need to.
 71 | The standard library json module is sufficient most of the time.
 72 | However, orjson is a lot more performant in virtually all cases.
 73 | ```python
 74 | DDB.config.use_orjson = True # Default value
 75 | ```
 76 | 
 77 | Usage
 78 | ========================================================================================
 79 | 
 80 | Import
 81 | ----------------------------------------------------------------------------------------
 82 | 
 83 | ```python
 84 | import dictdatabase as DDB
 85 | ```
 86 | 
 87 | Create a file
 88 | ----------------------------------------------------------------------------------------
 89 | This library is called DictDataBase, but you can actually use any json serializable object.
 90 | ```python
 91 | users_dict = {
 92 |    "u1": { "name" : "Ben", "age": 30, "job": "Software Engineer" },
 93 |    "u2": { "name" : "Sue", "age": 21, "job": "Architect" },
 94 |    "u3": { "name" : "Joe", "age": 50, "job": "Manager" },
 95 | }
 96 | DDB.at("users").create(users_dict)
 97 | ```
 98 | There is now a file called `users.json` or `users.ddb` in your specified storage
 99 | directory depending on if you use compression.
100 | 
101 | 
102 | Check if file or sub-key exists
103 | ----------------------------------------------------------------------------------------
104 | ```python
105 | DDB.at("users").exists()
106 | >>> True  # File exists
107 | DDB.at("users", key="u10").exists()
108 | >>> False # Key "u10" not in users
109 | DDB.at("users", key="u2").exists()
110 | >>> True
111 | ```
112 | 
113 | Read dicts
114 | ----------------------------------------------------------------------------------------
115 | 
116 | ```python
117 | d = DDB.at("users").read()
118 | d == users_dict # True
119 | 
120 | # Only partially read Joe
121 | joe = DDB.at("users", key="u3").read()
122 | joe == users_dict["Joe"] # True
123 | ```
124 | 
125 | > Note: Doing a partial read like with `DDB.at("users", key="Joe").read()` will only
126 | > return the value of the key if the key is at the root indentation level.
127 | > Example: You can get "a" from {"a" : 3}, but not from {"b": {"a": 3}}.
128 | 
129 | It is also possible to only read a subset of keys based on a filter callback:
130 | 
131 | ```python
132 | DDB.at("numbers").create({"a", 1, "b", 2, "c": 3})
133 | 
134 | above_1 = DDB.at("numbers", where=lambda k, v: v > 1).read()
135 | >>> above_1 == {"b", 2, "c": 3}
136 | ```
137 | > The `where` callback is a function that takes two parameters, the key and the value.
138 | 
139 | 
140 | Write dicts
141 | ----------------------------------------------------------------------------------------
142 | 
143 | ```python
144 | with DDB.at("users").session() as (session, users):
145 |    users["u3"]["age"] = 99
146 | print(DDB.at("users", key="u3").read()["age"])
147 | >>> 99
148 | ```
149 | > If you do not call session.write(), changes will not be written to disk!
150 | 
151 | Partial writing
152 | ----------------------------------------------------------------------------------------
153 | Imagine you have a huge json file with many purchases.
154 | The json file looks like this: `{<id>: <purchase>, <id>: <purchase>, ...}`.
155 | Normally, you would have to read and parse the entire file to get a specific key.
156 | After modifying the purchase, you would also have to serialize and write the
157 | entire file again. With DDB, you can do it more efficiently:
158 | ```python
159 | with DDB.at("purchases", key="3244").session() as (session, purchase):
160 |     purchase["status"] = "cancelled"
161 |     session.write()
162 | ```
163 | Afterwards, the status is updated in the json file.
164 | However, DDB did only efficiently gather the one purchase with id 134425, parsed
165 | its value, and serialized that value alone before writing again. This is several
166 | orders of magnitude faster than the naive approach when working with big files.
167 | 
168 | 
169 | Folders
170 | ----------------------------------------------------------------------------------------
171 | 
172 | You can also read and write to folders of files. Consider the same example as
173 | before, but now we have a folder called `purchases` that contains many files
174 | `<id>.json`. If you want to open a session or read a specific one, you can do:
175 | 
176 | ```python
177 | DDB.at("purchases/<id>").read()
178 | # Or equivalently:
179 | DDB.at("purchases", "<id>").read()
180 | ```
181 | 
182 | To open a session or read all, do the following:
183 | ```python
184 | DDB.at("purchases/*").read()
185 | # Or equivalently:
186 | DDB.at("purchases", "*").read()
187 | ```
188 | 
189 | ### Select from folder
190 | 
191 | If you have a folder containing many json files, you can read them selectively
192 | based on a function. The file is included if the provided function returns true
193 | when it get the file dict as input:
194 | 
195 | To open a session or read all, do the following:
196 | ```python
197 | for i in range(10):
198 |     DDB.at("folder", i).create({"a": i})
199 | # Now in the directory "folder", 10 files exist
200 | res = DDB.at("folder/*", where=lambda x: x["a"] > 7).read() # .session() also possible
201 | assert ress == {"8": {"a": 8}, "9": {"a": 9}} # True
202 | ```
203 | 
204 | 
205 | 
206 | Performance
207 | ========================================================================================
208 | 
209 | In preliminary testing, DictDataBase showed promising performance.
210 | 
211 | ### SQLite vs DictDataBase
212 | In each case, `16` parallel processes were spawned to perform `128` increments
213 | of a counter in `4` tables/files. SQLite achieves `2435 operations/s` while
214 | DictDataBase managed to achieve `3143 operations/s`.
215 | 
216 | ### More tests
217 | It remains to be tested how DictDatabase performs in different scenarios, for
218 | example when multiple processes want to perform full writes to one big file.
219 | 
220 | 
221 | Advanced
222 | ========================================================================================
223 | 
224 | Sleep Timeout
225 | ----------------------------------------------------------------------------------------
226 | DictDataBase uses a file locking protocol to coordinate concurrent file accesses.
227 | While waiting for a file where another thread or process currently has exclusive
228 | access rights, the status of the file lock is periodically checked. You can set
229 | the timout between the checks:
230 | 
231 | ```python
232 | DDB.locking.SLEEP_TIMEOUT = 0.001 # 1ms, default value
233 | ```
234 | 
235 | A value of 1 millisecond is good and it is generally not recommended to change it,
236 | but you can still tune it to optimize performance in your use case.
237 | 
238 | 
239 | Lock aquisition timeout
240 | ----------------------------------------------------------------------------------------
241 | AQUIRE_LOCK_TIMEOUT specifies the maximum duration to wait for acquiring a lock before
242 | giving up and throwing a timeout error.
243 | 
244 | ```python
245 | DDB.locking.REMOVE_ORPHAN_LOCK_TIMEOUT = 60.0 # 60s, default value
246 | ```
247 | 
248 | 
249 | API Reference
250 | ========================================================================================
251 | 
252 | ### `at(path) -> DDBMethodChooser:`
253 | Select a file or folder to perform an operation on.
254 | If you want to select a specific key in a file, use the `key` parameter,
255 | e.g. `DDB.at("file", key="subkey")`. The key value is only returned if the key
256 | is at the root level of the json object.
257 | 
258 | If you want to select an entire folder, use the `*` wildcard,
259 | eg. `DDB.at("folder", "*")`, or `DDB.at("folder/*")`. You can also use
260 | the `where` callback to select a subset of the file or folder.
261 | 
262 | If the callback returns `True`, the item will be selected. The callback
263 | needs to accept a key and value as arguments.
264 | 
265 | Args:
266 | - `path`: The path to the file or folder. Can be a string, a
267 | comma-separated list of strings, or a list.
268 | - `key`: The key to select from the file.
269 | - `where`: A function that takes a key and value and returns `True` if the
270 | key should be selected.
271 | 
272 | Beware: If you select a folder with the `*` wildcard, you can't use the `key`
273 | parameter.
274 | Also, you cannot use the `key` and `where` parameters at the same time.
275 | 
276 | DDBMethodChooser
277 | ----------------------------------------------------------------------------------------
278 | 
279 | ### `exists() -> bool:`
280 | Create a new file with the given data as the content. If the file
281 | already exists, a FileExistsError will be raised unless
282 | `force_overwrite` is set to True.
283 | 
284 | Args:
285 | - `data`: The data to write to the file. If not specified, an empty dict
286 | will be written.
287 | - `force_overwrite`: If `True`, will overwrite the file if it already
288 | exists, defaults to False (optional).
289 | 
290 | 
291 | ### `create(data=None, force_overwrite: bool = False):`
292 | It creates a database file at the given path, and writes the given database to
293 | it
294 | :param db: The database to create. If not specified, an empty database is
295 | created.
296 | :param force_overwrite: If True, will overwrite the database if it already
297 | exists, defaults to False (optional).
298 | 
299 | ### `delete()`
300 | Delete the file at the selected path.
301 | 
302 | ### `read(self, as_type: T = None) -> dict | T | None:`
303 | Reads a file or folder depending on previous `.at(...)` selection.
304 | 
305 | Args:
306 | - `as_type`: If provided, return the value as the given type.
307 | Eg. as_type=str will return str(value).
308 | 
309 | ### `session(self, as_type: T = None) -> DDBSession[T]:`
310 | Opens a session to the selected file(s) or folder, depending on previous
311 | `.at(...)` selection. Inside the with block, you have exclusive access
312 | to the file(s) or folder.
313 | Call `session.write()` to write the data to the file(s) or folder.
314 | 
315 | Args:
316 | - `as_type`: If provided, cast the value to the given type.
317 | Eg. as_type=str will return str(value).
318 | 


--------------------------------------------------------------------------------
/assets/coverage.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg xmlns="http://www.w3.org/2000/svg" width="99" height="20">
 3 |     <linearGradient id="b" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
 5 |         <stop offset="1" stop-opacity=".1"/>
 6 |     </linearGradient>
 7 |     <mask id="a">
 8 |         <rect width="99" height="20" rx="3" fill="#fff"/>
 9 |     </mask>
10 |     <g mask="url(#a)">
11 |         <path fill="#555" d="M0 0h63v20H0z"/>
12 |         <path fill="#4c1" d="M63 0h36v20H63z"/>
13 |         <path fill="url(#b)" d="M0 0h99v20H0z"/>
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
16 |         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
17 |         <text x="31.5" y="14">coverage</text>
18 |         <text x="80" y="15" fill="#010101" fill-opacity=".3">98%</text>
19 |         <text x="80" y="14">98%</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/assets/logo.afdesign:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkrd/DictDataBase/12e650460c9284f8cd1249d26b16c18c04445691/assets/logo.afdesign


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkrd/DictDataBase/12e650460c9284f8cd1249d26b16c18c04445691/assets/logo.png


--------------------------------------------------------------------------------
/dictdatabase/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration import config  # noqa: F401
2 | from .models import at  # noqa: F401
3 | 


--------------------------------------------------------------------------------
/dictdatabase/byte_codes.py:
--------------------------------------------------------------------------------
 1 | # See: https://www.charset.org/utf-8
 2 | BACKSLASH = 92
 3 | QUOTE = 34
 4 | OPEN_SQUARE = 91
 5 | CLOSE_SQUARE = 93
 6 | OPEN_CURLY = 123
 7 | CLOSE_CURLY = 125
 8 | SPACE = 32
 9 | TAB = 9
10 | NEWLINE = 10
11 | COMMA = 44
12 | 


--------------------------------------------------------------------------------
/dictdatabase/configuration.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | 
 4 | class Confuguration:
 5 | 	__slots__ = ("storage_directory", "indent", "use_compression", "use_orjson")
 6 | 
 7 | 	storage_directory: str
 8 | 	indent: int | str | None  # eg. "\t" or 4 or None
 9 | 	use_compression: bool
10 | 	use_orjson: bool
11 | 
12 | 	def __init__(
13 | 		self,
14 | 		storage_directory: str = "ddb_storage",
15 | 		indent: str | int | None = "\t",
16 | 		use_compression: bool = False,
17 | 		use_orjson: bool = True,
18 | 	) -> None:
19 | 		self.storage_directory = storage_directory
20 | 		self.indent = indent
21 | 		self.use_compression = use_compression
22 | 		self.use_orjson = use_orjson
23 | 
24 | 
25 | config = Confuguration()
26 | 


--------------------------------------------------------------------------------
/dictdatabase/indexing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Union
 3 | 
 4 | import orjson
 5 | 
 6 | from . import config
 7 | 
 8 | # Problem: Multiple read processes will concurrently read and write the same file
 9 | # In some cases this will result in a empty read error, thats why the try-except exists
10 | 
11 | 
12 | # Idea 1:
13 | # - Never write to the index when reading
14 | # - When writing, the lock is exclusive on the index aswell, so no other process can read or write
15 | # Problem: If a file is only ever reed, it will never be indexed
16 | 
17 | # Idea 2:
18 | # - Write a new index_record to a new unique file
19 | # - Reading index happens from all related files
20 | # - When writing, the new index_record is collected and written into the main file
21 | # Problem: If a file is only ever reed, lots of index record files will accumulate
22 | 
23 | # Idea 3:
24 | # - Leave everything as is. While not ideal, it works. When empty read error occurs, don't use the index for that read
25 | 
26 | 
27 | class Indexer:
28 | 	"""
29 | 	The Indexer takes the name of a database file, and tries to load the .index file
30 | 	of the corresponding database file.
31 | 
32 | 	The name of the index file is the name of the database file, with the extension
33 | 	.index and all "/" replaced with "___"
34 | 
35 | 	The content of the index file is a json object, where the keys are keys inside
36 | 	the database json file, and the values are lists of 5 elements:
37 | 	- start_index: The index of the first byte of the value of the key in the database file
38 | 	- end_index: The index of the last byte of the value of the key in the database file
39 | 	- indent_level: The indent level of the key in the database file
40 | 	- indent_with: The indent string used.
41 | 	- value_hash: The hash of the value bytes
42 | 	"""
43 | 
44 | 	__slots__ = ("data", "path")
45 | 
46 | 	def __init__(self, db_name: str) -> None:
47 | 		# Make path of index file
48 | 		db_name = db_name.replace("/", "___")
49 | 		self.path = os.path.join(config.storage_directory, ".ddb", f"{db_name}.index")
50 | 
51 | 		os.makedirs(os.path.dirname(self.path), exist_ok=True)
52 | 		if not os.path.exists(self.path):
53 | 			self.data = {}
54 | 			return
55 | 
56 | 		try:
57 | 			with open(self.path, "rb") as f:
58 | 				self.data = orjson.loads(f.read())
59 | 		except orjson.JSONDecodeError:
60 | 			self.data = {}
61 | 
62 | 	def get(self, key: str) -> Union[list, None]:
63 | 		"""
64 | 		Returns a list of 5 elements for a key if it exists, otherwise None
65 | 		Elements:[start_index, end_index, indent_level, indent_with, value_hash]
66 | 		"""
67 | 		return self.data.get(key, None)
68 | 
69 | 	def write(
70 | 		self,
71 | 		key: str,
72 | 		start_index: int,
73 | 		end_index: int,
74 | 		indent_level: int,
75 | 		indent_with: str,
76 | 		value_hash: int,
77 | 		old_value_end: int,
78 | 	) -> None:
79 | 		"""
80 | 		Write index information for a key to the index file
81 | 		"""
82 | 
83 | 		if self.data.get(key, None) is not None:
84 | 			delta = end_index - old_value_end
85 | 			for entry in self.data.values():
86 | 				if entry[0] > old_value_end:
87 | 					entry[0] += delta
88 | 					entry[1] += delta
89 | 
90 | 		self.data[key] = [start_index, end_index, indent_level, indent_with, value_hash]
91 | 		with open(self.path, "wb") as f:
92 | 			f.write(orjson.dumps(self.data))
93 | 


--------------------------------------------------------------------------------
/dictdatabase/io_bytes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zlib
 3 | 
 4 | from . import config, utils
 5 | 
 6 | 
 7 | def read(db_name: str, *, start: int = None, end: int = None) -> bytes:
 8 | 	"""
 9 | 	Read the content of a file as bytes. Reading works even when the config
10 | 	changes, so a compressed ddb file can also be read if compression is
11 | 	disabled, and vice versa.
12 | 
13 | 	If no compression is used, efficient reading can be done by specifying a start
14 | 	and end byte index, such that only the bytes in that range are read from the
15 | 	file.
16 | 
17 | 	If compression is used, specifying a start and end byte index is still possible,
18 | 	but the entire file has to be read and decompressed first, and then the bytes
19 | 	in the range are returned. This is because the compressed file is not seekable.
20 | 
21 | 	Args:
22 | 	- `db_name`: The name of the database file to read from.
23 | 	- `start`: The start byte index to read from.
24 | 	- `end`: The end byte index to read up to (not included).
25 | 
26 | 	Raises:
27 | 	- `FileNotFoundError`: If the file does not exist as .json nor .ddb.
28 | 	- `OSError`: If no compression is used and `start` is negative.
29 | 	- `FileExistsError`: If the file exists as .json and .ddb.
30 | 	"""
31 | 
32 | 	json_path, json_exists, ddb_path, ddb_exists = utils.file_info(db_name)
33 | 
34 | 	if json_exists:
35 | 		if ddb_exists:
36 | 			raise FileExistsError(f'Inconsistent: "{db_name}" exists as .json and .ddb.' "Please remove one of them.")
37 | 		with open(json_path, "rb") as f:
38 | 			if start is None and end is None:
39 | 				return f.read()
40 | 			start = start or 0
41 | 			f.seek(start)
42 | 			if end is None:
43 | 				return f.read()
44 | 			return f.read(end - start)
45 | 	if not ddb_exists:
46 | 		raise FileNotFoundError(f'No database file exists for "{db_name}"')
47 | 	with open(ddb_path, "rb") as f:
48 | 		json_bytes = zlib.decompress(f.read())
49 | 		if start is None and end is None:
50 | 			return json_bytes
51 | 		start = start or 0
52 | 		end = end or len(json_bytes)
53 | 		return json_bytes[start:end]
54 | 
55 | 
56 | def write(db_name: str, dump: bytes, *, start: int = None) -> None:
57 | 	"""
58 | 	Write the bytes to the file of the db_path. If the db was compressed but no
59 | 	compression is enabled, remove the compressed file, and vice versa.
60 | 
61 | 	Args:
62 | 	- `db_name`: The name of the database to write to.
63 | 	- `dump`: The bytes to write to the file, representing correct JSON when
64 | 	decoded.
65 | 	- `start`: The start byte index to write to. If None, the whole file is overwritten.
66 | 	If the original content was longer, the rest truncated.
67 | 	"""
68 | 
69 | 	json_path, json_exists, ddb_path, ddb_exists = utils.file_info(db_name)
70 | 
71 | 	# Write bytes or string to file
72 | 	remove_file = None
73 | 	if config.use_compression:
74 | 		if start is not None:
75 | 			raise RuntimeError("Cannot write to compressed file at a specific index")
76 | 		write_file = ddb_path
77 | 		if json_exists:
78 | 			remove_file = json_path
79 | 		dump = zlib.compress(dump, 1)
80 | 	else:
81 | 		write_file = json_path
82 | 		if ddb_exists:
83 | 			remove_file = ddb_path
84 | 
85 | 	# Write bytes or string to file
86 | 	if start is None:
87 | 		with open(write_file, "wb") as f:
88 | 			f.write(dump)
89 | 	else:
90 | 		with open(write_file, "ab") as f:
91 | 			f.seek(start)
92 | 			f.truncate()
93 | 			f.write(dump)
94 | 
95 | 	# Remove the other file if it exists
96 | 	# This is done after writing to avoid data loss
97 | 	if remove_file is not None:
98 | 		os.remove(remove_file)
99 | 


--------------------------------------------------------------------------------
/dictdatabase/io_safe.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from . import config, io_unsafe, locking, utils
 4 | 
 5 | 
 6 | def read(file_name: str) -> dict:
 7 | 	"""
 8 | 	Read the content of a file as a dict.
 9 | 
10 | 	Args:
11 | 	- `file_name`: The name of the file to read from.
12 | 	"""
13 | 
14 | 	_, json_exists, _, ddb_exists = utils.file_info(file_name)
15 | 
16 | 	if not json_exists and not ddb_exists:
17 | 		return None
18 | 
19 | 	with locking.ReadLock(file_name):
20 | 		return io_unsafe.read(file_name)
21 | 
22 | 
23 | def partial_read(file_name: str, key: str) -> dict:
24 | 	"""
25 | 	Read only the value of a key-value pair from a file.
26 | 
27 | 	Args:
28 | 	- `file_name`: The name of the file to read from.
29 | 	- `key`: The key to read the value of.
30 | 	"""
31 | 
32 | 	_, json_exists, _, ddb_exists = utils.file_info(file_name)
33 | 
34 | 	if not json_exists and not ddb_exists:
35 | 		return None
36 | 
37 | 	with locking.ReadLock(file_name):
38 | 		return io_unsafe.partial_read(file_name, key)
39 | 
40 | 
41 | def write(file_name: str, data: dict) -> None:
42 | 	"""
43 | 	Ensures that writing only starts if there is no reading or writing in progress.
44 | 
45 | 	Args:
46 | 	- `file_name`: The name of the file to write to.
47 | 	- `data`: The data to write to the file.
48 | 	"""
49 | 
50 | 	dirname = os.path.dirname(f"{config.storage_directory}/{file_name}.any")
51 | 	os.makedirs(dirname, exist_ok=True)
52 | 
53 | 	with locking.WriteLock(file_name):
54 | 		io_unsafe.write(file_name, data)
55 | 
56 | 
57 | def delete(file_name: str) -> None:
58 | 	"""
59 | 	Ensures that deleting only starts if there is no reading or writing in progress.
60 | 
61 | 	Args:
62 | 	- `file_name`: The name of the file to delete.
63 | 	"""
64 | 
65 | 	json_path, json_exists, ddb_path, ddb_exists = utils.file_info(file_name)
66 | 
67 | 	if not json_exists and not ddb_exists:
68 | 		return
69 | 
70 | 	with locking.WriteLock(file_name):
71 | 		if json_exists:
72 | 			os.remove(json_path)
73 | 		if ddb_exists:
74 | 			os.remove(ddb_path)
75 | 


--------------------------------------------------------------------------------
/dictdatabase/io_unsafe.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import hashlib
  4 | import json
  5 | from dataclasses import dataclass
  6 | 
  7 | import orjson
  8 | 
  9 | from . import byte_codes, config, indexing, io_bytes, utils
 10 | 
 11 | 
 12 | @dataclass(frozen=True)  # slots=True not supported by python 3.8 and 3.9
 13 | class PartialDict:
 14 | 	prefix: bytes
 15 | 	key: str
 16 | 	value: dict
 17 | 	value_start: int
 18 | 	value_end: int
 19 | 	suffix: bytes
 20 | 
 21 | 
 22 | @dataclass(frozen=True)  # slots=True not supported by python 3.8 and 3.9
 23 | class PartialFileHandle:
 24 | 	db_name: str
 25 | 	partial_dict: PartialDict
 26 | 	indent_level: int
 27 | 	indent_with: str
 28 | 	indexer: indexing.Indexer
 29 | 
 30 | 
 31 | ########################################################################################
 32 | #### Full Reading
 33 | ########################################################################################
 34 | 
 35 | 
 36 | def read(db_name: str) -> dict:
 37 | 	"""
 38 | 	Read the file at db_path from the configured storage directory.
 39 | 	Make sure the file exists. If it does not a FileNotFoundError is
 40 | 	raised.
 41 | 	"""
 42 | 	# Always use orjson to read the file, because it is faster
 43 | 	return orjson.loads(io_bytes.read(db_name))
 44 | 
 45 | 
 46 | ########################################################################################
 47 | #### Partial Reading
 48 | ########################################################################################
 49 | 
 50 | 
 51 | def try_read_bytes_using_indexer(indexer: indexing.Indexer, db_name: str, key: str) -> bytes | None:
 52 | 	"""
 53 | 	Check if the key info is saved in the file's index file.
 54 | 	If it is and the value has not changed, return the value bytes.
 55 | 	Otherwise return None.
 56 | 	"""
 57 | 
 58 | 	if (index := indexer.get(key)) is None:
 59 | 		return None
 60 | 	start, end, _, _, value_hash = index
 61 | 	partial_bytes = io_bytes.read(db_name, start=start, end=end)
 62 | 	if value_hash != hashlib.sha256(partial_bytes).hexdigest():
 63 | 		return None
 64 | 	return partial_bytes
 65 | 
 66 | 
 67 | def partial_read(db_name: str, key: str) -> dict | None:
 68 | 	"""
 69 | 	Partially read a key from a db.
 70 | 	The key MUST be unique in the entire db, otherwise the behavior is undefined.
 71 | 	This is a lot faster than reading the entire db, because it does not parse
 72 | 	the entire file, but only the part <value> part of the <key>: <value> pair.
 73 | 
 74 | 	If the key is not found, a `KeyError` is raised.
 75 | 	"""
 76 | 
 77 | 	# Search for key in the index file
 78 | 	indexer = indexing.Indexer(db_name)
 79 | 	if (value_bytes := try_read_bytes_using_indexer(indexer, db_name, key)) is not None:
 80 | 		return orjson.loads(value_bytes)
 81 | 
 82 | 	# Not found in index file, search for key in the entire file
 83 | 	all_file_bytes = io_bytes.read(db_name)
 84 | 	key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
 85 | 
 86 | 	if key_end == -1:
 87 | 		return None
 88 | 
 89 | 	# Key found, now determine the bounding byte indices of the value
 90 | 	start = key_end + (1 if all_file_bytes[key_end] == byte_codes.SPACE else 0)
 91 | 	end = utils.seek_index_through_value_bytes(all_file_bytes, start)
 92 | 
 93 | 	indent_level, indent_with = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)
 94 | 	value_bytes = all_file_bytes[start:end]
 95 | 	value_hash = hashlib.sha256(value_bytes).hexdigest()
 96 | 
 97 | 	# Write key info to index file
 98 | 	indexer.write(key, start, end, indent_level, indent_with, value_hash, end)
 99 | 	return orjson.loads(value_bytes)
100 | 
101 | 
102 | ################################################################################
103 | #### Writing
104 | ################################################################################
105 | 
106 | 
107 | def serialize_data_to_json_bytes(data: dict) -> bytes:
108 | 	"""
109 | 	Serialize the data as json bytes. Depending on the config,
110 | 	this can be done with orjson or the standard json module.
111 | 	Additionally config.indent is respected.
112 | 	"""
113 | 	if config.use_orjson:
114 | 		option = (orjson.OPT_INDENT_2 if config.indent else 0) | orjson.OPT_SORT_KEYS
115 | 		return orjson.dumps(data, option=option)
116 | 	else:
117 | 		db_dump = json.dumps(data, indent=config.indent, sort_keys=True)
118 | 		return db_dump.encode()
119 | 
120 | 
121 | def write(db_name: str, data: dict) -> None:
122 | 	"""
123 | 	Write the dict db dumped as a json string
124 | 	to the file of the db_path.
125 | 	"""
126 | 	data_bytes = serialize_data_to_json_bytes(data)
127 | 	io_bytes.write(db_name, data_bytes)
128 | 
129 | 
130 | ################################################################################
131 | #### Partial Writing
132 | ################################################################################
133 | 
134 | 
135 | def try_get_partial_file_handle_by_index(
136 | 	indexer: indexing.Indexer,
137 | 	db_name: str,
138 | 	key: str,
139 | ) -> tuple[PartialFileHandle | None, bytes | None]:
140 | 	"""
141 | 	Try to get a partial file handle by using the key entry in the index file.
142 | 
143 | 	If the data could be read from the index file, a tuple of the partial file
144 | 	handle and None is returned.
145 | 	If the data could not be read from the index file, a tuple of None and the file
146 | 	bytes is returned, so that the file bytes can be searched for the key.
147 | 	"""
148 | 
149 | 	if (index := indexer.get(key)) is None:
150 | 		return None, io_bytes.read(db_name)
151 | 	start, end, indent_level, indent_with, value_hash = index
152 | 
153 | 	# If compression is enabled, all data has to be read from the file
154 | 	if config.use_compression:
155 | 		all_file_bytes = io_bytes.read(db_name)
156 | 		value_bytes = all_file_bytes[start:end]
157 | 		if value_hash != hashlib.sha256(value_bytes).hexdigest():
158 | 			return None, all_file_bytes
159 | 		value_data = orjson.loads(value_bytes)
160 | 		partial_dict = PartialDict(all_file_bytes[:start], key, value_data, start, end, all_file_bytes[end:])
161 | 
162 | 	# If compression is disabled, only the value and suffix have to be read
163 | 	else:
164 | 		value_and_suffix_bytes = io_bytes.read(db_name, start=start)
165 | 		value_length = end - start
166 | 		value_bytes = value_and_suffix_bytes[:value_length]
167 | 		if value_hash != hashlib.sha256(value_bytes).hexdigest():
168 | 			# If the hashes don't match, read the prefix to concat the full file bytes
169 | 			prefix_bytes = io_bytes.read(db_name, end=start)
170 | 			return None, prefix_bytes + value_and_suffix_bytes
171 | 		value_data = orjson.loads(value_bytes)
172 | 		partial_dict = PartialDict(None, key, value_data, start, end, value_and_suffix_bytes[value_length:])
173 | 
174 | 	return PartialFileHandle(db_name, partial_dict, indent_level, indent_with, indexer), None
175 | 
176 | 
177 | def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
178 | 	"""
179 | 	Partially read a key from a db.
180 | 	The key MUST be unique in the entire db, otherwise the behavior is undefined.
181 | 	This is a lot faster than reading the entire db, because it does not parse
182 | 	the entire file, but only the part <value> part of the <key>: <value> pair.
183 | 
184 | 	If the key is not found, a `KeyError` is raised.
185 | 	"""
186 | 
187 | 	# Search for key in the index file
188 | 	indexer = indexing.Indexer(db_name)
189 | 	partial_handle, all_file_bytes = try_get_partial_file_handle_by_index(indexer, db_name, key)
190 | 	if partial_handle is not None:
191 | 		return partial_handle
192 | 
193 | 	# Not found in index file, search for key in the entire file
194 | 	key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
195 | 
196 | 	if key_end == -1:
197 | 		raise KeyError(f'Key "{key}" not found in db "{db_name}"')
198 | 
199 | 	# Key found, now determine the bounding byte indices of the value
200 | 	start = key_end + (1 if all_file_bytes[key_end] == byte_codes.SPACE else 0)
201 | 	end = utils.seek_index_through_value_bytes(all_file_bytes, start)
202 | 
203 | 	indent_level, indent_with = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)
204 | 
205 | 	partial_value = orjson.loads(all_file_bytes[start:end])
206 | 	prefix_bytes = all_file_bytes[:start] if config.use_compression else None
207 | 	partial_dict = PartialDict(prefix_bytes, key, partial_value, start, end, all_file_bytes[end:])
208 | 	return PartialFileHandle(db_name, partial_dict, indent_level, indent_with, indexer)
209 | 
210 | 
211 | def partial_write(pf: PartialFileHandle) -> None:
212 | 	"""
213 | 	Write a partial file handle to the db.
214 | 	"""
215 | 
216 | 	partial_bytes = serialize_data_to_json_bytes(pf.partial_dict.value)
217 | 
218 | 	# Add indentation
219 | 	if pf.indent_level > 0 and pf.indent_with:
220 | 		replace_this = b"\n"
221 | 		replace_with = ("\n" + (pf.indent_level * pf.indent_with)).encode()
222 | 		partial_bytes = partial_bytes.replace(replace_this, replace_with)
223 | 
224 | 	# Write key info to index file
225 | 	pf.indexer.write(
226 | 		key=pf.partial_dict.key,
227 | 		start_index=pf.partial_dict.value_start,
228 | 		end_index=pf.partial_dict.value_start + len(partial_bytes),
229 | 		indent_level=pf.indent_level,
230 | 		indent_with=pf.indent_with,
231 | 		value_hash=hashlib.sha256(partial_bytes).hexdigest(),
232 | 		old_value_end=pf.partial_dict.value_end,
233 | 	)
234 | 
235 | 	if pf.partial_dict.prefix is None:
236 | 		# Prefix could not be determined due to compression, so write the entire file
237 | 		io_bytes.write(pf.db_name, partial_bytes + pf.partial_dict.suffix, start=pf.partial_dict.value_start)
238 | 	else:
239 | 		# Prefix was determined, so only write the changed part and the suffix
240 | 		io_bytes.write(pf.db_name, pf.partial_dict.prefix + partial_bytes + pf.partial_dict.suffix)
241 | 


--------------------------------------------------------------------------------
/dictdatabase/locking.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import contextlib
  4 | import os
  5 | import threading
  6 | import time
  7 | 
  8 | from . import config
  9 | 
 10 | # Design decisions:
 11 | # - Do not use pathlib, because it is slower than os
 12 | 
 13 | # Constants
 14 | SLEEP_TIMEOUT = 0.001 * 1  # (ms)
 15 | LOCK_KEEP_ALIVE_TIMEOUT = 0.001 * 0.08  # (ms)
 16 | 
 17 | # Duration to wait updating the timestamp of the lock file
 18 | ALIVE_LOCK_REFRESH_INTERVAL_NS = 1_000_000_000 * 10  # (s)
 19 | 
 20 | # Duration to wait before considering a lock as orphaned
 21 | REMOVE_ORPHAN_LOCK_TIMEOUT = 20.0
 22 | 
 23 | # Duration to wait before giving up on acquiring a lock
 24 | AQUIRE_LOCK_TIMEOUT = 60.0
 25 | 
 26 | 
 27 | def os_touch(path: str) -> None:
 28 | 	"""
 29 | 	Create an empty file at the given path. This mimics the UNIX touch command
 30 | 	and is compatible with both Windows and UNIX systems.
 31 | 	"""
 32 | 	mode = 0o666
 33 | 	flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL
 34 | 	fd = os.open(path, flags, mode)
 35 | 	os.close(fd)
 36 | 
 37 | 
 38 | class LockFileMeta:
 39 | 	"""
 40 | 	Metadata representation for a lock file.
 41 | 	"""
 42 | 
 43 | 	__slots__ = ("ddb_dir", "name", "id", "time_ns", "stage", "mode", "path")
 44 | 
 45 | 	ddb_dir: str
 46 | 	name: str
 47 | 	id: str
 48 | 	time_ns: str
 49 | 	stage: str
 50 | 	mode: str
 51 | 	path: str
 52 | 
 53 | 	def __init__(self, ddb_dir: str, name: str, id: str, time_ns: str, stage: str, mode: str) -> None:
 54 | 		self.ddb_dir = ddb_dir
 55 | 		self.name = name
 56 | 		self.id = id
 57 | 		self.time_ns = time_ns
 58 | 		self.stage = stage
 59 | 		self.mode = mode
 60 | 		lock_file = f"{name}.{id}.{time_ns}.{stage}.{mode}.lock"
 61 | 		self.path = os.path.join(ddb_dir, lock_file)
 62 | 
 63 | 	def __repr__(self) -> str:
 64 | 		return f"LockFileMeta({self.ddb_dir=}, {self.name=}, {self.id=}, {self.time_ns=}, {self.stage=}, {self.mode=})"
 65 | 
 66 | 	def new_with_updated_time(self) -> LockFileMeta:
 67 | 		"""
 68 | 		Create a new instance with an updated timestamp.
 69 | 		"""
 70 | 		time_ns = f"{time.time_ns()}"
 71 | 		return LockFileMeta(self.ddb_dir, self.name, self.id, time_ns, self.stage, self.mode)
 72 | 
 73 | 
 74 | class FileLocksSnapshot:
 75 | 	"""
 76 | 	Represents a snapshot of the current state of file locks in the directory.
 77 | 	This snapshot assists in deciding which lock should be acquired or released next.
 78 | 
 79 | 	On init, orphaned locks are removed.
 80 | 	"""
 81 | 
 82 | 	__slots__ = ("any_has_locks", "any_write_locks", "any_has_write_locks", "locks")
 83 | 
 84 | 	locks: list[LockFileMeta]
 85 | 	any_has_locks: bool
 86 | 	any_write_locks: bool
 87 | 	any_has_write_locks: bool
 88 | 
 89 | 	def __init__(self, need_lock: LockFileMeta) -> None:
 90 | 		self.locks = []
 91 | 		self.any_has_locks = False
 92 | 		self.any_write_locks = False
 93 | 		self.any_has_write_locks = False
 94 | 
 95 | 		for file_name in os.listdir(need_lock.ddb_dir):
 96 | 			if not file_name.endswith(".lock"):
 97 | 				continue
 98 | 			name, id, time_ns, stage, mode, _ = file_name.split(".")
 99 | 			if name != need_lock.name:
100 | 				continue
101 | 
102 | 			lock_meta = LockFileMeta(need_lock.ddb_dir, name, id, time_ns, stage, mode)
103 | 
104 | 			# Remove orphaned locks
105 | 			if lock_meta.path != need_lock.path:
106 | 				lock_age = time.time_ns() - int(lock_meta.time_ns)
107 | 				if lock_age > REMOVE_ORPHAN_LOCK_TIMEOUT * 1_000_000_000:
108 | 					os.unlink(lock_meta.path)
109 | 					print(f"Removed orphaned lock ({lock_meta.path})")
110 | 					continue
111 | 
112 | 			self.locks.append(lock_meta)
113 | 
114 | 			# Update lock state flags
115 | 			if lock_meta.stage == "has":
116 | 				self.any_has_locks = True
117 | 				if lock_meta.mode == "write":
118 | 					self.any_has_write_locks = True
119 | 			if lock_meta.mode == "write":
120 | 				self.any_write_locks = True
121 | 
122 | 	def exists(self, l: LockFileMeta) -> bool:
123 | 		"""
124 | 		Check if a lock with the same ID, stage, and mode exists in the current snapshot.
125 | 		"""
126 | 		return any(x.id == l.id and x.stage == l.stage and x.mode == l.mode for x in self.locks)
127 | 
128 | 	def oldest_need(self, need_lock: LockFileMeta) -> bool:
129 | 		"""
130 | 		Determine if the provided 'need_lock' is the oldest among all 'need' locks in the snapshot.
131 | 		"""
132 | 		# len(need_locks) is at least 1 since this function is only called if there is a need_lock
133 | 		need_locks = [l for l in self.locks if l.stage == "need"]
134 | 		# Sort by time_ns. If multiple, the the one with the smaller id is first
135 | 		need_locks = sorted(need_locks, key=lambda l: (int(l.time_ns), int(l.id)))
136 | 		return need_locks[0].id == need_lock.id
137 | 
138 | 
139 | class AbstractLock:
140 | 	"""
141 | 	Abstract base class for file locks. This class doesn't lock/unlock by itself but
142 | 	provides a blueprint for derived classes to implement.
143 | 	"""
144 | 
145 | 	__slots__ = ("db_name", "need_lock", "has_lock", "snapshot", "mode", "is_alive" "keep_alive_thread")
146 | 
147 | 	db_name: str
148 | 	need_lock: LockFileMeta
149 | 	has_lock: LockFileMeta
150 | 	snapshot: FileLocksSnapshot
151 | 	mode: str
152 | 	is_alive: bool
153 | 	keep_alive_thread: threading.Thread
154 | 
155 | 	def __init__(self, db_name: str) -> None:
156 | 		# Normalize db_name to avoid file naming conflicts
157 | 		self.db_name = db_name.replace("/", "___").replace(".", "____")
158 | 		time_ns = time.time_ns()
159 | 		t_id = f"{threading.get_native_id()}"  # ID that's unique across processes and threads.
160 | 		dir = os.path.join(config.storage_directory, ".ddb")
161 | 
162 | 		self.need_lock = LockFileMeta(dir, self.db_name, t_id, time_ns, "need", self.mode)
163 | 		self.has_lock = LockFileMeta(dir, self.db_name, t_id, time_ns, "has", self.mode)
164 | 
165 | 		self.is_alive = False
166 | 		self.keep_alive_thread = None
167 | 
168 | 		# Ensure lock directory exists
169 | 		if not os.path.isdir(dir):
170 | 			os.makedirs(dir, exist_ok=True)
171 | 
172 | 	def _keep_alive_thread(self) -> None:
173 | 		"""
174 | 		Keep the lock alive by updating the timestamp of the lock file.
175 | 		"""
176 | 
177 | 		current_has_lock_time_ns: int = int(self.has_lock.time_ns)
178 | 
179 | 		while self.is_alive:
180 | 			time.sleep(LOCK_KEEP_ALIVE_TIMEOUT)
181 | 			if time.time_ns() - current_has_lock_time_ns < ALIVE_LOCK_REFRESH_INTERVAL_NS:
182 | 				continue
183 | 
184 | 			# Assert: The lock is older than ALIVE_LOCK_REFRESH_INTERVAL_NS ns
185 | 			# This means the has_lock must be refreshed
186 | 
187 | 			new_has_lock = self.has_lock.new_with_updated_time()
188 | 			os_touch(new_has_lock.path)
189 | 			with contextlib.suppress(FileNotFoundError):
190 | 				os.unlink(self.has_lock.path)  # Remove old lock file
191 | 			self.has_lock = new_has_lock
192 | 			current_has_lock_time_ns = int(new_has_lock.time_ns)
193 | 
194 | 	def _start_keep_alive_thread(self) -> None:
195 | 		"""
196 | 		Start a thread that keeps the lock alive by updating the timestamp of the lock file.
197 | 		"""
198 | 
199 | 		if self.keep_alive_thread is not None:
200 | 			raise RuntimeError("Keep alive thread already exists.")
201 | 
202 | 		self.is_alive = True
203 | 		self.keep_alive_thread = threading.Thread(target=self._keep_alive_thread, daemon=False)
204 | 		self.keep_alive_thread.start()
205 | 
206 | 	def _lock(self) -> None:
207 | 		"""Override this method to implement locking mechanism."""
208 | 		raise NotImplementedError
209 | 
210 | 	def _unlock(self) -> None:
211 | 		"""Remove the lock files associated with this lock."""
212 | 
213 | 		if self.keep_alive_thread is not None:
214 | 			self.is_alive = False
215 | 			self.keep_alive_thread.join()
216 | 			self.keep_alive_thread = None
217 | 
218 | 		for p in ("need_lock", "has_lock"):
219 | 			try:
220 | 				if lock := getattr(self, p, None):
221 | 					os.unlink(lock.path)
222 | 			except FileNotFoundError:
223 | 				pass
224 | 			finally:
225 | 				setattr(self, p, None)
226 | 
227 | 	def __enter__(self) -> None:
228 | 		self._lock()
229 | 
230 | 	def __exit__(self, exc_type, exc_val, exc_tb) -> None:  # noqa: ANN001
231 | 		self._unlock()
232 | 
233 | 
234 | class ReadLock(AbstractLock):
235 | 	"""
236 | 	A file-based read lock.
237 | 	Multiple threads/processes can simultaneously hold a read lock unless there's a write lock.
238 | 	"""
239 | 
240 | 	mode = "read"
241 | 
242 | 	def _lock(self) -> None:
243 | 		# Express intention to acquire read lock
244 | 		os.makedirs(os.path.dirname(self.need_lock.path), exist_ok=True)
245 | 		os_touch(self.need_lock.path)
246 | 		self.snapshot = FileLocksSnapshot(self.need_lock)
247 | 
248 | 		# If this thread already holds a read lock, raise an exception.
249 | 		if self.snapshot.exists(self.has_lock):
250 | 			os.unlink(self.need_lock.path)
251 | 			raise RuntimeError("Thread already has a read lock. Do not try to obtain a read lock twice.")
252 | 
253 | 		start_time = time.time()
254 | 
255 | 		# Try to acquire lock until conditions are met or a timeout occurs
256 | 		while True:
257 | 			if not self.snapshot.any_write_locks or (
258 | 				not self.snapshot.any_has_write_locks and self.snapshot.oldest_need(self.need_lock)
259 | 			):
260 | 				self.has_lock = self.has_lock.new_with_updated_time()
261 | 				os_touch(self.has_lock.path)
262 | 				os.unlink(self.need_lock.path)
263 | 				self._start_keep_alive_thread()
264 | 				return
265 | 			time.sleep(SLEEP_TIMEOUT)
266 | 			if time.time() - start_time > AQUIRE_LOCK_TIMEOUT:
267 | 				raise RuntimeError("Timeout while waiting for read lock.")
268 | 			self.snapshot = FileLocksSnapshot(self.need_lock)
269 | 
270 | 
271 | class WriteLock(AbstractLock):
272 | 	"""
273 | 	A file-based write lock.
274 | 	Only one thread/process can hold a write lock, blocking others from acquiring either read or write locks.
275 | 	"""
276 | 
277 | 	mode = "write"
278 | 
279 | 	def _lock(self) -> None:
280 | 		# Express intention to acquire write lock
281 | 		os.makedirs(os.path.dirname(self.need_lock.path), exist_ok=True)
282 | 		os_touch(self.need_lock.path)
283 | 		self.snapshot = FileLocksSnapshot(self.need_lock)
284 | 
285 | 		# If this thread already holds a write lock, raise an exception.
286 | 		if self.snapshot.exists(self.has_lock):
287 | 			os.unlink(self.need_lock.path)
288 | 			raise RuntimeError("Thread already has a write lock. Do not try to obtain a write lock twice.")
289 | 
290 | 		start_time = time.time()
291 | 
292 | 		# Try to acquire lock until conditions are met or a timeout occurs
293 | 		while True:
294 | 			if not self.snapshot.any_has_locks and self.snapshot.oldest_need(self.need_lock):
295 | 				self.has_lock = self.has_lock.new_with_updated_time()
296 | 				os_touch(self.has_lock.path)
297 | 				os.unlink(self.need_lock.path)
298 | 				self._start_keep_alive_thread()
299 | 				return
300 | 			time.sleep(SLEEP_TIMEOUT)
301 | 			if time.time() - start_time > AQUIRE_LOCK_TIMEOUT:
302 | 				raise RuntimeError("Timeout while waiting for write lock.")
303 | 			self.snapshot = FileLocksSnapshot(self.need_lock)
304 | 


--------------------------------------------------------------------------------
/dictdatabase/models.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Any, Callable, Type, TypeVar
  4 | 
  5 | from . import config, io_safe, utils
  6 | from .sessions import (
  7 | 	SessionDirFull,
  8 | 	SessionDirWhere,
  9 | 	SessionFileFull,
 10 | 	SessionFileKey,
 11 | 	SessionFileWhere,
 12 | )
 13 | 
 14 | T = TypeVar("T")
 15 | 
 16 | 
 17 | class OperationType:
 18 | 	"""
 19 | 	Legal:
 20 | 	- DDB.at("file")
 21 | 	- DDB.at("file", key="subkey")
 22 | 	- DDB.at("file", where=lambda k, v: ...)
 23 | 	- DDB.at("dir", "*")
 24 | 	- DDB.at("dir", "*", where=lambda k, v: ...)
 25 | 
 26 | 	Illegal:
 27 | 	- DDB.at("file", key="subkey", where=lambda k, v: ...)
 28 | 	- DDB.at("dir", key="subkey", where=lambda k, v: ...)
 29 | 	- DDB.at("dir", key="subkey")
 30 | 	"""
 31 | 
 32 | 	def __init__(self, path: str, key: str, where: Callable) -> None:
 33 | 		self.dir = "*" in path
 34 | 		self.file = not self.dir
 35 | 		self.where = where is not None
 36 | 		self.key = key is not None
 37 | 
 38 | 		if self.key and self.where:
 39 | 			raise TypeError("Cannot specify both key and where")
 40 | 		if self.key and self.dir:
 41 | 			raise TypeError("Cannot specify sub-key when selecting a folder. Specify the key in the path instead.")
 42 | 
 43 | 	@property
 44 | 	def file_normal(self) -> bool:
 45 | 		return self.file and not self.where and not self.key
 46 | 
 47 | 	@property
 48 | 	def file_key(self) -> bool:
 49 | 		return self.file and not self.where and self.key
 50 | 
 51 | 	@property
 52 | 	def file_where(self) -> bool:
 53 | 		return self.file and self.where and not self.key
 54 | 
 55 | 	@property
 56 | 	def dir_normal(self) -> bool:
 57 | 		return self.dir and not self.where and not self.key
 58 | 
 59 | 	@property
 60 | 	def dir_where(self) -> bool:
 61 | 		return self.dir and self.where and not self.key
 62 | 
 63 | 
 64 | def at(*path, key: str = None, where: Callable[[Any, Any], bool] = None) -> DDBMethodChooser:
 65 | 	"""
 66 | 	Select a file or folder to perform an operation on.
 67 | 	If you want to select a specific key in a file, use the `key` parameter,
 68 | 	e.g. `DDB.at("file", key="subkey")`.
 69 | 
 70 | 	If you want to select an entire folder, use the `*` wildcard,
 71 | 	eg. `DDB.at("folder", "*")`, or `DDB.at("folder/*")`. You can also use
 72 | 	the `where` callback to select a subset of the file or folder.
 73 | 
 74 | 	If the callback returns `True`, the item will be selected. The callback
 75 | 	needs to accept a key and value as arguments.
 76 | 
 77 | 	Args:
 78 | 	- `path`: The path to the file or folder. Can be a string, a
 79 | 	comma-separated list of strings, or a list.
 80 | 	- `key`: The key to select from the file.
 81 | 	- `where`: A function that takes a key and value and returns `True` if the
 82 | 	key should be selected.
 83 | 
 84 | 	Beware: If you select a folder with the `*` wildcard, you can't use the `key` parameter.
 85 | 	Also, you cannot use the `key` and `where` parameters at the same time.
 86 | 	"""
 87 | 	return DDBMethodChooser(path, key, where)
 88 | 
 89 | 
 90 | class DDBMethodChooser:
 91 | 	__slots__ = ("path", "key", "where", "op_type")
 92 | 
 93 | 	path: str
 94 | 	key: str
 95 | 	where: Callable[[Any, Any], bool]
 96 | 	op_type: OperationType
 97 | 
 98 | 	def __init__(
 99 | 		self,
100 | 		path: tuple,
101 | 		key: str = None,
102 | 		where: Callable[[Any, Any], bool] = None,
103 | 	) -> None:
104 | 		# Convert path to a list of strings
105 | 		pc = []
106 | 		for p in path:
107 | 			pc += p if isinstance(p, list) else [p]
108 | 		self.path = "/".join([str(p) for p in pc])
109 | 		self.key = key
110 | 		self.where = where
111 | 		self.op_type = OperationType(self.path, self.key, self.where)
112 | 		# Invariants:
113 | 		# - Both key and where cannot be not None at the same time
114 | 		# - If key is not None, then there is no wildcard in the path.
115 | 
116 | 	def exists(self) -> bool:
117 | 		"""
118 | 		Efficiently checks if a database exists. If the selected path contains
119 | 		a wildcard, it will return True if at least one file exists in the folder.
120 | 
121 | 
122 | 		If a key was specified, check if it exists in a database.
123 | 		The key can be anywhere in the database, even deeply nested.
124 | 		As long it exists as a key in any dict, it will be found.
125 | 		"""
126 | 		if self.where is not None:
127 | 			raise RuntimeError("DDB.at(where=...).exists() cannot be used with the where parameter")
128 | 
129 | 		if not utils.file_exists(self.path):
130 | 			return False
131 | 		if self.key is None:
132 | 			return True
133 | 		# Key is passed and occurs is True
134 | 		return io_safe.partial_read(self.path, key=self.key) is not None
135 | 
136 | 	def create(self, data: dict | None = None, force_overwrite: bool = False) -> None:
137 | 		"""
138 | 		Create a new file with the given data as the content. If the file
139 | 		already exists, a FileExistsError will be raised unless
140 | 		`force_overwrite` is set to True.
141 | 
142 | 		Args:
143 | 		- `data`: The data to write to the file. If not specified, it will be `{}`
144 | 		will be written.
145 | 		- `force_overwrite`: If `True`, will overwrite the file if it already
146 | 		exists, defaults to False (optional).
147 | 		"""
148 | 		if self.where is not None or self.key is not None:
149 | 			raise RuntimeError("DDB.at().create() cannot be used with the where or key parameters")
150 | 
151 | 		# Except if db exists and force_overwrite is False
152 | 		if not force_overwrite and self.exists():
153 | 			raise FileExistsError(
154 | 				f"Database {self.path} already exists in {config.storage_directory}. Pass force_overwrite=True to overwrite."
155 | 			)
156 | 		# Write db to file
157 | 		if data is None:
158 | 			data = {}
159 | 		io_safe.write(self.path, data)
160 | 
161 | 	def delete(self) -> None:
162 | 		"""
163 | 		Delete the file at the selected path.
164 | 		"""
165 | 		if self.where is not None or self.key is not None:
166 | 			raise RuntimeError("DDB.at().delete() cannot be used with the where or key parameters")
167 | 		io_safe.delete(self.path)
168 | 
169 | 	def read(self, as_type: Type[T] = None) -> dict | T | None:
170 | 		"""
171 | 		Reads a file or folder depending on previous `.at(...)` selection.
172 | 
173 | 		Args:
174 | 		- `as_type`: If provided, return the value as the given type.
175 | 		Eg. as_type=str will return str(value).
176 | 		"""
177 | 
178 | 		def type_cast(value):
179 | 			if as_type is None:
180 | 				return value
181 | 			return as_type(value)
182 | 
183 | 		data = {}
184 | 
185 | 		if self.op_type.file_normal:
186 | 			data = io_safe.read(self.path)
187 | 
188 | 		elif self.op_type.file_key:
189 | 			data = io_safe.partial_read(self.path, self.key)
190 | 
191 | 		elif self.op_type.file_where:
192 | 			file_content = io_safe.read(self.path)
193 | 			if file_content is None:
194 | 				return None
195 | 			for k, v in file_content.items():
196 | 				if self.where(k, type_cast(v)):
197 | 					data[k] = v
198 | 
199 | 		elif self.op_type.dir_normal:
200 | 			pattern_paths = utils.find_all(self.path)
201 | 			data = {n.split("/")[-1]: io_safe.read(n) for n in pattern_paths}
202 | 
203 | 		elif self.op_type.dir_where:
204 | 			for db_name in utils.find_all(self.path):
205 | 				k, v = db_name.split("/")[-1], io_safe.read(db_name)
206 | 				if self.where(k, type_cast(v)):
207 | 					data[k] = v
208 | 
209 | 		return type_cast(data)
210 | 
211 | 	def session(
212 | 		self, as_type: Type[T] = None
213 | 	) -> SessionFileFull[T] | SessionFileKey[T] | SessionFileWhere[T] | SessionDirFull[T] | SessionDirWhere[T]:
214 | 		"""
215 | 		Opens a session to the selected file(s) or folder, depending on previous
216 | 		`.at(...)` selection. Inside the with block, you have exclusive access
217 | 		to the file(s) or folder.
218 | 		Call `session.write()` to write the data to the file(s) or folder.
219 | 
220 | 		Args:
221 | 		- `as_type`: If provided, cast the value to the given type.
222 | 		Eg. as_type=str will return str(value).
223 | 
224 | 		Raises:
225 | 		- `FileNotFoundError`: If the file does not exist.
226 | 		- `KeyError`: If a key is specified and it does not exist.
227 | 
228 | 		Returns:
229 | 		- Tuple of (session_object, data)
230 | 		"""
231 | 		if self.op_type.file_normal:
232 | 			return SessionFileFull(self.path, as_type)
233 | 		if self.op_type.file_key:
234 | 			return SessionFileKey(self.path, self.key, as_type)
235 | 		if self.op_type.file_where:
236 | 			return SessionFileWhere(self.path, self.where, as_type)
237 | 		if self.op_type.dir_normal:
238 | 			return SessionDirFull(self.path, as_type)
239 | 		if self.op_type.dir_where:
240 | 			return SessionDirWhere(self.path, self.where, as_type)
241 | 


--------------------------------------------------------------------------------
/dictdatabase/sessions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from contextlib import contextmanager
  4 | from typing import Any, Callable, Generic, Tuple, TypeVar
  5 | 
  6 | from . import io_unsafe, locking, utils
  7 | 
  8 | T = TypeVar("T")
  9 | JSONSerializable = TypeVar("JSONSerializable", str, int, float, bool, None, list, dict)
 10 | 
 11 | 
 12 | def type_cast(obj, as_type):
 13 | 	return obj if as_type is None else as_type(obj)
 14 | 
 15 | 
 16 | class SessionBase:
 17 | 	in_session: bool
 18 | 	db_name: str
 19 | 	as_type: T
 20 | 
 21 | 	def __init__(self, db_name: str, as_type):
 22 | 		self.in_session = False
 23 | 		self.db_name = db_name
 24 | 		self.as_type = as_type
 25 | 
 26 | 	def __enter__(self):
 27 | 		self.in_session = True
 28 | 		self.data_handle = {}
 29 | 
 30 | 	def __exit__(self, type, value, tb):
 31 | 		write_lock = getattr(self, "write_lock", None)
 32 | 		if write_lock is not None:
 33 | 			if isinstance(write_lock, list):
 34 | 				for lock in write_lock:
 35 | 					lock._unlock()
 36 | 			else:
 37 | 				write_lock._unlock()
 38 | 		self.write_lock, self.in_session = None, False
 39 | 
 40 | 	def write(self):
 41 | 		if not self.in_session:
 42 | 			raise PermissionError("Only call write() inside a with statement.")
 43 | 
 44 | 
 45 | @contextmanager
 46 | def safe_context(super, self, *, db_names_to_lock=None):
 47 | 	"""
 48 | 	If an exception happens in the context, the __exit__ method of the passed super
 49 | 	class will be called.
 50 | 	"""
 51 | 	super.__enter__()
 52 | 	try:
 53 | 		if isinstance(db_names_to_lock, str):
 54 | 			self.write_lock = locking.WriteLock(self.db_name)
 55 | 			self.write_lock._lock()
 56 | 		elif isinstance(db_names_to_lock, list):
 57 | 			self.write_lock = [locking.WriteLock(x) for x in self.db_name]
 58 | 			for lock in self.write_lock:
 59 | 				lock._lock()
 60 | 		yield
 61 | 	except BaseException as e:
 62 | 		super.__exit__(type(e), e, e.__traceback__)
 63 | 		raise e
 64 | 
 65 | 
 66 | ########################################################################################
 67 | #### File sessions
 68 | ########################################################################################
 69 | 
 70 | 
 71 | class SessionFileFull(SessionBase, Generic[T]):
 72 | 	"""
 73 | 	Context manager for read-write access to a full file.
 74 | 
 75 | 	Efficiency:
 76 | 	Reads and writes the entire file.
 77 | 	"""
 78 | 
 79 | 	def __enter__(self) -> Tuple[SessionFileFull, JSONSerializable | T]:
 80 | 		with safe_context(super(), self, db_names_to_lock=self.db_name):
 81 | 			self.data_handle = io_unsafe.read(self.db_name)
 82 | 			return self, type_cast(self.data_handle, self.as_type)
 83 | 
 84 | 	def write(self):
 85 | 		super().write()
 86 | 		io_unsafe.write(self.db_name, self.data_handle)
 87 | 
 88 | 
 89 | class SessionFileKey(SessionBase, Generic[T]):
 90 | 	"""
 91 | 	Context manager for read-write access to a single key-value item in a file.
 92 | 
 93 | 	Efficiency:
 94 | 	Uses partial reading, which allows only reading the bytes of the key-value item.
 95 | 	When writing, only the bytes of the key-value and the bytes of the file after
 96 | 	the key-value are written.
 97 | 	"""
 98 | 
 99 | 	def __init__(self, db_name: str, key: str, as_type: T):
100 | 		super().__init__(db_name, as_type)
101 | 		self.key = key
102 | 
103 | 	def __enter__(self) -> Tuple[SessionFileKey, JSONSerializable | T]:
104 | 		with safe_context(super(), self, db_names_to_lock=self.db_name):
105 | 			self.partial_handle = io_unsafe.get_partial_file_handle(self.db_name, self.key)
106 | 			self.data_handle = self.partial_handle.partial_dict.value
107 | 			return self, type_cast(self.data_handle, self.as_type)
108 | 
109 | 	def write(self):
110 | 		super().write()
111 | 		io_unsafe.partial_write(self.partial_handle)
112 | 
113 | 
114 | class SessionFileWhere(SessionBase, Generic[T]):
115 | 	"""
116 | 	Context manager for read-write access to selection of key-value items in a file.
117 | 	The where callable is called with the key and value of each item in the file.
118 | 
119 | 	Efficiency:
120 | 	Reads and writes the entire file, so it is not more efficient than
121 | 	SessionFileFull.
122 | 	"""
123 | 
124 | 	def __init__(self, db_name: str, where: Callable[[Any, Any], bool], as_type: T):
125 | 		super().__init__(db_name, as_type)
126 | 		self.where = where
127 | 
128 | 	def __enter__(self) -> Tuple[SessionFileWhere, JSONSerializable | T]:
129 | 		with safe_context(super(), self, db_names_to_lock=self.db_name):
130 | 			self.original_data = io_unsafe.read(self.db_name)
131 | 			for k, v in self.original_data.items():
132 | 				if self.where(k, v):
133 | 					self.data_handle[k] = v
134 | 			return self, type_cast(self.data_handle, self.as_type)
135 | 
136 | 	def write(self):
137 | 		super().write()
138 | 		self.original_data.update(self.data_handle)
139 | 		io_unsafe.write(self.db_name, self.original_data)
140 | 
141 | 
142 | ########################################################################################
143 | #### File sessions
144 | ########################################################################################
145 | 
146 | 
147 | class SessionDirFull(SessionBase, Generic[T]):
148 | 	"""
149 | 	Context manager for read-write access to all files in a directory.
150 | 	They are provided as a dict of {str(file_name): dict(file_content)}, where the
151 | 	file name does not contain the directory name nor the file extension.
152 | 
153 | 	Efficiency:
154 | 	Fully reads and writes all files.
155 | 	"""
156 | 
157 | 	def __init__(self, db_name: str, as_type: T):
158 | 		super().__init__(utils.find_all(db_name), as_type)
159 | 
160 | 	def __enter__(self) -> Tuple[SessionDirFull, JSONSerializable | T]:
161 | 		with safe_context(super(), self, db_names_to_lock=self.db_name):
162 | 			self.data_handle = {n.split("/")[-1]: io_unsafe.read(n) for n in self.db_name}
163 | 			return self, type_cast(self.data_handle, self.as_type)
164 | 
165 | 	def write(self):
166 | 		super().write()
167 | 		for name in self.db_name:
168 | 			io_unsafe.write(name, self.data_handle[name.split("/")[-1]])
169 | 
170 | 
171 | class SessionDirWhere(SessionBase, Generic[T]):
172 | 	"""
173 | 	Context manager for read-write access to selection of files in a directory.
174 | 	The where callable is called with the file name and parsed content of each file.
175 | 
176 | 	Efficiency:
177 | 	Fully reads all files, but only writes the selected files.
178 | 	"""
179 | 
180 | 	def __init__(self, db_name: str, where: Callable[[Any, Any], bool], as_type: T):
181 | 		super().__init__(utils.find_all(db_name), as_type)
182 | 		self.where = where
183 | 
184 | 	def __enter__(self) -> Tuple[SessionDirWhere, JSONSerializable | T]:
185 | 		with safe_context(super(), self):
186 | 			selected_db_names, write_lock = [], []
187 | 			for db_name in self.db_name:
188 | 				lock = locking.WriteLock(db_name)
189 | 				lock._lock()
190 | 				k, v = db_name.split("/")[-1], io_unsafe.read(db_name)
191 | 				if self.where(k, v):
192 | 					self.data_handle[k] = v
193 | 					write_lock.append(lock)
194 | 					selected_db_names.append(db_name)
195 | 				else:
196 | 					lock._unlock()
197 | 			self.write_lock = write_lock
198 | 			self.db_name = selected_db_names
199 | 			return self, type_cast(self.data_handle, self.as_type)
200 | 
201 | 	def write(self):
202 | 		super().write()
203 | 		for name in self.db_name:
204 | 			io_unsafe.write(name, self.data_handle[name.split("/")[-1]])
205 | 


--------------------------------------------------------------------------------
/dictdatabase/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import glob
  4 | import os
  5 | from typing import Tuple
  6 | 
  7 | from . import byte_codes, config
  8 | 
  9 | 
 10 | def file_info(db_name: str) -> Tuple[str, bool, str, bool]:
 11 | 	"""
 12 | 	Returns a tuple of four elements, the first and third being the paths to the
 13 | 	JSON and DDB files, and the second and third being booleans indicating whether
 14 | 	those files exist:
 15 | 
 16 | 	>>> (json_path, json_exists, ddb_path, ddb_exists)
 17 | 
 18 | 	Args:
 19 | 	- `db_name`: The name of the database
 20 | 	"""
 21 | 	base = f"{config.storage_directory}/{db_name}"
 22 | 	j, d = f"{base}.json", f"{base}.ddb"
 23 | 	return j, os.path.exists(j), d, os.path.exists(d)
 24 | 
 25 | 
 26 | def file_exists(db_name: str) -> bool:
 27 | 	"""
 28 | 	Returns True if the given database exists, either as a JSON or DDB file.
 29 | 
 30 | 	Args:
 31 | 	- `db_name`: The name of the database
 32 | 	"""
 33 | 	base = f"{config.storage_directory}/{db_name}"
 34 | 	j, d = f"{base}.json", f"{base}.ddb"
 35 | 	return os.path.exists(j) or os.path.exists(d)
 36 | 
 37 | 
 38 | def find_all(file_name: str) -> list[str]:
 39 | 	"""
 40 | 	Returns a list of all the database names that match the given glob file_name.
 41 | 
 42 | 	Args:
 43 | 	- `file_name`: The glob file_name to search for
 44 | 	"""
 45 | 
 46 | 	files_all = glob.glob(f"{config.storage_directory}/{file_name}.ddb")
 47 | 	files_all += glob.glob(f"{config.storage_directory}/{file_name}.json")
 48 | 
 49 | 	for trim in [f"{config.storage_directory}/", ".ddb", ".json"]:
 50 | 		files_all = [d.replace(trim, "") for d in files_all]
 51 | 	return files_all
 52 | 
 53 | 
 54 | def seek_index_through_value_bytes(json_bytes: bytes, index: int) -> int:
 55 | 	"""
 56 | 	Finds the index of the next comma or closing bracket/brace after the value
 57 | 	of a key-value pair in a bytes object containing valid JSON when decoded.
 58 | 
 59 | 	Args:
 60 | 	- `json_bytes`: A bytes object containing valid JSON when decoded
 61 | 	- `index`: The start index in json_bytes
 62 | 
 63 | 	Returns:
 64 | 	- The end index of the value.
 65 | 	"""
 66 | 
 67 | 	# TODO: Try to implement this using bytes.find() instead of a loop
 68 | 	# This make count_nesting a lot faster
 69 | 
 70 | 	# See https://www.json.org/json-en.html for the JSON syntax
 71 | 
 72 | 	list_depth, dict_depth, i, len_json_bytes = 0, 0, index, len(json_bytes)
 73 | 
 74 | 	while i < len_json_bytes:
 75 | 		current = json_bytes[i]
 76 | 		# If backslash, skip the next character
 77 | 		if current == byte_codes.BACKSLASH:
 78 | 			i += 1
 79 | 
 80 | 		# Assert: the current character is not escaped with a backslash
 81 | 
 82 | 		elif current == byte_codes.QUOTE:
 83 | 			while True:
 84 | 				i = json_bytes.find(byte_codes.QUOTE, i + 1)
 85 | 				if i == -1:
 86 | 					raise TypeError("Invalid JSON")
 87 | 
 88 | 				j = i - 1
 89 | 				backslash_count = 0
 90 | 				while j >= 0 and json_bytes[j] == byte_codes.BACKSLASH:
 91 | 					backslash_count += 1
 92 | 					j -= 1
 93 | 				if backslash_count % 2 == 0:
 94 | 					# If the number of backslashes is even, the quote is not escaped
 95 | 					break
 96 | 				# Else, the quote is escaped, and the loop continues
 97 | 
 98 | 			# Exit point where string ends and nesting is zero
 99 | 			if list_depth == 0 and dict_depth == 0:
100 | 				return i + 1
101 | 
102 | 		# Invariant: Not in_str, not escaped
103 | 
104 | 		# Handle opening brackets
105 | 		elif current == byte_codes.OPEN_SQUARE:
106 | 			list_depth += 1
107 | 		elif current == byte_codes.OPEN_CURLY:
108 | 			dict_depth += 1
109 | 		# Handle closing brackets
110 | 		elif current == byte_codes.CLOSE_SQUARE:
111 | 			list_depth -= 1
112 | 			if list_depth == 0 and dict_depth <= 0:
113 | 				return i + 1 + dict_depth  # dict_depth is -1 in case: {"a": {}}
114 | 		elif current == byte_codes.CLOSE_CURLY:
115 | 			dict_depth -= 1
116 | 			if dict_depth <= 0 and list_depth == 0:
117 | 				return i + 1 + dict_depth  # dict_depth is -1 in case: {"a": {}}
118 | 		elif list_depth == 0:
119 | 			if dict_depth == -1:
120 | 				return i
121 | 			if dict_depth == 0 and current in [byte_codes.COMMA, byte_codes.NEWLINE]:
122 | 				# Handle commas and newline as exit points
123 | 				return i
124 | 		i += 1
125 | 
126 | 	raise TypeError("Invalid JSON")
127 | 
128 | 
129 | def count_nesting_in_bytes(json_bytes: bytes, start: int, end: int) -> int:
130 | 	"""
131 | 	Returns the number of nesting levels.
132 | 	Considered bytes are from `start` inclusive to `end` exclusive.
133 | 
134 | 	The nesting is counted by the number of opening and closing brackets/braces
135 | 	that are not in a string or escaped with a backslash.
136 | 
137 | 	Args:
138 | 	- `json_bytes`: A bytes object containing valid JSON when decoded
139 | 	"""
140 | 	i, nesting = start, 0
141 | 	# Find the number of opening curly braces
142 | 	while (i := json_bytes.find(byte_codes.OPEN_CURLY, i, end)) != -1:
143 | 		if i == 0 or json_bytes[i - 1] != byte_codes.BACKSLASH:
144 | 			nesting += 1
145 | 		i += 1
146 | 	i = start
147 | 	# Find the number of closing curly braces
148 | 	while (i := json_bytes.find(byte_codes.CLOSE_CURLY, i, end)) != -1:
149 | 		if i == 0 or json_bytes[i - 1] != byte_codes.BACKSLASH:
150 | 			nesting -= 1
151 | 		i += 1
152 | 	return nesting
153 | 
154 | 
155 | def find_outermost_key_in_json_bytes(json_bytes: bytes, key: str) -> Tuple[int, int]:
156 | 	"""
157 | 	Returns the index of the key that is at the outermost nesting level. If the
158 | 	key is not found, return -1. If the key you are looking for is `some_key`,
159 | 	the function will search for `"some_key":` and return the start and end
160 | 	index of that string that is at the outermost nesting level, or -1 if the
161 | 	it is not found.
162 | 
163 | 	Args:
164 | 	- `json_bytes`: A bytes object containing valid JSON when decoded
165 | 	- `key`: The key of an key-value pair in `json_bytes` to search for,
166 | 	represented as bytes.
167 | 
168 | 	Returns:
169 | 	- A tuple of the key start (inclusive) and end (exclusive) index,
170 | 	or `(-1, -1)` if the key is not found.
171 | 	"""
172 | 
173 | 	# TODO: Very strict. the key must have a colon directly after it
174 | 	# For example {"a": 1} will work, but {"a" : 1} will not work!
175 | 
176 | 	key = f'"{key}":'.encode()
177 | 
178 | 	if (curr_i := json_bytes.find(key, 0)) == -1:
179 | 		return (-1, -1)
180 | 
181 | 	# Assert: Key was found and curr_i is the index of the first character of the key
182 | 
183 | 	# Keep track of all found keys and their nesting level
184 | 	key_nest = [(curr_i, count_nesting_in_bytes(json_bytes, 0, curr_i))]
185 | 
186 | 	# As long as more keys are found, keep track of them and their nesting level
187 | 	while (next_i := json_bytes.find(key, curr_i + len(key))) != -1:
188 | 		nesting = count_nesting_in_bytes(json_bytes, curr_i + len(key), next_i)
189 | 		key_nest.append((next_i, nesting))
190 | 		curr_i = next_i
191 | 
192 | 	# Assert: all keys have been found, and their nesting relative to each other is
193 | 	# stored in key_nest, whose length is at least 1.
194 | 
195 | 	# Early exit if there is only one key
196 | 	if len(key_nest) == 1:
197 | 		index, level = key_nest[0]
198 | 		return (index, index + len(key)) if level == 1 else (-1, -1)
199 | 
200 | 	# Relative to total nesting
201 | 	for i in range(1, len(key_nest)):
202 | 		key_nest[i] = (key_nest[i][0], key_nest[i - 1][1] + key_nest[i][1])
203 | 
204 | 	# Filter out all keys that are not at the outermost nesting level
205 | 	indices_at_index_one = [i for i, level in key_nest if level == 1]
206 | 	if len(indices_at_index_one) != 1:
207 | 		return (-1, -1)
208 | 	return (indices_at_index_one[0], indices_at_index_one[0] + len(key))
209 | 
210 | 
211 | def detect_indentation_in_json_bytes(json_bytes: bytes, index: int) -> Tuple[int, str]:
212 | 	"""
213 | 	Count the amount of whitespace before the index to determine the indentation
214 | 	level and whitespace used.
215 | 
216 | 	Args:
217 | 	- `json_bytes`: A bytes object containing valid JSON when decoded
218 | 	- `index`: The index behind which the indentation is to be determined
219 | 
220 | 	Returns:
221 | 	- A tuple of the indentation level and the whitespace used
222 | 	"""
223 | 
224 | 	indentation_bytes, contains_tab = bytes(), False
225 | 	for i in range(index - 1, -1, -1):
226 | 		if json_bytes[i] not in [byte_codes.SPACE, byte_codes.TAB]:
227 | 			break
228 | 		if json_bytes[i] == byte_codes.TAB:
229 | 			contains_tab = True
230 | 		indentation_bytes = indentation_bytes + bytes([json_bytes[i]])
231 | 
232 | 	if contains_tab:
233 | 		return len(indentation_bytes), "\t"
234 | 	if isinstance(config.indent, int) and config.indent > 0:
235 | 		return len(indentation_bytes) // config.indent, " " * config.indent
236 | 	if isinstance(config.indent, str):
237 | 		return len(indentation_bytes) // 2, "  "
238 | 	return 0, ""
239 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | default:
 2 |     @just --list
 3 | 
 4 | alias t := test
 5 | test:
 6 |     poetry run pytest --cov=dictdatabase --cov-report term-missing
 7 |     rm ./.coverage
 8 | 
 9 | alias p := profiler
10 | profiler:
11 |     poetry run python profiler.py
12 | 
13 | alias bp := benchmark_parallel
14 | benchmark_parallel:
15 |     poetry run python tests/benchmark/run_parallel.py
16 | 
17 | alias bt := benchmark_threaded
18 | benchmark_threaded:
19 |     poetry run python tests/benchmark/run_threaded.py
20 | 
21 | alias ba := benchmark_async
22 | benchmark_async:
23 |     poetry run python tests/benchmark/run_async.py
24 | 
25 | publish:
26 |     uv build
27 |     uv publish
28 |     rm -rf dist
29 |     rm -rf dictdatabase.egg-info
30 | 


--------------------------------------------------------------------------------
/profiler.py:
--------------------------------------------------------------------------------
 1 | from distutils.command.config import config
 2 | 
 3 | from path_dict import PathDict
 4 | from pyinstrument import profiler
 5 | 
 6 | import dictdatabase as DDB
 7 | from dictdatabase import io_unsafe
 8 | 
 9 | DDB.config.storage_directory = "./test_db/production_database"
10 | DDB.config.use_orjson = True
11 | DDB.config.indent = 2
12 | 
13 | 
14 | p = profiler.Profiler(interval=0.0001)
15 | with p:
16 | 	# fM44 is small
17 | 	# a2lU has many annotations
18 | 	# DDB.at("tasks", key="fM44").read(key="fM44", as_type=PathDict)
19 | 	for _ in range(10):
20 | 		with DDB.at("tasks", key="a2lU").session(as_type=PathDict) as (session, task):
21 | 			task["jay"] = lambda x: (x or 0) + 1
22 | 			session.write()
23 | 	# DDB.at("tasks_as_dir/*").read()
24 | 
25 | 
26 | p.open_in_browser(timeline=False)
27 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "dictdatabase"
 3 | version = "2.5.1"
 4 | description = "Easy-to-use database using dicts"
 5 | readme = "README.md"
 6 | authors = [{ name = "Marcel Kröker", email = "kroeker.marcel@gmail.com" }]
 7 | license = { file = "LICENSE" }
 8 | classifiers=[
 9 | 	"Programming Language :: Python :: 3",
10 | 	"License :: OSI Approved :: MIT License",
11 | 	"Operating System :: OS Independent",
12 | 	"Intended Audience :: Developers",
13 | 	"Programming Language :: Python",
14 | 	"Topic :: Software Development :: Libraries :: Python Modules"
15 | ]
16 | requires-python = ">=3.8,<3.14"
17 | dependencies = [
18 |     "orjson >= 3.9, <4.0",
19 | ]
20 | 
21 | 
22 | [dependency-groups]
23 | dev = [
24 |     "super-py ~= 0.4.2",
25 |     "pyinstrument ~= 4.4.0",
26 |     "pytest-cov ~= 4.0.0",
27 |     "path-dict ~= 3.0.4",
28 |     "ruff>=0.11.6",
29 | ]
30 | 
31 | 
32 | [tool.setuptools]
33 | packages = ["dictdatabase"]
34 | license-files = [] # Workaround for https://github.com/astral-sh/uv/issues/9513
35 | 
36 | 
37 | [tool.uv]
38 | package = true
39 | 
40 | 
41 | [tool.ruff]
42 | show-fixes = true
43 | line-length = 120
44 | select = [
45 | 	"ANN", # annotations
46 |     "B", # bugbear
47 |     "C", # comprehensions
48 |     "E", # style errors
49 |     "F", # flakes
50 |     "I", # import sorting
51 |     "M", # meta
52 |     "N", # naming
53 |     "U", # upgrade
54 |     "W", # style warnings
55 |     "YTT", # sys.version
56 | ]
57 | ignore = [
58 | 	"E501", # line length
59 | 	"UP007", # use X | Y for union (not possible in python 3.8)
60 |     "UP006", # Use typing.Tuple for python 3.8 support
61 |     "W191", # indentation contains tabs
62 |     "E741", # ambiguous variable name
63 | ]
64 | 
65 | 
66 | [tool.ruff.format]
67 | indent-style = "tab"
68 | quote-style = "double"
69 | 


--------------------------------------------------------------------------------
/scenario_comparison.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | from pathlib import Path
 4 | 
 5 | from pyinstrument import profiler
 6 | 
 7 | import dictdatabase as DDB
 8 | 
 9 | DDB.config.storage_directory = ".ddb_scenario_comparison"
10 | Path(DDB.config.storage_directory).mkdir(exist_ok=True)
11 | 
12 | 
13 | # Create a database with 10_000 entries
14 | all_users = {}
15 | for i in range(10_000):
16 | 	print(i)
17 | 	user = {
18 | 		"id": "".join(random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8)),
19 | 		"name": "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=5)),
20 | 		"surname": "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=20)),
21 | 		"description": "".join(random.choices('abcdefghij"klmnopqrst😁uvwxyz\\ ', k=5000)),
22 | 		"age": random.randint(0, 100),
23 | 	}
24 | 	all_users[user["id"]] = user
25 | 	DDB.at("users_dir", user["id"]).create(user)
26 | DDB.at("users").create(all_users)
27 | 
28 | 
29 | ################################################################################
30 | #### Test read from directory
31 | 
32 | 
33 | # 06.11.22: 2695ms
34 | t1 = time.monotonic()
35 | with profiler.Profiler() as p:
36 | 	DDB.at("users_dir/*").read()
37 | p.open_in_browser()
38 | print("Read all users from directory:", time.monotonic() - t1)
39 | 
40 | 
41 | ################################################################################
42 | #### Test read from single file
43 | 
44 | 
45 | # 06.11.22: 181ms
46 | t1 = time.monotonic()
47 | DDB.at("users").read()
48 | print("Read all users from single file:", time.monotonic() - t1)
49 | 


--------------------------------------------------------------------------------
/scene_random_writes.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from pyinstrument.profiler import Profiler
 4 | 
 5 | import dictdatabase as DDB
 6 | 
 7 | user_count = 100_000
 8 | 
 9 | # all_users = {}
10 | # for i in range(user_count):
11 | 
12 | #     user = {
13 | #         "id": str(i),
14 | #         "pref": "".join(random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8)),
15 | #         "name": "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=5)),
16 | #         "surname": "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=20)),
17 | #         "description": "".join(random.choices("abcdefghij\"klmnopqrst😁uvwxyz\\ ", k=5000)),
18 | #         "age": random.randint(0, 100),
19 | #     }
20 | #     all_users[str(i)] = user
21 | # DDB.at("users").create(all_users, force_overwrite=True)
22 | 
23 | print("Users created")
24 | 
25 | p = Profiler(interval=0.0001)
26 | p.start()
27 | for it in range(500):
28 | 	print(it)
29 | 	user_id = str(random.randint(user_count - 100, user_count - 1))
30 | 	with DDB.at("users", key=user_id).session() as (session, user):
31 | 		user["age"] += 1
32 | 		session.write()
33 | p.stop()
34 | p.open_in_browser(timeline=False)
35 | 


--------------------------------------------------------------------------------
/test_key_finder.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from dictdatabase import utils
 4 | 
 5 | test_dict = {
 6 | 	"b": 2,
 7 | 	"c": {
 8 | 		"a": 1,
 9 | 		"b": 2,
10 | 	},
11 | 	"d": {
12 | 		"a": 1,
13 | 		"b": 2,
14 | 	},
15 | 	"a": 1,
16 | }
17 | 
18 | json_str = json.dumps(test_dict, indent=2, sort_keys=False)
19 | json_bytes = json_str.encode()
20 | 
21 | index = utils.find_outermost_key_in_json_bytes(json_bytes, "a")
22 | 
23 | print("lel")
24 | print(index)
25 | print(json_bytes[index[0] : index[1]])
26 | 
27 | 
28 | print(b"00111000".find(b"111", 0, 20))
29 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkrd/DictDataBase/12e650460c9284f8cd1249d26b16c18c04445691/tests/__init__.py


--------------------------------------------------------------------------------
/tests/benchmark/locking.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from pathlib import Path
 3 | 
 4 | from pyinstrument import profiler
 5 | 
 6 | import dictdatabase as DDB
 7 | from dictdatabase import locking
 8 | 
 9 | DDB.config.storage_directory = "./.benchmark_locking"
10 | path = Path(DDB.config.storage_directory)
11 | path.mkdir(exist_ok=True, parents=True)
12 | 
13 | 
14 | # 05.11.22: 4520ms
15 | # 25.11.22: 4156ms
16 | with profiler.Profiler() as p:
17 | 	for _ in range(25_000):
18 | 		l = locking.ReadLock("db")
19 | 		l._lock()
20 | 		l._unlock()
21 | p.open_in_browser()
22 | 
23 | 
24 | # 05.11.22: 4884ms
25 | # 25.11.22: 4159ms
26 | with profiler.Profiler() as p:
27 | 	for _ in range(25_000):
28 | 		l = locking.WriteLock("db")
29 | 		l._lock()
30 | 		l._unlock()
31 | p.open_in_browser()
32 | 
33 | 
34 | l = locking.WriteLock("db/test.some")
35 | l._lock()
36 | 
37 | 
38 | shutil.rmtree(DDB.config.storage_directory)
39 | 


--------------------------------------------------------------------------------
/tests/benchmark/parallel_appends.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import time
 5 | from calendar import c
 6 | from multiprocessing import Pool
 7 | 
 8 | from pyinstrument import Profiler
 9 | from utils import db_job, make_table, print_and_assert_results
10 | 
11 | import dictdatabase as DDB
12 | 
13 | 
14 | def proc_job(id, n):
15 | 	DDB.config.storage_directory = "./.ddb_bench_parallel"
16 | 	DDB.locking.SLEEP_TIMEOUT = 0.001
17 | 	for _ in range(n):
18 | 		t1 = time.monotonic_ns()
19 | 		with DDB.at("append_here").session() as (session, db):
20 | 			if len(db) == 0:
21 | 				db += [
22 | 					{
23 | 						"counter": 0,
24 | 						"firstname": "John",
25 | 						"lastname": "Doe",
26 | 						"age": 42,
27 | 						"address": "1234 Main St",
28 | 						"city": "Anytown",
29 | 						"state": "CA",
30 | 						"zip": "12345",
31 | 						"phone": "123-456-7890",
32 | 						"interests": ["Python", "Databases", "DDB", "DDB-CLI", "DDB-Web", "Google"],
33 | 					}
34 | 				] * 50000
35 | 			else:
36 | 				db.append({**db[-1], "counter": db[-1]["counter"] + 1})
37 | 			session.write()
38 | 		time.sleep(0.5)
39 | 
40 | 		vis = "🔴" * (id + 1)
41 | 		print(f"{(time.monotonic_ns() - t1) / 1e6:.2f} ms {vis}")
42 | 
43 | 
44 | def proc_read_job(id, n):
45 | 	DDB.config.storage_directory = "./.ddb_bench_parallel"
46 | 	DDB.locking.SLEEP_TIMEOUT = 0.001
47 | 	for _ in range(n):
48 | 		t1 = time.monotonic_ns()
49 | 		DDB.at("append_here").read()
50 | 		vis = "🟢" * (id + 1)
51 | 		print(f"{(time.monotonic_ns() - t1) / 1e6:.2f} ms {vis}")
52 | 
53 | 
54 | if __name__ == "__main__":
55 | 	proc_count = 2
56 | 	per_proc = 100
57 | 	DDB.config.storage_directory = "./.ddb_bench_parallel"
58 | 	# Create Tables
59 | 	DDB.at("append_here").create([], force_overwrite=True)
60 | 	# Execute process pool running incrementor as the target task
61 | 	t1 = time.monotonic()
62 | 	pool = Pool(processes=proc_count * 2)
63 | 	for i in range(proc_count):
64 | 		pool.apply_async(
65 | 			proc_job,
66 | 			args=(
67 | 				i,
68 | 				per_proc,
69 | 			),
70 | 		)
71 | 		pool.apply_async(
72 | 			proc_read_job,
73 | 			args=(
74 | 				i,
75 | 				per_proc,
76 | 			),
77 | 		)
78 | 	pool.close()
79 | 	pool.join()
80 | 	print(f"⏱️ {time.monotonic() - t1} seconds")
81 | 


--------------------------------------------------------------------------------
/tests/benchmark/run_async.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import shutil
 4 | import time
 5 | 
 6 | from utils import incrementor, print_and_assert_results
 7 | 
 8 | import dictdatabase as DDB
 9 | 
10 | 
11 | async def thread_job(i, n, file_count):
12 | 	DDB.locking.SLEEP_TIMEOUT = 0.001
13 | 	incrementor(i, n, file_count)
14 | 
15 | 
16 | async def threaded_stress(file_count=2, thread_count=10, per_thread=500):
17 | 	# Create file_count json files
18 | 	for t in range(file_count):
19 | 		DDB.at(f"incr{t}").create({"counter": 0}, force_overwrite=True)
20 | 
21 | 	# Create tasks for concurrent execution
22 | 	tasks = [(incrementor, (i, per_thread, file_count)) for i in range(thread_count)]
23 | 
24 | 	# Execute process pool running incrementor as the target task
25 | 	t1 = time.monotonic()
26 | 	await asyncio.gather(*[thread_job(i, per_thread, file_count) for i in range(thread_count)])
27 | 	t2 = time.monotonic()
28 | 
29 | 	print_and_assert_results(thread_count, per_thread, file_count, t1, t2)
30 | 
31 | 
32 | if __name__ == "__main__":
33 | 	DDB.config.storage_directory = ".ddb_bench_async"
34 | 	try:
35 | 		shutil.rmtree(".ddb_bench_async", ignore_errors=True)
36 | 		os.mkdir(".ddb_bench_async")
37 | 		loop = asyncio.get_event_loop()
38 | 		loop.run_until_complete(threaded_stress())
39 | 		loop.close()
40 | 	finally:
41 | 		shutil.rmtree(".ddb_bench_async", ignore_errors=True)
42 | 


--------------------------------------------------------------------------------
/tests/benchmark/run_big_file.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | 
 4 | import dictdatabase as DDB
 5 | 
 6 | 
 7 | def make_random_posts(count):
 8 | 	posts = {}
 9 | 	for _ in range(count):
10 | 		id = str(random.randint(0, 999_999_999))
11 | 		title_length = random.randint(10, 100)
12 | 		content_length = random.randint(200, 500)
13 | 		posts[id] = {
14 | 			"id": id,
15 | 			"title": "".join(random.choices("  abcdefghijklmnopqrstuvwxyz,.", k=title_length)),
16 | 			"content": "".join(random.choices("  abcdefghijklmnopqrstuvwxyz,.", k=content_length)),
17 | 		}
18 | 	return posts
19 | 
20 | 
21 | def make_users(count):
22 | 	all_users = {}
23 | 	for i in range(count):
24 | 		all_users[str(i)] = {
25 | 			"id": str(i),
26 | 			"name": "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=5)),
27 | 			"surname": "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=20)),
28 | 			"age": random.randint(20, 80),
29 | 			"posts": make_random_posts(random.randint(200, 300)),
30 | 		}
31 | 	return all_users
32 | 
33 | 
34 | def read_specific_users():
35 | 	accessed_users = sorted([str(i * 100) for i in range(100)], key=lambda x: random.random())
36 | 	t1 = time.monotonic()
37 | 	for user_id in accessed_users:
38 | 		print(f"Accessing user {user_id}")
39 | 		u = DDB.at("big_users", key=user_id).read()
40 | 		print(f"User {user_id} has {len(u['posts'])} posts and is {u['age']} years old")
41 | 	t2 = time.monotonic()
42 | 	print(f"Time taken: {(t2 - t1) * 1000}ms")
43 | 
44 | 
45 | def write_specific_users():
46 | 	accessed_users = sorted([str(i * 100) for i in range(100)], key=lambda x: random.random())
47 | 	t1 = time.monotonic()
48 | 	for user_id in accessed_users:
49 | 		print(f"Accessing user {user_id}")
50 | 
51 | 		with DDB.at("big_users", key=user_id).session() as (session, user):
52 | 			user["surname"] = "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=random.randint(3, 50)))
53 | 			session.write()
54 | 	t2 = time.monotonic()
55 | 	print(f"Time taken: {(t2 - t1) * 1000}ms")
56 | 
57 | 
58 | def random_access_users(write_read_ratio=0.1, count=500):
59 | 	accessed_users = [str(i * 100) for i in [random.randint(0, 99) for _ in range(count)]]
60 | 	t1 = time.monotonic()
61 | 	for user_id in accessed_users:
62 | 		if random.random() < write_read_ratio:
63 | 			with DDB.at("big_users", key=user_id).session() as (session, user):
64 | 				user["surname"] = "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=random.randint(3, 50)))
65 | 				session.write()
66 | 			print(f"Accessed user {user_id} for writing")
67 | 		else:
68 | 			u = DDB.at("big_users", key=user_id).read()
69 | 			print(f"User {user_id} has {len(u['posts'])} posts and is {u['age']} years old")
70 | 
71 | 	t2 = time.monotonic()
72 | 	print(f"Time taken: {t2 - t1}s")
73 | 
74 | 
75 | # DDB.at("big_users").create(make_users(20_000), force_overwrite=True)  # 2500MB
76 | 
77 | # random_access_users()
78 | # write_specific_users()
79 | read_specific_users()
80 | 


--------------------------------------------------------------------------------
/tests/benchmark/run_parallel.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | import shutil
  5 | import time
  6 | from calendar import c
  7 | from dataclasses import dataclass
  8 | from multiprocessing import Pool
  9 | 
 10 | from path_dict import PathDict
 11 | 
 12 | import dictdatabase as DDB
 13 | 
 14 | DDB.config.storage_directory = ".ddb_bench_multi"
 15 | 
 16 | 
 17 | def benchmark(iterations, setup: callable = None):
 18 | 	def decorator(function):
 19 | 		def wrapper(*args, **kwargs):
 20 | 			f_name = function.__name__
 21 | 			kwargs["name"] = f_name
 22 | 			if setup:
 23 | 				setup(kwargs)
 24 | 			t1 = time.monotonic()
 25 | 			for _ in range(iterations):
 26 | 				function(*args, **kwargs)
 27 | 			t2 = time.monotonic()
 28 | 			print(f"⏱️ {iterations / (t2 - t1):.1f} op/s for {f_name} ({(t2 - t1):.1f} seconds)")
 29 | 
 30 | 		return wrapper
 31 | 
 32 | 	return decorator
 33 | 
 34 | 
 35 | @benchmark(iterations=9000, setup=lambda kw: DDB.at(kw["name"]).create({"data": {"counter": 0}}, force_overwrite=True))
 36 | def sequential_full_read_small_file(name):
 37 | 	DDB.at(name).read()
 38 | 
 39 | 
 40 | @benchmark(iterations=8000, setup=lambda kw: DDB.at(kw["name"]).create({"data": {"counter": 0}}, force_overwrite=True))
 41 | def sequential_partial_read_small_file(name):
 42 | 	DDB.at(name, key="data").read()
 43 | 
 44 | 
 45 | @benchmark(iterations=8000, setup=lambda kw: DDB.at(kw["name"]).create({"data": {"counter": 0}}, force_overwrite=True))
 46 | def sequential_full_write_small_file(name):
 47 | 	with DDB.at(name).session() as (session, db):
 48 | 		db["data"]["counter"] += 1
 49 | 		session.write()
 50 | 
 51 | 
 52 | @benchmark(iterations=6000, setup=lambda kw: DDB.at(kw["name"]).create({"data": {"counter": 0}}, force_overwrite=True))
 53 | def sequential_partial_write_small_file(name):
 54 | 	with DDB.at(name, key="data").session() as (session, db):
 55 | 		db["counter"] += 1
 56 | 		session.write()
 57 | 
 58 | 
 59 | @dataclass
 60 | class Scenario:
 61 | 	files: int = 1
 62 | 	readers: int = 0
 63 | 	writers: int = 0
 64 | 	big_file: bool = False
 65 | 	use_compression: bool = False
 66 | 	ops: int = 10
 67 | 
 68 | 	def print(self):
 69 | 		res = f"✨ Scenario: {'🔹' * self.readers}{'🔻' * self.writers} ({self.readers}r{self.writers}w)"
 70 | 		res += ", 🔸 compression" if self.use_compression else ""
 71 | 		res += ", 💎 big file" if self.big_file else ""
 72 | 		print(res)
 73 | 
 74 | 
 75 | def print_and_assert_results(scenario: Scenario, t):
 76 | 	ops = (scenario.writers + scenario.readers) * scenario.ops * scenario.files
 77 | 	ops_sec = f"{(ops / t):.0f}"
 78 | 	s = f"⏱️ {ops_sec} op/s ({ops} in {t:.2f}s)"
 79 | 	print(str.ljust(s, 32), end="")
 80 | 	for t in range(scenario.files):
 81 | 		db = DDB.at(f"incr{t}").read()
 82 | 		if db["counter"]["counter"] != scenario.ops * scenario.writers:
 83 | 			print("❌", db["counter"]["counter"], "!=", scenario.ops * scenario.writers)
 84 | 		assert db["counter"]["counter"] == scenario.ops * scenario.writers
 85 | 
 86 | 
 87 | def process_job(mode, scenario, cfg):
 88 | 	DDB.config = cfg
 89 | 	DDB.locking.SLEEP_TIMEOUT = 0.001
 90 | 
 91 | 	t1 = time.monotonic()
 92 | 	for _ in range(scenario.ops):
 93 | 		for t in sorted(range(scenario.files), key=lambda _: random.random()):
 94 | 			if mode == "r":
 95 | 				DDB.at(f"incr{t}", key="counter").read()
 96 | 
 97 | 			elif mode == "w":
 98 | 				with DDB.at(f"incr{t}", key="counter").session(as_type=PathDict) as (session, d):
 99 | 					d.at("counter").set(d.at("counter").get() + 1)
100 | 					session.write()
101 | 	t2 = time.monotonic()
102 | 	return t2 - t1
103 | 
104 | 
105 | def parallel_stressor(scenario: Scenario):
106 | 	DDB.config.use_compression = scenario.use_compression
107 | 	# Create Tables
108 | 	for t in range(scenario.files):
109 | 		if scenario.big_file:
110 | 			with open(os.path.join(os.getcwd(), "test_db/production_database/tasks.json"), "r") as f:
111 | 				db = json.loads(f.read())
112 | 				db["counter"] = {"counter": 0}
113 | 		else:
114 | 			db = {"counter": {"counter": 0}}
115 | 		DDB.at(f"incr{t}").create(db, force_overwrite=True)
116 | 
117 | 	# Execute process pool running incrementor as the target task
118 | 	res = []
119 | 	pool = Pool(processes=scenario.readers + scenario.writers)
120 | 	for mode in "w" * scenario.writers + "r" * scenario.readers:
121 | 		res.append(pool.apply_async(process_job, args=(mode, scenario, DDB.config)))
122 | 	pool.close()
123 | 	pool.join()
124 | 
125 | 	total_time = sum(r.get() for r in res) / (scenario.readers + scenario.writers)
126 | 	print_and_assert_results(scenario, total_time)
127 | 
128 | 
129 | scenarios = [
130 | 	Scenario(readers=1, ops=6000),
131 | 	Scenario(readers=2, ops=6000),
132 | 	Scenario(readers=4, ops=6000),
133 | 	Scenario(readers=8, ops=3000),
134 | 	Scenario(writers=1, ops=6000),
135 | 	Scenario(writers=2, ops=1000),
136 | 	Scenario(writers=4, ops=800),
137 | 	Scenario(writers=8, ops=200),
138 | 	Scenario(readers=20, writers=20, ops=30),
139 | 	Scenario(readers=8, ops=1500),
140 | 	Scenario(readers=8, ops=1500, use_compression=True),
141 | 	Scenario(readers=8, ops=1500, big_file=True),
142 | 	Scenario(readers=8, writers=1, ops=200),
143 | 	Scenario(readers=8, writers=1, ops=25, big_file=True),
144 | 	Scenario(readers=1, writers=8, ops=200),
145 | 	Scenario(readers=1, writers=8, ops=10, big_file=True),
146 | 	Scenario(readers=8, writers=8, ops=100),
147 | 	Scenario(readers=8, writers=8, ops=8, big_file=True),
148 | ]
149 | 
150 | if __name__ == "__main__":
151 | 	# print("✨ Simple sequential benchmarks")
152 | 	# sequential_full_read_small_file()
153 | 	# sequential_partial_read_small_file()
154 | 	# sequential_full_write_small_file()
155 | 	# sequential_partial_write_small_file()
156 | 
157 | 	# Parallel benchmarks
158 | 	for scenario in scenarios:
159 | 		try:
160 | 			shutil.rmtree(".ddb_bench_multi", ignore_errors=True)
161 | 			os.mkdir(".ddb_bench_multi")
162 | 			parallel_stressor(scenario)
163 | 			scenario.print()
164 | 		finally:
165 | 			shutil.rmtree(".ddb_bench_multi", ignore_errors=True)
166 | 


--------------------------------------------------------------------------------
/tests/benchmark/run_parallel_multi.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import threading
 5 | import time
 6 | from calendar import c
 7 | from multiprocessing import Pool
 8 | 
 9 | from pyinstrument import Profiler
10 | from utils import print_and_assert_results
11 | 
12 | import dictdatabase as DDB
13 | from dictdatabase.configuration import Confuguration
14 | 
15 | 
16 | def proc_job(n, cfg):
17 | 	DDB.config = cfg
18 | 	DDB.locking.SLEEP_TIMEOUT = 0.001
19 | 
20 | 	for _ in range(n):
21 | 		with DDB.at("incr/*").session() as (session, d):
22 | 			for k, v in d.items():
23 | 				v["counter"] += 1
24 | 			session.write()
25 | 
26 | 
27 | def parallel_stressor(file_count):
28 | 	# Create Tables
29 | 	for t in range(11):
30 | 		DDB.at("incr", t).create({"counter": 0}, force_overwrite=True)
31 | 
32 | 	# Execute process pool running incrementor as the target task
33 | 	t1 = time.monotonic()
34 | 	res = []
35 | 	pool = Pool(processes=file_count)
36 | 	for _ in range(file_count):
37 | 		r = pool.apply_async(proc_job, args=(1000, DDB.config))
38 | 		res.append(r)
39 | 	pool.close()
40 | 	pool.join()
41 | 	t2 = time.monotonic()
42 | 	for r in res:
43 | 		print(r.get())
44 | 
45 | 
46 | if __name__ == "__main__":
47 | 	DDB.config.storage_directory = ".ddb_bench_parallel"
48 | 	try:
49 | 		shutil.rmtree(".ddb_bench_parallel", ignore_errors=True)
50 | 		os.mkdir(".ddb_bench_parallel")
51 | 		parallel_stressor(4)
52 | 	finally:
53 | 		pass
54 | 		# shutil.rmtree(".ddb_bench_parallel", ignore_errors=True)
55 | 


--------------------------------------------------------------------------------
/tests/benchmark/run_threaded.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import time
 5 | 
 6 | import super_py as sp
 7 | from utils import db_job, print_and_assert_results
 8 | 
 9 | import dictdatabase as DDB
10 | 
11 | 
12 | def threaded_stressor(file_count, readers, writers, operations_per_thread, big_file, compression):
13 | 	# Create Tables
14 | 	for t in range(file_count):
15 | 		if big_file:
16 | 			with open(os.path.join(os.getcwd(), "test_db/production_database/tasks.json"), "r") as f:
17 | 				db = json.loads(f.read())
18 | 				db["counter"] = {"counter": 0}
19 | 		else:
20 | 			db = {"counter": {"counter": 0}}
21 | 		DDB.at(f"incr{t}").create(db, force_overwrite=True)
22 | 
23 | 	tasks = [(db_job, (mode, file_count, operations_per_thread)) for mode in "w" * writers + "r" * readers]
24 | 
25 | 	# Execute process pool running incrementor as the target task
26 | 	t1 = time.monotonic()
27 | 	sp.concurrency.run_threaded(tasks, max_threads=writers + readers)
28 | 	t2 = time.monotonic()
29 | 	print_and_assert_results(readers, writers, operations_per_process, file_count, big_file, compression, t1, t2)
30 | 
31 | 
32 | if __name__ == "__main__":
33 | 	DDB.config.storage_directory = ".ddb_bench_threaded"
34 | 	operations_per_process = 4
35 | 	for file_count, readers, writers in [(1, 4, 4), (1, 8, 1), (1, 1, 8), (4, 8, 8)]:
36 | 		print("")
37 | 		print(
38 | 			f"✨ Scenario: {file_count} files, {readers} readers, {writers} writers, {operations_per_process} operations per process"
39 | 		)
40 | 		for big_file, compression in [(False, False), (False, True), (True, False), (True, True)]:
41 | 			try:
42 | 				shutil.rmtree(".ddb_bench_threaded", ignore_errors=True)
43 | 				os.mkdir(".ddb_bench_threaded")
44 | 				threaded_stressor(file_count, readers, writers, operations_per_process, big_file, compression)
45 | 			finally:
46 | 				shutil.rmtree(".ddb_bench_threaded", ignore_errors=True)
47 | 


--------------------------------------------------------------------------------
/tests/benchmark/sequential_appends.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import time
 5 | from calendar import c
 6 | from multiprocessing import Pool
 7 | 
 8 | from pyinstrument import Profiler
 9 | 
10 | import dictdatabase as DDB
11 | 
12 | 
13 | def seq_job(n):
14 | 	DDB.at("db").create(
15 | 		[
16 | 			{
17 | 				"counter": 0,
18 | 				"firstname": "John",
19 | 				"lastname": "Doe",
20 | 				"age": 42,
21 | 				"address": "1234 Main St",
22 | 				"city": "Anytown",
23 | 				"state": "CA",
24 | 				"zip": "12345",
25 | 				"phone": "123-456-7890",
26 | 				"interests": ["Python", "Databases", "DDB", "DDB-CLI", "DDB-Web", "Google"],
27 | 			}
28 | 		]
29 | 		* 50000,
30 | 		force_overwrite=True,
31 | 	)
32 | 	for _ in range(n):
33 | 		t1 = time.monotonic_ns()
34 | 		with DDB.at("db").session() as (session, db):
35 | 			db.append({**db[-1], "counter": db[-1]["counter"] + 1})
36 | 			session.write()
37 | 		print(f"{(time.monotonic_ns() - t1) / 1e6:.2f} ms")
38 | 
39 | 
40 | if __name__ == "__main__":
41 | 	DDB.config.storage_directory = "./.ddb_bench_sequential"
42 | 	DDB.locking.SLEEP_TIMEOUT = 0.001
43 | 	DDB.config.use_orjson = True
44 | 	DDB.config.indent = 2
45 | 
46 | 	p = Profiler(interval=0.00001)
47 | 	p.start()
48 | 	# Execute process pool running incrementor as the target task
49 | 	seq_job(20)
50 | 	p.stop()
51 | 	p.open_in_browser()
52 | 


--------------------------------------------------------------------------------
/tests/benchmark/sqlite/run.sh:
--------------------------------------------------------------------------------
1 | cd ./benchmarks/sqlite
2 | poetry run python3 test.py
3 | 


--------------------------------------------------------------------------------
/tests/benchmark/sqlite/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sqlite3
 3 | import time
 4 | 
 5 | import super_py as sp
 6 | 
 7 | 
 8 | def teardown():
 9 | 	os.remove("test.db")
10 | 
11 | 
12 | @sp.test(teardown=teardown)
13 | def parallel_stress(tables=4, processes=16, per_process=128):
14 | 	# Create the database with all tables
15 | 	con = sqlite3.connect("test.db")
16 | 	for t in range(tables):
17 | 		cur = con.cursor()
18 | 		cur.execute(f"CREATE TABLE IF NOT EXISTS incr{t} (counter INTEGER)")
19 | 		cur.execute(f"INSERT INTO incr{t} (counter) VALUES (0)")
20 | 		con.commit()
21 | 	con.close()
22 | 
23 | 	# Run the incr_db function in parallel
24 | 	args = f"{tables} {processes} {per_process}"
25 | 	t1 = time.time()
26 | 	os.system(f"python3 test_parallel_runner.py {args}")
27 | 	t2 = time.time()
28 | 
29 | 	ops = processes * per_process * tables
30 | 	ops_sec = int(ops / (t2 - t1))
31 | 	print(f"{ops = }, {ops_sec = }, {tables = }, {processes = } {per_process = }")
32 | 	print(f"{t2 - t1 = }")
33 | 
34 | 	for t in range(tables):
35 | 		con = sqlite3.connect("test.db")
36 | 		cur = con.cursor()
37 | 		cur.execute(f"SELECT counter FROM incr{t}")
38 | 		t_counter = cur.fetchone()[0]
39 | 		con.close()
40 | 		print(f"{t_counter = }, should be {processes * per_process}")
41 | 		assert t_counter == processes * per_process
42 | 


--------------------------------------------------------------------------------
/tests/benchmark/sqlite/test_parallel_runner.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | import sys
 3 | from multiprocessing import Pool
 4 | 
 5 | 
 6 | def incr_db(n, tables):
 7 | 	for _ in range(n):
 8 | 		for t in range(tables):
 9 | 			con = sqlite3.connect("test.db")
10 | 			cur = con.cursor()
11 | 			cur.execute(f"UPDATE incr{t} SET counter = counter + 1")
12 | 			con.commit()
13 | 			con.close()
14 | 	return True
15 | 
16 | 
17 | if __name__ == "__main__":
18 | 	tables = int(sys.argv[1])
19 | 	processes = int(sys.argv[2])
20 | 	per_process = int(sys.argv[3])
21 | 
22 | 	pool = Pool(processes=processes)
23 | 	for _ in range(processes):
24 | 		pool.apply_async(
25 | 			incr_db,
26 | 			args=(
27 | 				per_process,
28 | 				tables,
29 | 			),
30 | 		)
31 | 	pool.close()
32 | 	pool.join()
33 | 


--------------------------------------------------------------------------------
/tests/benchmark/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | 
 4 | from path_dict import pd
 5 | 
 6 | import dictdatabase as DDB
 7 | 
 8 | 
 9 | def make_table(recursion_depth=3, keys_per_level=50):
10 | 	d = {"key1": "val1", "key2": 2, "key3": [1, "2", [3, 3]]}
11 | 	for i in range(recursion_depth):
12 | 		d = {f"key{i}{j}": d for j in range(keys_per_level)}
13 | 	# print(f"Made table of size {len(json.dumps(d)) // 1e6}mb")
14 | 	return {"counter": {"counter": 0}, "big": d}
15 | 
16 | 
17 | def print_stats(i, durations):
18 | 	avg = f"{sum(durations) / len(durations):.0f}"
19 | 	median = f"{sorted(durations)[len(durations) // 2]:.0f}"
20 | 	min_t = f"{min(durations):.0f}"
21 | 	max_t = f"{max(durations):.0f}"
22 | 
23 | 	# print(f"{i}: total: {len(durations)}, avg: {avg}ms (med: {median}), {min_t}-{max_t}ms")
24 | 
25 | 
26 | def print_and_assert_results(readers, writers, per_proc, tables, big_file, compression, t1, t2):
27 | 	ops = (writers + readers) * per_proc * tables
28 | 	ops_sec = f"{(ops / (t2 - t1)):.0f}"
29 | 	print(f"⏱️  {ops_sec} op/s ({ops} in {t2 - t1:.2f}s), {big_file = }, {compression = }")
30 | 	for t in range(tables):
31 | 		db = DDB.at(f"incr{t}").read()
32 | 		# print(db["counter"]["counter"], "==", per_proc * writers)
33 | 		assert db["counter"]["counter"] == per_proc * writers
34 | 		# print(f"✅ counter={db['counter']}")
35 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | import dictdatabase as DDB
 6 | 
 7 | 
 8 | @pytest.fixture(autouse=True)
 9 | def isolate_database_files(tmp_path: Path):
10 | 	DDB.config.storage_directory = str(tmp_path)
11 | 
12 | 
13 | @pytest.fixture(scope="function")
14 | def name_of_test(request):
15 | 	return request.function.__name__
16 | 
17 | 
18 | @pytest.fixture(params=[True, False])
19 | def use_compression(request):
20 | 	DDB.config.use_compression = request.param
21 | 	return request.param
22 | 
23 | 
24 | @pytest.fixture(params=[True, False])
25 | def use_orjson(request):
26 | 	DDB.config.use_orjson = request.param
27 | 	return request.param
28 | 
29 | 
30 | @pytest.fixture(params=[None, 0, 2, "\t"])
31 | def indent(request):
32 | 	DDB.config.indent = request.param
33 | 	return request.param
34 | 


--------------------------------------------------------------------------------
/tests/system_checks/test_clocks.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import threading
 3 | import time
 4 | 
 5 | 
 6 | def print_clocks(label: str) -> None:
 7 | 	print(f"--- {label} ---")
 8 | 	print("time_ns()        :", time.time_ns())
 9 | 	print("monotonic_ns()   :", time.monotonic_ns())
10 | 	print("perf_counter_ns():", time.perf_counter_ns())
11 | 	print("\n")
12 | 
13 | 
14 | def thread_function(thread_name: str) -> None:
15 | 	print_clocks(f"Thread-{thread_name}")
16 | 
17 | 
18 | def process_function(process_name: str) -> None:
19 | 	print_clocks(f"Process-{process_name}")
20 | 
21 | 
22 | if __name__ == "__main__":
23 | 	print_clocks("Main Thread")
24 | 
25 | 	threads = []
26 | 	for i in range(3):
27 | 		thread = threading.Thread(target=thread_function, args=(i,))
28 | 		thread.start()
29 | 		threads.append(thread)
30 | 
31 | 	for thread in threads:
32 | 		thread.join()
33 | 
34 | 	processes = []
35 | 	for i in range(3):
36 | 		process = multiprocessing.Process(target=process_function, args=(i,))
37 | 		process.start()
38 | 		processes.append(process)
39 | 
40 | 	for process in processes:
41 | 		process.join()
42 | 


--------------------------------------------------------------------------------
/tests/system_checks/test_monotonic_over_threads.py:
--------------------------------------------------------------------------------
 1 | import queue
 2 | import threading
 3 | import time
 4 | 
 5 | # Number of threads
 6 | NUM_THREADS = 64
 7 | 
 8 | # Define the clocks to test
 9 | clocks = {
10 | 	"time           ": time.time,
11 | 	"time_ns        ": time.time_ns,
12 | 	"monotonic      ": time.monotonic,
13 | 	"monotonic_ns   ": time.monotonic_ns,
14 | 	"perf_counter   ": time.perf_counter,
15 | 	"perf_counter_ns": time.perf_counter_ns,
16 | }
17 | 
18 | # Queue to store timestamps in order
19 | timestamps = queue.Queue()
20 | 
21 | 
22 | def capture_time(i, clock_func: callable) -> None:
23 | 	# Capture time using the given clock function and put it in the queue
24 | 	for _ in range(1000):
25 | 		# print(f"Thread {i} capturing time")
26 | 		timestamps.put(clock_func())
27 | 
28 | 
29 | def check_monotonicity_for_clock(clock_name: str, clock_func: callable) -> None:
30 | 	# Clear the queue for the next clock
31 | 	while not timestamps.empty():
32 | 		timestamps.get()
33 | 
34 | 	# Create and start threads
35 | 	threads = []
36 | 	for i in range(NUM_THREADS):
37 | 		thread = threading.Thread(
38 | 			target=capture_time,
39 | 			args=(
40 | 				i,
41 | 				clock_func,
42 | 			),
43 | 		)
44 | 		thread.start()
45 | 		threads.append(thread)
46 | 
47 | 	# Wait for all threads to complete
48 | 	for thread in threads:
49 | 		thread.join()
50 | 
51 | 	# Extract timestamps from the queue
52 | 	captured_times = []
53 | 	while not timestamps.empty():
54 | 		captured_times.append(timestamps.get())
55 | 
56 | 	# Check if the clock is monotonic
57 | 	is_monotonic = all(captured_times[i] <= captured_times[i + 1] for i in range(len(captured_times) - 1))
58 | 
59 | 	if is_monotonic:
60 | 		print(f"Clock: {clock_name} is monotonic over {NUM_THREADS} threads ✅")
61 | 	else:
62 | 		print(f"Clock: {clock_name} is not monotonic over {NUM_THREADS} threads ❌")
63 | 	print("-" * 40)
64 | 
65 | 
66 | if __name__ == "__main__":
67 | 	# Check monotonicity for each clock
68 | 	for clock_name, clock_func in clocks.items():
69 | 		check_monotonicity_for_clock(clock_name, clock_func)
70 | 


--------------------------------------------------------------------------------
/tests/system_checks/test_tick_rate.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | def get_tick_rate(clock_func: callable) -> float:
 5 | 	start_time = time.time()
 6 | 	measurements = [clock_func() for _ in range(2_000_000)]
 7 | 	end_time = time.time()
 8 | 
 9 | 	ticks = 0
10 | 	prev_value = measurements[0]
11 | 	for current_value in measurements[1:]:
12 | 		if current_value < prev_value:
13 | 			raise RuntimeError("Clock function is not monotonic")
14 | 		if current_value != prev_value:
15 | 			ticks += 1
16 | 		prev_value = current_value
17 | 
18 | 	return ticks / (end_time - start_time)  # ticks per second
19 | 
20 | 
21 | if __name__ == "__main__":
22 | 	clock_funcs = {
23 | 		"time           ": time.time,
24 | 		"time_ns        ": time.time_ns,
25 | 		"monotonic      ": time.monotonic,
26 | 		"monotonic_ns   ": time.monotonic_ns,
27 | 		"perf_counter   ": time.perf_counter,
28 | 		"perf_counter_ns": time.perf_counter_ns,
29 | 	}
30 | 
31 | 	for name, func in clock_funcs.items():
32 | 		print(f"Tick rate for {name}: {get_tick_rate(func) / 1_000_000.0:.3f}M ticks/second")
33 | 


--------------------------------------------------------------------------------
/tests/test_at.py:
--------------------------------------------------------------------------------
 1 | from dictdatabase.models import at
 2 | 
 3 | 
 4 | def test_at():
 5 | 	assert at("x").path == "x"
 6 | 	assert at("x", "y", "z").path == "x/y/z"
 7 | 	assert at(["x", "y", "z"]).path == "x/y/z"
 8 | 	assert at("x", ["y", "z"]).path == "x/y/z"
 9 | 	assert at(["x", "y"], "z").path == "x/y/z"
10 | 	assert at(["x"], "y", "z").path == "x/y/z"
11 | 	assert at("x", ["y"], "z").path == "x/y/z"
12 | 	assert at("x", "y", ["z"]).path == "x/y/z"
13 | 	assert at("x", ["y"], ["z"]).path == "x/y/z"
14 | 	assert at(["x"], "y", ["z"]).path == "x/y/z"
15 | 	assert at(["x"], ["y"], "z").path == "x/y/z"
16 | 	assert at(["x"], ["y"], ["z"]).path == "x/y/z"
17 | 


--------------------------------------------------------------------------------
/tests/test_create.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pytest
 4 | from path_dict import pd
 5 | 
 6 | import dictdatabase as DDB
 7 | from tests.utils import make_complex_nested_random_dict
 8 | 
 9 | 
10 | def test_create(use_compression, use_orjson, indent):
11 | 	DDB.at("create").create(force_overwrite=True)
12 | 	db = DDB.at("create").read()
13 | 	assert db == {}
14 | 
15 | 	with DDB.at("create").session(as_type=pd) as (session, d):
16 | 		d["a", "b", "c"] = "😁"
17 | 		session.write()
18 | 	assert DDB.at("create").read() == {"a": {"b": {"c": "😁"}}}
19 | 
20 | 	with pytest.raises(RuntimeError):
21 | 		DDB.at("create", where=lambda k, v: True).create(force_overwrite=True)
22 | 
23 | 	with pytest.raises(RuntimeError):
24 | 		DDB.at("create", key="any").create(force_overwrite=True)
25 | 
26 | 
27 | def test_create_edge_cases(use_compression, use_orjson, indent):
28 | 	cases = [-2, 0.0, "", "x", [], {}, True]
29 | 
30 | 	for i, c in enumerate(cases):
31 | 		DDB.at(f"tcec{i}").create(c, force_overwrite=True)
32 | 		assert DDB.at(f"tcec{i}").read() == c
33 | 
34 | 	with pytest.raises(TypeError):
35 | 		DDB.at("tcec99").create(object(), force_overwrite=True)
36 | 
37 | 
38 | def test_nested_file_creation(use_compression, use_orjson, indent):
39 | 	n = DDB.at("nested/file/nonexistent").read()
40 | 	assert n is None
41 | 	db = make_complex_nested_random_dict(12, 6)
42 | 	DDB.at("nested/file/creation/test").create(db, force_overwrite=True)
43 | 	assert DDB.at("nested/file/creation/test").read() == db
44 | 
45 | 
46 | def test_create_same_file_twice(use_compression, use_orjson, indent):
47 | 	name = "test_create_same_file_twice"
48 | 	# Check that creating the same file twice must raise an error
49 | 	with pytest.raises(FileExistsError):
50 | 		DDB.at(name).create(force_overwrite=True)
51 | 		DDB.at(name).create()
52 | 	# Check that creating the same file twice with force_overwrite=True works
53 | 	DDB.at(f"{name}2").create(force_overwrite=True)
54 | 	DDB.at(f"{name}2").create(force_overwrite=True)
55 | 


--------------------------------------------------------------------------------
/tests/test_delete.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import dictdatabase as DDB
 4 | 
 5 | 
 6 | def test_delete(use_compression, use_orjson, indent):
 7 | 	DDB.at("test_delete").create({"a": 1}, force_overwrite=True)
 8 | 	assert DDB.at("test_delete").read() == {"a": 1}
 9 | 	DDB.at("test_delete").delete()
10 | 	assert DDB.at("test_delete").read() is None
11 | 
12 | 	with pytest.raises(RuntimeError):
13 | 		DDB.at("test_delete", where=lambda k, v: True).delete()
14 | 
15 | 	with pytest.raises(RuntimeError):
16 | 		DDB.at("test_delete", key="any").delete()
17 | 
18 | 
19 | def test_delete_nonexistent(use_compression, use_orjson, indent):
20 | 	DDB.at("test_delete_nonexistent").delete()
21 | 


--------------------------------------------------------------------------------
/tests/test_excepts.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from path_dict import pd
 3 | 
 4 | import dictdatabase as DDB
 5 | from dictdatabase import io_bytes, utils
 6 | 
 7 | 
 8 | def test_except_during_open_session(use_compression, use_orjson, indent):
 9 | 	name = "test_except_during_open_session"
10 | 	d = {"test": "value"}
11 | 	DDB.at(name).create(d, force_overwrite=True)
12 | 	with pytest.raises(RuntimeError):
13 | 		with DDB.at(name).session() as (session, test):
14 | 			raise RuntimeError("Any Exception")
15 | 
16 | 
17 | def test_except_on_save_unserializable(use_compression, use_orjson, indent):
18 | 	name = "test_except_on_save_unserializable"
19 | 	with pytest.raises(TypeError):
20 | 		d = {"test": "value"}
21 | 		DDB.at(name).create(d, force_overwrite=True)
22 | 		with DDB.at(name).session(as_type=pd) as (session, test):
23 | 			test["test"] = {"key": {1, 2}}
24 | 			session.write()
25 | 
26 | 
27 | def test_except_on_save_unserializable_in_multisession(use_compression, use_orjson, indent):
28 | 	name = "test_except_on_save_unserializable_in_multisession"
29 | 	with pytest.raises(TypeError):
30 | 		d = {"test": "value"}
31 | 		DDB.at(name, "1").create(d, force_overwrite=True)
32 | 		DDB.at(name, "2").create(d, force_overwrite=True)
33 | 		with DDB.at(name, "*").session(as_type=pd) as (session, test):
34 | 			test["1"]["test"] = {"key": {1, 2}}
35 | 			session.write()
36 | 
37 | 
38 | def test_except_on_session_in_session(use_compression, use_orjson, indent):
39 | 	name = "test_except_on_session_in_session"
40 | 	d = {"test": "value"}
41 | 	DDB.at(name).create(d, force_overwrite=True)
42 | 	with pytest.raises(RuntimeError):
43 | 		with DDB.at(name).session(as_type=pd) as (session, test):
44 | 			with DDB.at(name).session(as_type=pd) as (session2, test2):
45 | 				pass
46 | 
47 | 
48 | def test_except_on_write_outside_session(use_compression, use_orjson, indent):
49 | 	with pytest.raises(PermissionError):
50 | 		s = DDB.at("test_except_on_write_outside_session").session()
51 | 		s.write()
52 | 
53 | 
54 | def test_wildcard_and_subkey_except(use_compression, use_orjson, indent):
55 | 	with pytest.raises(TypeError):
56 | 		DDB.at("test_wildcard_and_subkey_except/*", key="key").read()
57 | 
58 | 
59 | def test_utils_invalid_json_except():
60 | 	with pytest.raises(TypeError):
61 | 		utils.seek_index_through_value_bytes(b"{This is not { JSON", 0)
62 | 
63 | 
64 | def test_bytes_write_except():
65 | 	# It is not allowed to specify a start index when compression is used.
66 | 	with pytest.raises(RuntimeError):
67 | 		DDB.config.use_compression = True
68 | 		io_bytes.write("any", b"any", start=1)
69 | 


--------------------------------------------------------------------------------
/tests/test_exists.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import dictdatabase as DDB
 4 | 
 5 | 
 6 | def test_exists(use_compression, use_orjson, indent):
 7 | 	DDB.at("test_exists").create({"a": 1}, force_overwrite=True)
 8 | 	assert DDB.at("test_exists").exists()
 9 | 	assert not DDB.at("test_exists/nonexistent").exists()
10 | 	assert DDB.at("test_exists", key="a").exists()
11 | 	assert not DDB.at("test_exists", key="b").exists()
12 | 	with pytest.raises(RuntimeError):
13 | 		DDB.at("test_exists", where=lambda k, v: True).exists()
14 | 


--------------------------------------------------------------------------------
/tests/test_indentation.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import orjson
 4 | import pytest
 5 | 
 6 | import dictdatabase as DDB
 7 | from dictdatabase import config, io_bytes, io_unsafe, utils
 8 | 
 9 | data = {
10 | 	"a": 1,
11 | 	"b": {
12 | 		"c": 2,
13 | 		"cl": [1, "\\"],
14 | 		"d": {
15 | 			"e": 3,
16 | 			"el": [1, "\\"],
17 | 		},
18 | 	},
19 | 	"l": [1, "\\"],
20 | }
21 | 
22 | 
23 | def string_dump(db: dict):
24 | 	if not config.use_orjson:
25 | 		return json.dumps(db, indent=config.indent, sort_keys=True).encode()
26 | 	option = (orjson.OPT_INDENT_2 if config.indent else 0) | orjson.OPT_SORT_KEYS
27 | 	return orjson.dumps(db, option=option)
28 | 
29 | 
30 | def test_indentation(use_compression, use_orjson, indent):
31 | 	DDB.at("test_indentation").create(data, force_overwrite=True)
32 | 
33 | 	with DDB.at("test_indentation", key="b").session() as (session, db_b):
34 | 		db_b["c"] = 3
35 | 		session.write()
36 | 	data["b"]["c"] = 3
37 | 
38 | 	assert io_bytes.read("test_indentation") == string_dump(data)
39 | 
40 | 	# Accessing a key not at root level should raise an error
41 | 	with pytest.raises(KeyError):
42 | 		with DDB.at("test_indentation", key="d").session() as (session, db_d):
43 | 			session.write()
44 | 	assert io_bytes.read("test_indentation") == string_dump(data)
45 | 


--------------------------------------------------------------------------------
/tests/test_indexer.py:
--------------------------------------------------------------------------------
 1 | import dictdatabase as DDB
 2 | 
 3 | 
 4 | def test_indexer(use_compression, use_orjson, indent):
 5 | 	DDB.at("test_indexer").create(force_overwrite=True, data={"a": {"e": 4}, "b": 2})
 6 | 
 7 | 	# Trigger create index entry for key "a"
 8 | 	assert DDB.at("test_indexer", key="a").read() == {"e": 4}
 9 | 
10 | 	# Retrieve the index entry for key "a" by using the indexer
11 | 	with DDB.at("test_indexer", key="a").session() as (session, d):
12 | 		d["e"] = 5
13 | 		session.write()
14 | 
15 | 	# Check that the index entry for key "a" has been updated
16 | 	assert DDB.at("test_indexer").read() == {"a": {"e": 5}, "b": 2}
17 | 


--------------------------------------------------------------------------------
/tests/test_io_bytes.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from dictdatabase import io_bytes
 4 | 
 5 | 
 6 | def test_write_bytes(name_of_test, use_compression):
 7 | 	# No partial writing to compressed file allowed
 8 | 	if use_compression:
 9 | 		with pytest.raises(RuntimeError):
10 | 			io_bytes.write(name_of_test, b"test", start=5)
11 | 		return
12 | 	# Write shorter content at index
13 | 	io_bytes.write(name_of_test, b"0123456789")
14 | 	io_bytes.write(name_of_test, b"abc", start=2)
15 | 	assert io_bytes.read(name_of_test) == b"01abc"
16 | 	# Overwrite with shorter content
17 | 	io_bytes.write(name_of_test, b"xy")
18 | 	assert io_bytes.read(name_of_test) == b"xy"
19 | 	# Overwrite with longer content
20 | 	io_bytes.write(name_of_test, b"0123456789")
21 | 	io_bytes.write(name_of_test, b"abcdef", start=8)
22 | 	assert io_bytes.read(name_of_test) == b"01234567abcdef"
23 | 	# Write at index out of range
24 | 	io_bytes.write(name_of_test, b"01")
25 | 	io_bytes.write(name_of_test, b"ab", start=4)
26 | 	assert io_bytes.read(name_of_test) == b"01\x00\x00ab"
27 | 
28 | 
29 | def test_read_bytes(name_of_test, use_compression):
30 | 	io_bytes.write(name_of_test, b"0123456789")
31 | 	# In range
32 | 	assert io_bytes.read(name_of_test, start=2, end=5) == b"234"
33 | 	# Normal ranges
34 | 	assert io_bytes.read(name_of_test, start=0, end=10) == b"0123456789"
35 | 	assert io_bytes.read(name_of_test, start=2) == b"23456789"
36 | 	assert io_bytes.read(name_of_test, end=2) == b"01"
37 | 	assert io_bytes.read(name_of_test) == b"0123456789"
38 | 	# End out of range
39 | 	assert io_bytes.read(name_of_test, start=9, end=20) == b"9"
40 | 	# Completely out of range
41 | 	assert io_bytes.read(name_of_test, start=25, end=30) == b""
42 | 	# Start negative
43 | 	if use_compression:
44 | 		assert io_bytes.read(name_of_test, start=-5, end=3) == b""
45 | 	else:
46 | 		with pytest.raises(OSError):
47 | 			io_bytes.read(name_of_test, start=-5, end=3)
48 | 


--------------------------------------------------------------------------------
/tests/test_io_safe.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pytest
 4 | 
 5 | import dictdatabase as DDB
 6 | from dictdatabase import io_safe
 7 | 
 8 | 
 9 | def test_read(use_compression, use_orjson, indent):
10 | 	# Elicit read error
11 | 	DDB.config.use_orjson = True
12 | 	with pytest.raises(json.decoder.JSONDecodeError):
13 | 		with open(f"{DDB.config.storage_directory}/corrupted_json.json", "w") as f:
14 | 			f.write("This is not JSON")
15 | 		io_safe.read("corrupted_json")
16 | 
17 | 
18 | def test_partial_read(use_compression, use_orjson, indent):
19 | 	assert io_safe.partial_read("nonexistent", key="none") is None
20 | 
21 | 
22 | def test_write(use_compression, use_orjson, indent):
23 | 	with pytest.raises(TypeError):
24 | 		io_safe.write("nonexistent", lambda x: x)
25 | 
26 | 
27 | def test_delete(use_compression, use_orjson, indent):
28 | 	DDB.at("to_be_deleted").create()
29 | 	DDB.at("to_be_deleted").delete()
30 | 	assert DDB.at("to_be_deleted").read() is None
31 | 


--------------------------------------------------------------------------------
/tests/test_locking.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | import time
  3 | 
  4 | import pytest
  5 | 
  6 | from dictdatabase import locking
  7 | 
  8 | 
  9 | def test_lock_release():
 10 | 	lock = locking.WriteLock("db_release")
 11 | 
 12 | 	with lock:
 13 | 		pass  # Lock should be released here
 14 | 
 15 | 	# Now, another lock should be able to be acquired
 16 | 	with locking.WriteLock("db_release"):
 17 | 		pass
 18 | 
 19 | 
 20 | def test_read_lock_release():
 21 | 	read_lock = locking.ReadLock("test_db")
 22 | 	write_lock = locking.WriteLock("test_db")
 23 | 
 24 | 	# Acquire and release a read lock
 25 | 	with read_lock:
 26 | 		pass
 27 | 
 28 | 	# Now attempt to acquire a write lock
 29 | 	with write_lock:
 30 | 		assert write_lock.has_lock is not None
 31 | 
 32 | 	read_lock._unlock()
 33 | 	write_lock._unlock()
 34 | 
 35 | 
 36 | def test_double_lock_exception(use_compression):
 37 | 	name = "test_double_lock_exception"
 38 | 	with pytest.raises(RuntimeError):
 39 | 		with locking.ReadLock(name):
 40 | 			with locking.ReadLock(name):
 41 | 				pass
 42 | 
 43 | 	ls = locking.FileLocksSnapshot(locking.ReadLock(name).need_lock)
 44 | 	assert len(ls.locks) == 0
 45 | 
 46 | 
 47 | def test_get_lock_names(use_compression):
 48 | 	lock = locking.ReadLock("db")
 49 | 	lock._lock()
 50 | 
 51 | 	ls = locking.FileLocksSnapshot(locking.ReadLock("none").need_lock)
 52 | 	assert ls.locks == []
 53 | 	ls = locking.FileLocksSnapshot(lock.need_lock)
 54 | 	assert len(ls.locks) == 1
 55 | 
 56 | 	assert ls.locks[0].id == str(threading.get_native_id())
 57 | 	assert int(ls.locks[0].time_ns) >= int(lock.need_lock.time_ns)
 58 | 	assert ls.locks[0].stage == "has"
 59 | 	assert ls.locks[0].mode == "read"
 60 | 
 61 | 	assert ls.any_has_locks
 62 | 	assert not ls.any_write_locks
 63 | 	assert not ls.any_has_write_locks
 64 | 
 65 | 	lock._unlock()
 66 | 
 67 | 
 68 | def test_lock_must_implement_lock_function():
 69 | 	class BadLock(locking.AbstractLock):
 70 | 		mode = "read"
 71 | 
 72 | 	lock = BadLock("db")
 73 | 	with pytest.raises(NotImplementedError):
 74 | 		lock._lock()
 75 | 
 76 | 
 77 | def test_remove_orphaned_locks():
 78 | 	# SLEEP_TIMEOUT = 0.001
 79 | 	# LOCK_KEEP_ALIVE_TIMEOUT = 0.001
 80 | 	# REMOVE_ORPHAN_LOCK_TIMEOUT = 20.0  # Duration to wait before considering a lock as orphaned.
 81 | 	# AQUIRE_LOCK_TIMEOUT = 60.0
 82 | 
 83 | 	prev = locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT
 84 | 
 85 | 	locking.AQUIRE_LOCK_TIMEOUT = 10.0
 86 | 	locking.LOCK_KEEP_ALIVE_TIMEOUT = 1.0
 87 | 	locking.REMOVE_ORPHAN_LOCK_TIMEOUT = 0.1
 88 | 	lock = locking.ReadLock("test_remove_orphaned_locks")
 89 | 	lock._lock()
 90 | 
 91 | 	ls = locking.FileLocksSnapshot(lock.need_lock)
 92 | 	assert len(ls.locks) >= 1  ## The one lock or two if currently in keep alive handover
 93 | 
 94 | 	time.sleep(0.2)
 95 | 	# Trigger the removal of orphaned locks
 96 | 	ls = locking.FileLocksSnapshot(lock.need_lock)
 97 | 
 98 | 	assert len(ls.locks) == 0
 99 | 
100 | 	lock._unlock()
101 | 
102 | 	locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT = prev
103 | 
104 | 
105 | def test_lock_keep_alive():
106 | 	prev = locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT
107 | 
108 | 	locking.LOCK_KEEP_ALIVE_TIMEOUT = 0.1
109 | 	locking.ALIVE_LOCK_MAX_AGE = 0.5
110 | 
111 | 	lock = locking.ReadLock("test_lock_keep_alive")
112 | 
113 | 	with lock:
114 | 		time.sleep(1.0)
115 | 
116 | 	locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT = prev
117 | 


--------------------------------------------------------------------------------
/tests/test_parallel_crud.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import time
  3 | from multiprocessing import Manager, Process
  4 | 
  5 | import dictdatabase as DDB
  6 | 
  7 | 
  8 | def do_create(name_of_test: str, return_dict: dict, id_counter: dict, operations: dict) -> None:
  9 | 	with DDB.at(name_of_test).session() as (session, db):
 10 | 		key = f"{id_counter['id']}"
 11 | 		db[key] = {"counter": 0}
 12 | 		id_counter["id"] += 1
 13 | 		operations["create"] += 1
 14 | 		session.write()
 15 | 		return_dict["created_ids"] += [key]
 16 | 
 17 | 
 18 | def do_update(name_of_test: str, return_dict: dict, operations: dict) -> None:
 19 | 	# increment a random counter
 20 | 	with DDB.at(name_of_test).session() as (session, db):
 21 | 		key = random.choice(return_dict["created_ids"])
 22 | 		db[key]["counter"] += 1
 23 | 		operations["increment"] += 1
 24 | 		session.write()
 25 | 
 26 | 
 27 | def do_delete(name_of_test: str, return_dict: dict, operations: dict) -> None:
 28 | 	# Delete a counter
 29 | 	with DDB.at(name_of_test).session() as (session, db):
 30 | 		key = random.choice(return_dict["created_ids"])
 31 | 		operations["increment"] -= db[key]["counter"]
 32 | 		operations["delete"] += 1
 33 | 		db.pop(key)
 34 | 		return_dict["created_ids"] = [i for i in return_dict["created_ids"] if i != key]
 35 | 		session.write()
 36 | 
 37 | 
 38 | def do_read(name_of_test: str, return_dict: dict, operations: dict) -> None:
 39 | 	# read a counter
 40 | 	key = random.choice(return_dict["created_ids"])
 41 | 	DDB.at(name_of_test, key=key).read()
 42 | 	operations["read"] += 1
 43 | 
 44 | 
 45 | def worker_process(name_of_test: str, i: int, return_dict: dict, id_counter: dict) -> None:
 46 | 	# Random seed to ensure each process gets different random numbers
 47 | 	random.seed(i)
 48 | 	DDB.config.storage_directory = ".ddb_bench_threaded"
 49 | 	operations = {
 50 | 		"create": 0,
 51 | 		"increment": 0,
 52 | 		"read": 0,
 53 | 		"delete": 0,
 54 | 	}
 55 | 
 56 | 	for _ in range(1000):
 57 | 		choice = random.random()
 58 | 		if choice < 0.05:  # 5% chance
 59 | 			do_create(name_of_test, return_dict, id_counter, operations)
 60 | 		elif choice < 0.30:  # 25% chance
 61 | 			do_update(name_of_test, return_dict, operations)
 62 | 		elif choice < 0.33:  # 3% chance
 63 | 			do_delete(name_of_test, return_dict, operations)
 64 | 		else:  # 67% chance
 65 | 			do_read(name_of_test, return_dict, operations)
 66 | 
 67 | 	# Return the operations for this worker
 68 | 	return_dict[i] = operations
 69 | 
 70 | 
 71 | def test_multiprocessing_crud(name_of_test, use_compression, use_orjson):
 72 | 	pre_fill_count = 500
 73 | 	DDB.at(name_of_test).create({f"{i}": {"counter": 0} for i in range(pre_fill_count)}, force_overwrite=True)
 74 | 
 75 | 	manager = Manager()
 76 | 	return_dict = manager.dict()
 77 | 	id_counter = manager.dict()
 78 | 	id_counter["id"] = pre_fill_count
 79 | 	return_dict["created_ids"] = [f"{i}" for i in range(pre_fill_count)]
 80 | 
 81 | 	start_time = time.time()
 82 | 	processes = []
 83 | 	for i in range(8):  # Spawn 4 processes
 84 | 		p = Process(target=worker_process, args=(name_of_test, i, return_dict, id_counter))
 85 | 		processes.append(p)
 86 | 		p.start()
 87 | 
 88 | 	for p in processes:
 89 | 		p.join()
 90 | 
 91 | 	print(return_dict)
 92 | 	print("Duration", time.time() - start_time)
 93 | 
 94 | 	db_state = DDB.at(name_of_test).read()
 95 | 
 96 | 	logged_increment_ops = sum(x["increment"] for k, x in return_dict.items() if k != "created_ids")
 97 | 	assert logged_increment_ops == sum(x["counter"] for x in db_state.values())
 98 | 
 99 | 	logged_create_ops = sum(x["create"] for k, x in return_dict.items() if k != "created_ids")
100 | 	logged_delete_ops = sum(x["delete"] for k, x in return_dict.items() if k != "created_ids")
101 | 	assert pre_fill_count + logged_create_ops - logged_delete_ops == len(db_state.keys())
102 | 


--------------------------------------------------------------------------------
/tests/test_parallel_sessions.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing.pool import Pool
 2 | 
 3 | from path_dict import pd
 4 | 
 5 | import dictdatabase as DDB
 6 | 
 7 | 
 8 | def increment_counters(n, tables, cfg):
 9 | 	DDB.config.storage_directory = cfg.storage_directory
10 | 	DDB.config.use_compression = cfg.use_compression
11 | 	DDB.config.use_orjson = cfg.use_orjson
12 | 
13 | 	for _ in range(n):
14 | 		for t in range(tables):
15 | 			# Perform a counter increment
16 | 			with DDB.at(f"test_stress_parallel{t}").session(as_type=pd) as (session, d):
17 | 				d["counter"] = lambda x: (x or 0) + 1
18 | 				session.write()
19 | 	return True
20 | 
21 | 
22 | def read_counters(n, tables, cfg):
23 | 	DDB.config.storage_directory = cfg.storage_directory
24 | 	DDB.config.use_compression = cfg.use_compression
25 | 	DDB.config.use_orjson = cfg.use_orjson
26 | 	for _ in range(n):
27 | 		for t in range(tables):
28 | 			DDB.at(f"test_stress_parallel{t}").read()
29 | 	return True
30 | 
31 | 
32 | def test_stress_multiprocessing(use_compression, use_orjson):
33 | 	per_thread = 15
34 | 	tables = 1
35 | 	threads = 3
36 | 	# Create tables
37 | 	for t in range(tables):
38 | 		DDB.at(f"test_stress_parallel{t}").create({}, force_overwrite=True)
39 | 
40 | 	results = []
41 | 	pool = Pool(processes=threads)
42 | 	for _ in range(threads):
43 | 		r = pool.apply_async(increment_counters, args=(per_thread, tables, DDB.config))
44 | 		results.append(r)
45 | 		r = pool.apply_async(read_counters, args=(per_thread, tables, DDB.config))
46 | 		results.append(r)
47 | 	pool.close()
48 | 	pool.join()
49 | 
50 | 	# Check correctness of results
51 | 	assert [r.get() for r in results] == [True] * threads * 2
52 | 	for t in range(tables):
53 | 		db = DDB.at(f"test_stress_parallel{t}").read()
54 | 		assert db["counter"] == threads * per_thread
55 | 
56 | 
57 | def test_heavy_multiprocessing():
58 | 	per_thread = 50
59 | 	tables = 1
60 | 	threads = 20
61 | 	# Create tables
62 | 	for t in range(tables):
63 | 		DDB.at(f"test_stress_parallel{t}").create({}, force_overwrite=True)
64 | 
65 | 	results = []
66 | 	pool = Pool(processes=threads)
67 | 	for _ in range(threads):
68 | 		r = pool.apply_async(increment_counters, args=(per_thread, tables, DDB.config))
69 | 		results.append(r)
70 | 		r = pool.apply_async(read_counters, args=(per_thread, tables, DDB.config))
71 | 		results.append(r)
72 | 	pool.close()
73 | 	pool.join()
74 | 
75 | 	# Check correctness of results
76 | 	assert [r.get() for r in results] == [True] * threads * 2
77 | 	for t in range(tables):
78 | 		db = DDB.at(f"test_stress_parallel{t}").read()
79 | 		assert db["counter"] == threads * per_thread
80 | 
81 | 
82 | def read_partial(n, cfg):
83 | 	DDB.locking.SLEEP_TIMEOUT = 0
84 | 	DDB.config = cfg
85 | 	for _ in range(n):
86 | 		DDB.at("test_stress_parallel0", key="key").read()
87 | 	return True
88 | 
89 | 
90 | def test_induce_indexer_except(use_compression):
91 | 	DDB.at("test_stress_parallel0").create({}, force_overwrite=True)
92 | 
93 | 	pool = Pool(processes=2)
94 | 	for _ in range(2):
95 | 		pool.apply_async(read_partial, args=(1000, DDB.config))
96 | 	pool.close()
97 | 	pool.join()
98 | 


--------------------------------------------------------------------------------
/tests/test_partial.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pytest
  4 | from path_dict import pd
  5 | 
  6 | import dictdatabase as DDB
  7 | 
  8 | 
  9 | def test_subread(use_compression, use_orjson, indent):
 10 | 	name = "test_subread"
 11 | 	j = {
 12 | 		"a": "Hello{}",
 13 | 		"b": [0, 1],
 14 | 		"c": {"d": "e"},
 15 | 	}
 16 | 
 17 | 	DDB.at(name).create(j, force_overwrite=True)
 18 | 
 19 | 	assert DDB.at(name, key="a").read() == "Hello{}"
 20 | 	assert DDB.at(name, where=lambda k, v: isinstance(v, list)).read() == {"b": [0, 1]}
 21 | 
 22 | 	assert DDB.at(name, key="f").read() is None
 23 | 
 24 | 	assert DDB.at(name, key="b").read() == [0, 1]
 25 | 	assert DDB.at(name, key="c").read() == {"d": "e"}
 26 | 
 27 | 	j2 = {"a": {"b": "c"}, "b": {"d": "e"}}
 28 | 	DDB.at("test_subread2").create(j2, force_overwrite=True)
 29 | 	assert DDB.at("test_subread2", key="b").read() == {"d": "e"}
 30 | 
 31 | 	assert DDB.at("none", key="none").read() is None
 32 | 
 33 | 	j3 = {"a": {"b": {"\\c\\": {"a": "a"}}}}
 34 | 	DDB.at("test_subread3").create(j3, force_overwrite=True)
 35 | 	assert DDB.at("test_subread3", key="a").read() == {"b": {"\\c\\": {"a": "a"}}}
 36 | 
 37 | 
 38 | def test_subwrite(use_compression, use_orjson, indent):
 39 | 	name = "test_subwrite"
 40 | 	j = {
 41 | 		"b": {"0": 1},
 42 | 		"c": {"d": "e"},
 43 | 	}
 44 | 
 45 | 	DDB.at(name).create(j, force_overwrite=True)
 46 | 	with DDB.at(name, key="c").session(as_type=pd) as (session, task):
 47 | 		task["f"] = lambda x: (x or 0) + 5
 48 | 		session.write()
 49 | 	assert DDB.at(name, key="c").read() == {"d": "e", "f": 5}
 50 | 
 51 | 	with DDB.at(name, key="b").session(as_type=pd) as (session, task):
 52 | 		task["f"] = lambda x: (x or 0) + 2
 53 | 		session.write()
 54 | 
 55 | 	assert DDB.at(name, key="f").read() is None
 56 | 
 57 | 	with pytest.raises(KeyError):
 58 | 		with DDB.at(name, key="none").session() as (session, key):
 59 | 			session.write()
 60 | 
 61 | 
 62 | def test_write_file_where(use_compression, use_orjson, indent):
 63 | 	name = "test_write_file_where"
 64 | 	j = {
 65 | 		"a": 1,
 66 | 		"b": 20,
 67 | 		"c": 3,
 68 | 		"d": 40,
 69 | 	}
 70 | 
 71 | 	DDB.at(name).create(j, force_overwrite=True)
 72 | 
 73 | 	with DDB.at(name, where=lambda k, v: v > 10).session() as (session, vals):
 74 | 		vals.update({"b": 30, "d": 50, "e": 60})
 75 | 		session.write()
 76 | 	assert DDB.at(name).read() == {
 77 | 		"a": 1,
 78 | 		"b": 30,
 79 | 		"c": 3,
 80 | 		"d": 50,
 81 | 		"e": 60,
 82 | 	}
 83 | 
 84 | 
 85 | def test_dir_where(use_compression, use_orjson, indent):
 86 | 	name = "test_dir_where"
 87 | 	for i in range(5):
 88 | 		DDB.at(name, i).create({"k": i}, force_overwrite=True)
 89 | 
 90 | 	with DDB.at(name, "*", where=lambda k, v: v["k"] > 2).session() as (session, vals):
 91 | 		for k, v in vals.items():
 92 | 			v["k"] += 1
 93 | 		session.write()
 94 | 	assert DDB.at(name, "*").read() == {
 95 | 		"0": {"k": 0},
 96 | 		"1": {"k": 1},
 97 | 		"2": {"k": 2},
 98 | 		"3": {"k": 4},
 99 | 		"4": {"k": 5},
100 | 	}
101 | 


--------------------------------------------------------------------------------
/tests/test_read.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pytest
 4 | 
 5 | import dictdatabase as DDB
 6 | from tests.utils import make_complex_nested_random_dict
 7 | 
 8 | 
 9 | def test_non_existent(use_compression, use_orjson, indent):
10 | 	d = DDB.at("nonexistent").read()
11 | 	assert d is None
12 | 
13 | 
14 | def test_file_exists_error(use_compression, use_orjson, indent):
15 | 	with open(f"{DDB.config.storage_directory}/test_file_exists_error.json", "w") as f:
16 | 		f.write("")
17 | 	with open(f"{DDB.config.storage_directory}/test_file_exists_error.ddb", "w") as f:
18 | 		f.write("")
19 | 	with pytest.raises(FileExistsError):
20 | 		DDB.at("test_file_exists_error").read()
21 | 
22 | 
23 | def test_invalid_params(use_compression, use_orjson, indent):
24 | 	with pytest.raises(TypeError):
25 | 		DDB.at("test_invalid_params", key="any", where=lambda k, v: True).read()
26 | 
27 | 
28 | def test_read_integrity(use_compression, use_orjson, indent):
29 | 	cases = [
30 | 		r'{"a": "\\", "b": 0}',
31 | 		r'{"a": "\\\\", "b": 1234}',
32 | 		r'{"a": "\\\\\"", "b": 1234}',
33 | 		r'{"a": "\\\"\\", "b": 1234}',
34 | 		r'{"a": "\"\\\\", "b": 1234}',
35 | 		r'{"a": "\"", "b": 1234}',
36 | 		r'{"a": "\"\"", "b": 1234}',
37 | 		r'{"a": "\"\"\\", "b": 1234}',
38 | 		r'{"a": "\"\\\"", "b": 1234}',
39 | 		r'{"a": "\\\"\"", "b": 1234}',
40 | 	]
41 | 
42 | 	for case in cases:
43 | 		with open(f"{DDB.config.storage_directory}/test_read_integrity.json", "w") as f:
44 | 			f.write(case)
45 | 		key_a = DDB.at("test_read_integrity", key="a").read()
46 | 		key_b = DDB.at("test_read_integrity", key="b").read()
47 | 		assert key_a == json.loads(case)["a"]
48 | 		assert key_b == json.loads(case)["b"]
49 | 
50 | 
51 | def test_create_and_read(use_compression, use_orjson, indent):
52 | 	name = "test_create_and_read"
53 | 	d = make_complex_nested_random_dict(12, 6)
54 | 	DDB.at(name).create(d, force_overwrite=True)
55 | 	dd = DDB.at(name).read()
56 | 	assert d == dd
57 | 
58 | 
59 | def test_read_compression_switching(use_orjson, indent):
60 | 	name = "test_read_compression_switching"
61 | 	DDB.config.use_compression = False
62 | 	d = make_complex_nested_random_dict(12, 6)
63 | 	DDB.at(name).create(d, force_overwrite=True)
64 | 	DDB.config.use_compression = True
65 | 	dd = DDB.at(name).read()
66 | 	assert d == dd
67 | 	DDB.at(name).create(d, force_overwrite=True)
68 | 	DDB.config.use_compression = False
69 | 	dd = DDB.at(name).read()
70 | 	assert d == dd
71 | 
72 | 
73 | def test_multiread(use_compression, use_orjson, indent):
74 | 	dl = []
75 | 	for i in range(3):
76 | 		dl += [make_complex_nested_random_dict(12, 6)]
77 | 		DDB.at(f"test_multiread/d{i}").create(dl[-1], force_overwrite=True)
78 | 
79 | 	mr = DDB.at("test_multiread/*").read()
80 | 	mr2 = DDB.at("test_multiread", "*").read()
81 | 	assert mr == mr2
82 | 	mr = {k.replace("test_multiread/", ""): v for k, v in mr.items()}
83 | 	assert mr == {f"d{i}": dl[i] for i in range(3)}
84 | 


--------------------------------------------------------------------------------
/tests/test_threaded_sessions.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures import ThreadPoolExecutor, wait
 2 | 
 3 | from path_dict import pd
 4 | 
 5 | import dictdatabase as DDB
 6 | 
 7 | 
 8 | def increment_counters(n, tables):
 9 | 	for _ in range(n):
10 | 		for t in range(tables):
11 | 			# Perform a counter increment
12 | 			with DDB.at(f"test_stress_threaded{t}").session(as_type=pd) as (session, d):
13 | 				d["counter"] = lambda x: (x or 0) + 1
14 | 				session.write()
15 | 	return True
16 | 
17 | 
18 | def read_counters(n, tables):
19 | 	for _ in range(n):
20 | 		for t in range(tables):
21 | 			DDB.at(f"test_stress_threaded{t}").read()
22 | 	return True
23 | 
24 | 
25 | def test_stress_threaded(use_compression, use_orjson):
26 | 	per_thread = 15
27 | 	tables = 1
28 | 	threads = 3
29 | 	# Create tables
30 | 	for t in range(tables):
31 | 		DDB.at(f"test_stress_threaded{t}").create({}, force_overwrite=True)
32 | 
33 | 	results = []
34 | 	with ThreadPoolExecutor(max_workers=threads) as pool:
35 | 		for _ in range(threads):
36 | 			future = pool.submit(increment_counters, per_thread, tables)
37 | 			results.append(future)
38 | 			future = pool.submit(read_counters, per_thread, tables)
39 | 			results.append(future)
40 | 	wait(results)
41 | 
42 | 	# Check correctness of results
43 | 	assert [r.result() for r in results] == [True] * threads * 2
44 | 	for t in range(tables):
45 | 		db = DDB.at(f"test_stress_threaded{t}").read()
46 | 		assert db["counter"] == threads * per_thread
47 | 
48 | 
49 | def test_heavy_threading():
50 | 	per_thread = 50
51 | 	tables = 1
52 | 	threads = 20
53 | 	# Create tables
54 | 	for t in range(tables):
55 | 		DDB.at(f"test_stress_threaded{t}").create({}, force_overwrite=True)
56 | 
57 | 	results = []
58 | 	with ThreadPoolExecutor(max_workers=threads) as pool:
59 | 		for _ in range(threads):
60 | 			future = pool.submit(increment_counters, per_thread, tables)
61 | 			results.append(future)
62 | 			future = pool.submit(read_counters, per_thread, tables)
63 | 			results.append(future)
64 | 	wait(results)
65 | 
66 | 	# Check correctness of results
67 | 	assert [r.result() for r in results] == [True] * threads * 2
68 | 	for t in range(tables):
69 | 		db = DDB.at(f"test_stress_threaded{t}").read()
70 | 		assert db["counter"] == threads * per_thread
71 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | import orjson
 4 | 
 5 | from dictdatabase import byte_codes, utils
 6 | 
 7 | 
 8 | def test_seek_index_through_value_bytes():
 9 | 	v = b'{"a": 1, "b": {}}'
10 | 	assert utils.seek_index_through_value_bytes(v, 5) == 7
11 | 	assert utils.seek_index_through_value_bytes(v, 6) == 7
12 | 	assert utils.seek_index_through_value_bytes(v, 13) == 16
13 | 	vc = b'{"a":1,"b":{}}'
14 | 	assert utils.seek_index_through_value_bytes(vc, 5) == 6
15 | 	assert utils.seek_index_through_value_bytes(vc, 11) == 13
16 | 	n = b'{"a": 1234, "b": {"c": 2}}'
17 | 	assert utils.seek_index_through_value_bytes(n, 5) == 10
18 | 	assert utils.seek_index_through_value_bytes(n, 6) == 10
19 | 
20 | 
21 | def test_seek_index_through_value_bytes_2():
22 | 	def load_with_orjson(bytes, key):
23 | 		return orjson.loads(bytes)[key]
24 | 
25 | 	def load_with_seeker(bytes, key):
26 | 		key_bytes = f'"{key}":'.encode()
27 | 		a_val_start = bytes.find(key_bytes) + len(key_bytes)
28 | 		if bytes[a_val_start] == byte_codes.SPACE:
29 | 			a_val_start += 1
30 | 		a_val_end = utils.seek_index_through_value_bytes(bytes, a_val_start)
31 | 		return orjson.loads(bytes[a_val_start:a_val_end])
32 | 
33 | 	values = [
34 | 		# Lists
35 | 		[],
36 | 		[{}],
37 | 		[""],
38 | 		[1],
39 | 		[1, 2, 3],
40 | 		["xs", -123.3, "c"],
41 | 		[1, "xs", 2, "value", 3, "c"],
42 | 		[1, "xs", 2, "value", 3, "c", [1, 2, 3], [1, 2, 3], [1, 2, 3]],
43 | 		[{}, {}, {}],
44 | 		[{"xs": 1}, {"value": 2}, {"c": 3}],
45 | 		[{"xs": 1}, {"value": 2}, {"c": 3}, {"xs": 1}, {"value": 2}, {"c": 3}],
46 | 		[{"xs": 1}, {"value": 2}, {"c": 3}, {"xs": 1}, {"value": 2}, {"c": 3}, [1, 2, 3], [1, 2, 3], [1, 2, 3]],
47 | 		# Dicts
48 | 		{},
49 | 		{"": ""},
50 | 		{"x": []},
51 | 		{"xs": 1},
52 | 		{"xs": 1, "value": 2},
53 | 		{"xs": [], "value": {}},
54 | 		{"xs": -3.3, "value": ""},
55 | 		# Numbers
56 | 		1,
57 | 		1234,
58 | 		1.3,
59 | 		32.3,
60 | 		0,
61 | 		-1.3,
62 | 		-0,
63 | 		# Strings
64 | 		"",
65 | 		"a",
66 | 		"hello",
67 | 		"a\\b",
68 | 		"\\",
69 | 		"\\\\",
70 | 		'\\\\"',
71 | 		'\\"\\',
72 | 		'"\\\\',
73 | 		'"',
74 | 		'""',
75 | 		'""\\',
76 | 		'"\\"',
77 | 		'\\""',
78 | 		# Booleans
79 | 		True,
80 | 		None,
81 | 		False,
82 | 	]
83 | 
84 | 	for indent, v1, v2 in itertools.product([False, True], values, values):
85 | 		option = orjson.OPT_SORT_KEYS | (orjson.OPT_INDENT_2 if indent else 0)
86 | 		json_bytes = orjson.dumps({"a": v1, "b": v2}, option=option)
87 | 		assert load_with_orjson(json_bytes, "a") == load_with_seeker(json_bytes, "a")
88 | 		assert load_with_orjson(json_bytes, "b") == load_with_seeker(json_bytes, "b")
89 | 


--------------------------------------------------------------------------------
/tests/test_where.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from path_dict import PathDict
 3 | 
 4 | import dictdatabase as DDB
 5 | 
 6 | 
 7 | def test_where(use_compression, use_orjson, indent):
 8 | 	for i in range(10):
 9 | 		DDB.at("test_select", i).create({"a": i}, force_overwrite=True)
10 | 
11 | 	s = DDB.at("test_select/*", where=lambda k, v: v["a"] > 7).read()
12 | 
13 | 	assert s == {"8": {"a": 8}, "9": {"a": 9}}
14 | 
15 | 	with pytest.raises(KeyError):
16 | 		DDB.at("test_select/*", where=lambda k, v: v["b"] > 5).read()
17 | 
18 | 	assert DDB.at("nonexistent/*", where=lambda k, v: v["a"] > 5).read() == {}
19 | 
20 | 	assert DDB.at("nonexistent", where=lambda k, v: v["a"] > 5).read() is None
21 | 
22 | 	s = DDB.at("test_select/*", where=lambda k, v: v.at("a").get() > 7).read(as_type=PathDict)
23 | 	assert s.get() == {"8": {"a": 8}, "9": {"a": 9}}
24 | 


--------------------------------------------------------------------------------
/tests/test_write.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from path_dict import pd
 3 | 
 4 | import dictdatabase as DDB
 5 | from tests.utils import make_complex_nested_random_dict
 6 | 
 7 | 
 8 | def test_non_existent_session(use_compression, use_orjson, indent):
 9 | 	name = "test_non_existent_session"
10 | 	with pytest.raises(FileNotFoundError):
11 | 		with DDB.at(name).session() as (session, d):
12 | 			session.write()
13 | 
14 | 
15 | def test_write(use_compression, use_orjson, indent):
16 | 	name = "test_write"
17 | 	d = make_complex_nested_random_dict(12, 6)
18 | 	DDB.at(name).create(d, force_overwrite=True)
19 | 	with DDB.at(name).session() as (session, dd):
20 | 		assert d == dd
21 | 		session.write()
22 | 
23 | 
24 | def test_write_compression_switching(use_orjson, indent):
25 | 	name = "test_write_compression_switching"
26 | 	DDB.config.use_compression = False
27 | 	d = make_complex_nested_random_dict(12, 6)
28 | 	DDB.at(name).create(d, force_overwrite=True)
29 | 	with DDB.at(name).session() as (session, dd):
30 | 		assert d == dd
31 | 		session.write()
32 | 	assert DDB.at(name).read() == d
33 | 	DDB.config.use_compression = True
34 | 	with DDB.at(name).session(as_type=pd) as (session, dd):
35 | 		assert d == dd.get()
36 | 		session.write()
37 | 	assert DDB.at(name).read() == d
38 | 	DDB.config.use_compression = False
39 | 	with DDB.at(name).session() as (session, dd):
40 | 		assert d == dd
41 | 		session.write()
42 | 	assert DDB.at(name).read() == d
43 | 
44 | 
45 | def test_multi_session(use_compression, use_orjson, indent):
46 | 	a = {"a": 1}
47 | 	b = {"b": 2}
48 | 
49 | 	DDB.at("test_multi_session/d1").create(a, force_overwrite=True)
50 | 	DDB.at("test_multi_session/d2").create(b, force_overwrite=True)
51 | 
52 | 	with DDB.at("test_multi_session/*").session() as (session, d):
53 | 		assert d == {"d1": a, "d2": b}
54 | 		session.write()
55 | 	assert DDB.at("test_multi_session/*").read() == {"d1": a, "d2": b}
56 | 
57 | 
58 | def test_write_wildcard_key_except(use_compression, use_orjson, indent):
59 | 	with pytest.raises(TypeError):
60 | 		with DDB.at("test/*", key="any").session() as (session, d):
61 | 			pass
62 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import random
 4 | import string
 5 | 
 6 | 
 7 | def get_tasks_json():
 8 | 	print(os.getcwd())
 9 | 	with open("test_db/production_database/tasks.json", "rb") as f:
10 | 		return json.load(f)
11 | 
12 | 
13 | def make_complex_nested_random_dict(max_width, max_depth):
14 | 	def random_string(choices, md):
15 | 		length = random.randint(0, max_width)
16 | 		letters = string.ascii_letters + "".join(["\\", " ", "🚀", '"'])
17 | 		return "".join(random.choices(letters, k=length))
18 | 
19 | 	def random_int(choices, md):
20 | 		return random.randint(-1000, 1000)
21 | 
22 | 	def random_float(choices, md):
23 | 		return random.uniform(-1000, 1000)
24 | 
25 | 	def random_bool(choices, md):
26 | 		return random.choice([True, False])
27 | 
28 | 	def random_none(choices, md):
29 | 		return None
30 | 
31 | 	def random_list(choices, md):
32 | 		if md == 0:
33 | 			return []
34 | 		res = []
35 | 		for _ in range(random.randint(0, max_width)):
36 | 			v = random.choice(choices)(choices, md - 1)
37 | 			res += [v]
38 | 		return res
39 | 
40 | 	def random_dict(choices, md):
41 | 		if md == 0:
42 | 			return {}
43 | 		res = {}
44 | 		for _ in range(random.randint(0, max_width)):
45 | 			k = random_string(choices, md)
46 | 			v = random.choice(choices)(choices, md - 1)
47 | 			res[k] = v
48 | 		return res
49 | 
50 | 	return random_dict(
51 | 		[random_string, random_int, random_float, random_bool, random_none, random_list, random_dict], max_depth
52 | 	)
53 | 


--------------------------------------------------------------------------------