├── .gitignore ├── .ideas ├── col.py ├── collections.py └── tables.py ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── Makefile ├── NOTICE.txt ├── README.md ├── docs └── index.html ├── howto.txt ├── pyproject.toml ├── requirements ├── dev.in ├── dev.lock ├── dev.unlock ├── manage.in ├── manage.lock ├── manage.unlock ├── pins-validate-pyproject-pep639.in ├── pins-virtualenv-cve-2024-53899.in ├── pip.in ├── pip.lock └── pip.unlock ├── src └── kenobi │ ├── __init__.py │ └── kenobi.py └── tests ├── __init__.py ├── conftest.py └── test_kenobi.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .vscode/ 132 | ~$* 133 | 134 | # Removed from project 135 | .coverage 136 | .coverage-* 137 | -------------------------------------------------------------------------------- /.ideas/col.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | KenobiDB is a small document-based DB, supporting simple usage including 5 | insertion, removal, and basic search, now extended to support collections. 6 | """ 7 | import json 8 | import os 9 | import re 10 | import sqlite3 11 | from concurrent.futures import ThreadPoolExecutor 12 | from threading import RLock 13 | 14 | 15 | class KenobiDB: 16 | """ 17 | A lightweight document-based database built on SQLite. Supports basic 18 | operations such as insert, remove, search, update, and asynchronous 19 | execution, now with MongoDB-like collection support. 20 | """ 21 | 22 | def __init__(self, file): 23 | """ 24 | Initialize the KenobiDB instance. 25 | 26 | Args: 27 | file (str): Path to the SQLite file. If it does not exist, 28 | it will be created. 29 | """ 30 | self.file = os.path.expanduser(file) 31 | self._lock = RLock() 32 | self.executor = ThreadPoolExecutor(max_workers=5) 33 | self._connection = sqlite3.connect(self.file, check_same_thread=False) 34 | self._add_regexp_support(self._connection) 35 | self._initialize_db() 36 | 37 | def _initialize_db(self): 38 | """ 39 | Create the table and index if they do not exist, and set 40 | journal mode to WAL. 41 | """ 42 | with self._lock: 43 | self._connection.execute( 44 | """ 45 | CREATE TABLE IF NOT EXISTS documents ( 46 | id INTEGER PRIMARY KEY AUTOINCREMENT, 47 | data TEXT NOT NULL, 48 | collection TEXT NOT NULL DEFAULT 'default' 49 | ) 50 | """ 51 | ) 52 | self._connection.execute( 53 | """ 54 | CREATE INDEX IF NOT EXISTS idx_key 55 | ON documents ( 56 | json_extract(data, '$.key') 57 | ) 58 | """ 59 | ) 60 | self._connection.execute("PRAGMA journal_mode=WAL;") 61 | 62 | @staticmethod 63 | def _add_regexp_support(conn): 64 | """ 65 | Add REGEXP function support to the SQLite connection. 66 | """ 67 | 68 | def regexp(pattern, value): 69 | """Code sqlite3 runs when REGEXP sql encountered. Takes two params. 70 | inner function is untestable, a module level function is testable 71 | 72 | Args: 73 | pattern (str): regex 74 | value (str): text blob the regex parses 75 | 76 | Returns: 77 | bool: True match occurred 78 | """ 79 | return re.search(pattern, value) is not None 80 | 81 | conn.create_function("REGEXP", 2, regexp) 82 | 83 | def insert(self, document, collection="default"): 84 | """ 85 | Insert a single document into a specific collection. 86 | 87 | Args: 88 | document (dict): The document to insert. 89 | collection (str): The collection name. Defaults to 'default'. 90 | 91 | Returns: 92 | bool: True upon successful insertion. 93 | """ 94 | if not isinstance(document, dict): 95 | raise TypeError("Must insert a dict") 96 | if not isinstance(collection, str) or not collection: 97 | raise ValueError("Collection must be a non-empty string") 98 | with self._lock: 99 | self._connection.execute( 100 | "INSERT INTO documents (data, collection) VALUES (?, ?)", 101 | (json.dumps(document), collection), 102 | ) 103 | self._connection.commit() 104 | return True 105 | 106 | def insert_many(self, document_list, collection="default"): 107 | """ 108 | Insert multiple documents into a specific collection. 109 | 110 | Args: 111 | document_list (list): The list of documents to insert. 112 | collection (str): The collection name. Defaults to 'default'. 113 | 114 | Returns: 115 | bool: True upon successful insertion. 116 | """ 117 | if not isinstance(document_list, list) or not all( 118 | isinstance(doc, dict) for doc in document_list 119 | ): 120 | raise TypeError("Must insert a list of dicts") 121 | if not isinstance(collection, str) or not collection: 122 | raise ValueError("Collection must be a non-empty string") 123 | with self._lock: 124 | self._connection.executemany( 125 | "INSERT INTO documents (data, collection) VALUES (?, ?)", 126 | [(json.dumps(doc), collection) for doc in document_list], 127 | ) 128 | self._connection.commit() 129 | return True 130 | 131 | def remove(self, key, value, collection="default"): 132 | """ 133 | Remove all documents from a specific collection where key matches value. 134 | 135 | Args: 136 | key (str): The field name to match. 137 | value (Any): The value to match. 138 | collection (str): The collection name. Defaults to 'default'. 139 | 140 | Returns: 141 | int: Number of documents removed. 142 | """ 143 | if not key or not isinstance(key, str): 144 | raise ValueError("Key must be a non-empty string") 145 | if value is None: 146 | raise ValueError("Value cannot be None") 147 | if not isinstance(collection, str) or not collection: 148 | raise ValueError("Collection must be a non-empty string") 149 | query = ( 150 | "DELETE FROM documents " 151 | "WHERE json_extract(data, '$.' || ?) = ? AND collection = ?" 152 | ) 153 | with self._lock: 154 | result = self._connection.execute(query, (key, value, collection)) 155 | self._connection.commit() 156 | return result.rowcount 157 | 158 | def search(self, key, value, collection="default", limit=100, offset=0): 159 | """ 160 | Search documents in a specific collection matching (key == value). 161 | 162 | Args: 163 | key (str): The document field to match on. 164 | value (Any): The value for which to search. 165 | collection (str): The collection name. Defaults to 'default'. 166 | limit (int): The maximum number of documents to return. 167 | offset (int): The starting point for retrieval. 168 | 169 | Returns: 170 | list: A list of matching documents (dicts). 171 | """ 172 | if not key or not isinstance(key, str): 173 | raise ValueError("Key must be a non-empty string") 174 | if not isinstance(collection, str) or not collection: 175 | raise ValueError("Collection must be a non-empty string") 176 | 177 | query = ( 178 | "SELECT data FROM documents " 179 | "WHERE json_extract(data, '$.' || ?) = ? AND collection = ? " 180 | "LIMIT ? OFFSET ?" 181 | ) 182 | with self._lock: 183 | cursor = self._connection.execute( 184 | query, (key, value, collection, limit, offset) 185 | ) 186 | return [json.loads(row[0]) for row in cursor.fetchall()] 187 | 188 | def all(self, collection="default", limit=100, offset=0): 189 | """ 190 | Return a paginated list of all documents in a specific collection. 191 | 192 | Args: 193 | collection (str): The collection name. Defaults to 'default'. 194 | limit (int): The maximum number of documents to return. 195 | offset (int): The starting point for retrieval. 196 | 197 | Returns: 198 | list: A list of all documents (dicts). 199 | """ 200 | if not isinstance(collection, str) or not collection: 201 | raise ValueError("Collection must be a non-empty string") 202 | 203 | query = "SELECT data FROM documents WHERE collection = ? LIMIT ? OFFSET ?" 204 | with self._lock: 205 | cursor = self._connection.execute(query, (collection, limit, offset)) 206 | return [json.loads(row[0]) for row in cursor.fetchall()] 207 | 208 | def list_collections(self): 209 | """ 210 | List all unique collections in the database. 211 | 212 | Returns: 213 | list: A list of collection names. 214 | """ 215 | query = "SELECT DISTINCT collection FROM documents" 216 | with self._lock: 217 | cursor = self._connection.execute(query) 218 | return [row[0] for row in cursor.fetchall()] 219 | 220 | def remove_collection(self, collection): 221 | """ 222 | Remove all documents in a specific collection. 223 | 224 | Args: 225 | collection (str): The collection name. 226 | 227 | Returns: 228 | int: Number of documents removed. 229 | """ 230 | if not isinstance(collection, str) or not collection: 231 | raise ValueError("Collection must be a non-empty string") 232 | query = "DELETE FROM documents WHERE collection = ?" 233 | with self._lock: 234 | result = self._connection.execute(query, (collection,)) 235 | self._connection.commit() 236 | return result.rowcount 237 | 238 | def close(self): 239 | """ 240 | Shutdown the thread pool executor and close the database connection. 241 | """ 242 | self.executor.shutdown() 243 | with self._lock: 244 | self._connection.close() 245 | -------------------------------------------------------------------------------- /.ideas/collections.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | KenobiDB is a small document-based DB, supporting simple usage including 5 | insertion, removal, and basic search, now extended to support both tables and collections. 6 | """ 7 | import json 8 | import os 9 | import re 10 | import sqlite3 11 | from concurrent.futures import ThreadPoolExecutor 12 | from threading import RLock 13 | 14 | 15 | class KenobiDB: 16 | """ 17 | A lightweight document-based database built on SQLite. Supports basic 18 | operations such as insert, remove, search, update, and asynchronous 19 | execution, now with MongoDB-like collection and table support. 20 | """ 21 | 22 | def __init__(self, file): 23 | """ 24 | Initialize the KenobiDB instance. 25 | 26 | Args: 27 | file (str): Path to the SQLite file. If it does not exist, 28 | it will be created. 29 | """ 30 | self.file = os.path.expanduser(file) 31 | self._lock = RLock() 32 | self.executor = ThreadPoolExecutor(max_workers=5) 33 | self._connection = sqlite3.connect(self.file, check_same_thread=False) 34 | self._add_regexp_support(self._connection) 35 | self._initialize_db() 36 | 37 | def _initialize_db(self): 38 | """ 39 | Create the table and index if they do not exist, and set 40 | journal mode to WAL. 41 | """ 42 | with self._lock: 43 | self._connection.execute( 44 | """ 45 | CREATE TABLE IF NOT EXISTS documents ( 46 | id INTEGER PRIMARY KEY AUTOINCREMENT, 47 | data TEXT NOT NULL, 48 | collection TEXT NOT NULL DEFAULT 'default', 49 | table_name TEXT NOT NULL DEFAULT 'default' 50 | ) 51 | """ 52 | ) 53 | self._connection.execute( 54 | """ 55 | CREATE INDEX IF NOT EXISTS idx_key 56 | ON documents ( 57 | json_extract(data, '$.key') 58 | ) 59 | """ 60 | ) 61 | self._connection.execute("PRAGMA journal_mode=WAL;") 62 | 63 | @staticmethod 64 | def _add_regexp_support(conn): 65 | """ 66 | Add REGEXP function support to the SQLite connection. 67 | """ 68 | 69 | def regexp(pattern, value): 70 | """Code sqlite3 runs when REGEXP sql encountered. Takes two params. 71 | inner function is untestable, a module level function is testable 72 | 73 | Args: 74 | pattern (str): regex 75 | value (str): text blob the regex parses 76 | 77 | Returns: 78 | bool: True match occurred 79 | """ 80 | return re.search(pattern, value) is not None 81 | 82 | conn.create_function("REGEXP", 2, regexp) 83 | 84 | def insert(self, document, collection="default", table_name="default"): 85 | """ 86 | Insert a single document into a specific collection and table. 87 | 88 | Args: 89 | document (dict): The document to insert. 90 | collection (str): The collection name. Defaults to 'default'. 91 | table_name (str): The table name. Defaults to 'default'. 92 | 93 | Returns: 94 | bool: True upon successful insertion. 95 | """ 96 | if not isinstance(document, dict): 97 | raise TypeError("Must insert a dict") 98 | if not isinstance(collection, str) or not collection: 99 | raise ValueError("Collection must be a non-empty string") 100 | if not isinstance(table_name, str) or not table_name: 101 | raise ValueError("Table name must be a non-empty string") 102 | with self._lock: 103 | self._connection.execute( 104 | "INSERT INTO documents (data, collection, table_name) VALUES (?, ?, ?)", 105 | (json.dumps(document), collection, table_name), 106 | ) 107 | self._connection.commit() 108 | return True 109 | 110 | def insert_many(self, document_list, collection="default", table_name="default"): 111 | """ 112 | Insert multiple documents into a specific collection and table. 113 | 114 | Args: 115 | document_list (list): The list of documents to insert. 116 | collection (str): The collection name. Defaults to 'default'. 117 | table_name (str): The table name. Defaults to 'default'. 118 | 119 | Returns: 120 | bool: True upon successful insertion. 121 | """ 122 | if not isinstance(document_list, list) or not all( 123 | isinstance(doc, dict) for doc in document_list 124 | ): 125 | raise TypeError("Must insert a list of dicts") 126 | if not isinstance(collection, str) or not collection: 127 | raise ValueError("Collection must be a non-empty string") 128 | if not isinstance(table_name, str) or not table_name: 129 | raise ValueError("Table name must be a non-empty string") 130 | with self._lock: 131 | self._connection.executemany( 132 | "INSERT INTO documents (data, collection, table_name) VALUES (?, ?, ?)", 133 | [(json.dumps(doc), collection, table_name) for doc in document_list], 134 | ) 135 | self._connection.commit() 136 | return True 137 | 138 | def remove(self, key, value, collection="default", table_name="default"): 139 | """ 140 | Remove all documents from a specific collection and table where key matches value. 141 | 142 | Args: 143 | key (str): The field name to match. 144 | value (Any): The value to match. 145 | collection (str): The collection name. Defaults to 'default'. 146 | table_name (str): The table name. Defaults to 'default'. 147 | 148 | Returns: 149 | int: Number of documents removed. 150 | """ 151 | if not key or not isinstance(key, str): 152 | raise ValueError("Key must be a non-empty string") 153 | if value is None: 154 | raise ValueError("Value cannot be None") 155 | if not isinstance(collection, str) or not collection: 156 | raise ValueError("Collection must be a non-empty string") 157 | if not isinstance(table_name, str) or not table_name: 158 | raise ValueError("Table name must be a non-empty string") 159 | query = ( 160 | "DELETE FROM documents " 161 | "WHERE json_extract(data, '$.' || ?) = ? AND collection = ? AND table_name = ?" 162 | ) 163 | with self._lock: 164 | result = self._connection.execute( 165 | query, (key, value, collection, table_name) 166 | ) 167 | self._connection.commit() 168 | return result.rowcount 169 | 170 | def search( 171 | self, 172 | key, 173 | value, 174 | collection="default", 175 | table_name="default", 176 | limit=100, 177 | offset=0, 178 | ): 179 | """ 180 | Search documents in a specific collection and table matching (key == value). 181 | 182 | Args: 183 | key (str): The document field to match on. 184 | value (Any): The value for which to search. 185 | collection (str): The collection name. Defaults to 'default'. 186 | table_name (str): The table name. Defaults to 'default'. 187 | limit (int): The maximum number of documents to return. 188 | offset (int): The starting point for retrieval. 189 | 190 | Returns: 191 | list: A list of matching documents (dicts). 192 | """ 193 | if not key or not isinstance(key, str): 194 | raise ValueError("Key must be a non-empty string") 195 | if not isinstance(collection, str) or not collection: 196 | raise ValueError("Collection must be a non-empty string") 197 | if not isinstance(table_name, str) or not table_name: 198 | raise ValueError("Table name must be a non-empty string") 199 | 200 | query = ( 201 | "SELECT data FROM documents " 202 | "WHERE json_extract(data, '$.' || ?) = ? AND collection = ? AND table_name = ? " 203 | "LIMIT ? OFFSET ?" 204 | ) 205 | with self._lock: 206 | cursor = self._connection.execute( 207 | query, (key, value, collection, table_name, limit, offset) 208 | ) 209 | return [json.loads(row[0]) for row in cursor.fetchall()] 210 | 211 | def all(self, collection="default", table_name="default", limit=100, offset=0): 212 | """ 213 | Return a paginated list of all documents in a specific collection and table. 214 | 215 | Args: 216 | collection (str): The collection name. Defaults to 'default'. 217 | table_name (str): The table name. Defaults to 'default'. 218 | limit (int): The maximum number of documents to return. 219 | offset (int): The starting point for retrieval. 220 | 221 | Returns: 222 | list: A list of all documents (dicts). 223 | """ 224 | if not isinstance(collection, str) or not collection: 225 | raise ValueError("Collection must be a non-empty string") 226 | if not isinstance(table_name, str) or not table_name: 227 | raise ValueError("Table name must be a non-empty string") 228 | 229 | query = "SELECT data FROM documents WHERE collection = ? AND table_name = ? LIMIT ? OFFSET ?" 230 | with self._lock: 231 | cursor = self._connection.execute( 232 | query, (collection, table_name, limit, offset) 233 | ) 234 | return [json.loads(row[0]) for row in cursor.fetchall()] 235 | 236 | def list_collections(self): 237 | """ 238 | List all unique collections in the database. 239 | 240 | Returns: 241 | list: A list of collection names. 242 | """ 243 | query = "SELECT DISTINCT collection FROM documents" 244 | with self._lock: 245 | cursor = self._connection.execute(query) 246 | return [row[0] for row in cursor.fetchall()] 247 | 248 | def list_tables(self): 249 | """ 250 | List all unique table names in the database. 251 | 252 | Returns: 253 | list: A list of table names. 254 | """ 255 | query = "SELECT DISTINCT table_name FROM documents" 256 | with self._lock: 257 | cursor = self._connection.execute(query) 258 | return [row[0] for row in cursor.fetchall()] 259 | 260 | def remove_collection(self, collection): 261 | """ 262 | Remove all documents in a specific collection. 263 | 264 | Args: 265 | collection (str): The collection name. 266 | 267 | Returns: 268 | int: Number of documents removed. 269 | """ 270 | if not isinstance(collection, str) or not collection: 271 | raise ValueError("Collection must be a non-empty string") 272 | query = "DELETE FROM documents WHERE collection = ?" 273 | with self._lock: 274 | result = self._connection.execute(query, (collection,)) 275 | self._connection.commit() 276 | return result.rowcount 277 | 278 | def remove_table(self, table_name): 279 | """ 280 | Remove all documents in a specific table. 281 | 282 | Args: 283 | table_name (str): The table name. 284 | 285 | Returns: 286 | int: Number of documents removed. 287 | """ 288 | if not isinstance(table_name, str) or not table_name: 289 | raise ValueError("Table name must be a non-empty string") 290 | query = "DELETE FROM documents WHERE table_name = ?" 291 | with self._lock: 292 | result = self._connection.execute(query, (table_name,)) 293 | self._connection.commit() 294 | return result.rowcount 295 | 296 | def close(self): 297 | """ 298 | Shutdown the thread pool executor and close the database connection. 299 | """ 300 | self.executor.shutdown() 301 | with self._lock: 302 | self._connection.close() 303 | -------------------------------------------------------------------------------- /.ideas/tables.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | KenobiDB is a small document-based DB, supporting simple usage including 5 | insertion, removal, and basic search. 6 | Written by Harrison Erd (https://patx.github.io/) 7 | https://patx.github.io/kenobi/ 8 | """ 9 | # Copyright Harrison Erd 10 | # 11 | # Redistribution and use in source and binary forms, with or without 12 | # modification, are permitted provided that the following conditions are met: 13 | # 14 | # 1. Redistributions of source code must retain the above copyright notice, 15 | # this list of conditions and the following disclaimer. 16 | # 2. Redistributions in binary form must reproduce the above copyright notice, 17 | # this list of conditions and the following disclaimer in the documentation 18 | # and/or other materials provided with the distribution. 19 | # 3. Neither the name of the copyright holder nor the names of its contributors 20 | # may be used to endorse or promote products derived from this software 21 | # without specific prior written permission. 22 | # 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 | # POSSIBILITY OF SUCH DAMAGE. 34 | 35 | import json 36 | import os 37 | import re 38 | import sqlite3 39 | from concurrent.futures import ThreadPoolExecutor 40 | from threading import RLock 41 | 42 | 43 | class KenobiDB: 44 | """ 45 | A lightweight document-based database built on SQLite. Supports basic 46 | operations such as insert, remove, search, update, and asynchronous 47 | execution. 48 | """ 49 | 50 | def __init__(self, file): 51 | """ 52 | Initialize the KenobiDB instance. 53 | 54 | Args: 55 | file (str): Path to the SQLite file. If it does not exist, 56 | it will be created. 57 | """ 58 | self.file = os.path.expanduser(file) 59 | self._lock = RLock() 60 | self.executor = ThreadPoolExecutor(max_workers=5) 61 | self._regexp_connections = set() # Track connections with REGEXP added 62 | self._connection = sqlite3.connect(self.file, check_same_thread=False) 63 | self._add_regexp_support(self._connection) # Add REGEXP support lazily 64 | 65 | def _add_regexp_support(self, conn): 66 | """ 67 | Add REGEXP function support to the SQLite connection. 68 | """ 69 | 70 | def regexp(pattern, value): 71 | """Code sqlite3 runs when REGEXP sql encountered. Takes two params. 72 | inner function is untestable, a module level function is testable 73 | 74 | Args: 75 | pattern (str): regex 76 | value (str): text blob the regex parses 77 | 78 | Returns: 79 | bool: True match occurred 80 | """ 81 | return re.search(pattern, value) is not None 82 | 83 | conn.create_function("REGEXP", 2, regexp) 84 | 85 | def table(self, name): 86 | """ 87 | Access or create a specific table. 88 | 89 | Args: 90 | name (str): The name of the table. 91 | 92 | Returns: 93 | KenobiTable: An object for interacting with the table. 94 | """ 95 | if not name or not isinstance(name, str): 96 | raise ValueError("Table name must be a non-empty string.") 97 | return KenobiTable(self, name) 98 | 99 | def execute_async(self, func, *args, **kwargs): 100 | """ 101 | Execute a function asynchronously using a thread pool. 102 | 103 | Args: 104 | func (callable): The function to execute. 105 | *args: Arguments for the function. 106 | **kwargs: Keyword arguments for the function. 107 | 108 | Returns: 109 | concurrent.futures.Future: A Future object representing 110 | the execution. 111 | """ 112 | return self.executor.submit(func, *args, **kwargs) 113 | 114 | def close(self): 115 | """ 116 | Shutdown the thread pool executor and close the database connection. 117 | """ 118 | self.executor.shutdown() 119 | with self._lock: 120 | self._connection.close() 121 | 122 | 123 | class KenobiTable: 124 | """ 125 | A class to represent and interact with a specific table within KenobiDB. 126 | """ 127 | 128 | def __init__(self, db, name): 129 | """Class constructor""" 130 | self.db = db 131 | self.name = name 132 | self._lock = db._lock 133 | self._create_table() 134 | 135 | def _create_table(self): 136 | """ 137 | Create the table if it does not exist. 138 | """ 139 | with self._lock: 140 | self.db._connection.execute( 141 | f""" 142 | CREATE TABLE IF NOT EXISTS {self.name} ( 143 | id INTEGER PRIMARY KEY AUTOINCREMENT, 144 | data TEXT NOT NULL 145 | ) 146 | """ 147 | ) 148 | 149 | def insert(self, document): 150 | """ 151 | Insert a document into this table. 152 | 153 | Args: 154 | document (dict): The document to insert. 155 | 156 | Returns: 157 | bool: True upon successful insertion. 158 | """ 159 | if not isinstance(document, dict): 160 | raise TypeError("Must insert a dict") 161 | with self._lock: 162 | self.db._connection.execute( 163 | f"INSERT INTO {self.name} (data) VALUES (?)", (json.dumps(document),) 164 | ) 165 | self.db._connection.commit() 166 | return True 167 | 168 | def rename(self, new_name): 169 | """ 170 | Rename the table. 171 | 172 | Args: 173 | new_name (str): The new name of the table. 174 | """ 175 | if not new_name or not isinstance(new_name, str): 176 | raise ValueError("New table name must be a non-empty string.") 177 | with self._lock: 178 | self.db._connection.execute(f"ALTER TABLE {self.name} RENAME TO {new_name}") 179 | self.name = new_name 180 | 181 | def drop(self): 182 | """ 183 | Drop the table. 184 | """ 185 | with self._lock: 186 | self.db._connection.execute(f"DROP TABLE {self.name}") 187 | 188 | def all(self, limit=100, offset=0): 189 | """ 190 | Return a paginated list of all documents in the table. 191 | 192 | Args: 193 | limit (int): The maximum number of documents to return. 194 | offset (int): The starting point for retrieval. 195 | 196 | Returns: 197 | list: A list of all documents (dicts). 198 | """ 199 | query = f"SELECT data FROM {self.name} LIMIT ? OFFSET ?" 200 | with self._lock: 201 | cursor = self.db._connection.execute(query, (limit, offset)) 202 | return [json.loads(row[0]) for row in cursor.fetchall()] 203 | 204 | def search(self, key, value, limit=100, offset=0): 205 | """ 206 | Return a list of documents matching (key == value) in the table. 207 | 208 | Args: 209 | key (str): The document field to match on. 210 | value (Any): The value for which to search. 211 | limit (int): The maximum number of documents to return. 212 | offset (int): The starting point for retrieval. 213 | 214 | Returns: 215 | list: A list of matching documents (dicts). 216 | """ 217 | if not key or not isinstance(key, str): 218 | raise ValueError("Key must be a non-empty string") 219 | 220 | query = ( 221 | f"SELECT data FROM {self.name} " 222 | "WHERE json_extract(data, '$.' || ?) = ? " 223 | "LIMIT ? OFFSET ?" 224 | ) 225 | with self._lock: 226 | cursor = self.db._connection.execute(query, (key, value, limit, offset)) 227 | return [json.loads(row[0]) for row in cursor.fetchall()] 228 | 229 | def remove(self, key, value): 230 | """ 231 | Remove all documents where the given key matches the specified value. 232 | 233 | Args: 234 | key (str): The field name to match. 235 | value (Any): The value to match. 236 | 237 | Returns: 238 | int: Number of documents removed. 239 | """ 240 | if not key or not isinstance(key, str): 241 | raise ValueError("key must be a non-empty string") 242 | if value is None: 243 | raise ValueError("value cannot be None") 244 | query = f"DELETE FROM {self.name} " "WHERE json_extract(data, '$.' || ?) = ?" 245 | with self._lock: 246 | result = self.db._connection.execute(query, (key, value)) 247 | self.db._connection.commit() 248 | return result.rowcount 249 | 250 | def update(self, id_key, id_value, new_dict): 251 | """ 252 | Update documents that match (id_key == id_value) by merging new_dict. 253 | 254 | Args: 255 | id_key (str): The field name to match. 256 | id_value (Any): The value to match. 257 | new_dict (dict): A dictionary of changes to apply. 258 | 259 | Returns: 260 | bool: True if at least one document was updated, False otherwise. 261 | """ 262 | if not isinstance(new_dict, dict): 263 | raise TypeError("new_dict must be a dictionary") 264 | if not id_key or not isinstance(id_key, str): 265 | raise ValueError("id_key must be a non-empty string") 266 | if id_value is None: 267 | raise ValueError("id_value cannot be None") 268 | 269 | select_query = ( 270 | f"SELECT data FROM {self.name} " "WHERE json_extract(data, '$.' || ?) = ?" 271 | ) 272 | update_query = ( 273 | f"UPDATE {self.name} " 274 | "SET data = ? " 275 | "WHERE json_extract(data, '$.' || ?) = ?" 276 | ) 277 | with self._lock: 278 | cursor = self.db._connection.execute(select_query, (id_key, id_value)) 279 | documents = cursor.fetchall() 280 | if not documents: 281 | return False 282 | for row in documents: 283 | document = json.loads(row[0]) 284 | if not isinstance(document, dict): 285 | continue 286 | document.update(new_dict) 287 | self.db._connection.execute( 288 | update_query, (json.dumps(document), id_key, id_value) 289 | ) 290 | self.db._connection.commit() 291 | return True 292 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Install pre-commit hooks via 2 | # pre-commit install 3 | 4 | exclude: > 5 | (?x)^( 6 | \.vscode/settings\.json| 7 | tests/.*xml| 8 | tests/.*txt| 9 | )$ 10 | 11 | repos: 12 | 13 | - repo: https://github.com/pre-commit/pre-commit-hooks 14 | rev: v5.0.0 15 | hooks: 16 | - id: check-merge-conflict 17 | - id: trailing-whitespace 18 | - id: end-of-file-fixer 19 | - id: check-docstring-first 20 | - id: check-json 21 | - id: check-yaml 22 | - id: debug-statements 23 | 24 | 25 | - repo: https://github.com/abravalheri/validate-pyproject 26 | rev: v0.23 27 | hooks: 28 | - id: validate-pyproject 29 | 30 | - repo: https://github.com/psf/black 31 | rev: 25.1.0 32 | hooks: 33 | - id: black 34 | 35 | - repo: https://github.com/keewis/blackdoc 36 | rev: v0.3.9 37 | hooks: 38 | - id: blackdoc 39 | additional_dependencies: ["black==25.1.0"] 40 | - id: blackdoc-autoupdate-black 41 | 42 | - repo: https://github.com/PyCQA/flake8 43 | rev: 7.1.1 44 | hooks: 45 | - id: flake8 46 | args: ["--ignore=E704,E203,W503,W605,W293,W291,E501"] 47 | 48 | - repo: https://github.com/PyCQA/isort 49 | rev: 6.0.0 50 | hooks: 51 | - id: isort 52 | 53 | - repo: https://github.com/econchick/interrogate 54 | rev: 1.7.0 # or master if you're bold 55 | hooks: 56 | - id: interrogate 57 | args: ["-vv", "--fail-under=100", "--omit-covered-files", "--ignore-init-module"] 58 | pass_filenames: false # needed if excluding files with pyproject.toml 59 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Harrison Erd 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its 14 | contributors may be used to endorse or promote products derived from this 15 | software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 18 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 19 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 21 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 24 | OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 26 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Makefile 2 | 3 | # missing .in and .unlock files 4 | recursive-include requirements *.in 5 | recursive-include requirements *.unlock 6 | recursive-include requirements *.lock 7 | 8 | # missing __init__.py conftest.py 9 | graft tests/ 10 | 11 | # remove pyc cache folders 12 | prune tests/__pycache__/ 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .ONESHELL: 2 | .DEFAULT_GOAL := help 3 | SHELL := /bin/bash 4 | 5 | # underscore separated; aka sdist and whl names 6 | # https://blogs.gentoo.org/mgorny/2023/02/09/the-inconsistencies-around-python-package-naming-and-the-new-policy/ 7 | APP_NAME := kenobi 8 | 9 | define NORMALIZE_APP_NAME 10 | try: 11 | from importlib import metadata 12 | except ImportError: 13 | v = '$(APP_NAME)'.replace('_', "-").replace('.', "-") 14 | print(v) 15 | else: 16 | print(metadata.metadata('$(APP_NAME)')['Name'])) 17 | endef 18 | 19 | #virtual environment. If 0 issue warning 20 | #Not activated:0 21 | #activated: 1 22 | ifeq ($(VIRTUAL_ENV),) 23 | $(warning virtualenv not activated) 24 | is_venv = 25 | else 26 | is_venv = 1 27 | VENV_BIN := $(VIRTUAL_ENV)/bin 28 | VENV_BIN_PYTHON := python3 29 | PY_X_Y := $(shell $(VENV_BIN_PYTHON) -c 'import platform; t_ver = platform.python_version_tuple(); print(".".join(t_ver[:2]));') 30 | endif 31 | 32 | ifeq ($(is_venv),1) 33 | # Package name is hyphen delimited 34 | PACKAGE_NAME ?= $(shell $(VENV_BIN_PYTHON) -c "$(NORMALIZE_APP_NAME)") 35 | VENV_PACKAGES ?= $(shell $(VENV_BIN_PYTHON) -m pip list --disable-pip-version-check --no-input | /bin/awk '{print $$1}') 36 | IS_PACKAGE ?= $(findstring $(1),$(VENV_PACKAGES)) 37 | 38 | is_wheel ?= $(call IS_PACKAGE,wheel) 39 | is_piptools ?= $(call IS_PACKAGE,pip-tools) 40 | 41 | find_whl = $(shell [[ -z "$(3)" ]] && extention=".whl" || extention="$(3)"; [[ -z "$(2)" ]] && srcdir="dist" || srcdir="$(2)/dist"; [[ -z "$(1)" ]] && whl=$$(ls $$srcdir/$(APP_NAME)*.whl --format="single-column") || whl=$$(ls $$srcdir/$(1)*.whl --format="single-column"); echo $${whl##*/}) 42 | endif 43 | 44 | ##@ Helpers 45 | 46 | # https://www.thapaliya.com/en/writings/well-documented-makefiles/ 47 | .PHONY: help 48 | help: ## (Default) Display this help -- Always up to date 49 | @awk -F ':.*##' '/^[^: ]+:.*##/{printf " \033[1m%-20s\033[m %s\n",$$1,$$2} /^##@/{printf "\n%s\n",substr($$0,5)}' $(MAKEFILE_LIST) 50 | 51 | 52 | ##@ Testing 53 | 54 | .PHONY: pre-commit 55 | pre-commit: ## Run checks found in .pre-commit-config.yaml 56 | @pre-commit run --all-files 57 | 58 | .PHONY: update-pre-commit 59 | update-pre-commit: ## Bump package to latest version 60 | @pre-commit autoupdate 61 | 62 | 63 | ##@ GNU Make standard targets 64 | 65 | .PHONY: build 66 | build: ## Make the source distribution 67 | @python -m build 68 | 69 | .PHONY: install 70 | install: override usage := make [force=1] 71 | install: override check_web := Install failed. Possible cause no web connection 72 | install: private force_text = $(if $(force),"--force-reinstall") 73 | install: ## Installs *as a package*, not *with the ui* -- make [force=1] [debug=1] install 74 | ifeq ($(is_venv),1) 75 | ifeq ($(is_wheel), wheel) 76 | @if [[ "$$?" -eq 0 ]]; then 77 | 78 | whl=$(call find_whl,$(APP_NAME),,) #1: PYPI package name (hyphens). 2 folder/app name (APP_NAME;underscores). 3 file extension 79 | echo $(whl) 80 | $(VENV_BIN_PYTHON) -m pip install --disable-pip-version-check --no-color --log="/tmp/$(APP_NAME)_install_prod.log" $(force_text) "dist/$$whl" 81 | 82 | fi 83 | 84 | endif 85 | endif 86 | 87 | .PHONY: install-force 88 | install-force: force := 1 89 | install-force: install ## Force install even if exact same version 90 | 91 | # --cov-report=xml 92 | # Dependencies: pytest, pytest-cov, pytest-regressions 93 | # make [v=1] check 94 | # $(VENV_BIN)/pytest --showlocals --cov=wreck --cov-report=term-missing --cov-config=pyproject.toml $(verbose_text) tests 95 | .PHONY: check 96 | check: private verbose_text = $(if $(v),"--verbose") 97 | check: ## Run tests, generate coverage reports -- make [v=1] check 98 | ifeq ($(is_venv),1) 99 | -@$(VENV_BIN_PYTHON) -m coverage erase 100 | $(VENV_BIN_PYTHON) -m coverage run --parallel -m pytest --showlocals $(verbose_text) -m "not slow" tests 101 | $(VENV_BIN_PYTHON) -m coverage combine 102 | $(VENV_BIN_PYTHON) -m coverage report --fail-under=88 103 | endif 104 | 105 | .PHONY: distclean 106 | distclean: ## Clean build files 107 | @rm -rf dist/ build/ || :; 108 | 109 | # assumes already installed: pyenv and shims 110 | # .rst2html5/ needs to exist, but need not be an actual venv 111 | # .doc requires py310 cuz Sphinx 112 | # .tox contains all supported pyenv versions 113 | .PHONY: configure-pyenv 114 | configure-pyenv: ## Configure pyenv .python-version files 115 | @which pyenv &>/dev/null 116 | if [[ "$?" -eq 0 ]]; then 117 | 118 | mkdir -p .venv || :; 119 | pyenv version-name > .venv/.python-version 120 | # mkdir .doc || :; 121 | # echo "3.10.14\n" > .doc/.python-version 122 | mkdir -p .tox || :; 123 | pyenv versions --bare > .tox/.python-version 124 | # mkdir .rst2html5 || :; 125 | 126 | fi 127 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2024-2025 Harrison Erd 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 3. Neither the name of the copyright holder nor the names of its contributors 12 | may be used to endorse or promote products derived from this software 13 | without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI Downloads](https://static.pepy.tech/badge/kenobi)](https://pepy.tech/projects/kenobi) 2 | 3 | KenobiDB is a document-based data store abstraction built on Python’s `sqlite3`, offering a simple and efficient way to manage JSON-like data. Its API is highly similar to MongoDB’s, providing familiar operations for insertion, updates, and searches—without the need for a server connection. By removing the complexity of SQL, KenobiDB delivers a secure, high-performance environment with built-in thread safety, async execution, and basic indexing while leveraging the simplicity of a document-based database. Perfect for small applications and prototypes, KenobiDB combines SQLite’s lightweight, serverless setup with the flexibility of document-based storage. Check out the [website](http://patx.github.io/kenobi/) or view the project on [PyPI](https://pypi.org/project/kenobi/). 4 | 5 | ## Features 6 | 7 | - Lightweight and serverless setup using SQLite. 8 | - MongoDB-like API for familiar operations. 9 | - Supports key-value pair searching instead of complex SQL queries. 10 | - Thread safety with `RLock`. 11 | - Asynchronous execution with `ThreadPoolExecutor`. 12 | - Built-in basic indexing for efficient searches. 13 | - Super easy integration. 14 | - Solid performance 15 | 16 | ## Installation 17 | 18 | You can install KenobiDB using pip: 19 | 20 | ```bash 21 | pip install kenobi 22 | ``` 23 | 24 | Alternatively, for the latest version, copy and paste the `kenobi.py` file into your working directory. 25 | 26 | ## Quick Start 27 | 28 | ```python 29 | from kenobi import KenobiDB 30 | 31 | db = KenobiDB('example.db') 32 | 33 | db.insert({'name': 'John', 'color': 'blue'}) 34 | # Output: True 35 | 36 | db.search('color', 'blue') 37 | # Output: [{'name': 'John', 'color': 'blue'}] 38 | ``` 39 | 40 | ## Overview/Usage 41 | 42 | ### Initialization and Setup 43 | 44 | Initialize the database with a specified file. If the file does not exist, it will be created. SQLite is used for storage, and the database ensures the necessary table and indices are created. 45 | 46 | ```python 47 | db = KenobiDB('example.db') 48 | ``` 49 | 50 | ### Basic Operations 51 | 52 | #### Insert 53 | 54 | Add a single document or multiple documents to the database. 55 | 56 | ```python 57 | db.insert({'name': 'Oden', 'color': 'blue'}) 58 | 59 | db.insert_many([ 60 | {'name': 'Ryan', 'color': 'red'}, 61 | {'name': 'Tom', 'color': 'green'} 62 | ]) 63 | ``` 64 | 65 | #### Remove 66 | 67 | Remove documents matching a specific key-value pair. 68 | 69 | ```python 70 | db.remove('name', 'Oden') 71 | ``` 72 | 73 | #### Update 74 | 75 | Update documents matching a specific key-value pair with new data. 76 | 77 | ```python 78 | db.update('name', 'Ryan', {'color': 'dark'}) 79 | ``` 80 | 81 | #### Purge 82 | 83 | Remove all documents from the database. 84 | 85 | ```python 86 | db.purge() 87 | ``` 88 | 89 | ### Search Operations 90 | 91 | #### All 92 | 93 | Retrieve all documents with optional pagination. 94 | 95 | ```python 96 | db.all(limit=10, offset=0) # With pagination 97 | 98 | db.all() # No pagination 99 | ``` 100 | 101 | #### Search 102 | 103 | Retrieve documents matching a specific key-value pair with optional pagination. 104 | 105 | ```python 106 | db.search('color', 'blue') 107 | ``` 108 | 109 | #### Glob Search 110 | 111 | Retrieve documents using regex. 112 | 113 | ```python 114 | db.search_pattern('color', 'b*') 115 | ``` 116 | 117 | #### Find Any 118 | 119 | Retrieve documents where a key matches any value in a list. 120 | 121 | ```python 122 | db.find_any('color', ['blue', 'red']) 123 | ``` 124 | 125 | #### Find All 126 | 127 | Retrieve documents where a key matches all values in a list. 128 | 129 | ```python 130 | db.find_all('color', ['blue', 'red']) 131 | ``` 132 | 133 | ### Concurrency and Asynchronous Execution 134 | 135 | KenobiDB uses `RLock` for thread safety and `ThreadPoolExecutor` with a maximum of 5 workers for concurrent operations. 136 | 137 | #### Asynchronous Execution 138 | 139 | Use the `execute_async` method to run functions asynchronously. 140 | 141 | ```python 142 | def insert_document(db, document): 143 | db.insert(document) 144 | 145 | future = db.execute_async(insert_document, db, {'name': 'Luke', 'color': 'green'}) 146 | ``` 147 | 148 | #### Close 149 | 150 | Shut down the thread pool executor. 151 | 152 | ```python 153 | db.close() 154 | ``` 155 | 156 | ## Testing and Contributions 157 | 158 | Contributions are welcome! To test the library: 159 | 160 | 1. Clone the repository. 161 | 2. Report issues as you encounter them. 162 | 3. Run the unittests. 163 | 164 | Feel free to open issues or submit pull requests on the [GitHub repository](https://github.com/patx/kenobi). 165 | 166 | ## Limitations 167 | 168 | KenobiDB is designed for small-scale applications and prototypes. While it provides excellent performance for most operations, it is not intended to replace full-fledged databases for high-scale or enterprise-level applications for that you should use MongoDB. 169 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /howto.txt: -------------------------------------------------------------------------------- 1 | * Release checklist 2 | 3 | - Adjust coverage fail under minimal percentage 4 | tox-test.ini 5 | .github/workflows/coverage.yml 6 | pyproject.toml [tool.coverage.report] 7 | Makefile target, check 8 | - Run coverage ensure no failures. Does not run slow tests 9 | $ make check 10 | - Run twice 11 | $ make pre-commit 12 | - Remove old branches 13 | $ git branch 14 | $ git branch -D [branch name] 15 | - commit 16 | $ git add . 17 | $ git commit -S -m "" 18 | - tag a commit to trigger .github/workflows/release.yml 19 | $ git tag -as -m "Version 4.0" 4.0 20 | $ git push --follow-tags 21 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=75.8.0", 4 | "wheel", 5 | "build", 6 | ] 7 | build-backend = "setuptools.build_meta" 8 | 9 | [project] 10 | name = "kenobi" 11 | dynamic = [ 12 | "optional-dependencies", 13 | ] 14 | version = "4.0" 15 | requires-python = ">=3.9" 16 | dependencies = [] 17 | description = "document based database using sqlite" 18 | readme = "README.md" 19 | 20 | classifiers = [ 21 | "Environment :: Console", 22 | "Development Status :: 3 - Alpha", 23 | "Programming Language :: Python :: 3", 24 | "License :: OSI Approved :: BSD License", 25 | "Intended Audience :: Developers", 26 | "Topic :: Database", 27 | ] 28 | 29 | # https://peps.python.org/pep-0639/ 30 | # https://clearlydefined.io/?sort=releaseDate&sortDesc=true&name=wreck 31 | # waiting for setuptools maintainers to implement metadata v2.4 support 32 | # https://github.com/pypa/setuptools/issues/4759 33 | license = {file = "LICENSE"} 34 | # license = "BSD-3-Clause" 35 | # license-files = [ 36 | # "LICEN[CS]E*", 37 | # "NOTICE*", 38 | # ] 39 | 40 | authors = [ 41 | {name = "Harrison Erd", email = "harrisonerd@gmail.com"}, 42 | ] 43 | 44 | [project.urls] 45 | "Source code" = 'http://patx.github.io/kenobi' 46 | "Issue tracker" = 'http://patx.github.io/kenobi/issues' 47 | "PyPI Releases" = 'https://pypi.org/project/kenobi' 48 | 49 | [tool.setuptools.packages.find] 50 | where = ["src"] 51 | include = ["kenobi*"] 52 | 53 | [tool.setuptools.dynamic] 54 | optional-dependencies.pip = { file = ['requirements/pip.lock'] } 55 | optional-dependencies.dev = { file = ['requirements/dev.lock'] } 56 | optional-dependencies.manage = { file = ['requirements/manage.lock'] } 57 | 58 | [tool.pytest.ini_options] 59 | markers = [ 60 | "slow: marks tests as slow (deselect with '-m \"not slow\"')", 61 | ] 62 | 63 | [tool.coverage.run] 64 | source_pkgs = ["kenobi"] # **REQUIRED** to be set correctly. Your package name 65 | branch = true 66 | 67 | [tool.coverage.report] 68 | exclude_lines = [ 69 | "pragma: no cover", 70 | "@abc.abstractmethod", 71 | "@abc.abstractproperty", 72 | "if TYPE_CHECKING:", 73 | "if typing.TYPE_CHECKING:", 74 | ] 75 | precision = 2 76 | ignore_errors = true 77 | skip_covered = true 78 | skip_empty = true 79 | fail_under = 88 80 | show_missing = true 81 | -------------------------------------------------------------------------------- /requirements/dev.in: -------------------------------------------------------------------------------- 1 | -c pins-validate-pyproject-pep639.in 2 | 3 | coverage 4 | pytest>=7.1 5 | validate-pyproject 6 | -------------------------------------------------------------------------------- /requirements/dev.lock: -------------------------------------------------------------------------------- 1 | coverage==7.6.10 2 | # via -r requirements/dev.in 3 | exceptiongroup==1.2.2 4 | # via pytest 5 | fastjsonschema==2.21.1 6 | # via validate-pyproject 7 | iniconfig==2.0.0 8 | # via pytest 9 | packaging==24.2 10 | # via pytest 11 | pluggy==1.5.0 12 | # via pytest 13 | pytest==8.3.4 14 | # via -r requirements/dev.in 15 | tomli==2.2.1 16 | # via pytest 17 | validate-pyproject==0.23 18 | # via 19 | # -c requirements/pins-validate-pyproject-pep639.in 20 | # -r requirements/dev.in 21 | -------------------------------------------------------------------------------- /requirements/dev.unlock: -------------------------------------------------------------------------------- 1 | coverage 2 | pytest>=7.1 3 | validate-pyproject>=0.23 4 | -------------------------------------------------------------------------------- /requirements/manage.in: -------------------------------------------------------------------------------- 1 | -c pins-virtualenv-cve-2024-53899.in 2 | 3 | pre-commit 4 | -------------------------------------------------------------------------------- /requirements/manage.lock: -------------------------------------------------------------------------------- 1 | cfgv==3.4.0 2 | # via pre-commit 3 | distlib==0.3.9 4 | # via virtualenv 5 | filelock==3.17.0 6 | # via virtualenv 7 | identify==2.6.6 8 | # via pre-commit 9 | nodeenv==1.9.1 10 | # via pre-commit 11 | platformdirs==4.3.6 12 | # via virtualenv 13 | pre-commit==4.1.0 14 | # via -r requirements/manage.in 15 | pyyaml==6.0.2 16 | # via pre-commit 17 | virtualenv==20.29.1 18 | # via 19 | # -c requirements/pins-virtualenv-cve-2024-53899.in 20 | # pre-commit 21 | -------------------------------------------------------------------------------- /requirements/manage.unlock: -------------------------------------------------------------------------------- 1 | pre-commit 2 | virtualenv>=20.26.6 3 | -------------------------------------------------------------------------------- /requirements/pins-validate-pyproject-pep639.in: -------------------------------------------------------------------------------- 1 | # pep639 support added 2 | validate-pyproject>=0.23 3 | -------------------------------------------------------------------------------- /requirements/pins-virtualenv-cve-2024-53899.in: -------------------------------------------------------------------------------- 1 | # command injection through activation scripts 2 | # https://github.com/advisories/GHSA-rqc4-2hc7-8c8v 3 | virtualenv>=20.26.6 4 | -------------------------------------------------------------------------------- /requirements/pip.in: -------------------------------------------------------------------------------- 1 | pip>=24.2 2 | setuptools>=75.8.0 3 | -------------------------------------------------------------------------------- /requirements/pip.lock: -------------------------------------------------------------------------------- 1 | 2 | # The following packages are considered to be unsafe in a requirements file: 3 | pip==25.0 4 | # via -r requirements/pip.in 5 | setuptools==75.8.0 6 | # via -r requirements/pip.in 7 | -------------------------------------------------------------------------------- /requirements/pip.unlock: -------------------------------------------------------------------------------- 1 | pip>=24.2 2 | setuptools>=75.8.0 3 | -------------------------------------------------------------------------------- /src/kenobi/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. moduleauthor:: Harrison Erd 3 | 4 | .. py:data:: __all__ 5 | :type: tuple[str] 6 | :value: ("KenobiDB",) 7 | 8 | Package exports 9 | 10 | """ 11 | 12 | from .kenobi import KenobiDB 13 | 14 | __all__ = ("KenobiDB",) 15 | -------------------------------------------------------------------------------- /src/kenobi/kenobi.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. moduleauthor:: Harrison Erd 3 | 4 | KenobiDB is a small document-based DB, supporting simple usage including 5 | insertion, removal, and basic search. 6 | Written by Harrison Erd `Profile `_ 7 | `Home `_ 8 | 9 | Copyright Harrison Erd 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are met: 13 | 14 | 1. Redistributions of source code must retain the above copyright notice, 15 | this list of conditions and the following disclaimer. 16 | 2. Redistributions in binary form must reproduce the above copyright notice, 17 | this list of conditions and the following disclaimer in the documentation 18 | and/or other materials provided with the distribution. 19 | 3. Neither the name of the copyright holder nor the names of its contributors 20 | may be used to endorse or promote products derived from this software 21 | without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 | POSSIBILITY OF SUCH DAMAGE. 34 | """ 35 | 36 | import json 37 | import os 38 | import re 39 | import sqlite3 40 | from concurrent.futures import ThreadPoolExecutor 41 | from threading import RLock 42 | 43 | 44 | class KenobiDB: 45 | """ 46 | A lightweight document-based database built on SQLite. Supports basic 47 | operations such as insert, remove, search, update, and asynchronous 48 | execution. 49 | """ 50 | 51 | def __init__(self, file): 52 | """ 53 | Initialize the KenobiDB instance. 54 | 55 | Args: 56 | file (str): Path to the SQLite file. If it does not exist, 57 | it will be created. 58 | """ 59 | cls = type(self) 60 | self.file = os.path.expanduser(file) 61 | self._lock = RLock() 62 | self.executor = ThreadPoolExecutor(max_workers=5) 63 | self._regexp_connections = set() # Track connections with REGEXP added 64 | self._connection = sqlite3.connect(self.file, check_same_thread=False) 65 | cls._add_regexp_support(self._connection) # Add REGEXP support lazily 66 | self._initialize_db() 67 | 68 | def _initialize_db(self): 69 | """ 70 | Create the table and index if they do not exist, and set 71 | journal mode to WAL. 72 | """ 73 | with self._lock: 74 | self._connection.execute( 75 | """ 76 | CREATE TABLE IF NOT EXISTS documents ( 77 | id INTEGER PRIMARY KEY AUTOINCREMENT, 78 | data TEXT NOT NULL 79 | ) 80 | """ 81 | ) 82 | self._connection.execute( 83 | """ 84 | CREATE INDEX IF NOT EXISTS idx_key 85 | ON documents ( 86 | json_extract(data, '$.key') 87 | ) 88 | """ 89 | ) 90 | self._connection.execute("PRAGMA journal_mode=WAL;") 91 | 92 | @staticmethod 93 | def _add_regexp_support(conn): 94 | """ 95 | Add REGEXP function support to the SQLite connection. 96 | """ 97 | 98 | def regexp(pattern, value): 99 | """Code sqlite3 runs when REGEXP sql encountered. Takes two params. 100 | inner function is untestable, a module level function is testable. 101 | Precede module level function name with underscore 102 | 103 | Args: 104 | pattern (str): regex 105 | value (str): text blob the regex parses 106 | 107 | Returns: 108 | bool: True match occurred 109 | """ 110 | return re.search(pattern, value) is not None 111 | 112 | conn.create_function("REGEXP", 2, regexp) 113 | 114 | def _get_connection(self): 115 | """ 116 | Return the active SQLite connection. 117 | """ 118 | return self._connection 119 | 120 | def insert(self, document): 121 | """ 122 | Insert a single document (dict) into the database. 123 | 124 | Args: 125 | document (dict): The document to insert. 126 | 127 | Returns: 128 | bool: True upon successful insertion. 129 | 130 | Raises: 131 | TypeError: If the provided document is not a dictionary. 132 | """ 133 | if not isinstance(document, dict): 134 | raise TypeError("Must insert a dict") 135 | with self._lock: 136 | self._connection.execute( 137 | "INSERT INTO documents (data) VALUES (?)", (json.dumps(document),) 138 | ) 139 | self._connection.commit() 140 | return True 141 | 142 | def insert_many(self, document_list): 143 | """ 144 | Insert multiple documents (list of dicts) into the database. 145 | 146 | Args: 147 | document_list (list): The list of documents to insert. 148 | 149 | Returns: 150 | bool: True upon successful insertion. 151 | 152 | Raises: 153 | TypeError: If the provided object is not a list of dicts. 154 | """ 155 | if not isinstance(document_list, list) or not all( 156 | isinstance(doc, dict) for doc in document_list 157 | ): 158 | raise TypeError("Must insert a list of dicts") 159 | with self._lock: 160 | self._connection.executemany( 161 | "INSERT INTO documents (data) VALUES (?)", 162 | [(json.dumps(doc),) for doc in document_list], 163 | ) 164 | self._connection.commit() 165 | return True 166 | 167 | def remove(self, key, value): 168 | """ 169 | Remove all documents where the given key matches the specified value. 170 | 171 | Args: 172 | key (str): The field name to match. 173 | value (Any): The value to match. 174 | 175 | Returns: 176 | int: Number of documents removed. 177 | 178 | Raises: 179 | ValueError: If 'key' is empty or 'value' is None. 180 | """ 181 | if not key or not isinstance(key, str): 182 | raise ValueError("key must be a non-empty string") 183 | if value is None: 184 | raise ValueError("value cannot be None") 185 | query = "DELETE FROM documents " "WHERE json_extract(data, '$.' || ?) = ?" 186 | with self._lock: 187 | result = self._connection.execute(query, (key, value)) 188 | self._connection.commit() 189 | return result.rowcount 190 | 191 | def update(self, id_key, id_value, new_dict): 192 | """ 193 | Update documents that match (id_key == id_value) by merging new_dict. 194 | 195 | Args: 196 | id_key (str): The field name to match. 197 | id_value (Any): The value to match. 198 | new_dict (dict): A dictionary of changes to apply. 199 | 200 | Returns: 201 | bool: True if at least one document was updated, False otherwise. 202 | 203 | Raises: 204 | TypeError: If new_dict is not a dict. 205 | ValueError: If id_key is invalid or id_value is None. 206 | """ 207 | if not isinstance(new_dict, dict): 208 | raise TypeError("new_dict must be a dictionary") 209 | if not id_key or not isinstance(id_key, str): 210 | raise ValueError("id_key must be a non-empty string") 211 | if id_value is None: 212 | raise ValueError("id_value cannot be None") 213 | 214 | select_query = ( 215 | "SELECT data FROM documents " "WHERE json_extract(data, '$.' || ?) = ?" 216 | ) 217 | update_query = ( 218 | "UPDATE documents " 219 | "SET data = ? " 220 | "WHERE json_extract(data, '$.' || ?) = ?" 221 | ) 222 | with self._lock: 223 | cursor = self._connection.execute(select_query, (id_key, id_value)) 224 | documents = cursor.fetchall() 225 | if not documents: 226 | return False 227 | for row in documents: 228 | document = json.loads(row[0]) 229 | if not isinstance(document, dict): 230 | continue 231 | document.update(new_dict) 232 | self._connection.execute( 233 | update_query, (json.dumps(document), id_key, id_value) 234 | ) 235 | self._connection.commit() 236 | return True 237 | 238 | def purge(self): 239 | """ 240 | Remove all documents from the database. 241 | 242 | Returns: 243 | bool: True upon successful purge. 244 | """ 245 | with self._lock: 246 | self._connection.execute("DELETE FROM documents") 247 | self._connection.commit() 248 | return True 249 | 250 | def all(self, limit=100, offset=0): 251 | """ 252 | Return a paginated list of all documents. 253 | 254 | Args: 255 | limit (int): The maximum number of documents to return. 256 | offset (int): The starting point for retrieval. 257 | 258 | Returns: 259 | list: A list of all documents (dicts). 260 | """ 261 | query = "SELECT data FROM documents LIMIT ? OFFSET ?" 262 | with self._lock: 263 | cursor = self._connection.execute(query, (limit, offset)) 264 | return [json.loads(row[0]) for row in cursor.fetchall()] 265 | 266 | def search(self, key, value, limit=100, offset=0): 267 | """ 268 | Return a list of documents matching (key == value). 269 | 270 | Args: 271 | key (str): The document field to match on. 272 | value (Any): The value for which to search. 273 | limit (int): The maximum number of documents to return. 274 | offset (int): The starting point for retrieval. 275 | 276 | Returns: 277 | list: A list of matching documents (dicts). 278 | """ 279 | if not key or not isinstance(key, str): 280 | raise ValueError("Key must be a non-empty string") 281 | 282 | query = ( 283 | "SELECT data FROM documents " 284 | "WHERE json_extract(data, '$.' || ?) = ? " 285 | "LIMIT ? OFFSET ?" 286 | ) 287 | with self._lock: 288 | cursor = self._connection.execute(query, (key, value, limit, offset)) 289 | return [json.loads(row[0]) for row in cursor.fetchall()] 290 | 291 | def search_pattern(self, key, pattern, limit=100, offset=0): 292 | """ 293 | Search documents matching a regex pattern. 294 | 295 | Args: 296 | key (str): The document field to match on. 297 | pattern (str): The regex pattern to match. 298 | limit (int): The maximum number of documents to return. 299 | offset (int): The starting point for retrieval. 300 | 301 | Returns: 302 | list: A list of matching documents (dicts). 303 | 304 | Raises: 305 | ValueError: If the key or pattern is invalid. 306 | """ 307 | if not key or not isinstance(key, str): 308 | raise ValueError("key must be a non-empty string") 309 | if not pattern or not isinstance(pattern, str): 310 | raise ValueError("pattern must be a non-empty string") 311 | 312 | query = """ 313 | SELECT data FROM documents 314 | WHERE json_extract(data, '$.' || ?) REGEXP ? 315 | LIMIT ? OFFSET ? 316 | """ 317 | with self._lock: 318 | cursor = self._connection.execute(query, (key, pattern, limit, offset)) 319 | return [json.loads(row[0]) for row in cursor.fetchall()] 320 | 321 | def find_any(self, key, value_list): 322 | """ 323 | Return documents where key matches any value in value_list. 324 | 325 | Args: 326 | key (str): The document field to match on. 327 | value_list (list): A list of possible values. 328 | 329 | Returns: 330 | list: A list of matching documents. 331 | """ 332 | placeholders = ", ".join(["?"] * len(value_list)) 333 | query = f""" 334 | SELECT DISTINCT documents.data 335 | FROM documents, json_each(documents.data, '$.' || ?) 336 | WHERE json_each.value IN ({placeholders}) 337 | """ 338 | with self._lock: 339 | cursor = self._connection.execute(query, [key] + value_list) 340 | return [json.loads(row[0]) for row in cursor.fetchall()] 341 | 342 | def find_all(self, key, value_list): 343 | """ 344 | Return documents where the key contains all values in value_list. 345 | 346 | Args: 347 | key (str): The field to match. 348 | value_list (list): The required values to match. 349 | 350 | Returns: 351 | list: A list of matching documents. 352 | """ 353 | placeholders = ", ".join(["?"] * len(value_list)) 354 | query = f""" 355 | SELECT documents.data 356 | FROM documents 357 | WHERE ( 358 | SELECT COUNT(DISTINCT value) 359 | FROM json_each(documents.data, '$.' || ?) 360 | WHERE value IN ({placeholders}) 361 | ) = ? 362 | """ 363 | with self._lock: 364 | cursor = self._connection.execute( 365 | query, [key] + value_list + [len(value_list)] 366 | ) 367 | return [json.loads(row[0]) for row in cursor.fetchall()] 368 | 369 | def execute_async(self, func, *args, **kwargs): 370 | """ 371 | Execute a function asynchronously using a thread pool. 372 | 373 | Args: 374 | func (callable): The function to execute. 375 | *args: Arguments for the function. 376 | **kwargs: Keyword arguments for the function. 377 | 378 | Returns: 379 | concurrent.futures.Future: A Future object representing 380 | the execution. 381 | """ 382 | return self.executor.submit(func, *args, **kwargs) 383 | 384 | def close(self): 385 | """ 386 | Shutdown the thread pool executor and close the database connection. 387 | """ 388 | self.executor.shutdown() 389 | with self._lock: 390 | self._connection.close() 391 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patx/kenobi/34a490f9097c0ac043fd3f3716e88c189fe0d144/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. moduleauthor:: Harrison Erd 3 | 4 | Database fixture and temp folder preparation fixtures 5 | 6 | .. py:data:: pytest_plugins 7 | :type: list[str] 8 | :value: [] 9 | 10 | pytest plugins to activate 11 | 12 | """ 13 | 14 | import shutil 15 | from collections.abc import Sequence 16 | from pathlib import PurePath 17 | 18 | import pytest 19 | 20 | from kenobi import KenobiDB 21 | 22 | pytest_plugins = [] 23 | 24 | 25 | @pytest.fixture() 26 | def prepare_folders_files(request): 27 | """Prepare folders and files within folder.""" 28 | 29 | set_folders = set() 30 | 31 | def _method(seq_rel_paths, tmp_path): 32 | """Creates folders and empty files 33 | 34 | :param seq_rel_paths: Relative file paths. Creates folders as well 35 | :type seq_rel_paths: 36 | 37 | collections.abc.Sequence[str | pathlib.Path] | collections.abc.MutableSet[str | pathlib.Path] 38 | 39 | :param tmp_path: Start absolute path 40 | :type tmp_path: pathlib.Path 41 | :returns: Set of absolute paths of created files 42 | :rtype: set[pathlib.Path] 43 | """ 44 | set_abs_paths = set() 45 | is_seq = seq_rel_paths is not None and ( 46 | (isinstance(seq_rel_paths, Sequence) and not isinstance(seq_rel_paths, str)) 47 | or isinstance(seq_rel_paths, set) 48 | ) 49 | if is_seq: 50 | for posix in seq_rel_paths: 51 | if isinstance(posix, str): 52 | abs_path = tmp_path.joinpath(*posix.split("/")) 53 | elif issubclass(type(posix), PurePath): 54 | if not posix.is_absolute(): 55 | abs_path = tmp_path / posix 56 | else: # pragma: no cover 57 | # already absolute 58 | abs_path = posix 59 | else: 60 | abs_path = None 61 | 62 | if abs_path is not None: 63 | set_abs_paths.add(abs_path) 64 | set_folders.add(abs_path.parent) 65 | abs_path.parent.mkdir(parents=True, exist_ok=True) 66 | abs_path.touch() 67 | else: 68 | abs_path = None 69 | 70 | return set_abs_paths 71 | 72 | yield _method 73 | 74 | # cleanup 75 | if request.node.test_report.outcome == "passed": 76 | for abspath_folder in set_folders: 77 | shutil.rmtree(abspath_folder, ignore_errors=True) 78 | 79 | 80 | @pytest.fixture() 81 | def db_path(tmp_path): 82 | """ 83 | Returns: 84 | pathlib.Path: path to database within pytest managed temporary folder 85 | """ 86 | path_db = tmp_path.joinpath("test_kenobi.db") 87 | 88 | return path_db 89 | 90 | 91 | @pytest.fixture() 92 | def create_db(db_path, request): 93 | """Per test function create database in pytest managed temporary folder 94 | 95 | Usage 96 | 97 | .. code-block:: text 98 | 99 | import pytest 100 | def test_sometest(create_db): 101 | db = create_db() 102 | 103 | Returns: 104 | KenobiDB: database instance 105 | """ 106 | db = KenobiDB(db_path) 107 | 108 | def cleanup(): 109 | """Pretty way but works. 110 | 111 | Purposefully refrain from: purge database or delete database file. 112 | 113 | Let pytest manage removing the db file. So can later 114 | debug a test function in a working debug environment. 115 | """ 116 | db.close() 117 | 118 | def _fcn(): 119 | """Initializes database. After test function close database. 120 | 121 | - purposefully induce a failure with :code:`assert False` 122 | 123 | - go to the temp folder 124 | 125 | - activate the venv 126 | 127 | - open a REPR with :command:`python` 128 | 129 | """ 130 | 131 | return db 132 | 133 | request.addfinalizer(cleanup) 134 | 135 | return _fcn 136 | -------------------------------------------------------------------------------- /tests/test_kenobi.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. moduleauthor:: Harrison Erd 3 | 4 | Not slow 5 | 6 | .. code-block:: shell 7 | 8 | python -m coverage run --source='kenobi.kenobi' -m pytest \ 9 | --showlocals -m "not slow" tests/test_kenobi.py && coverage report \ 10 | --data-file=.coverage --include="**/kenobi.py" 11 | 12 | All tests 13 | 14 | .. code-block:: shell 15 | 16 | python -m coverage run --source='kenobi.kenobi' -m pytest \ 17 | --showlocals tests/test_kenobi.py && coverage report \ 18 | --data-file=.coverage --include="**/kenobi.py" 19 | 20 | """ 21 | 22 | import time 23 | from contextlib import nullcontext as does_not_raise 24 | from functools import partial 25 | 26 | import pytest 27 | 28 | testdata_insert_single_document = ( 29 | ( 30 | "insert", 31 | {"key": "value"}, 32 | does_not_raise(), 33 | 1, 34 | {"key": "value"}, 35 | ), 36 | ( 37 | "insert_many", 38 | [{"key": "value1"}, {"key": "value2"}], 39 | does_not_raise(), 40 | 2, 41 | [{"key": "value1"}, {"key": "value2"}], 42 | ), 43 | ( 44 | "insert", 45 | 0.1234, 46 | pytest.raises(TypeError), 47 | 0, 48 | {}, 49 | ), 50 | ( 51 | "insert", 52 | None, 53 | pytest.raises(TypeError), 54 | 0, 55 | {}, 56 | ), 57 | ( 58 | "insert_many", 59 | [0.1234, 0.1234], 60 | pytest.raises(TypeError), 61 | 0, 62 | [], 63 | ), 64 | ) 65 | 66 | ids_insert_single_document = ( 67 | "Single document", 68 | "Multiple documents", 69 | "document invalid unsupported type", 70 | "document invalid None", 71 | "Multiple documents unsupported types", 72 | ) 73 | 74 | 75 | @pytest.mark.parametrize( 76 | "meth, document, expectation, result_count_expected, document_expected", 77 | testdata_insert_single_document, 78 | ids=ids_insert_single_document, 79 | ) 80 | def test_insert_single_document( 81 | meth, 82 | document, 83 | expectation, 84 | result_count_expected, 85 | document_expected, 86 | create_db, 87 | ): 88 | """Test inserting a single document.""" 89 | # pytest -vv --showlocals --log-level INFO -k "test_insert_single_document" tests 90 | # pytest -vv --showlocals --log-level INFO tests/test_kenobi.py::test_insert_single_document\[Single\ document\] 91 | # prepare 92 | db = create_db() 93 | if hasattr(db, meth): 94 | fcn = getattr(db, meth) 95 | # insert document(s) 96 | with expectation: 97 | fcn(document) 98 | 99 | # act 100 | results = db.all() 101 | # verify 102 | result_count_actual = len(results) 103 | assert result_count_actual == result_count_expected 104 | if isinstance(expectation, does_not_raise): 105 | # If fail :code:`len(results) == 0`. results[0] --> IndexError 106 | # document_actual = results[0] 107 | if isinstance(document, dict): 108 | assert document_expected in results 109 | elif isinstance(document, list): 110 | for d_document in document: 111 | assert d_document in results 112 | else: 113 | pass 114 | 115 | 116 | testdata_remove_document = ( 117 | ( 118 | {"key": "value"}, 119 | "key", 120 | "value", 121 | does_not_raise(), 122 | 0, 123 | ), 124 | ( 125 | {"key": "value"}, 126 | None, 127 | "value", 128 | pytest.raises(ValueError), 129 | 1, 130 | ), 131 | ( 132 | {"key": "value"}, 133 | 0.12345, 134 | "value", 135 | pytest.raises(ValueError), 136 | 1, 137 | ), 138 | ( 139 | {"key": "value"}, 140 | "key", 141 | None, 142 | pytest.raises(ValueError), 143 | 1, 144 | ), 145 | ) 146 | ids_remove_document = ( 147 | "remove one document", 148 | "key None", 149 | "key unsupported type", 150 | "value None", 151 | ) 152 | 153 | 154 | @pytest.mark.parametrize( 155 | "document, query_key, query_val, expectation, results_count_expected", 156 | testdata_remove_document, 157 | ids=ids_remove_document, 158 | ) 159 | def test_remove_document( 160 | document, query_key, query_val, expectation, results_count_expected, create_db 161 | ): 162 | """Test removing a document by key:value.""" 163 | # pytest -vv --showlocals --log-level INFO -k "test_remove_document" tests 164 | # prepare 165 | db = create_db() 166 | db.insert(document) 167 | # act 168 | with expectation: 169 | db.remove(query_key, query_val) 170 | # verify 171 | results = db.all() 172 | results_count_actual = len(results) 173 | assert results_count_actual == results_count_expected 174 | 175 | 176 | testdata_update_document = ( 177 | ( 178 | {"id": 1, "key": "value"}, 179 | {"key": "new_value"}, 180 | "id", 181 | 1, 182 | "key", 183 | "new_value", 184 | does_not_raise(), 185 | 1, 186 | True, 187 | ), 188 | ( 189 | {"id": 1, "key": "value"}, 190 | {"key": "new_value"}, 191 | None, 192 | 1, 193 | "key", 194 | "value", 195 | pytest.raises(ValueError), 196 | 1, 197 | False, 198 | ), 199 | ( 200 | {"id": 1, "key": "value"}, 201 | {"key": "new_value"}, 202 | "id", 203 | None, 204 | "key", 205 | "value", 206 | pytest.raises(ValueError), 207 | 1, 208 | False, 209 | ), 210 | ( 211 | {"id": 1, "key": "value"}, 212 | {"key": "new_value"}, 213 | "id", 214 | 2, 215 | "key", 216 | "value", 217 | does_not_raise(), 218 | 1, 219 | False, 220 | ), 221 | ) 222 | ids_update_document = ( 223 | "Update a document", 224 | "id_field None ValueError", 225 | "id_val None ValueError", 226 | "could not update nonexistent document", 227 | ) 228 | 229 | 230 | @pytest.mark.parametrize( 231 | ( 232 | "document, updated_fields, id_field, id_val, val_key, " 233 | "val_expected, expectation, results_count_expected, is_success_expected" 234 | ), 235 | testdata_update_document, 236 | ids=ids_update_document, 237 | ) 238 | def test_update_document( 239 | document, 240 | updated_fields, 241 | id_field, 242 | id_val, 243 | val_key, 244 | val_expected, 245 | expectation, 246 | results_count_expected, 247 | is_success_expected, 248 | create_db, 249 | ): 250 | """Test updating a document by key:value.""" 251 | # pytest -vv --showlocals --log-level INFO -k "test_update_document" tests 252 | # prepare 253 | db = create_db() 254 | db.insert(document) 255 | # act 256 | with expectation: 257 | is_success_actual = db.update(id_field, id_val, updated_fields) 258 | if isinstance(expectation, does_not_raise): 259 | assert is_success_actual is is_success_expected 260 | # verify 261 | results = db.all() 262 | results_count_actual = len(results) 263 | assert results_count_actual == results_count_expected 264 | val_actual = results[0][val_key] 265 | 266 | assert val_actual == val_expected 267 | 268 | 269 | def test_purge_database(create_db): 270 | """Test purging all documents from the database.""" 271 | # pytest -vv --showlocals --log-level INFO -k "test_purge_database" tests 272 | documents = [{"key": "value1"}, {"key": "value2"}] 273 | results_count_expected = 0 274 | # prepare 275 | db = create_db() 276 | db.insert_many(documents) 277 | # act 278 | db.purge() 279 | # verify 280 | results = db.all() 281 | results_count_actual = len(results) 282 | assert results_count_actual == results_count_expected 283 | 284 | 285 | testdata_search_by_key_value = ( 286 | ( 287 | [{"key": "value1"}, {"key": "value2"}], 288 | "key", 289 | "value1", 290 | does_not_raise(), 291 | 1, 292 | ), 293 | ( 294 | [{"key": "value1"}, {"key": "value2"}], 295 | None, 296 | "value1", 297 | pytest.raises(ValueError), 298 | 1, 299 | ), 300 | ( 301 | [{"key": "value1"}, {"key": "value2"}], 302 | 0.2345, 303 | "value1", 304 | pytest.raises(ValueError), 305 | 1, 306 | ), 307 | ) 308 | ids_search_by_key_value = ( 309 | "successful query", 310 | "query_key None", 311 | "query_key unsupported type", 312 | ) 313 | 314 | 315 | @pytest.mark.parametrize( 316 | "documents, query_key, query_val, expectation, results_count_expected", 317 | testdata_search_by_key_value, 318 | ids=ids_search_by_key_value, 319 | ) 320 | def test_search_by_key_value( 321 | documents, 322 | query_key, 323 | query_val, 324 | expectation, 325 | results_count_expected, 326 | create_db, 327 | ): 328 | """Test searching documents by key:value.""" 329 | # pytest -vv --showlocals --log-level INFO -k "test_search_by_key_value" tests 330 | 331 | # prepare 332 | db = create_db() 333 | db.insert_many(documents) 334 | # act 335 | with expectation: 336 | results = db.search(query_key, query_val) 337 | # verify 338 | if isinstance(expectation, does_not_raise): 339 | results_count_actual = len(results) 340 | assert results_count_actual == results_count_expected 341 | actual_doc_0 = results[0] 342 | expected_doc_0 = documents[0] 343 | assert actual_doc_0 == expected_doc_0 344 | 345 | 346 | testdata_find_any = ( 347 | ( 348 | [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}], 349 | "key", 350 | ["value1", "value3"], 351 | does_not_raise(), 352 | 2, 353 | (0, 2), 354 | ), 355 | ( 356 | [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}], 357 | None, 358 | ["value1", "value3"], 359 | does_not_raise(), 360 | 0, 361 | (), 362 | ), 363 | ( 364 | [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}], 365 | 0.1234, 366 | ["value1", "value3"], 367 | does_not_raise(), 368 | 0, 369 | (), 370 | ), 371 | ( 372 | [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}], 373 | "key", 374 | [None, None], 375 | does_not_raise(), 376 | 0, 377 | (), 378 | ), 379 | ( 380 | [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}], 381 | "key", 382 | [0.1234, 0.1234], 383 | does_not_raise(), 384 | 0, 385 | (), 386 | ), 387 | pytest.param( 388 | [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}], 389 | "key", 390 | {0.1234, 0.1234}, 391 | pytest.raises(TypeError), 392 | 0, 393 | (), 394 | marks=pytest.mark.xfail, 395 | ), 396 | pytest.param( 397 | [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}], 398 | "key", 399 | (0.1234, 0.1234), 400 | pytest.raises(TypeError), 401 | 0, 402 | (), 403 | marks=pytest.mark.xfail, 404 | ), 405 | ) 406 | ids_find_any = ( 407 | "successful query", 408 | "key None", 409 | "key unsupported type", 410 | "query vals list None None", 411 | "query vals list both unsupported type", 412 | "query vals set both unsupported type BUG", 413 | "query vals tuple both unsupported type BUG", 414 | ) 415 | 416 | 417 | @pytest.mark.parametrize( 418 | ( 419 | "documents, query_key, query_vals, expectation, " 420 | "results_count_expected, t_documents_idxs" 421 | ), 422 | testdata_find_any, 423 | ids=ids_find_any, 424 | ) 425 | def test_find_any( 426 | documents, 427 | query_key, 428 | query_vals, 429 | expectation, 430 | results_count_expected, 431 | t_documents_idxs, 432 | create_db, 433 | ): 434 | """Test finding documents where a key matches any value in a list.""" 435 | # pytest -vv --showlocals --log-level INFO -k "test_find_any" tests 436 | # prepare 437 | db = create_db() 438 | db.insert_many(documents) 439 | 440 | # act 441 | with expectation: 442 | results = db.find_any(query_key, query_vals) 443 | # verify 444 | if isinstance(expectation, does_not_raise): 445 | results_count_actual = len(results) 446 | assert results_count_actual == results_count_expected 447 | for doc_idx in t_documents_idxs: 448 | assert documents[doc_idx] in results 449 | 450 | 451 | testdata_find_all = ( 452 | ( 453 | [ 454 | {"key": ["value1", "value2"]}, 455 | {"key": ["value1"]}, 456 | {"key": ["value2", "value3"]}, 457 | ], 458 | "key", 459 | ["value1", "value2"], 460 | does_not_raise(), 461 | 1, 462 | (0,), 463 | ), 464 | pytest.param( 465 | [ 466 | {"key": ["value1", "value2"]}, 467 | {"key": ["value1"]}, 468 | {"key": ["value2", "value3"]}, 469 | ], 470 | "key", 471 | {"value1", "value2"}, 472 | pytest.raises(TypeError), 473 | 1, 474 | (), 475 | marks=pytest.mark.xfail, 476 | ), 477 | pytest.param( 478 | [ 479 | {"key": ["value1", "value2"]}, 480 | {"key": ["value1"]}, 481 | {"key": ["value2", "value3"]}, 482 | ], 483 | "key", 484 | ("value1", "value2"), 485 | pytest.raises(TypeError), 486 | 1, 487 | (), 488 | marks=pytest.mark.xfail, 489 | ), 490 | ) 491 | ids_find_all = ( 492 | "successful query", 493 | "query vals set both unsupported type BUG", 494 | "query vals tuple both unsupported type BUG", 495 | ) 496 | 497 | 498 | @pytest.mark.parametrize( 499 | ( 500 | "documents, query_key, query_vals, expectation, " 501 | "results_count_expected, t_documents_idxs" 502 | ), 503 | testdata_find_all, 504 | ids=ids_find_all, 505 | ) 506 | def test_find_all( 507 | documents, 508 | query_key, 509 | query_vals, 510 | expectation, 511 | results_count_expected, 512 | t_documents_idxs, 513 | create_db, 514 | ): 515 | """Test finding documents where a key matches all values in a list.""" 516 | # pytest -vv --showlocals --log-level INFO -k "test_find_all" tests 517 | # prepare 518 | db = create_db() 519 | db.insert_many(documents) 520 | # act 521 | with expectation: 522 | results = db.find_all(query_key, query_vals) 523 | # verify 524 | if isinstance(expectation, does_not_raise): 525 | results_count_actual = len(results) 526 | assert results_count_actual == results_count_expected 527 | for doc_idx in t_documents_idxs: 528 | assert results[0] == documents[doc_idx] 529 | 530 | 531 | def test_pagination_all(create_db): 532 | """Test paginated retrieval of all documents.""" 533 | # pytest -vv --showlocals --log-level INFO -k "test_pagination_all" tests 534 | documents = [{"key": f"value{i}"} for i in range(10)] 535 | results_count_expected = 5 536 | 537 | # prepare 538 | db = create_db() 539 | db.insert_many(documents) 540 | # act 541 | results = db.all(limit=5, offset=0) 542 | # verify 543 | results_count_actual = len(results) 544 | assert results_count_actual == results_count_expected 545 | assert results == documents[:5] 546 | 547 | 548 | def test_pagination_search(create_db): 549 | """Test paginated search by key:value.""" 550 | # pytest -vv --showlocals --log-level INFO -k "test_pagination_search" tests 551 | documents = [{"key": f"value{i}"} for i in range(10)] 552 | results_count_expected = 1 553 | 554 | # prepare 555 | db = create_db() 556 | db.insert_many(documents) 557 | # act 558 | results = db.search("key", "value1", limit=1, offset=0) 559 | # verify 560 | results_count_actual = len(results) 561 | assert results_count_actual == results_count_expected 562 | assert results[0] == {"key": "value1"} 563 | 564 | 565 | def db_task(fcn, doc): 566 | """Function usable by thread pool executor""" 567 | fcn(doc) 568 | 569 | 570 | testdata_concurrent_inserts = ( 571 | ( 572 | [{"key": f"value{i}"} for i in range(50)], 573 | does_not_raise(), 574 | 50, 575 | ), 576 | ) 577 | ids_concurrent_inserts = ("successful concurrent inserts",) 578 | 579 | 580 | @pytest.mark.parametrize( 581 | "documents, expectation, results_count_expected", 582 | testdata_concurrent_inserts, 583 | ids=ids_concurrent_inserts, 584 | ) 585 | def test_concurrent_inserts(documents, expectation, results_count_expected, create_db): 586 | """Test concurrent inserts to ensure thread safety.""" 587 | # pytest -vv --showlocals --log-level INFO -k "test_concurrent_inserts" tests 588 | # prepare 589 | db = create_db() 590 | # pytest doesn't support inner functions 591 | insert_task = partial(db_task, db.insert) 592 | 593 | # act 594 | with expectation: 595 | with db.executor as executor: 596 | executor.map(insert_task, documents) 597 | # verify 598 | results = db.all() 599 | results_count_actual = len(results) 600 | assert results_count_actual == results_count_expected 601 | 602 | 603 | def test_performance_bulk_insert(create_db): 604 | """Test the performance of bulk inserting a large number of documents.""" 605 | # pytest -vv --showlocals --log-level INFO -k "test_performance_bulk_insert" tests 606 | documents = [{"key": f"value{i}"} for i in range(1000)] 607 | duration_max_expected = 5 608 | # prepare 609 | db = create_db() 610 | start_time = time.time() 611 | # act 612 | db.insert_many(documents) 613 | end_time = time.time() 614 | duration_actual = end_time - start_time 615 | # verify 616 | assert duration_actual < duration_max_expected, "Bulk insert took too long" 617 | 618 | 619 | def test_safe_query_handling(create_db): 620 | """Test safe handling of potentially harmful input to prevent SQL injection.""" 621 | # pytest -vv --showlocals --log-level INFO -k "test_safe_query_handling" tests 622 | document = {"key": "value"} 623 | results_count_expected = 0 624 | # prepare 625 | db = create_db() 626 | db.insert(document) 627 | # act 628 | results = db.search("key", "value OR 1=1") 629 | # verify 630 | results_count_actual = len(results) 631 | assert ( 632 | results_count_actual == results_count_expected 633 | ), "Unsafe query execution detected" 634 | 635 | 636 | @pytest.mark.slow 637 | def test_large_dataset(create_db): 638 | """Stress test: Insert and retrieve a large number of documents.""" 639 | # pytest -vv --showlocals --log-level INFO -k "test_large_dataset" tests 640 | num_docs = 1_000_000 641 | documents = [{"key": f"value{i}"} for i in range(num_docs)] 642 | duration_1M_inserts_max = 300 643 | 644 | # prepare 645 | db = create_db() 646 | 647 | # Measure insertion performance 648 | start_time = time.time() 649 | db.insert_many(documents) 650 | end_time = time.time() 651 | duration_1M_inserts_actual = end_time - start_time 652 | 653 | # Ensure insertion is reasonably fast 654 | assert ( 655 | duration_1M_inserts_actual < duration_1M_inserts_max 656 | ), "Inserting 1,000,000 documents took too long" 657 | msg_info = f"Inserted {num_docs} documents in {duration_1M_inserts_actual} seconds" 658 | print(msg_info) 659 | 660 | # Measure retrieval performance 661 | start_time = time.time() 662 | all_docs = db.all(limit=num_docs) 663 | end_time = time.time() 664 | retrieval_duration_actual = end_time - start_time 665 | docs_count_actual = len(all_docs) 666 | 667 | # Ensure retrieval is correct and performant 668 | assert docs_count_actual == num_docs, "Not all documents were retrieved" 669 | assert ( 670 | retrieval_duration_actual < duration_1M_inserts_max 671 | ), "Retrieving 1,000,000 documents took too long" 672 | msg_info = f"Retrieved {docs_count_actual} documents in {retrieval_duration_actual} seconds" 673 | print(msg_info) 674 | 675 | 676 | testdata_malformed_json_in_update = ( 677 | ( 678 | {"id": 1, "key": "value"}, 679 | pytest.raises(TypeError), 680 | ), 681 | ) 682 | ids_malformed_json_in_update = ("Insert a malformed document",) 683 | 684 | 685 | @pytest.mark.parametrize( 686 | "malformed_document, expectation", 687 | testdata_malformed_json_in_update, 688 | ids=ids_malformed_json_in_update, 689 | ) 690 | def test_malformed_json_in_update(malformed_document, expectation, create_db): 691 | """Test handling malformed JSON in update.""" 692 | # pytest -vv --showlocals --log-level INFO -k "test_malformed_json_in_update" tests 693 | # prepare 694 | db = create_db() 695 | db.insert(malformed_document) 696 | 697 | # Attempt to update with malformed JSON structure 698 | with expectation: 699 | db.update("id", 1, "not a dict") 700 | --------------------------------------------------------------------------------