├── .gitignore
├── .ideas
    ├── col.py
    ├── collections.py
    └── tables.py
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── Makefile
├── NOTICE.txt
├── README.md
├── docs
    └── index.html
├── howto.txt
├── pyproject.toml
├── requirements
    ├── dev.in
    ├── dev.lock
    ├── dev.unlock
    ├── manage.in
    ├── manage.lock
    ├── manage.unlock
    ├── pins-validate-pyproject-pep639.in
    ├── pins-virtualenv-cve-2024-53899.in
    ├── pip.in
    ├── pip.lock
    └── pip.unlock
├── src
    └── kenobi
    │   ├── __init__.py
    │   └── kenobi.py
└── tests
    ├── __init__.py
    ├── conftest.py
    └── test_kenobi.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | .vscode/
132 | ~$*
133 | 
134 | # Removed from project
135 | .coverage
136 | .coverage-*
137 | 


--------------------------------------------------------------------------------
/.ideas/col.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | KenobiDB is a small document-based DB, supporting simple usage including
  5 | insertion, removal, and basic search, now extended to support collections.
  6 | """
  7 | import json
  8 | import os
  9 | import re
 10 | import sqlite3
 11 | from concurrent.futures import ThreadPoolExecutor
 12 | from threading import RLock
 13 | 
 14 | 
 15 | class KenobiDB:
 16 |     """
 17 |     A lightweight document-based database built on SQLite. Supports basic
 18 |     operations such as insert, remove, search, update, and asynchronous
 19 |     execution, now with MongoDB-like collection support.
 20 |     """
 21 | 
 22 |     def __init__(self, file):
 23 |         """
 24 |         Initialize the KenobiDB instance.
 25 | 
 26 |         Args:
 27 |             file (str): Path to the SQLite file. If it does not exist,
 28 |                 it will be created.
 29 |         """
 30 |         self.file = os.path.expanduser(file)
 31 |         self._lock = RLock()
 32 |         self.executor = ThreadPoolExecutor(max_workers=5)
 33 |         self._connection = sqlite3.connect(self.file, check_same_thread=False)
 34 |         self._add_regexp_support(self._connection)
 35 |         self._initialize_db()
 36 | 
 37 |     def _initialize_db(self):
 38 |         """
 39 |         Create the table and index if they do not exist, and set
 40 |         journal mode to WAL.
 41 |         """
 42 |         with self._lock:
 43 |             self._connection.execute(
 44 |                 """
 45 |                 CREATE TABLE IF NOT EXISTS documents (
 46 |                     id INTEGER PRIMARY KEY AUTOINCREMENT,
 47 |                     data TEXT NOT NULL,
 48 |                     collection TEXT NOT NULL DEFAULT 'default'
 49 |                 )
 50 |                 """
 51 |             )
 52 |             self._connection.execute(
 53 |                 """
 54 |                 CREATE INDEX IF NOT EXISTS idx_key
 55 |                 ON documents (
 56 |                     json_extract(data, '$.key')
 57 |                 )
 58 |                 """
 59 |             )
 60 |             self._connection.execute("PRAGMA journal_mode=WAL;")
 61 | 
 62 |     @staticmethod
 63 |     def _add_regexp_support(conn):
 64 |         """
 65 |         Add REGEXP function support to the SQLite connection.
 66 |         """
 67 | 
 68 |         def regexp(pattern, value):
 69 |             """Code sqlite3 runs when REGEXP sql encountered. Takes two params.
 70 |             inner function is untestable, a module level function is testable
 71 | 
 72 |             Args:
 73 |                 pattern (str): regex
 74 |                 value (str): text blob the regex parses
 75 | 
 76 |             Returns:
 77 |                 bool: True match occurred
 78 |             """
 79 |             return re.search(pattern, value) is not None
 80 | 
 81 |         conn.create_function("REGEXP", 2, regexp)
 82 | 
 83 |     def insert(self, document, collection="default"):
 84 |         """
 85 |         Insert a single document into a specific collection.
 86 | 
 87 |         Args:
 88 |             document (dict): The document to insert.
 89 |             collection (str): The collection name. Defaults to 'default'.
 90 | 
 91 |         Returns:
 92 |             bool: True upon successful insertion.
 93 |         """
 94 |         if not isinstance(document, dict):
 95 |             raise TypeError("Must insert a dict")
 96 |         if not isinstance(collection, str) or not collection:
 97 |             raise ValueError("Collection must be a non-empty string")
 98 |         with self._lock:
 99 |             self._connection.execute(
100 |                 "INSERT INTO documents (data, collection) VALUES (?, ?)",
101 |                 (json.dumps(document), collection),
102 |             )
103 |             self._connection.commit()
104 |             return True
105 | 
106 |     def insert_many(self, document_list, collection="default"):
107 |         """
108 |         Insert multiple documents into a specific collection.
109 | 
110 |         Args:
111 |             document_list (list): The list of documents to insert.
112 |             collection (str): The collection name. Defaults to 'default'.
113 | 
114 |         Returns:
115 |             bool: True upon successful insertion.
116 |         """
117 |         if not isinstance(document_list, list) or not all(
118 |             isinstance(doc, dict) for doc in document_list
119 |         ):
120 |             raise TypeError("Must insert a list of dicts")
121 |         if not isinstance(collection, str) or not collection:
122 |             raise ValueError("Collection must be a non-empty string")
123 |         with self._lock:
124 |             self._connection.executemany(
125 |                 "INSERT INTO documents (data, collection) VALUES (?, ?)",
126 |                 [(json.dumps(doc), collection) for doc in document_list],
127 |             )
128 |             self._connection.commit()
129 |             return True
130 | 
131 |     def remove(self, key, value, collection="default"):
132 |         """
133 |         Remove all documents from a specific collection where key matches value.
134 | 
135 |         Args:
136 |             key (str): The field name to match.
137 |             value (Any): The value to match.
138 |             collection (str): The collection name. Defaults to 'default'.
139 | 
140 |         Returns:
141 |             int: Number of documents removed.
142 |         """
143 |         if not key or not isinstance(key, str):
144 |             raise ValueError("Key must be a non-empty string")
145 |         if value is None:
146 |             raise ValueError("Value cannot be None")
147 |         if not isinstance(collection, str) or not collection:
148 |             raise ValueError("Collection must be a non-empty string")
149 |         query = (
150 |             "DELETE FROM documents "
151 |             "WHERE json_extract(data, '$.' || ?) = ? AND collection = ?"
152 |         )
153 |         with self._lock:
154 |             result = self._connection.execute(query, (key, value, collection))
155 |             self._connection.commit()
156 |             return result.rowcount
157 | 
158 |     def search(self, key, value, collection="default", limit=100, offset=0):
159 |         """
160 |         Search documents in a specific collection matching (key == value).
161 | 
162 |         Args:
163 |             key (str): The document field to match on.
164 |             value (Any): The value for which to search.
165 |             collection (str): The collection name. Defaults to 'default'.
166 |             limit (int): The maximum number of documents to return.
167 |             offset (int): The starting point for retrieval.
168 | 
169 |         Returns:
170 |             list: A list of matching documents (dicts).
171 |         """
172 |         if not key or not isinstance(key, str):
173 |             raise ValueError("Key must be a non-empty string")
174 |         if not isinstance(collection, str) or not collection:
175 |             raise ValueError("Collection must be a non-empty string")
176 | 
177 |         query = (
178 |             "SELECT data FROM documents "
179 |             "WHERE json_extract(data, '$.' || ?) = ? AND collection = ? "
180 |             "LIMIT ? OFFSET ?"
181 |         )
182 |         with self._lock:
183 |             cursor = self._connection.execute(
184 |                 query, (key, value, collection, limit, offset)
185 |             )
186 |             return [json.loads(row[0]) for row in cursor.fetchall()]
187 | 
188 |     def all(self, collection="default", limit=100, offset=0):
189 |         """
190 |         Return a paginated list of all documents in a specific collection.
191 | 
192 |         Args:
193 |             collection (str): The collection name. Defaults to 'default'.
194 |             limit (int): The maximum number of documents to return.
195 |             offset (int): The starting point for retrieval.
196 | 
197 |         Returns:
198 |             list: A list of all documents (dicts).
199 |         """
200 |         if not isinstance(collection, str) or not collection:
201 |             raise ValueError("Collection must be a non-empty string")
202 | 
203 |         query = "SELECT data FROM documents WHERE collection = ? LIMIT ? OFFSET ?"
204 |         with self._lock:
205 |             cursor = self._connection.execute(query, (collection, limit, offset))
206 |             return [json.loads(row[0]) for row in cursor.fetchall()]
207 | 
208 |     def list_collections(self):
209 |         """
210 |         List all unique collections in the database.
211 | 
212 |         Returns:
213 |             list: A list of collection names.
214 |         """
215 |         query = "SELECT DISTINCT collection FROM documents"
216 |         with self._lock:
217 |             cursor = self._connection.execute(query)
218 |             return [row[0] for row in cursor.fetchall()]
219 | 
220 |     def remove_collection(self, collection):
221 |         """
222 |         Remove all documents in a specific collection.
223 | 
224 |         Args:
225 |             collection (str): The collection name.
226 | 
227 |         Returns:
228 |             int: Number of documents removed.
229 |         """
230 |         if not isinstance(collection, str) or not collection:
231 |             raise ValueError("Collection must be a non-empty string")
232 |         query = "DELETE FROM documents WHERE collection = ?"
233 |         with self._lock:
234 |             result = self._connection.execute(query, (collection,))
235 |             self._connection.commit()
236 |             return result.rowcount
237 | 
238 |     def close(self):
239 |         """
240 |         Shutdown the thread pool executor and close the database connection.
241 |         """
242 |         self.executor.shutdown()
243 |         with self._lock:
244 |             self._connection.close()
245 | 


--------------------------------------------------------------------------------
/.ideas/collections.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | KenobiDB is a small document-based DB, supporting simple usage including
  5 | insertion, removal, and basic search, now extended to support both tables and collections.
  6 | """
  7 | import json
  8 | import os
  9 | import re
 10 | import sqlite3
 11 | from concurrent.futures import ThreadPoolExecutor
 12 | from threading import RLock
 13 | 
 14 | 
 15 | class KenobiDB:
 16 |     """
 17 |     A lightweight document-based database built on SQLite. Supports basic
 18 |     operations such as insert, remove, search, update, and asynchronous
 19 |     execution, now with MongoDB-like collection and table support.
 20 |     """
 21 | 
 22 |     def __init__(self, file):
 23 |         """
 24 |         Initialize the KenobiDB instance.
 25 | 
 26 |         Args:
 27 |             file (str): Path to the SQLite file. If it does not exist,
 28 |                 it will be created.
 29 |         """
 30 |         self.file = os.path.expanduser(file)
 31 |         self._lock = RLock()
 32 |         self.executor = ThreadPoolExecutor(max_workers=5)
 33 |         self._connection = sqlite3.connect(self.file, check_same_thread=False)
 34 |         self._add_regexp_support(self._connection)
 35 |         self._initialize_db()
 36 | 
 37 |     def _initialize_db(self):
 38 |         """
 39 |         Create the table and index if they do not exist, and set
 40 |         journal mode to WAL.
 41 |         """
 42 |         with self._lock:
 43 |             self._connection.execute(
 44 |                 """
 45 |                 CREATE TABLE IF NOT EXISTS documents (
 46 |                     id INTEGER PRIMARY KEY AUTOINCREMENT,
 47 |                     data TEXT NOT NULL,
 48 |                     collection TEXT NOT NULL DEFAULT 'default',
 49 |                     table_name TEXT NOT NULL DEFAULT 'default'
 50 |                 )
 51 |                 """
 52 |             )
 53 |             self._connection.execute(
 54 |                 """
 55 |                 CREATE INDEX IF NOT EXISTS idx_key
 56 |                 ON documents (
 57 |                     json_extract(data, '$.key')
 58 |                 )
 59 |                 """
 60 |             )
 61 |             self._connection.execute("PRAGMA journal_mode=WAL;")
 62 | 
 63 |     @staticmethod
 64 |     def _add_regexp_support(conn):
 65 |         """
 66 |         Add REGEXP function support to the SQLite connection.
 67 |         """
 68 | 
 69 |         def regexp(pattern, value):
 70 |             """Code sqlite3 runs when REGEXP sql encountered. Takes two params.
 71 |             inner function is untestable, a module level function is testable
 72 | 
 73 |             Args:
 74 |                 pattern (str): regex
 75 |                 value (str): text blob the regex parses
 76 | 
 77 |             Returns:
 78 |                 bool: True match occurred
 79 |             """
 80 |             return re.search(pattern, value) is not None
 81 | 
 82 |         conn.create_function("REGEXP", 2, regexp)
 83 | 
 84 |     def insert(self, document, collection="default", table_name="default"):
 85 |         """
 86 |         Insert a single document into a specific collection and table.
 87 | 
 88 |         Args:
 89 |             document (dict): The document to insert.
 90 |             collection (str): The collection name. Defaults to 'default'.
 91 |             table_name (str): The table name. Defaults to 'default'.
 92 | 
 93 |         Returns:
 94 |             bool: True upon successful insertion.
 95 |         """
 96 |         if not isinstance(document, dict):
 97 |             raise TypeError("Must insert a dict")
 98 |         if not isinstance(collection, str) or not collection:
 99 |             raise ValueError("Collection must be a non-empty string")
100 |         if not isinstance(table_name, str) or not table_name:
101 |             raise ValueError("Table name must be a non-empty string")
102 |         with self._lock:
103 |             self._connection.execute(
104 |                 "INSERT INTO documents (data, collection, table_name) VALUES (?, ?, ?)",
105 |                 (json.dumps(document), collection, table_name),
106 |             )
107 |             self._connection.commit()
108 |             return True
109 | 
110 |     def insert_many(self, document_list, collection="default", table_name="default"):
111 |         """
112 |         Insert multiple documents into a specific collection and table.
113 | 
114 |         Args:
115 |             document_list (list): The list of documents to insert.
116 |             collection (str): The collection name. Defaults to 'default'.
117 |             table_name (str): The table name. Defaults to 'default'.
118 | 
119 |         Returns:
120 |             bool: True upon successful insertion.
121 |         """
122 |         if not isinstance(document_list, list) or not all(
123 |             isinstance(doc, dict) for doc in document_list
124 |         ):
125 |             raise TypeError("Must insert a list of dicts")
126 |         if not isinstance(collection, str) or not collection:
127 |             raise ValueError("Collection must be a non-empty string")
128 |         if not isinstance(table_name, str) or not table_name:
129 |             raise ValueError("Table name must be a non-empty string")
130 |         with self._lock:
131 |             self._connection.executemany(
132 |                 "INSERT INTO documents (data, collection, table_name) VALUES (?, ?, ?)",
133 |                 [(json.dumps(doc), collection, table_name) for doc in document_list],
134 |             )
135 |             self._connection.commit()
136 |             return True
137 | 
138 |     def remove(self, key, value, collection="default", table_name="default"):
139 |         """
140 |         Remove all documents from a specific collection and table where key matches value.
141 | 
142 |         Args:
143 |             key (str): The field name to match.
144 |             value (Any): The value to match.
145 |             collection (str): The collection name. Defaults to 'default'.
146 |             table_name (str): The table name. Defaults to 'default'.
147 | 
148 |         Returns:
149 |             int: Number of documents removed.
150 |         """
151 |         if not key or not isinstance(key, str):
152 |             raise ValueError("Key must be a non-empty string")
153 |         if value is None:
154 |             raise ValueError("Value cannot be None")
155 |         if not isinstance(collection, str) or not collection:
156 |             raise ValueError("Collection must be a non-empty string")
157 |         if not isinstance(table_name, str) or not table_name:
158 |             raise ValueError("Table name must be a non-empty string")
159 |         query = (
160 |             "DELETE FROM documents "
161 |             "WHERE json_extract(data, '$.' || ?) = ? AND collection = ? AND table_name = ?"
162 |         )
163 |         with self._lock:
164 |             result = self._connection.execute(
165 |                 query, (key, value, collection, table_name)
166 |             )
167 |             self._connection.commit()
168 |             return result.rowcount
169 | 
170 |     def search(
171 |         self,
172 |         key,
173 |         value,
174 |         collection="default",
175 |         table_name="default",
176 |         limit=100,
177 |         offset=0,
178 |     ):
179 |         """
180 |         Search documents in a specific collection and table matching (key == value).
181 | 
182 |         Args:
183 |             key (str): The document field to match on.
184 |             value (Any): The value for which to search.
185 |             collection (str): The collection name. Defaults to 'default'.
186 |             table_name (str): The table name. Defaults to 'default'.
187 |             limit (int): The maximum number of documents to return.
188 |             offset (int): The starting point for retrieval.
189 | 
190 |         Returns:
191 |             list: A list of matching documents (dicts).
192 |         """
193 |         if not key or not isinstance(key, str):
194 |             raise ValueError("Key must be a non-empty string")
195 |         if not isinstance(collection, str) or not collection:
196 |             raise ValueError("Collection must be a non-empty string")
197 |         if not isinstance(table_name, str) or not table_name:
198 |             raise ValueError("Table name must be a non-empty string")
199 | 
200 |         query = (
201 |             "SELECT data FROM documents "
202 |             "WHERE json_extract(data, '$.' || ?) = ? AND collection = ? AND table_name = ? "
203 |             "LIMIT ? OFFSET ?"
204 |         )
205 |         with self._lock:
206 |             cursor = self._connection.execute(
207 |                 query, (key, value, collection, table_name, limit, offset)
208 |             )
209 |             return [json.loads(row[0]) for row in cursor.fetchall()]
210 | 
211 |     def all(self, collection="default", table_name="default", limit=100, offset=0):
212 |         """
213 |         Return a paginated list of all documents in a specific collection and table.
214 | 
215 |         Args:
216 |             collection (str): The collection name. Defaults to 'default'.
217 |             table_name (str): The table name. Defaults to 'default'.
218 |             limit (int): The maximum number of documents to return.
219 |             offset (int): The starting point for retrieval.
220 | 
221 |         Returns:
222 |             list: A list of all documents (dicts).
223 |         """
224 |         if not isinstance(collection, str) or not collection:
225 |             raise ValueError("Collection must be a non-empty string")
226 |         if not isinstance(table_name, str) or not table_name:
227 |             raise ValueError("Table name must be a non-empty string")
228 | 
229 |         query = "SELECT data FROM documents WHERE collection = ? AND table_name = ? LIMIT ? OFFSET ?"
230 |         with self._lock:
231 |             cursor = self._connection.execute(
232 |                 query, (collection, table_name, limit, offset)
233 |             )
234 |             return [json.loads(row[0]) for row in cursor.fetchall()]
235 | 
236 |     def list_collections(self):
237 |         """
238 |         List all unique collections in the database.
239 | 
240 |         Returns:
241 |             list: A list of collection names.
242 |         """
243 |         query = "SELECT DISTINCT collection FROM documents"
244 |         with self._lock:
245 |             cursor = self._connection.execute(query)
246 |             return [row[0] for row in cursor.fetchall()]
247 | 
248 |     def list_tables(self):
249 |         """
250 |         List all unique table names in the database.
251 | 
252 |         Returns:
253 |             list: A list of table names.
254 |         """
255 |         query = "SELECT DISTINCT table_name FROM documents"
256 |         with self._lock:
257 |             cursor = self._connection.execute(query)
258 |             return [row[0] for row in cursor.fetchall()]
259 | 
260 |     def remove_collection(self, collection):
261 |         """
262 |         Remove all documents in a specific collection.
263 | 
264 |         Args:
265 |             collection (str): The collection name.
266 | 
267 |         Returns:
268 |             int: Number of documents removed.
269 |         """
270 |         if not isinstance(collection, str) or not collection:
271 |             raise ValueError("Collection must be a non-empty string")
272 |         query = "DELETE FROM documents WHERE collection = ?"
273 |         with self._lock:
274 |             result = self._connection.execute(query, (collection,))
275 |             self._connection.commit()
276 |             return result.rowcount
277 | 
278 |     def remove_table(self, table_name):
279 |         """
280 |         Remove all documents in a specific table.
281 | 
282 |         Args:
283 |             table_name (str): The table name.
284 | 
285 |         Returns:
286 |             int: Number of documents removed.
287 |         """
288 |         if not isinstance(table_name, str) or not table_name:
289 |             raise ValueError("Table name must be a non-empty string")
290 |         query = "DELETE FROM documents WHERE table_name = ?"
291 |         with self._lock:
292 |             result = self._connection.execute(query, (table_name,))
293 |             self._connection.commit()
294 |             return result.rowcount
295 | 
296 |     def close(self):
297 |         """
298 |         Shutdown the thread pool executor and close the database connection.
299 |         """
300 |         self.executor.shutdown()
301 |         with self._lock:
302 |             self._connection.close()
303 | 


--------------------------------------------------------------------------------
/.ideas/tables.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | KenobiDB is a small document-based DB, supporting simple usage including
  5 | insertion, removal, and basic search.
  6 | Written by Harrison Erd (https://patx.github.io/)
  7 | https://patx.github.io/kenobi/
  8 | """
  9 | # Copyright Harrison Erd
 10 | #
 11 | # Redistribution and use in source and binary forms, with or without
 12 | # modification, are permitted provided that the following conditions are met:
 13 | #
 14 | # 1. Redistributions of source code must retain the above copyright notice,
 15 | #    this list of conditions and the following disclaimer.
 16 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 17 | #    this list of conditions and the following disclaimer in the documentation
 18 | #    and/or other materials provided with the distribution.
 19 | # 3. Neither the name of the copyright holder nor the names of its contributors
 20 | #    may be used to endorse or promote products derived from this software
 21 | #    without specific prior written permission.
 22 | #
 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 24 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 25 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 26 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 27 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 28 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 29 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 30 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 31 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 32 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33 | # POSSIBILITY OF SUCH DAMAGE.
 34 | 
 35 | import json
 36 | import os
 37 | import re
 38 | import sqlite3
 39 | from concurrent.futures import ThreadPoolExecutor
 40 | from threading import RLock
 41 | 
 42 | 
 43 | class KenobiDB:
 44 |     """
 45 |     A lightweight document-based database built on SQLite. Supports basic
 46 |     operations such as insert, remove, search, update, and asynchronous
 47 |     execution.
 48 |     """
 49 | 
 50 |     def __init__(self, file):
 51 |         """
 52 |         Initialize the KenobiDB instance.
 53 | 
 54 |         Args:
 55 |             file (str): Path to the SQLite file. If it does not exist,
 56 |                 it will be created.
 57 |         """
 58 |         self.file = os.path.expanduser(file)
 59 |         self._lock = RLock()
 60 |         self.executor = ThreadPoolExecutor(max_workers=5)
 61 |         self._regexp_connections = set()  # Track connections with REGEXP added
 62 |         self._connection = sqlite3.connect(self.file, check_same_thread=False)
 63 |         self._add_regexp_support(self._connection)  # Add REGEXP support lazily
 64 | 
 65 |     def _add_regexp_support(self, conn):
 66 |         """
 67 |         Add REGEXP function support to the SQLite connection.
 68 |         """
 69 | 
 70 |         def regexp(pattern, value):
 71 |             """Code sqlite3 runs when REGEXP sql encountered. Takes two params.
 72 |             inner function is untestable, a module level function is testable
 73 | 
 74 |             Args:
 75 |                 pattern (str): regex
 76 |                 value (str): text blob the regex parses
 77 | 
 78 |             Returns:
 79 |                 bool: True match occurred
 80 |             """
 81 |             return re.search(pattern, value) is not None
 82 | 
 83 |         conn.create_function("REGEXP", 2, regexp)
 84 | 
 85 |     def table(self, name):
 86 |         """
 87 |         Access or create a specific table.
 88 | 
 89 |         Args:
 90 |             name (str): The name of the table.
 91 | 
 92 |         Returns:
 93 |             KenobiTable: An object for interacting with the table.
 94 |         """
 95 |         if not name or not isinstance(name, str):
 96 |             raise ValueError("Table name must be a non-empty string.")
 97 |         return KenobiTable(self, name)
 98 | 
 99 |     def execute_async(self, func, *args, **kwargs):
100 |         """
101 |         Execute a function asynchronously using a thread pool.
102 | 
103 |         Args:
104 |             func (callable): The function to execute.
105 |             *args: Arguments for the function.
106 |             **kwargs: Keyword arguments for the function.
107 | 
108 |         Returns:
109 |             concurrent.futures.Future: A Future object representing
110 |             the execution.
111 |         """
112 |         return self.executor.submit(func, *args, **kwargs)
113 | 
114 |     def close(self):
115 |         """
116 |         Shutdown the thread pool executor and close the database connection.
117 |         """
118 |         self.executor.shutdown()
119 |         with self._lock:
120 |             self._connection.close()
121 | 
122 | 
123 | class KenobiTable:
124 |     """
125 |     A class to represent and interact with a specific table within KenobiDB.
126 |     """
127 | 
128 |     def __init__(self, db, name):
129 |         """Class constructor"""
130 |         self.db = db
131 |         self.name = name
132 |         self._lock = db._lock
133 |         self._create_table()
134 | 
135 |     def _create_table(self):
136 |         """
137 |         Create the table if it does not exist.
138 |         """
139 |         with self._lock:
140 |             self.db._connection.execute(
141 |                 f"""
142 |                 CREATE TABLE IF NOT EXISTS {self.name} (
143 |                     id INTEGER PRIMARY KEY AUTOINCREMENT,
144 |                     data TEXT NOT NULL
145 |                 )
146 |             """
147 |             )
148 | 
149 |     def insert(self, document):
150 |         """
151 |         Insert a document into this table.
152 | 
153 |         Args:
154 |             document (dict): The document to insert.
155 | 
156 |         Returns:
157 |             bool: True upon successful insertion.
158 |         """
159 |         if not isinstance(document, dict):
160 |             raise TypeError("Must insert a dict")
161 |         with self._lock:
162 |             self.db._connection.execute(
163 |                 f"INSERT INTO {self.name} (data) VALUES (?)", (json.dumps(document),)
164 |             )
165 |             self.db._connection.commit()
166 |             return True
167 | 
168 |     def rename(self, new_name):
169 |         """
170 |         Rename the table.
171 | 
172 |         Args:
173 |             new_name (str): The new name of the table.
174 |         """
175 |         if not new_name or not isinstance(new_name, str):
176 |             raise ValueError("New table name must be a non-empty string.")
177 |         with self._lock:
178 |             self.db._connection.execute(f"ALTER TABLE {self.name} RENAME TO {new_name}")
179 |             self.name = new_name
180 | 
181 |     def drop(self):
182 |         """
183 |         Drop the table.
184 |         """
185 |         with self._lock:
186 |             self.db._connection.execute(f"DROP TABLE {self.name}")
187 | 
188 |     def all(self, limit=100, offset=0):
189 |         """
190 |         Return a paginated list of all documents in the table.
191 | 
192 |         Args:
193 |             limit (int): The maximum number of documents to return.
194 |             offset (int): The starting point for retrieval.
195 | 
196 |         Returns:
197 |             list: A list of all documents (dicts).
198 |         """
199 |         query = f"SELECT data FROM {self.name} LIMIT ? OFFSET ?"
200 |         with self._lock:
201 |             cursor = self.db._connection.execute(query, (limit, offset))
202 |             return [json.loads(row[0]) for row in cursor.fetchall()]
203 | 
204 |     def search(self, key, value, limit=100, offset=0):
205 |         """
206 |         Return a list of documents matching (key == value) in the table.
207 | 
208 |         Args:
209 |             key (str): The document field to match on.
210 |             value (Any): The value for which to search.
211 |             limit (int): The maximum number of documents to return.
212 |             offset (int): The starting point for retrieval.
213 | 
214 |         Returns:
215 |             list: A list of matching documents (dicts).
216 |         """
217 |         if not key or not isinstance(key, str):
218 |             raise ValueError("Key must be a non-empty string")
219 | 
220 |         query = (
221 |             f"SELECT data FROM {self.name} "
222 |             "WHERE json_extract(data, '$.' || ?) = ? "
223 |             "LIMIT ? OFFSET ?"
224 |         )
225 |         with self._lock:
226 |             cursor = self.db._connection.execute(query, (key, value, limit, offset))
227 |             return [json.loads(row[0]) for row in cursor.fetchall()]
228 | 
229 |     def remove(self, key, value):
230 |         """
231 |         Remove all documents where the given key matches the specified value.
232 | 
233 |         Args:
234 |             key (str): The field name to match.
235 |             value (Any): The value to match.
236 | 
237 |         Returns:
238 |             int: Number of documents removed.
239 |         """
240 |         if not key or not isinstance(key, str):
241 |             raise ValueError("key must be a non-empty string")
242 |         if value is None:
243 |             raise ValueError("value cannot be None")
244 |         query = f"DELETE FROM {self.name} " "WHERE json_extract(data, '$.' || ?) = ?"
245 |         with self._lock:
246 |             result = self.db._connection.execute(query, (key, value))
247 |             self.db._connection.commit()
248 |             return result.rowcount
249 | 
250 |     def update(self, id_key, id_value, new_dict):
251 |         """
252 |         Update documents that match (id_key == id_value) by merging new_dict.
253 | 
254 |         Args:
255 |             id_key (str): The field name to match.
256 |             id_value (Any): The value to match.
257 |             new_dict (dict): A dictionary of changes to apply.
258 | 
259 |         Returns:
260 |             bool: True if at least one document was updated, False otherwise.
261 |         """
262 |         if not isinstance(new_dict, dict):
263 |             raise TypeError("new_dict must be a dictionary")
264 |         if not id_key or not isinstance(id_key, str):
265 |             raise ValueError("id_key must be a non-empty string")
266 |         if id_value is None:
267 |             raise ValueError("id_value cannot be None")
268 | 
269 |         select_query = (
270 |             f"SELECT data FROM {self.name} " "WHERE json_extract(data, '$.' || ?) = ?"
271 |         )
272 |         update_query = (
273 |             f"UPDATE {self.name} "
274 |             "SET data = ? "
275 |             "WHERE json_extract(data, '$.' || ?) = ?"
276 |         )
277 |         with self._lock:
278 |             cursor = self.db._connection.execute(select_query, (id_key, id_value))
279 |             documents = cursor.fetchall()
280 |             if not documents:
281 |                 return False
282 |             for row in documents:
283 |                 document = json.loads(row[0])
284 |                 if not isinstance(document, dict):
285 |                     continue
286 |                 document.update(new_dict)
287 |                 self.db._connection.execute(
288 |                     update_query, (json.dumps(document), id_key, id_value)
289 |                 )
290 |             self.db._connection.commit()
291 |             return True
292 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Install pre-commit hooks via
 2 | # pre-commit install
 3 | 
 4 | exclude: >
 5 |     (?x)^(
 6 |       \.vscode/settings\.json|
 7 |       tests/.*xml|
 8 |       tests/.*txt|
 9 |     )$
10 | 
11 | repos:
12 | 
13 |   - repo: https://github.com/pre-commit/pre-commit-hooks
14 |     rev: v5.0.0
15 |     hooks:
16 |     - id: check-merge-conflict
17 |     - id: trailing-whitespace
18 |     - id: end-of-file-fixer
19 |     - id: check-docstring-first
20 |     - id: check-json
21 |     - id: check-yaml
22 |     - id: debug-statements
23 | 
24 | 
25 |   - repo: https://github.com/abravalheri/validate-pyproject
26 |     rev: v0.23
27 |     hooks:
28 |       - id: validate-pyproject
29 | 
30 |   - repo: https://github.com/psf/black
31 |     rev: 25.1.0
32 |     hooks:
33 |       - id: black
34 | 
35 |   - repo: https://github.com/keewis/blackdoc
36 |     rev: v0.3.9
37 |     hooks:
38 |       - id: blackdoc
39 |         additional_dependencies: ["black==25.1.0"]
40 |       - id: blackdoc-autoupdate-black
41 | 
42 |   - repo: https://github.com/PyCQA/flake8
43 |     rev: 7.1.1
44 |     hooks:
45 |       - id: flake8
46 |         args: ["--ignore=E704,E203,W503,W605,W293,W291,E501"]
47 | 
48 |   - repo: https://github.com/PyCQA/isort
49 |     rev: 6.0.0
50 |     hooks:
51 |       - id: isort
52 | 
53 |   - repo: https://github.com/econchick/interrogate
54 |     rev: 1.7.0  # or master if you're bold
55 |     hooks:
56 |       - id: interrogate
57 |         args: ["-vv", "--fail-under=100", "--omit-covered-files", "--ignore-init-module"]
58 |         pass_filenames: false  # needed if excluding files with pyproject.toml
59 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Harrison Erd
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice,
 7 | this list of conditions and the following disclaimer.
 8 | 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 | 
13 | 3. Neither the name of the copyright holder nor the names of its
14 | contributors may be used to endorse or promote products derived from this
15 | software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
21 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 | OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include Makefile
 2 | 
 3 | # missing .in and .unlock files
 4 | recursive-include requirements *.in
 5 | recursive-include requirements *.unlock
 6 | recursive-include requirements *.lock
 7 | 
 8 | # missing __init__.py conftest.py
 9 | graft tests/
10 | 
11 | # remove pyc cache folders
12 | prune tests/__pycache__/
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .ONESHELL:
  2 | .DEFAULT_GOAL := help
  3 | SHELL := /bin/bash
  4 | 
  5 | # underscore separated; aka sdist and whl names
  6 | # https://blogs.gentoo.org/mgorny/2023/02/09/the-inconsistencies-around-python-package-naming-and-the-new-policy/
  7 | APP_NAME := kenobi
  8 | 
  9 | define NORMALIZE_APP_NAME
 10 | try:
 11 |     from importlib import metadata
 12 | except ImportError:
 13 |     v = '$(APP_NAME)'.replace('_', "-").replace('.', "-")
 14 |     print(v)
 15 | else:
 16 |     print(metadata.metadata('$(APP_NAME)')['Name']))
 17 | endef
 18 | 
 19 | #virtual environment. If 0 issue warning
 20 | #Not activated:0
 21 | #activated: 1
 22 | ifeq ($(VIRTUAL_ENV),)
 23 | $(warning virtualenv not activated)
 24 | is_venv =
 25 | else
 26 | is_venv = 1
 27 | VENV_BIN := $(VIRTUAL_ENV)/bin
 28 | VENV_BIN_PYTHON := python3
 29 | PY_X_Y := $(shell $(VENV_BIN_PYTHON) -c 'import platform; t_ver = platform.python_version_tuple(); print(".".join(t_ver[:2]));')
 30 | endif
 31 | 
 32 | ifeq ($(is_venv),1)
 33 |   # Package name is hyphen delimited
 34 |   PACKAGE_NAME ?= $(shell $(VENV_BIN_PYTHON) -c "$(NORMALIZE_APP_NAME)")
 35 |   VENV_PACKAGES ?= $(shell $(VENV_BIN_PYTHON) -m pip list --disable-pip-version-check --no-input | /bin/awk '{print $$1}')
 36 |   IS_PACKAGE ?= $(findstring $(1),$(VENV_PACKAGES))
 37 | 
 38 |   is_wheel ?= $(call IS_PACKAGE,wheel)
 39 |   is_piptools ?= $(call IS_PACKAGE,pip-tools)
 40 | 
 41 |   find_whl = $(shell [[ -z "$(3)" ]] && extention=".whl" || extention="$(3)"; [[ -z "$(2)" ]] && srcdir="dist" || srcdir="$(2)/dist"; [[ -z "$(1)" ]] && whl=$$(ls $$srcdir/$(APP_NAME)*.whl  --format="single-column") || whl=$$(ls $$srcdir/$(1)*.whl --format="single-column"); echo $${whl##*/})
 42 | endif
 43 | 
 44 | ##@ Helpers
 45 | 
 46 | # https://www.thapaliya.com/en/writings/well-documented-makefiles/
 47 | .PHONY: help
 48 | help:					## (Default) Display this help -- Always up to date
 49 | 	@awk -F ':.*##' '/^[^: ]+:.*##/{printf "  \033[1m%-20s\033[m %s\n",$$1,$$2} /^##@/{printf "\n%s\n",substr($$0,5)}' $(MAKEFILE_LIST)
 50 | 
 51 | 
 52 | ##@ Testing
 53 | 
 54 | .PHONY: pre-commit
 55 | pre-commit:				## Run checks found in .pre-commit-config.yaml
 56 | 	@pre-commit run --all-files
 57 | 
 58 | .PHONY: update-pre-commit
 59 | update-pre-commit:		## Bump package to latest version
 60 | 	@pre-commit autoupdate
 61 | 
 62 | 
 63 | ##@ GNU Make standard targets
 64 | 
 65 | .PHONY: build
 66 | build:					## Make the source distribution
 67 | 	@python -m build
 68 | 
 69 | .PHONY: install
 70 | install: override usage := make [force=1]
 71 | install: override check_web := Install failed. Possible cause no web connection
 72 | install: private force_text = $(if $(force),"--force-reinstall")
 73 | install:				## Installs *as a package*, not *with the ui* -- make [force=1] [debug=1] install
 74 | ifeq ($(is_venv),1)
 75 |   ifeq ($(is_wheel), wheel)
 76 | 	@if [[  "$$?" -eq 0 ]]; then
 77 | 
 78 | 	whl=$(call find_whl,$(APP_NAME),,) #1: PYPI package name (hyphens). 2 folder/app name (APP_NAME;underscores). 3 file extension
 79 | 	echo $(whl)
 80 | 	$(VENV_BIN_PYTHON) -m pip install --disable-pip-version-check --no-color --log="/tmp/$(APP_NAME)_install_prod.log" $(force_text) "dist/$$whl"
 81 | 
 82 | 	fi
 83 | 
 84 |   endif
 85 | endif
 86 | 
 87 | .PHONY: install-force
 88 | install-force: force := 1
 89 | install-force: install	## Force install even if exact same version
 90 | 
 91 | # --cov-report=xml
 92 | # Dependencies: pytest, pytest-cov, pytest-regressions
 93 | # make [v=1] check
 94 | # $(VENV_BIN)/pytest --showlocals --cov=wreck --cov-report=term-missing --cov-config=pyproject.toml $(verbose_text) tests
 95 | .PHONY: check
 96 | check: private verbose_text = $(if $(v),"--verbose")
 97 | check:					## Run tests, generate coverage reports -- make [v=1] check
 98 | ifeq ($(is_venv),1)
 99 | 	-@$(VENV_BIN_PYTHON) -m coverage erase
100 | 	$(VENV_BIN_PYTHON) -m coverage run --parallel -m pytest --showlocals $(verbose_text) -m "not slow" tests
101 | 	$(VENV_BIN_PYTHON) -m coverage combine
102 | 	$(VENV_BIN_PYTHON) -m coverage report --fail-under=88
103 | endif
104 | 
105 | .PHONY: distclean
106 | distclean:				## Clean build files
107 | 	@rm -rf dist/ build/ || :;
108 | 
109 | # assumes already installed: pyenv and shims
110 | # .rst2html5/ needs to exist, but need not be an actual venv
111 | # .doc requires py310 cuz Sphinx
112 | # .tox contains all supported pyenv versions
113 | .PHONY: configure-pyenv
114 | configure-pyenv:			## Configure pyenv .python-version files
115 | 	@which pyenv &>/dev/null
116 | 	if [[ "$?" -eq 0 ]]; then
117 | 
118 | 	mkdir -p .venv || :;
119 | 	pyenv version-name > .venv/.python-version
120 | 	# mkdir .doc || :;
121 | 	# echo "3.10.14\n" > .doc/.python-version
122 | 	mkdir -p .tox || :;
123 | 	pyenv versions --bare > .tox/.python-version
124 | 	# mkdir .rst2html5 || :;
125 | 
126 | 	fi
127 | 


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2024-2025 Harrison Erd
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice,
 7 |    this list of conditions and the following disclaimer.
 8 | 2. Redistributions in binary form must reproduce the above copyright notice,
 9 |    this list of conditions and the following disclaimer in the documentation
10 |    and/or other materials provided with the distribution.
11 | 3. Neither the name of the copyright holder nor the names of its contributors
12 |    may be used to endorse or promote products derived from this software
13 |    without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 | POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![PyPI Downloads](https://static.pepy.tech/badge/kenobi)](https://pepy.tech/projects/kenobi)
  2 | 
  3 | KenobiDB is a document-based data store abstraction built on Python’s `sqlite3`, offering a simple and efficient way to manage JSON-like data. Its API is highly similar to MongoDB’s, providing familiar operations for insertion, updates, and searches—without the need for a server connection. By removing the complexity of SQL, KenobiDB delivers a secure, high-performance environment with built-in thread safety, async execution, and basic indexing while leveraging the simplicity of a document-based database. Perfect for small applications and prototypes, KenobiDB combines SQLite’s lightweight, serverless setup with the flexibility of document-based storage. Check out the [website](http://patx.github.io/kenobi/) or view the project on [PyPI](https://pypi.org/project/kenobi/).
  4 | 
  5 | ## Features
  6 | 
  7 | - Lightweight and serverless setup using SQLite.
  8 | - MongoDB-like API for familiar operations.
  9 | - Supports key-value pair searching instead of complex SQL queries.
 10 | - Thread safety with `RLock`.
 11 | - Asynchronous execution with `ThreadPoolExecutor`.
 12 | - Built-in basic indexing for efficient searches.
 13 | - Super easy integration.
 14 | - Solid performance
 15 | 
 16 | ## Installation
 17 | 
 18 | You can install KenobiDB using pip:
 19 | 
 20 | ```bash
 21 | pip install kenobi
 22 | ```
 23 | 
 24 | Alternatively, for the latest version, copy and paste the `kenobi.py` file into your working directory.
 25 | 
 26 | ## Quick Start
 27 | 
 28 | ```python
 29 | from kenobi import KenobiDB
 30 | 
 31 | db = KenobiDB('example.db')
 32 | 
 33 | db.insert({'name': 'John', 'color': 'blue'})
 34 | # Output: True
 35 | 
 36 | db.search('color', 'blue')
 37 | # Output: [{'name': 'John', 'color': 'blue'}]
 38 | ```
 39 | 
 40 | ## Overview/Usage
 41 | 
 42 | ### Initialization and Setup
 43 | 
 44 | Initialize the database with a specified file. If the file does not exist, it will be created. SQLite is used for storage, and the database ensures the necessary table and indices are created.
 45 | 
 46 | ```python
 47 | db = KenobiDB('example.db')
 48 | ```
 49 | 
 50 | ### Basic Operations
 51 | 
 52 | #### Insert
 53 | 
 54 | Add a single document or multiple documents to the database.
 55 | 
 56 | ```python
 57 | db.insert({'name': 'Oden', 'color': 'blue'})
 58 | 
 59 | db.insert_many([
 60 |     {'name': 'Ryan', 'color': 'red'},
 61 |     {'name': 'Tom', 'color': 'green'}
 62 | ])
 63 | ```
 64 | 
 65 | #### Remove
 66 | 
 67 | Remove documents matching a specific key-value pair.
 68 | 
 69 | ```python
 70 | db.remove('name', 'Oden')
 71 | ```
 72 | 
 73 | #### Update
 74 | 
 75 | Update documents matching a specific key-value pair with new data.
 76 | 
 77 | ```python
 78 | db.update('name', 'Ryan', {'color': 'dark'})
 79 | ```
 80 | 
 81 | #### Purge
 82 | 
 83 | Remove all documents from the database.
 84 | 
 85 | ```python
 86 | db.purge()
 87 | ```
 88 | 
 89 | ### Search Operations
 90 | 
 91 | #### All
 92 | 
 93 | Retrieve all documents with optional pagination.
 94 | 
 95 | ```python
 96 | db.all(limit=10, offset=0)  # With pagination
 97 | 
 98 | db.all()  # No pagination
 99 | ```
100 | 
101 | #### Search
102 | 
103 | Retrieve documents matching a specific key-value pair with optional pagination.
104 | 
105 | ```python
106 | db.search('color', 'blue')
107 | ```
108 | 
109 | #### Glob Search
110 | 
111 | Retrieve documents using regex.
112 | 
113 | ```python
114 | db.search_pattern('color', 'b*')
115 | ```
116 | 
117 | #### Find Any
118 | 
119 | Retrieve documents where a key matches any value in a list.
120 | 
121 | ```python
122 | db.find_any('color', ['blue', 'red'])
123 | ```
124 | 
125 | #### Find All
126 | 
127 | Retrieve documents where a key matches all values in a list.
128 | 
129 | ```python
130 | db.find_all('color', ['blue', 'red'])
131 | ```
132 | 
133 | ### Concurrency and Asynchronous Execution
134 | 
135 | KenobiDB uses `RLock` for thread safety and `ThreadPoolExecutor` with a maximum of 5 workers for concurrent operations.
136 | 
137 | #### Asynchronous Execution
138 | 
139 | Use the `execute_async` method to run functions asynchronously.
140 | 
141 | ```python
142 | def insert_document(db, document):
143 |     db.insert(document)
144 | 
145 | future = db.execute_async(insert_document, db, {'name': 'Luke', 'color': 'green'})
146 | ```
147 | 
148 | #### Close
149 | 
150 | Shut down the thread pool executor.
151 | 
152 | ```python
153 | db.close()
154 | ```
155 | 
156 | ## Testing and Contributions
157 | 
158 | Contributions are welcome! To test the library:
159 | 
160 | 1. Clone the repository.
161 | 2. Report issues as you encounter them.
162 | 3. Run the unittests.
163 | 
164 | Feel free to open issues or submit pull requests on the [GitHub repository](https://github.com/patx/kenobi).
165 | 
166 | ## Limitations
167 | 
168 | KenobiDB is designed for small-scale applications and prototypes. While it provides excellent performance for most operations, it is not intended to replace full-fledged databases for high-scale or enterprise-level applications for that you should use MongoDB.
169 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 | <head>
2 |   <meta http-equiv="Refresh" content="0; URL=https://github.com/patx/kenobi" />
3 | </head>
4 | 


--------------------------------------------------------------------------------
/howto.txt:
--------------------------------------------------------------------------------
 1 | * Release checklist
 2 | 
 3 | - Adjust coverage fail under minimal percentage
 4 |   tox-test.ini
 5 |   .github/workflows/coverage.yml
 6 |   pyproject.toml [tool.coverage.report]
 7 |   Makefile target, check
 8 | - Run coverage ensure no failures. Does not run slow tests
 9 |   $ make check
10 | - Run twice
11 |   $ make pre-commit
12 | - Remove old branches
13 |   $ git branch
14 |   $ git branch -D [branch name]
15 | - commit
16 |   $ git add .
17 |   $ git commit -S -m ""
18 | - tag a commit to trigger .github/workflows/release.yml
19 |   $ git tag -as -m "Version 4.0" 4.0
20 |   $ git push --follow-tags
21 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=75.8.0",
 4 |     "wheel",
 5 |     "build",
 6 | ]
 7 | build-backend = "setuptools.build_meta"
 8 | 
 9 | [project]
10 | name = "kenobi"
11 | dynamic = [
12 |     "optional-dependencies",
13 | ]
14 | version = "4.0"
15 | requires-python = ">=3.9"
16 | dependencies = []
17 | description = "document based database using sqlite"
18 | readme = "README.md"
19 | 
20 | classifiers = [
21 |     "Environment :: Console",
22 |     "Development Status :: 3 - Alpha",
23 |     "Programming Language :: Python :: 3",
24 |     "License :: OSI Approved :: BSD License",
25 |     "Intended Audience :: Developers",
26 |     "Topic :: Database",
27 | ]
28 | 
29 | # https://peps.python.org/pep-0639/
30 | # https://clearlydefined.io/?sort=releaseDate&sortDesc=true&name=wreck
31 | # waiting for setuptools maintainers to implement metadata v2.4 support
32 | # https://github.com/pypa/setuptools/issues/4759
33 | license = {file = "LICENSE"}
34 | # license = "BSD-3-Clause"
35 | # license-files = [
36 | #     "LICEN[CS]E*",
37 | #     "NOTICE*",
38 | # ]
39 | 
40 | authors = [
41 |     {name = "Harrison Erd", email = "harrisonerd@gmail.com"},
42 | ]
43 | 
44 | [project.urls]
45 | "Source code" = 'http://patx.github.io/kenobi'
46 | "Issue tracker" = 'http://patx.github.io/kenobi/issues'
47 | "PyPI Releases" = 'https://pypi.org/project/kenobi'
48 | 
49 | [tool.setuptools.packages.find]
50 | where = ["src"]
51 | include = ["kenobi*"]
52 | 
53 | [tool.setuptools.dynamic]
54 | optional-dependencies.pip = { file = ['requirements/pip.lock'] }
55 | optional-dependencies.dev = { file = ['requirements/dev.lock'] }
56 | optional-dependencies.manage = { file = ['requirements/manage.lock'] }
57 | 
58 | [tool.pytest.ini_options]
59 | markers = [
60 |     "slow: marks tests as slow (deselect with '-m \"not slow\"')",
61 | ]
62 | 
63 | [tool.coverage.run]
64 | source_pkgs = ["kenobi"] # **REQUIRED** to be set correctly. Your package name
65 | branch = true
66 | 
67 | [tool.coverage.report]
68 | exclude_lines = [
69 |     "pragma: no cover",
70 |     "@abc.abstractmethod",
71 |     "@abc.abstractproperty",
72 |     "if TYPE_CHECKING:",
73 |     "if typing.TYPE_CHECKING:",
74 | ]
75 | precision = 2
76 | ignore_errors = true
77 | skip_covered = true
78 | skip_empty = true
79 | fail_under = 88
80 | show_missing = true
81 | 


--------------------------------------------------------------------------------
/requirements/dev.in:
--------------------------------------------------------------------------------
1 | -c pins-validate-pyproject-pep639.in
2 | 
3 | coverage
4 | pytest>=7.1
5 | validate-pyproject
6 | 


--------------------------------------------------------------------------------
/requirements/dev.lock:
--------------------------------------------------------------------------------
 1 | coverage==7.6.10
 2 |     # via -r requirements/dev.in
 3 | exceptiongroup==1.2.2
 4 |     # via pytest
 5 | fastjsonschema==2.21.1
 6 |     # via validate-pyproject
 7 | iniconfig==2.0.0
 8 |     # via pytest
 9 | packaging==24.2
10 |     # via pytest
11 | pluggy==1.5.0
12 |     # via pytest
13 | pytest==8.3.4
14 |     # via -r requirements/dev.in
15 | tomli==2.2.1
16 |     # via pytest
17 | validate-pyproject==0.23
18 |     # via
19 |     #   -c requirements/pins-validate-pyproject-pep639.in
20 |     #   -r requirements/dev.in
21 | 


--------------------------------------------------------------------------------
/requirements/dev.unlock:
--------------------------------------------------------------------------------
1 | coverage
2 | pytest>=7.1
3 | validate-pyproject>=0.23
4 | 


--------------------------------------------------------------------------------
/requirements/manage.in:
--------------------------------------------------------------------------------
1 | -c pins-virtualenv-cve-2024-53899.in
2 | 
3 | pre-commit
4 | 


--------------------------------------------------------------------------------
/requirements/manage.lock:
--------------------------------------------------------------------------------
 1 | cfgv==3.4.0
 2 |     # via pre-commit
 3 | distlib==0.3.9
 4 |     # via virtualenv
 5 | filelock==3.17.0
 6 |     # via virtualenv
 7 | identify==2.6.6
 8 |     # via pre-commit
 9 | nodeenv==1.9.1
10 |     # via pre-commit
11 | platformdirs==4.3.6
12 |     # via virtualenv
13 | pre-commit==4.1.0
14 |     # via -r requirements/manage.in
15 | pyyaml==6.0.2
16 |     # via pre-commit
17 | virtualenv==20.29.1
18 |     # via
19 |     #   -c requirements/pins-virtualenv-cve-2024-53899.in
20 |     #   pre-commit
21 | 


--------------------------------------------------------------------------------
/requirements/manage.unlock:
--------------------------------------------------------------------------------
1 | pre-commit
2 | virtualenv>=20.26.6
3 | 


--------------------------------------------------------------------------------
/requirements/pins-validate-pyproject-pep639.in:
--------------------------------------------------------------------------------
1 | # pep639 support added
2 | validate-pyproject>=0.23
3 | 


--------------------------------------------------------------------------------
/requirements/pins-virtualenv-cve-2024-53899.in:
--------------------------------------------------------------------------------
1 | # command injection through activation scripts
2 | # https://github.com/advisories/GHSA-rqc4-2hc7-8c8v
3 | virtualenv>=20.26.6
4 | 


--------------------------------------------------------------------------------
/requirements/pip.in:
--------------------------------------------------------------------------------
1 | pip>=24.2
2 | setuptools>=75.8.0
3 | 


--------------------------------------------------------------------------------
/requirements/pip.lock:
--------------------------------------------------------------------------------
1 | 
2 | # The following packages are considered to be unsafe in a requirements file:
3 | pip==25.0
4 |     # via -r requirements/pip.in
5 | setuptools==75.8.0
6 |     # via -r requirements/pip.in
7 | 


--------------------------------------------------------------------------------
/requirements/pip.unlock:
--------------------------------------------------------------------------------
1 | pip>=24.2
2 | setuptools>=75.8.0
3 | 


--------------------------------------------------------------------------------
/src/kenobi/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. moduleauthor:: Harrison Erd <harrisonerd@gmail.com>
 3 | 
 4 | .. py:data:: __all__
 5 |    :type: tuple[str]
 6 |    :value: ("KenobiDB",)
 7 | 
 8 |    Package exports
 9 | 
10 | """
11 | 
12 | from .kenobi import KenobiDB
13 | 
14 | __all__ = ("KenobiDB",)
15 | 


--------------------------------------------------------------------------------
/src/kenobi/kenobi.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. moduleauthor:: Harrison Erd <harrisonerd@gmail.com>
  3 | 
  4 | KenobiDB is a small document-based DB, supporting simple usage including
  5 | insertion, removal, and basic search.
  6 | Written by Harrison Erd `Profile <https://patx.github.io/>`_
  7 | `Home <https://patx.github.io/kenobi>`_
  8 | 
  9 | Copyright Harrison Erd
 10 | 
 11 | Redistribution and use in source and binary forms, with or without
 12 | modification, are permitted provided that the following conditions are met:
 13 | 
 14 | 1. Redistributions of source code must retain the above copyright notice,
 15 |    this list of conditions and the following disclaimer.
 16 | 2. Redistributions in binary form must reproduce the above copyright notice,
 17 |    this list of conditions and the following disclaimer in the documentation
 18 |    and/or other materials provided with the distribution.
 19 | 3. Neither the name of the copyright holder nor the names of its contributors
 20 |    may be used to endorse or promote products derived from this software
 21 |    without specific prior written permission.
 22 | 
 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 28 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 29 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 30 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 31 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 32 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33 | POSSIBILITY OF SUCH DAMAGE.
 34 | """
 35 | 
 36 | import json
 37 | import os
 38 | import re
 39 | import sqlite3
 40 | from concurrent.futures import ThreadPoolExecutor
 41 | from threading import RLock
 42 | 
 43 | 
 44 | class KenobiDB:
 45 |     """
 46 |     A lightweight document-based database built on SQLite. Supports basic
 47 |     operations such as insert, remove, search, update, and asynchronous
 48 |     execution.
 49 |     """
 50 | 
 51 |     def __init__(self, file):
 52 |         """
 53 |         Initialize the KenobiDB instance.
 54 | 
 55 |         Args:
 56 |             file (str): Path to the SQLite file. If it does not exist,
 57 |                 it will be created.
 58 |         """
 59 |         cls = type(self)
 60 |         self.file = os.path.expanduser(file)
 61 |         self._lock = RLock()
 62 |         self.executor = ThreadPoolExecutor(max_workers=5)
 63 |         self._regexp_connections = set()  # Track connections with REGEXP added
 64 |         self._connection = sqlite3.connect(self.file, check_same_thread=False)
 65 |         cls._add_regexp_support(self._connection)  # Add REGEXP support lazily
 66 |         self._initialize_db()
 67 | 
 68 |     def _initialize_db(self):
 69 |         """
 70 |         Create the table and index if they do not exist, and set
 71 |         journal mode to WAL.
 72 |         """
 73 |         with self._lock:
 74 |             self._connection.execute(
 75 |                 """
 76 |                 CREATE TABLE IF NOT EXISTS documents (
 77 |                     id INTEGER PRIMARY KEY AUTOINCREMENT,
 78 |                     data TEXT NOT NULL
 79 |                 )
 80 |             """
 81 |             )
 82 |             self._connection.execute(
 83 |                 """
 84 |                 CREATE INDEX IF NOT EXISTS idx_key
 85 |                 ON documents (
 86 |                     json_extract(data, '$.key')
 87 |                 )
 88 |             """
 89 |             )
 90 |             self._connection.execute("PRAGMA journal_mode=WAL;")
 91 | 
 92 |     @staticmethod
 93 |     def _add_regexp_support(conn):
 94 |         """
 95 |         Add REGEXP function support to the SQLite connection.
 96 |         """
 97 | 
 98 |         def regexp(pattern, value):
 99 |             """Code sqlite3 runs when REGEXP sql encountered. Takes two params.
100 |             inner function is untestable, a module level function is testable.
101 |             Precede module level function name with underscore
102 | 
103 |             Args:
104 |                 pattern (str): regex
105 |                 value (str): text blob the regex parses
106 | 
107 |             Returns:
108 |                 bool: True match occurred
109 |             """
110 |             return re.search(pattern, value) is not None
111 | 
112 |         conn.create_function("REGEXP", 2, regexp)
113 | 
114 |     def _get_connection(self):
115 |         """
116 |         Return the active SQLite connection.
117 |         """
118 |         return self._connection
119 | 
120 |     def insert(self, document):
121 |         """
122 |         Insert a single document (dict) into the database.
123 | 
124 |         Args:
125 |             document (dict): The document to insert.
126 | 
127 |         Returns:
128 |             bool: True upon successful insertion.
129 | 
130 |         Raises:
131 |             TypeError: If the provided document is not a dictionary.
132 |         """
133 |         if not isinstance(document, dict):
134 |             raise TypeError("Must insert a dict")
135 |         with self._lock:
136 |             self._connection.execute(
137 |                 "INSERT INTO documents (data) VALUES (?)", (json.dumps(document),)
138 |             )
139 |             self._connection.commit()
140 |             return True
141 | 
142 |     def insert_many(self, document_list):
143 |         """
144 |         Insert multiple documents (list of dicts) into the database.
145 | 
146 |         Args:
147 |             document_list (list): The list of documents to insert.
148 | 
149 |         Returns:
150 |             bool: True upon successful insertion.
151 | 
152 |         Raises:
153 |             TypeError: If the provided object is not a list of dicts.
154 |         """
155 |         if not isinstance(document_list, list) or not all(
156 |             isinstance(doc, dict) for doc in document_list
157 |         ):
158 |             raise TypeError("Must insert a list of dicts")
159 |         with self._lock:
160 |             self._connection.executemany(
161 |                 "INSERT INTO documents (data) VALUES (?)",
162 |                 [(json.dumps(doc),) for doc in document_list],
163 |             )
164 |             self._connection.commit()
165 |             return True
166 | 
167 |     def remove(self, key, value):
168 |         """
169 |         Remove all documents where the given key matches the specified value.
170 | 
171 |         Args:
172 |             key (str): The field name to match.
173 |             value (Any): The value to match.
174 | 
175 |         Returns:
176 |             int: Number of documents removed.
177 | 
178 |         Raises:
179 |             ValueError: If 'key' is empty or 'value' is None.
180 |         """
181 |         if not key or not isinstance(key, str):
182 |             raise ValueError("key must be a non-empty string")
183 |         if value is None:
184 |             raise ValueError("value cannot be None")
185 |         query = "DELETE FROM documents " "WHERE json_extract(data, '$.' || ?) = ?"
186 |         with self._lock:
187 |             result = self._connection.execute(query, (key, value))
188 |             self._connection.commit()
189 |             return result.rowcount
190 | 
191 |     def update(self, id_key, id_value, new_dict):
192 |         """
193 |         Update documents that match (id_key == id_value) by merging new_dict.
194 | 
195 |         Args:
196 |             id_key (str): The field name to match.
197 |             id_value (Any): The value to match.
198 |             new_dict (dict): A dictionary of changes to apply.
199 | 
200 |         Returns:
201 |             bool: True if at least one document was updated, False otherwise.
202 | 
203 |         Raises:
204 |             TypeError: If new_dict is not a dict.
205 |             ValueError: If id_key is invalid or id_value is None.
206 |         """
207 |         if not isinstance(new_dict, dict):
208 |             raise TypeError("new_dict must be a dictionary")
209 |         if not id_key or not isinstance(id_key, str):
210 |             raise ValueError("id_key must be a non-empty string")
211 |         if id_value is None:
212 |             raise ValueError("id_value cannot be None")
213 | 
214 |         select_query = (
215 |             "SELECT data FROM documents " "WHERE json_extract(data, '$.' || ?) = ?"
216 |         )
217 |         update_query = (
218 |             "UPDATE documents "
219 |             "SET data = ? "
220 |             "WHERE json_extract(data, '$.' || ?) = ?"
221 |         )
222 |         with self._lock:
223 |             cursor = self._connection.execute(select_query, (id_key, id_value))
224 |             documents = cursor.fetchall()
225 |             if not documents:
226 |                 return False
227 |             for row in documents:
228 |                 document = json.loads(row[0])
229 |                 if not isinstance(document, dict):
230 |                     continue
231 |                 document.update(new_dict)
232 |                 self._connection.execute(
233 |                     update_query, (json.dumps(document), id_key, id_value)
234 |                 )
235 |             self._connection.commit()
236 |             return True
237 | 
238 |     def purge(self):
239 |         """
240 |         Remove all documents from the database.
241 | 
242 |         Returns:
243 |             bool: True upon successful purge.
244 |         """
245 |         with self._lock:
246 |             self._connection.execute("DELETE FROM documents")
247 |             self._connection.commit()
248 |             return True
249 | 
250 |     def all(self, limit=100, offset=0):
251 |         """
252 |         Return a paginated list of all documents.
253 | 
254 |         Args:
255 |             limit (int): The maximum number of documents to return.
256 |             offset (int): The starting point for retrieval.
257 | 
258 |         Returns:
259 |             list: A list of all documents (dicts).
260 |         """
261 |         query = "SELECT data FROM documents LIMIT ? OFFSET ?"
262 |         with self._lock:
263 |             cursor = self._connection.execute(query, (limit, offset))
264 |             return [json.loads(row[0]) for row in cursor.fetchall()]
265 | 
266 |     def search(self, key, value, limit=100, offset=0):
267 |         """
268 |         Return a list of documents matching (key == value).
269 | 
270 |         Args:
271 |             key (str): The document field to match on.
272 |             value (Any): The value for which to search.
273 |             limit (int): The maximum number of documents to return.
274 |             offset (int): The starting point for retrieval.
275 | 
276 |         Returns:
277 |             list: A list of matching documents (dicts).
278 |         """
279 |         if not key or not isinstance(key, str):
280 |             raise ValueError("Key must be a non-empty string")
281 | 
282 |         query = (
283 |             "SELECT data FROM documents "
284 |             "WHERE json_extract(data, '$.' || ?) = ? "
285 |             "LIMIT ? OFFSET ?"
286 |         )
287 |         with self._lock:
288 |             cursor = self._connection.execute(query, (key, value, limit, offset))
289 |             return [json.loads(row[0]) for row in cursor.fetchall()]
290 | 
291 |     def search_pattern(self, key, pattern, limit=100, offset=0):
292 |         """
293 |         Search documents matching a regex pattern.
294 | 
295 |         Args:
296 |             key (str): The document field to match on.
297 |             pattern (str): The regex pattern to match.
298 |             limit (int): The maximum number of documents to return.
299 |             offset (int): The starting point for retrieval.
300 | 
301 |         Returns:
302 |             list: A list of matching documents (dicts).
303 | 
304 |         Raises:
305 |             ValueError: If the key or pattern is invalid.
306 |         """
307 |         if not key or not isinstance(key, str):
308 |             raise ValueError("key must be a non-empty string")
309 |         if not pattern or not isinstance(pattern, str):
310 |             raise ValueError("pattern must be a non-empty string")
311 | 
312 |         query = """
313 |             SELECT data FROM documents
314 |             WHERE json_extract(data, '$.' || ?) REGEXP ?
315 |             LIMIT ? OFFSET ?
316 |         """
317 |         with self._lock:
318 |             cursor = self._connection.execute(query, (key, pattern, limit, offset))
319 |             return [json.loads(row[0]) for row in cursor.fetchall()]
320 | 
321 |     def find_any(self, key, value_list):
322 |         """
323 |         Return documents where key matches any value in value_list.
324 | 
325 |         Args:
326 |             key (str): The document field to match on.
327 |             value_list (list): A list of possible values.
328 | 
329 |         Returns:
330 |             list: A list of matching documents.
331 |         """
332 |         placeholders = ", ".join(["?"] * len(value_list))
333 |         query = f"""
334 |             SELECT DISTINCT documents.data
335 |             FROM documents, json_each(documents.data, '$.' || ?)
336 |             WHERE json_each.value IN ({placeholders})
337 |         """
338 |         with self._lock:
339 |             cursor = self._connection.execute(query, [key] + value_list)
340 |             return [json.loads(row[0]) for row in cursor.fetchall()]
341 | 
342 |     def find_all(self, key, value_list):
343 |         """
344 |         Return documents where the key contains all values in value_list.
345 | 
346 |         Args:
347 |             key (str): The field to match.
348 |             value_list (list): The required values to match.
349 | 
350 |         Returns:
351 |             list: A list of matching documents.
352 |         """
353 |         placeholders = ", ".join(["?"] * len(value_list))
354 |         query = f"""
355 |             SELECT documents.data
356 |             FROM documents
357 |             WHERE (
358 |                 SELECT COUNT(DISTINCT value)
359 |                 FROM json_each(documents.data, '$.' || ?)
360 |                 WHERE value IN ({placeholders})
361 |             ) = ?
362 |         """
363 |         with self._lock:
364 |             cursor = self._connection.execute(
365 |                 query, [key] + value_list + [len(value_list)]
366 |             )
367 |             return [json.loads(row[0]) for row in cursor.fetchall()]
368 | 
369 |     def execute_async(self, func, *args, **kwargs):
370 |         """
371 |         Execute a function asynchronously using a thread pool.
372 | 
373 |         Args:
374 |             func (callable): The function to execute.
375 |             *args: Arguments for the function.
376 |             **kwargs: Keyword arguments for the function.
377 | 
378 |         Returns:
379 |             concurrent.futures.Future: A Future object representing
380 |             the execution.
381 |         """
382 |         return self.executor.submit(func, *args, **kwargs)
383 | 
384 |     def close(self):
385 |         """
386 |         Shutdown the thread pool executor and close the database connection.
387 |         """
388 |         self.executor.shutdown()
389 |         with self._lock:
390 |             self._connection.close()
391 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patx/kenobi/34a490f9097c0ac043fd3f3716e88c189fe0d144/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. moduleauthor:: Harrison Erd <harrisonerd@gmail.com>
  3 | 
  4 | Database fixture and temp folder preparation fixtures
  5 | 
  6 | .. py:data:: pytest_plugins
  7 |    :type: list[str]
  8 |    :value: []
  9 | 
 10 |    pytest plugins to activate
 11 | 
 12 | """
 13 | 
 14 | import shutil
 15 | from collections.abc import Sequence
 16 | from pathlib import PurePath
 17 | 
 18 | import pytest
 19 | 
 20 | from kenobi import KenobiDB
 21 | 
 22 | pytest_plugins = []
 23 | 
 24 | 
 25 | @pytest.fixture()
 26 | def prepare_folders_files(request):
 27 |     """Prepare folders and files within folder."""
 28 | 
 29 |     set_folders = set()
 30 | 
 31 |     def _method(seq_rel_paths, tmp_path):
 32 |         """Creates folders and empty files
 33 | 
 34 |         :param seq_rel_paths: Relative file paths. Creates folders as well
 35 |         :type seq_rel_paths:
 36 | 
 37 |            collections.abc.Sequence[str | pathlib.Path] | collections.abc.MutableSet[str | pathlib.Path]
 38 | 
 39 |         :param tmp_path: Start absolute path
 40 |         :type tmp_path: pathlib.Path
 41 |         :returns: Set of absolute paths of created files
 42 |         :rtype: set[pathlib.Path]
 43 |         """
 44 |         set_abs_paths = set()
 45 |         is_seq = seq_rel_paths is not None and (
 46 |             (isinstance(seq_rel_paths, Sequence) and not isinstance(seq_rel_paths, str))
 47 |             or isinstance(seq_rel_paths, set)
 48 |         )
 49 |         if is_seq:
 50 |             for posix in seq_rel_paths:
 51 |                 if isinstance(posix, str):
 52 |                     abs_path = tmp_path.joinpath(*posix.split("/"))
 53 |                 elif issubclass(type(posix), PurePath):
 54 |                     if not posix.is_absolute():
 55 |                         abs_path = tmp_path / posix
 56 |                     else:  # pragma: no cover
 57 |                         # already absolute
 58 |                         abs_path = posix
 59 |                 else:
 60 |                     abs_path = None
 61 | 
 62 |                 if abs_path is not None:
 63 |                     set_abs_paths.add(abs_path)
 64 |                     set_folders.add(abs_path.parent)
 65 |                     abs_path.parent.mkdir(parents=True, exist_ok=True)
 66 |                     abs_path.touch()
 67 |         else:
 68 |             abs_path = None
 69 | 
 70 |         return set_abs_paths
 71 | 
 72 |     yield _method
 73 | 
 74 |     # cleanup
 75 |     if request.node.test_report.outcome == "passed":
 76 |         for abspath_folder in set_folders:
 77 |             shutil.rmtree(abspath_folder, ignore_errors=True)
 78 | 
 79 | 
 80 | @pytest.fixture()
 81 | def db_path(tmp_path):
 82 |     """
 83 |     Returns:
 84 |         pathlib.Path: path to database within pytest managed temporary folder
 85 |     """
 86 |     path_db = tmp_path.joinpath("test_kenobi.db")
 87 | 
 88 |     return path_db
 89 | 
 90 | 
 91 | @pytest.fixture()
 92 | def create_db(db_path, request):
 93 |     """Per test function create database in pytest managed temporary folder
 94 | 
 95 |     Usage
 96 | 
 97 |     .. code-block:: text
 98 | 
 99 |        import pytest
100 |        def test_sometest(create_db):
101 |            db = create_db()
102 | 
103 |     Returns:
104 |         KenobiDB: database instance
105 |     """
106 |     db = KenobiDB(db_path)
107 | 
108 |     def cleanup():
109 |         """Pretty way but works.
110 | 
111 |         Purposefully refrain from: purge database or delete database file.
112 | 
113 |         Let pytest manage removing the db file. So can later
114 |         debug a test function in a working debug environment.
115 |         """
116 |         db.close()
117 | 
118 |     def _fcn():
119 |         """Initializes database. After test function close database.
120 | 
121 |         - purposefully induce a failure with :code:`assert False`
122 | 
123 |         - go to the temp folder
124 | 
125 |         - activate the venv
126 | 
127 |         - open a REPR with :command:`python`
128 | 
129 |         """
130 | 
131 |         return db
132 | 
133 |     request.addfinalizer(cleanup)
134 | 
135 |     return _fcn
136 | 


--------------------------------------------------------------------------------
/tests/test_kenobi.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. moduleauthor:: Harrison Erd <harrisonerd@gmail.com>
  3 | 
  4 | Not slow
  5 | 
  6 | .. code-block:: shell
  7 | 
  8 |    python -m coverage run --source='kenobi.kenobi' -m pytest \
  9 |    --showlocals -m "not slow" tests/test_kenobi.py && coverage report \
 10 |    --data-file=.coverage --include="**/kenobi.py"
 11 | 
 12 | All tests
 13 | 
 14 | .. code-block:: shell
 15 | 
 16 |    python -m coverage run --source='kenobi.kenobi' -m pytest \
 17 |    --showlocals tests/test_kenobi.py && coverage report \
 18 |    --data-file=.coverage --include="**/kenobi.py"
 19 | 
 20 | """
 21 | 
 22 | import time
 23 | from contextlib import nullcontext as does_not_raise
 24 | from functools import partial
 25 | 
 26 | import pytest
 27 | 
 28 | testdata_insert_single_document = (
 29 |     (
 30 |         "insert",
 31 |         {"key": "value"},
 32 |         does_not_raise(),
 33 |         1,
 34 |         {"key": "value"},
 35 |     ),
 36 |     (
 37 |         "insert_many",
 38 |         [{"key": "value1"}, {"key": "value2"}],
 39 |         does_not_raise(),
 40 |         2,
 41 |         [{"key": "value1"}, {"key": "value2"}],
 42 |     ),
 43 |     (
 44 |         "insert",
 45 |         0.1234,
 46 |         pytest.raises(TypeError),
 47 |         0,
 48 |         {},
 49 |     ),
 50 |     (
 51 |         "insert",
 52 |         None,
 53 |         pytest.raises(TypeError),
 54 |         0,
 55 |         {},
 56 |     ),
 57 |     (
 58 |         "insert_many",
 59 |         [0.1234, 0.1234],
 60 |         pytest.raises(TypeError),
 61 |         0,
 62 |         [],
 63 |     ),
 64 | )
 65 | 
 66 | ids_insert_single_document = (
 67 |     "Single document",
 68 |     "Multiple documents",
 69 |     "document invalid unsupported type",
 70 |     "document invalid None",
 71 |     "Multiple documents unsupported types",
 72 | )
 73 | 
 74 | 
 75 | @pytest.mark.parametrize(
 76 |     "meth, document, expectation, result_count_expected, document_expected",
 77 |     testdata_insert_single_document,
 78 |     ids=ids_insert_single_document,
 79 | )
 80 | def test_insert_single_document(
 81 |     meth,
 82 |     document,
 83 |     expectation,
 84 |     result_count_expected,
 85 |     document_expected,
 86 |     create_db,
 87 | ):
 88 |     """Test inserting a single document."""
 89 |     # pytest -vv --showlocals --log-level INFO -k "test_insert_single_document" tests
 90 |     # pytest -vv --showlocals --log-level INFO tests/test_kenobi.py::test_insert_single_document\[Single\ document\]
 91 |     # prepare
 92 |     db = create_db()
 93 |     if hasattr(db, meth):
 94 |         fcn = getattr(db, meth)
 95 |         # insert document(s)
 96 |         with expectation:
 97 |             fcn(document)
 98 | 
 99 |     # act
100 |     results = db.all()
101 |     # verify
102 |     result_count_actual = len(results)
103 |     assert result_count_actual == result_count_expected
104 |     if isinstance(expectation, does_not_raise):
105 |         # If fail :code:`len(results) == 0`. results[0] --> IndexError
106 |         # document_actual = results[0]
107 |         if isinstance(document, dict):
108 |             assert document_expected in results
109 |         elif isinstance(document, list):
110 |             for d_document in document:
111 |                 assert d_document in results
112 |         else:
113 |             pass
114 | 
115 | 
116 | testdata_remove_document = (
117 |     (
118 |         {"key": "value"},
119 |         "key",
120 |         "value",
121 |         does_not_raise(),
122 |         0,
123 |     ),
124 |     (
125 |         {"key": "value"},
126 |         None,
127 |         "value",
128 |         pytest.raises(ValueError),
129 |         1,
130 |     ),
131 |     (
132 |         {"key": "value"},
133 |         0.12345,
134 |         "value",
135 |         pytest.raises(ValueError),
136 |         1,
137 |     ),
138 |     (
139 |         {"key": "value"},
140 |         "key",
141 |         None,
142 |         pytest.raises(ValueError),
143 |         1,
144 |     ),
145 | )
146 | ids_remove_document = (
147 |     "remove one document",
148 |     "key None",
149 |     "key unsupported type",
150 |     "value None",
151 | )
152 | 
153 | 
154 | @pytest.mark.parametrize(
155 |     "document, query_key, query_val, expectation, results_count_expected",
156 |     testdata_remove_document,
157 |     ids=ids_remove_document,
158 | )
159 | def test_remove_document(
160 |     document, query_key, query_val, expectation, results_count_expected, create_db
161 | ):
162 |     """Test removing a document by key:value."""
163 |     # pytest -vv --showlocals --log-level INFO -k "test_remove_document" tests
164 |     # prepare
165 |     db = create_db()
166 |     db.insert(document)
167 |     # act
168 |     with expectation:
169 |         db.remove(query_key, query_val)
170 |     # verify
171 |     results = db.all()
172 |     results_count_actual = len(results)
173 |     assert results_count_actual == results_count_expected
174 | 
175 | 
176 | testdata_update_document = (
177 |     (
178 |         {"id": 1, "key": "value"},
179 |         {"key": "new_value"},
180 |         "id",
181 |         1,
182 |         "key",
183 |         "new_value",
184 |         does_not_raise(),
185 |         1,
186 |         True,
187 |     ),
188 |     (
189 |         {"id": 1, "key": "value"},
190 |         {"key": "new_value"},
191 |         None,
192 |         1,
193 |         "key",
194 |         "value",
195 |         pytest.raises(ValueError),
196 |         1,
197 |         False,
198 |     ),
199 |     (
200 |         {"id": 1, "key": "value"},
201 |         {"key": "new_value"},
202 |         "id",
203 |         None,
204 |         "key",
205 |         "value",
206 |         pytest.raises(ValueError),
207 |         1,
208 |         False,
209 |     ),
210 |     (
211 |         {"id": 1, "key": "value"},
212 |         {"key": "new_value"},
213 |         "id",
214 |         2,
215 |         "key",
216 |         "value",
217 |         does_not_raise(),
218 |         1,
219 |         False,
220 |     ),
221 | )
222 | ids_update_document = (
223 |     "Update a document",
224 |     "id_field None ValueError",
225 |     "id_val None ValueError",
226 |     "could not update nonexistent document",
227 | )
228 | 
229 | 
230 | @pytest.mark.parametrize(
231 |     (
232 |         "document, updated_fields, id_field, id_val, val_key, "
233 |         "val_expected, expectation, results_count_expected, is_success_expected"
234 |     ),
235 |     testdata_update_document,
236 |     ids=ids_update_document,
237 | )
238 | def test_update_document(
239 |     document,
240 |     updated_fields,
241 |     id_field,
242 |     id_val,
243 |     val_key,
244 |     val_expected,
245 |     expectation,
246 |     results_count_expected,
247 |     is_success_expected,
248 |     create_db,
249 | ):
250 |     """Test updating a document by key:value."""
251 |     # pytest -vv --showlocals --log-level INFO -k "test_update_document" tests
252 |     # prepare
253 |     db = create_db()
254 |     db.insert(document)
255 |     # act
256 |     with expectation:
257 |         is_success_actual = db.update(id_field, id_val, updated_fields)
258 |     if isinstance(expectation, does_not_raise):
259 |         assert is_success_actual is is_success_expected
260 |     # verify
261 |     results = db.all()
262 |     results_count_actual = len(results)
263 |     assert results_count_actual == results_count_expected
264 |     val_actual = results[0][val_key]
265 | 
266 |     assert val_actual == val_expected
267 | 
268 | 
269 | def test_purge_database(create_db):
270 |     """Test purging all documents from the database."""
271 |     # pytest -vv --showlocals --log-level INFO -k "test_purge_database" tests
272 |     documents = [{"key": "value1"}, {"key": "value2"}]
273 |     results_count_expected = 0
274 |     # prepare
275 |     db = create_db()
276 |     db.insert_many(documents)
277 |     # act
278 |     db.purge()
279 |     # verify
280 |     results = db.all()
281 |     results_count_actual = len(results)
282 |     assert results_count_actual == results_count_expected
283 | 
284 | 
285 | testdata_search_by_key_value = (
286 |     (
287 |         [{"key": "value1"}, {"key": "value2"}],
288 |         "key",
289 |         "value1",
290 |         does_not_raise(),
291 |         1,
292 |     ),
293 |     (
294 |         [{"key": "value1"}, {"key": "value2"}],
295 |         None,
296 |         "value1",
297 |         pytest.raises(ValueError),
298 |         1,
299 |     ),
300 |     (
301 |         [{"key": "value1"}, {"key": "value2"}],
302 |         0.2345,
303 |         "value1",
304 |         pytest.raises(ValueError),
305 |         1,
306 |     ),
307 | )
308 | ids_search_by_key_value = (
309 |     "successful query",
310 |     "query_key None",
311 |     "query_key unsupported type",
312 | )
313 | 
314 | 
315 | @pytest.mark.parametrize(
316 |     "documents, query_key, query_val, expectation, results_count_expected",
317 |     testdata_search_by_key_value,
318 |     ids=ids_search_by_key_value,
319 | )
320 | def test_search_by_key_value(
321 |     documents,
322 |     query_key,
323 |     query_val,
324 |     expectation,
325 |     results_count_expected,
326 |     create_db,
327 | ):
328 |     """Test searching documents by key:value."""
329 |     # pytest -vv --showlocals --log-level INFO -k "test_search_by_key_value" tests
330 | 
331 |     # prepare
332 |     db = create_db()
333 |     db.insert_many(documents)
334 |     # act
335 |     with expectation:
336 |         results = db.search(query_key, query_val)
337 |     # verify
338 |     if isinstance(expectation, does_not_raise):
339 |         results_count_actual = len(results)
340 |         assert results_count_actual == results_count_expected
341 |         actual_doc_0 = results[0]
342 |         expected_doc_0 = documents[0]
343 |         assert actual_doc_0 == expected_doc_0
344 | 
345 | 
346 | testdata_find_any = (
347 |     (
348 |         [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}],
349 |         "key",
350 |         ["value1", "value3"],
351 |         does_not_raise(),
352 |         2,
353 |         (0, 2),
354 |     ),
355 |     (
356 |         [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}],
357 |         None,
358 |         ["value1", "value3"],
359 |         does_not_raise(),
360 |         0,
361 |         (),
362 |     ),
363 |     (
364 |         [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}],
365 |         0.1234,
366 |         ["value1", "value3"],
367 |         does_not_raise(),
368 |         0,
369 |         (),
370 |     ),
371 |     (
372 |         [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}],
373 |         "key",
374 |         [None, None],
375 |         does_not_raise(),
376 |         0,
377 |         (),
378 |     ),
379 |     (
380 |         [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}],
381 |         "key",
382 |         [0.1234, 0.1234],
383 |         does_not_raise(),
384 |         0,
385 |         (),
386 |     ),
387 |     pytest.param(
388 |         [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}],
389 |         "key",
390 |         {0.1234, 0.1234},
391 |         pytest.raises(TypeError),
392 |         0,
393 |         (),
394 |         marks=pytest.mark.xfail,
395 |     ),
396 |     pytest.param(
397 |         [{"key": "value1"}, {"key": "value2"}, {"key": "value3"}],
398 |         "key",
399 |         (0.1234, 0.1234),
400 |         pytest.raises(TypeError),
401 |         0,
402 |         (),
403 |         marks=pytest.mark.xfail,
404 |     ),
405 | )
406 | ids_find_any = (
407 |     "successful query",
408 |     "key None",
409 |     "key unsupported type",
410 |     "query vals list None None",
411 |     "query vals list both unsupported type",
412 |     "query vals set both unsupported type BUG",
413 |     "query vals tuple both unsupported type BUG",
414 | )
415 | 
416 | 
417 | @pytest.mark.parametrize(
418 |     (
419 |         "documents, query_key, query_vals, expectation, "
420 |         "results_count_expected, t_documents_idxs"
421 |     ),
422 |     testdata_find_any,
423 |     ids=ids_find_any,
424 | )
425 | def test_find_any(
426 |     documents,
427 |     query_key,
428 |     query_vals,
429 |     expectation,
430 |     results_count_expected,
431 |     t_documents_idxs,
432 |     create_db,
433 | ):
434 |     """Test finding documents where a key matches any value in a list."""
435 |     # pytest -vv --showlocals --log-level INFO -k "test_find_any" tests
436 |     # prepare
437 |     db = create_db()
438 |     db.insert_many(documents)
439 | 
440 |     # act
441 |     with expectation:
442 |         results = db.find_any(query_key, query_vals)
443 |     # verify
444 |     if isinstance(expectation, does_not_raise):
445 |         results_count_actual = len(results)
446 |         assert results_count_actual == results_count_expected
447 |         for doc_idx in t_documents_idxs:
448 |             assert documents[doc_idx] in results
449 | 
450 | 
451 | testdata_find_all = (
452 |     (
453 |         [
454 |             {"key": ["value1", "value2"]},
455 |             {"key": ["value1"]},
456 |             {"key": ["value2", "value3"]},
457 |         ],
458 |         "key",
459 |         ["value1", "value2"],
460 |         does_not_raise(),
461 |         1,
462 |         (0,),
463 |     ),
464 |     pytest.param(
465 |         [
466 |             {"key": ["value1", "value2"]},
467 |             {"key": ["value1"]},
468 |             {"key": ["value2", "value3"]},
469 |         ],
470 |         "key",
471 |         {"value1", "value2"},
472 |         pytest.raises(TypeError),
473 |         1,
474 |         (),
475 |         marks=pytest.mark.xfail,
476 |     ),
477 |     pytest.param(
478 |         [
479 |             {"key": ["value1", "value2"]},
480 |             {"key": ["value1"]},
481 |             {"key": ["value2", "value3"]},
482 |         ],
483 |         "key",
484 |         ("value1", "value2"),
485 |         pytest.raises(TypeError),
486 |         1,
487 |         (),
488 |         marks=pytest.mark.xfail,
489 |     ),
490 | )
491 | ids_find_all = (
492 |     "successful query",
493 |     "query vals set both unsupported type BUG",
494 |     "query vals tuple both unsupported type BUG",
495 | )
496 | 
497 | 
498 | @pytest.mark.parametrize(
499 |     (
500 |         "documents, query_key, query_vals, expectation, "
501 |         "results_count_expected, t_documents_idxs"
502 |     ),
503 |     testdata_find_all,
504 |     ids=ids_find_all,
505 | )
506 | def test_find_all(
507 |     documents,
508 |     query_key,
509 |     query_vals,
510 |     expectation,
511 |     results_count_expected,
512 |     t_documents_idxs,
513 |     create_db,
514 | ):
515 |     """Test finding documents where a key matches all values in a list."""
516 |     # pytest -vv --showlocals --log-level INFO -k "test_find_all" tests
517 |     # prepare
518 |     db = create_db()
519 |     db.insert_many(documents)
520 |     # act
521 |     with expectation:
522 |         results = db.find_all(query_key, query_vals)
523 |     # verify
524 |     if isinstance(expectation, does_not_raise):
525 |         results_count_actual = len(results)
526 |         assert results_count_actual == results_count_expected
527 |         for doc_idx in t_documents_idxs:
528 |             assert results[0] == documents[doc_idx]
529 | 
530 | 
531 | def test_pagination_all(create_db):
532 |     """Test paginated retrieval of all documents."""
533 |     # pytest -vv --showlocals --log-level INFO -k "test_pagination_all" tests
534 |     documents = [{"key": f"value{i}"} for i in range(10)]
535 |     results_count_expected = 5
536 | 
537 |     # prepare
538 |     db = create_db()
539 |     db.insert_many(documents)
540 |     # act
541 |     results = db.all(limit=5, offset=0)
542 |     # verify
543 |     results_count_actual = len(results)
544 |     assert results_count_actual == results_count_expected
545 |     assert results == documents[:5]
546 | 
547 | 
548 | def test_pagination_search(create_db):
549 |     """Test paginated search by key:value."""
550 |     # pytest -vv --showlocals --log-level INFO -k "test_pagination_search" tests
551 |     documents = [{"key": f"value{i}"} for i in range(10)]
552 |     results_count_expected = 1
553 | 
554 |     # prepare
555 |     db = create_db()
556 |     db.insert_many(documents)
557 |     # act
558 |     results = db.search("key", "value1", limit=1, offset=0)
559 |     # verify
560 |     results_count_actual = len(results)
561 |     assert results_count_actual == results_count_expected
562 |     assert results[0] == {"key": "value1"}
563 | 
564 | 
565 | def db_task(fcn, doc):
566 |     """Function usable by thread pool executor"""
567 |     fcn(doc)
568 | 
569 | 
570 | testdata_concurrent_inserts = (
571 |     (
572 |         [{"key": f"value{i}"} for i in range(50)],
573 |         does_not_raise(),
574 |         50,
575 |     ),
576 | )
577 | ids_concurrent_inserts = ("successful concurrent inserts",)
578 | 
579 | 
580 | @pytest.mark.parametrize(
581 |     "documents, expectation, results_count_expected",
582 |     testdata_concurrent_inserts,
583 |     ids=ids_concurrent_inserts,
584 | )
585 | def test_concurrent_inserts(documents, expectation, results_count_expected, create_db):
586 |     """Test concurrent inserts to ensure thread safety."""
587 |     # pytest -vv --showlocals --log-level INFO -k "test_concurrent_inserts" tests
588 |     # prepare
589 |     db = create_db()
590 |     #    pytest doesn't support inner functions
591 |     insert_task = partial(db_task, db.insert)
592 | 
593 |     # act
594 |     with expectation:
595 |         with db.executor as executor:
596 |             executor.map(insert_task, documents)
597 |     # verify
598 |     results = db.all()
599 |     results_count_actual = len(results)
600 |     assert results_count_actual == results_count_expected
601 | 
602 | 
603 | def test_performance_bulk_insert(create_db):
604 |     """Test the performance of bulk inserting a large number of documents."""
605 |     # pytest -vv --showlocals --log-level INFO -k "test_performance_bulk_insert" tests
606 |     documents = [{"key": f"value{i}"} for i in range(1000)]
607 |     duration_max_expected = 5
608 |     # prepare
609 |     db = create_db()
610 |     start_time = time.time()
611 |     # act
612 |     db.insert_many(documents)
613 |     end_time = time.time()
614 |     duration_actual = end_time - start_time
615 |     # verify
616 |     assert duration_actual < duration_max_expected, "Bulk insert took too long"
617 | 
618 | 
619 | def test_safe_query_handling(create_db):
620 |     """Test safe handling of potentially harmful input to prevent SQL injection."""
621 |     # pytest -vv --showlocals --log-level INFO -k "test_safe_query_handling" tests
622 |     document = {"key": "value"}
623 |     results_count_expected = 0
624 |     # prepare
625 |     db = create_db()
626 |     db.insert(document)
627 |     # act
628 |     results = db.search("key", "value OR 1=1")
629 |     # verify
630 |     results_count_actual = len(results)
631 |     assert (
632 |         results_count_actual == results_count_expected
633 |     ), "Unsafe query execution detected"
634 | 
635 | 
636 | @pytest.mark.slow
637 | def test_large_dataset(create_db):
638 |     """Stress test: Insert and retrieve a large number of documents."""
639 |     # pytest -vv --showlocals --log-level INFO -k "test_large_dataset" tests
640 |     num_docs = 1_000_000
641 |     documents = [{"key": f"value{i}"} for i in range(num_docs)]
642 |     duration_1M_inserts_max = 300
643 | 
644 |     # prepare
645 |     db = create_db()
646 | 
647 |     # Measure insertion performance
648 |     start_time = time.time()
649 |     db.insert_many(documents)
650 |     end_time = time.time()
651 |     duration_1M_inserts_actual = end_time - start_time
652 | 
653 |     # Ensure insertion is reasonably fast
654 |     assert (
655 |         duration_1M_inserts_actual < duration_1M_inserts_max
656 |     ), "Inserting 1,000,000 documents took too long"
657 |     msg_info = f"Inserted {num_docs} documents in {duration_1M_inserts_actual} seconds"
658 |     print(msg_info)
659 | 
660 |     # Measure retrieval performance
661 |     start_time = time.time()
662 |     all_docs = db.all(limit=num_docs)
663 |     end_time = time.time()
664 |     retrieval_duration_actual = end_time - start_time
665 |     docs_count_actual = len(all_docs)
666 | 
667 |     # Ensure retrieval is correct and performant
668 |     assert docs_count_actual == num_docs, "Not all documents were retrieved"
669 |     assert (
670 |         retrieval_duration_actual < duration_1M_inserts_max
671 |     ), "Retrieving 1,000,000 documents took too long"
672 |     msg_info = f"Retrieved {docs_count_actual} documents in {retrieval_duration_actual} seconds"
673 |     print(msg_info)
674 | 
675 | 
676 | testdata_malformed_json_in_update = (
677 |     (
678 |         {"id": 1, "key": "value"},
679 |         pytest.raises(TypeError),
680 |     ),
681 | )
682 | ids_malformed_json_in_update = ("Insert a malformed document",)
683 | 
684 | 
685 | @pytest.mark.parametrize(
686 |     "malformed_document, expectation",
687 |     testdata_malformed_json_in_update,
688 |     ids=ids_malformed_json_in_update,
689 | )
690 | def test_malformed_json_in_update(malformed_document, expectation, create_db):
691 |     """Test handling malformed JSON in update."""
692 |     # pytest -vv --showlocals --log-level INFO -k "test_malformed_json_in_update" tests
693 |     # prepare
694 |     db = create_db()
695 |     db.insert(malformed_document)
696 | 
697 |     # Attempt to update with malformed JSON structure
698 |     with expectation:
699 |         db.update("id", 1, "not a dict")
700 | 


--------------------------------------------------------------------------------