├── .github
└── workflows
│ └── tests.yaml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
├── Makefile
├── api.rst
├── conf.py
├── index.rst
├── installation.rst
└── quickstart.rst
├── pyproject.toml
├── setup.py
├── sophy.pyx
├── src
├── sophia.c
└── sophia.h
└── tests.py
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
1 | name: Tests
2 | on: [push]
3 | jobs:
4 | tests:
5 | name: ${{ matrix.python-version }}
6 | runs-on: ubuntu-latest
7 | strategy:
8 | fail-fast: false
9 | matrix:
10 | python-version: [3.8, "3.10", "3.11", "3.13"]
11 | steps:
12 | - uses: actions/checkout@v2
13 | - uses: actions/setup-python@v2
14 | with:
15 | python-version: ${{ matrix.python-version }}
16 | - name: pip deps
17 | run: |
18 | pip install cython setuptools
19 | python setup.py build_ext -i
20 | - name: runtests
21 | run: python tests.py
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | sophy.c
2 | sophy*.so
3 | MANIFEST
4 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 | build:
3 | os: ubuntu-22.04
4 | tools:
5 | python: "3.11"
6 | sphinx:
7 | configuration: docs/conf.py
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (C) Charles Leifer (coleifer@gmail.com)
2 |
3 | Redistribution and use in source and binary forms, with or
4 | without modification, are permitted provided that the following
5 | conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above
8 | copyright notice, this list of conditions and the
9 | following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials
14 | provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20 | AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
27 | THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 | SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include src/sophia.c
4 | include src/sophia.h
5 | include sophy.c
6 | include sophy.pyx
7 | include tests.py
8 | recursive-include docs *
9 |
10 | global-exclude *.pyc
11 | global-exclude *.o
12 | global-exclude *.so.0.1
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [sophy](http://sophy.readthedocs.io/en/latest/), fast Python bindings for
4 | [Sophia embedded database](http://sophia.systems), v2.2.
5 |
6 |
7 |
8 | #### About sophy
9 |
10 | * Written in Cython for speed and low-overhead
11 | * Clean, memorable APIs
12 | * Extensive support for Sophia's features
13 | * Python 2 **and** Python 3 support
14 | * No 3rd-party dependencies besides Cython
15 | * [Documentation on readthedocs](http://sophy.readthedocs.io/en/latest/)
16 |
17 | #### About Sophia
18 |
19 | * Ordered key/value store
20 | * Keys and values can be composed of multiple fieldsdata-types
21 | * ACID transactions
22 | * MVCC, optimistic, non-blocking concurrency with multiple readers and writers.
23 | * Multiple databases per environment
24 | * Multiple- and single-statement transactions across databases
25 | * Prefix searches
26 | * Automatic garbage collection and key expiration
27 | * Hot backup
28 | * Compression
29 | * Multi-threaded compaction
30 | * `mmap` support, direct I/O support
31 | * APIs for variety of statistics on storage engine internals
32 | * BSD licensed
33 |
34 | #### Some ideas of where Sophia might be a good fit
35 |
36 | * Running on application servers, low-latency / high-throughput
37 | * Time-series
38 | * Analytics / Events / Logging
39 | * Full-text search
40 | * Secondary-index for external data-store
41 |
42 | #### Limitations
43 |
44 | * Not tested on Windoze.
45 |
46 | If you encounter any bugs in the library, please [open an issue](https://github.com/coleifer/sophy/issues/new), including a description of the bug and any related traceback.
47 |
48 | ## Installation
49 |
50 | The [sophia](http://sophia.systems) sources are bundled with the `sophy` source
51 | code, so the only thing you need to install is [Cython](http://cython.org). You
52 | can install from [GitHub](https://github.com/coleifer/sophy) or from
53 | [PyPI](https://pypi.python.org/pypi/sophy/).
54 |
55 | Pip instructions:
56 |
57 | ```console
58 | $ pip install Cython
59 | $ pip install sophy
60 | ```
61 |
62 | Or to install the latest code from master:
63 |
64 | ```console
65 | $ pip install -e git+https://github.com/coleifer/sophy#egg=sophy
66 | ```
67 |
68 | Git instructions:
69 |
70 | ```console
71 | $ pip install Cython
72 | $ git clone https://github.com/coleifer/sophy
73 | $ cd sophy
74 | $ python setup.py build
75 | $ python setup.py install
76 | ```
77 |
78 | To run the tests:
79 |
80 | ```console
81 | $ python tests.py
82 | ```
83 |
84 | 
85 |
86 | ---------------------------------------------
87 |
88 | ## Overview
89 |
90 | Sophy is very simple to use. It acts like a Python `dict` object, but in
91 | addition to normal dictionary operations, you can read slices of data that are
92 | returned efficiently using cursors. Similarly, bulk writes using `update()` use
93 | an efficient, atomic batch operation.
94 |
95 | Despite the simple APIs, Sophia has quite a few advanced features. There is too
96 | much to cover everything in this document, so be sure to check out the official
97 | [Sophia storage engine documentation](http://sophia.systems/v2.2/).
98 |
99 | The next section will show how to perform common actions with `sophy`.
100 |
101 | ## Using Sophy
102 |
103 | Let's begin by import `sophy` and creating an environment. The environment
104 | can host multiple databases, each of which may have a different schema. In this
105 | example our database will store arbitrary binary data as the key and value.
106 | Finally we'll open the environment so we can start storing and retrieving data.
107 |
108 | ```python
109 | from sophy import Sophia, Schema, StringIndex
110 |
111 | # Instantiate our environment by passing a directory path which will store the
112 | # various data and metadata for our databases.
113 | env = Sophia('/path/to/store/data')
114 |
115 | # We'll define a very simple schema consisting of a single utf-8 string for the
116 | # key, and a single utf-8 string for the associated value.
117 | schema = Schema(key_parts=[StringIndex('key')],
118 | value_parts=[StringIndex('value')])
119 |
120 | # Create a key/value database using the schema above.
121 | db = env.add_database('example_db', schema)
122 |
123 | if not env.open():
124 | raise Exception('Unable to open Sophia environment.')
125 | ```
126 |
127 | ### CRUD operations
128 |
129 | Sophy databases use the familiar `dict` APIs for CRUD operations:
130 |
131 | ```python
132 |
133 | db['name'] = 'Huey'
134 | db['animal_type'] = 'cat'
135 | print db['name'], 'is a', db['animal_type'] # Huey is a cat
136 |
137 | 'name' in db # True
138 | 'color' in db # False
139 |
140 | db['temp_val'] = 'foo'
141 | del db['temp_val']
142 | print db['temp_val'] # raises a KeyError.
143 | ```
144 |
145 | Use `update()` for bulk-insert, and `multi_get()` for bulk-fetch. Unlike
146 | `__getitem__()`, calling `multi_get()` with a non-existant key will not raise
147 | an exception and return `None` instead.
148 |
149 | ```python
150 | db.update(k1='v1', k2='v2', k3='v3')
151 |
152 | for value in db.multi_get('k1', 'k3', 'kx'):
153 | print value
154 | # v1
155 | # v3
156 | # None
157 |
158 | result_dict = db.multi_get_dict(['k1', 'k3', 'kx'])
159 | # {'k1': 'v1', 'k3': 'v3'}
160 | ```
161 |
162 | ### Other dictionary methods
163 |
164 | Sophy databases also provides efficient implementations for `keys()`,
165 | `values()` and `items()`. Unlike dictionaries, however, iterating directly over
166 | a Sophy database will return the equivalent of the `items()` (as opposed to the
167 | just the keys):
168 |
169 | ```python
170 |
171 | db.update(k1='v1', k2='v2', k3='v3')
172 |
173 | list(db)
174 | # [('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3')]
175 |
176 |
177 | db.items()
178 | # same as above.
179 |
180 |
181 | db.keys()
182 | # ['k1', 'k2', 'k3']
183 |
184 |
185 | db.values()
186 | # ['v1', 'v2', 'v3']
187 | ```
188 |
189 | There are two ways to get the count of items in a database. You can use the
190 | `len()` function, which is not very efficient since it must allocate a cursor
191 | and iterate through the full database. An alternative is the `index_count`
192 | property, which may not be exact as it includes transactional duplicates and
193 | not-yet-merged duplicates.
194 |
195 | ```python
196 |
197 | print(len(db))
198 | # 4
199 |
200 | print(db.index_count)
201 | # 4
202 | ```
203 |
204 | ### Fetching ranges
205 |
206 | Because Sophia is an ordered data-store, performing ordered range scans is
207 | efficient. To retrieve a range of key-value pairs with Sophy, use the ordinary
208 | dictionary lookup with a `slice` instead.
209 |
210 | ```python
211 |
212 | db.update(k1='v1', k2='v2', k3='v3', k4='v4')
213 |
214 |
215 | # Slice key-ranges are inclusive:
216 | db['k1':'k3']
217 | # [('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3')]
218 |
219 |
220 | # Inexact matches are fine, too:
221 | db['k1.1':'k3.1']
222 | # [('k2', 'v2'), ('k3', 'v3')]
223 |
224 |
225 | # Leave the start or end empty to retrieve from the first/to the last key:
226 | db[:'k2']
227 | # [('k1', 'v1'), ('k2', 'v2')]
228 |
229 | db['k3':]
230 | # [('k3', 'v3'), ('k4', 'v4')]
231 |
232 |
233 | # To retrieve a range in reverse order, use the higher key first:
234 | db['k3':'k1']
235 | # [('k3', 'v3'), ('k2', 'v2'), ('k1', 'v1')]
236 | ```
237 |
238 | To retrieve a range in reverse order where the start or end is unspecified, you
239 | can pass in `True` as the `step` value of the slice to also indicate reverse:
240 |
241 | ```python
242 |
243 | db[:'k2':True]
244 | # [('k2', 'k1'), ('k1', 'v1')]
245 |
246 | db['k3'::True]
247 | # [('k4', 'v4'), ('k3', 'v3')]
248 |
249 | db[::True]
250 | # [('k4', 'v4'), ('k3', 'v3'), ('k2', 'v2'), ('k1', 'v1')]
251 | ```
252 |
253 | ### Cursors
254 |
255 | For finer-grained control over iteration, or to do prefix-matching, Sophy
256 | provides a cursor interface.
257 |
258 | The `cursor()` method accepts 5 parameters:
259 |
260 | * `order` (default=`>=`) -- semantics for matching the start key and ordering
261 | results.
262 | * `key` -- the start key
263 | * `prefix` -- search for prefix matches
264 | * `keys` -- (default=`True`) -- return keys while iterating
265 | * `values` -- (default=`True`) -- return values while iterating
266 |
267 | Suppose we were storing events in a database and were using an
268 | ISO-8601-formatted date-time as the key. Since ISO-8601 sorts
269 | lexicographically, we could retrieve events in correct order simply by
270 | iterating. To retrieve a particular slice of time, a prefix could be specified:
271 |
272 | ```python
273 |
274 | # Iterate over events for July, 2017:
275 | for timestamp, event_data in db.cursor(key='2017-07-01T00:00:00',
276 | prefix='2017-07-'):
277 | do_something()
278 | ```
279 |
280 | ### Transactions
281 |
282 | Sophia supports ACID transactions. Even better, a single transaction can cover
283 | operations to multiple databases in a given environment.
284 |
285 | Example usage:
286 |
287 | ```python
288 |
289 | account_balance = env.add_database('balance', ...)
290 | transaction_log = env.add_database('transaction_log', ...)
291 |
292 | # ...
293 |
294 | def transfer_funds(from_acct, to_acct, amount):
295 | with env.transaction() as txn:
296 | # To write to a database within a transaction, obtain a reference to
297 | # a wrapper object for the db:
298 | txn_acct_bal = txn[account_balance]
299 | txn_log = txn[transaction_log]
300 |
301 | # Transfer the asset by updating the respective balances. Note that we
302 | # are operating on the wrapper database, not the db instance.
303 | from_bal = txn_acct_bal[from_acct]
304 | txn_acct_bal[to_account] = from_bal + amount
305 | txn_acct_bal[from_account] = from_bal - amount
306 |
307 | # Log the transaction in the transaction_log database. Again, we use
308 | # the wrapper for the database:
309 | txn_log[from_account, to_account, get_timestamp()] = amount
310 | ```
311 |
312 | Multiple transactions are allowed to be open at the same time, but if there are
313 | conflicting changes, an exception will be thrown when attempting to commit the
314 | offending transaction:
315 |
316 | ```python
317 |
318 | # Create a basic k/v store. Schema.key_value() is a convenience/factory-method.
319 | kv = env.add_database('main', Schema.key_value())
320 |
321 | # ...
322 |
323 | # Instead of using the context manager, we'll call begin() explicitly so we
324 | # can show the interaction of 2 open transactions.
325 | txn = env.transaction().begin()
326 |
327 | t_kv = txn[kv]
328 | t_kv['k1'] = 'v1'
329 |
330 | txn2 = env.transaction().begin()
331 | t2_kv = txn2[kv]
332 |
333 | t2_kv['k1'] = 'v1-x'
334 |
335 | txn2.commit() # ERROR !!
336 | # SophiaError('txn is not finished, waiting for concurrent txn to finish.')
337 |
338 | txn.commit() # OK
339 |
340 | # Try again?
341 | txn2.commit() # ERROR !!
342 | # SophiaError('transasction rolled back by another concurrent transaction.')
343 | ```
344 |
345 | ## Index types, multi-field keys and values
346 |
347 | Sophia supports multi-field keys and values. Additionally, the individual
348 | fields can have different data-types. Sophy provides the following field
349 | types:
350 |
351 | * `StringIndex` - stores UTF8-encoded strings, e.g. text.
352 | * `BytesIndex` - stores bytestrings, e.g. binary data.
353 | * `JsonIndex` - stores arbitrary objects as UTF8-encoded JSON data.
354 | * `MsgPackIndex` - stores arbitrary objects using `msgpack` serialization.
355 | * `PickleIndex` - stores arbitrary objects using Python `pickle` library.
356 | * `UUIDIndex` - stores UUIDs.
357 | * `U64Index` and reversed, `U64RevIndex`
358 | * `U32Index` and reversed, `U32RevIndex`
359 | * `U16Index` and reversed, `U16RevIndex`
360 | * `U8Index` and reversed, `U8RevIndex`
361 | * `SerializedIndex` - which is basically a `BytesIndex` that accepts two
362 | functions: one for serializing the value to the db, and another for
363 | deserializing.
364 |
365 | To store arbitrary data encoded using msgpack, you could use `MsgPackIndex`:
366 |
367 | ```python
368 |
369 | schema = Schema(StringIndex('key'), MsgPackIndex('value'))
370 | db = sophia_env.add_database('main', schema)
371 | ```
372 |
373 | To declare a database with a multi-field key or value, you will pass the
374 | individual fields as arguments when constructing the `Schema` object. To
375 | initialize a schema where the key is composed of two strings and a 64-bit
376 | unsigned integer, and the value is composed of a string, you would write:
377 |
378 | ```python
379 |
380 | key = [StringIndex('last_name'), StringIndex('first_name'), U64Index('area_code')]
381 | value = [StringIndex('address_data')]
382 | schema = Schema(key_parts=key, value_parts=value)
383 |
384 | address_book = sophia_env.add_database('address_book', schema)
385 | ```
386 |
387 | To store data, we use the same dictionary methods as usual, just passing tuples
388 | instead of individual values:
389 |
390 | ```python
391 | sophia_env.open()
392 |
393 | address_book['kitty', 'huey', 66604] = '123 Meow St'
394 | address_book['puppy', 'mickey', 66604] = '1337 Woof-woof Court'
395 | ```
396 |
397 | To retrieve our data:
398 |
399 | ```python
400 | huey_address = address_book['kitty', 'huey', 66604]
401 | ```
402 |
403 | To delete a row:
404 |
405 | ```python
406 | del address_book['puppy', 'mickey', 66604]
407 | ```
408 |
409 | Indexing and slicing works as you would expect.
410 |
411 | **Note:** when working with a multi-part value, a tuple containing the value
412 | components will be returned. When working with a scalar value, instead of
413 | returning a 1-item tuple, the value itself is returned.
414 |
415 | ## Configuring and Administering Sophia
416 |
417 | Sophia can be configured using special properties on the `Sophia` and
418 | `Database` objects. Refer to the [configuration
419 | document](http://sophia.systems/v2.2/conf/sophia.html) for the details on the
420 | available options, including whether they are read-only, and the expected
421 | data-type.
422 |
423 | For example, to query Sophia's status, you can use the `status` property, which
424 | is a readonly setting returning a string:
425 |
426 | ```python
427 | print(env.status)
428 | "online"
429 | ```
430 |
431 | Other properties can be changed by assigning a new value to the property. For
432 | example, to read and then increase the number of threads used by the scheduler:
433 |
434 | ```python
435 | nthreads = env.scheduler_threads
436 | env.scheduler_threads = nthread + 2
437 | ```
438 |
439 | Database-specific properties are available as well. For example to get the
440 | number of GET and SET operations performed on a database, you would write:
441 |
442 | ```python
443 | print(db.stat_get, 'get operations')
444 | print(db.stat_set, 'set operations')
445 | ```
446 |
447 | Refer to the [documentation](http://sophia.systems/v2.2/conf/sophia.html) for
448 | complete lists of settings. Dotted-paths are translated into
449 | underscore-separated attributes.
450 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = sophy
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | .. _api:
2 |
3 | Sophy API
4 | =========
5 |
6 | .. py:class:: SophiaError
7 |
8 | General exception class used to indicate error returned by Sophia database.
9 |
10 | Environment
11 | -----------
12 |
13 | .. py:class:: Sophia(path)
14 |
15 | :param str path: Directory path to store environment and databases.
16 |
17 | Environment object providing access to databases and for controlling
18 | transactions.
19 |
20 | Example of creating environment, attaching a database and reading/writing
21 | data:
22 |
23 | .. code-block:: python
24 |
25 | from sophy import *
26 |
27 |
28 | # Environment for managing one or more databases.
29 | env = Sophia('/tmp/sophia-test')
30 |
31 | # Schema describes the indexes that comprise the key and value portions
32 | # of a database.
33 | kv_schema = Schema([StringIndex('key')], [StringIndex('value')])
34 | db = env.add_data('kv', kv_schema)
35 |
36 | # We need to open the env after configuring the database(s), in order
37 | # to read/write data.
38 | assert env.open(), 'Failed to open environment!'
39 |
40 | # We can use dict-style APIs to read/write key/value pairs.
41 | db['k1'] = 'v1'
42 | assert db['k1'] == 'v1'
43 |
44 | # Close the env when finished.
45 | assert env.close(), 'Failed to close environment!'
46 |
47 | .. py:method:: open()
48 |
49 | :return: Boolean indicating success.
50 |
51 | Open the environment. The environment must be opened in order to read
52 | and write data to the configured databases.
53 |
54 | .. py:method:: close()
55 |
56 | :return: Boolean indicating success.
57 |
58 | Close the environment.
59 |
60 | .. py:method:: add_database(name, schema)
61 |
62 | :param str name: database name
63 | :param Schema schema: schema for keys and values.
64 | :return: a database instance
65 | :rtype: :py:class:`Database`
66 |
67 | Add or declare a database. Environment must be closed to add databases.
68 | The :py:class:`Schema` will declare the data-types and structure of the
69 | key- and value-portion of the database.
70 |
71 | .. code-block:: python
72 |
73 | env = Sophia('/path/to/db-env')
74 |
75 | # Declare an events database with a multi-part key (ts, type) and
76 | # a msgpack-serialized data field.
77 | events_schema = Schema(
78 | key_parts=[U64Index('timestamp'), StringIndex('type')],
79 | value_parts=[MsgPackIndex('data')])
80 | db = env.add_database('events', events_schema)
81 |
82 | # Open the environment for read/write access to the database.
83 | env.open()
84 |
85 | # We can now write to the database.
86 | db[current_time(), 'init'] = {'msg': 'event logging initialized'}
87 |
88 | .. py:method:: remove_database(name)
89 |
90 | :param str name: database name
91 |
92 | Remove a database from the environment. Environment must be closed to
93 | remove databases. This method does really not have any practical value
94 | but is provided for consistency.
95 |
96 | .. py:method:: get_database(name)
97 |
98 | :return: the database corresponding to the provided name
99 | :rtype: :py:class:`Database`
100 |
101 | Obtain a reference to the given database, provided the database has
102 | been added to the environment by a previous call to
103 | :py:meth:`~Sophia.add_database`.
104 |
105 | .. py:method:: __getitem__(name)
106 |
107 | Short-hand for :py:meth:`~Sophia.get_database`.
108 |
109 | .. py:method:: transaction()
110 |
111 | :return: a transaction handle.
112 | :rtype: :py:class:`Transaction`
113 |
114 | Create a transaction handle which can be used to execute a transaction
115 | on the databases in the environment. The returned transaction can be
116 | used as a context-manager.
117 |
118 | Example:
119 |
120 | .. code-block:: python
121 |
122 | env = Sophia('/tmp/sophia-test')
123 | db = env.add_database('test', Schema.key_value())
124 | env.open()
125 |
126 | with env.transaction() as txn:
127 | t_db = txn[db]
128 | t_db['k1'] = 'v1'
129 | t_db.update(k2='v2', k3='v3')
130 |
131 | # Transaction has been committed.
132 | print(db['k1'], db['k3']) # prints "v1", "v3"
133 |
134 | See :py:class:`Transaction` for more information.
135 |
136 |
137 | Database
138 | --------
139 |
140 | .. py:class:: Database()
141 |
142 | Database interface. This object is not created directly, but references can
143 | be obtained via :py:meth:`Sophia.add_database` or :py:meth:`Sophia.get_database`.
144 |
145 | For example:
146 |
147 | .. code-block:: python
148 |
149 | env = Sophia('/path/to/data')
150 |
151 | kv_schema = Schema(StringIndex('key'), MsgPackIndex('value'))
152 | kv_db = env.add_database('kv', kv_schema)
153 |
154 | # Another reference to "kv_db":
155 | kv_db = env.get_database('kv')
156 |
157 | # Same as above:
158 | kv_db = env['kv']
159 |
160 | .. py:method:: set(key, value)
161 |
162 | :param key: key corresponding to schema (e.g. scalar or tuple).
163 | :param value: value corresponding to schema (e.g. scalar or tuple).
164 | :return: No return value.
165 |
166 | Store the value at the given key. For single-index keys or values, a
167 | scalar value may be provided as the key or value. If a composite or
168 | multi-index key or value is used, then a ``tuple`` must be provided.
169 |
170 | Examples:
171 |
172 | .. code-block:: python
173 |
174 | simple = Schema(StringIndex('key'), StringIndex('value'))
175 | simple_db = env.add_database('simple', simple)
176 |
177 | composite = Schema(
178 | [U64Index('timestamp'), StringIndex('type')],
179 | [MsgPackIndex('data')])
180 | composite_db = env.add_database('composite', composite)
181 |
182 | env.open() # Open env to access databases.
183 |
184 | # Set k1=v1 in the simple key/value database.
185 | simple_db.set('k1', 'v1')
186 |
187 | # Set new value in composite db. Note the key is a tuple and, since
188 | # the value is serialized using msgpack, we can transparently store
189 | # data-types like dicts.
190 | composite_db.set((current_time, 'evt_type'), {'msg': 'foo'})
191 |
192 | .. py:method:: get(key[, default=None])
193 |
194 | :param key: key corresponding to schema (e.g. scalar or tuple).
195 | :param default: default value if key does not exist.
196 | :return: value of given key or default value.
197 |
198 | Get the value at the given key. If the key does not exist, the default
199 | value is returned.
200 |
201 | If a multi-part key is defined for the given database, the key must be
202 | a tuple.
203 |
204 | Example:
205 |
206 | .. code-block:: python
207 |
208 | simple_db.set('k1', 'v1')
209 | simple_db.get('k1') # Returns "v1".
210 |
211 | simple_db.get('not-here') # Returns None.
212 |
213 | .. py:method:: delete(key)
214 |
215 | :param key: key corresponding to schema (e.g. scalar or tuple).
216 | :return: No return value
217 |
218 | Delete the given key, if it exists. If a multi-part key is defined for
219 | the given database, the key must be a tuple.
220 |
221 | Example:
222 |
223 | .. code-block:: python
224 |
225 | simple_db.set('k1', 'v1')
226 | simple_db.delete('k1') # Deletes "k1" from database.
227 |
228 | simple_db.exists('k1') # False.
229 |
230 | .. py:method:: exists(key)
231 |
232 | :param key: key corresponding to schema (e.g. scalar or tuple).
233 | :return: Boolean indicating if key exists.
234 | :rtype: bool
235 |
236 | Return whether the given key exists. If a multi-part key is defined for
237 | the given database, the key must be a tuple.
238 |
239 | .. py:method:: multi_set([__data=None[, **kwargs]])
240 |
241 | :param dict __data: Dictionary of key/value pairs to set.
242 | :param kwargs: Specify key/value pairs as keyword-arguments.
243 | :return: No return value
244 |
245 | Set multiple key/value pairs efficiently.
246 |
247 | .. py:method:: multi_get(*keys)
248 |
249 | :param keys: key(s) to retrieve
250 | :return: a list of values associated with the given keys. If a key does
251 | not exist a ``None`` will be indicated for the value.
252 | :rtype: list
253 |
254 | Get multiple values efficiently. Returned as a list of values
255 | corresponding to the ``keys`` argument, with missing values as
256 | ``None``.
257 |
258 | Example:
259 |
260 | .. code-block:: python
261 |
262 | db.update(k1='v1', k2='v2', k3='v3')
263 | db.multi_get('k1', 'k3', 'k-nothere')
264 | # ['v1', 'v3', None]
265 |
266 | .. py:method:: multi_get_dict(keys)
267 |
268 | :param list keys: list of keys to get
269 | :return: a list of values associated with the given keys. If a key does
270 | not exist a ``None`` will be indicated for the value.
271 | :rtype: list
272 |
273 | Get multiple values efficiently. Returned as a dict of key/value pairs.
274 | Missing values are not represented in the returned dict.
275 |
276 | Example:
277 |
278 | .. code-block:: python
279 |
280 | db.update(k1='v1', k2='v2', k3='v3')
281 | db.multi_get_dict(['k1', 'k3', 'k-nothere'])
282 | # {'k1': 'v1', 'k3': 'v3'}
283 |
284 | .. py:method:: multi_delete(*keys)
285 |
286 | :param keys: key(s) to delete
287 | :return: No return value
288 |
289 | Efficiently delete multiple keys.
290 |
291 | .. py:method:: get_range(start=None, stop=None, reverse=False)
292 |
293 | :param start: start key (omit to start at first record).
294 | :param stop: stop key (omit to stop at the last record).
295 | :param bool reverse: return range in reverse.
296 | :return: a generator that yields the requested key/value pairs.
297 |
298 | Fetch a range of key/value pairs from the given start-key, up-to and
299 | including the stop-key (if given).
300 |
301 | .. py:method:: keys()
302 |
303 | Return a cursor for iterating over the keys in the database.
304 |
305 | .. py:method:: values()
306 |
307 | Return a cursor for iterating over the values in the database.
308 |
309 | .. py:method:: items()
310 |
311 | Return a cursor for iterating over the key/value pairs in the database.
312 |
313 | .. py:method:: __getitem__(key_or_slice)
314 |
315 | :param key_or_slice: key or range of keys to retrieve.
316 | :return: value of given key, or an iterator over the range of keys.
317 | :raises: KeyError if single key requested and does not exist.
318 |
319 | Retrieve a single value or a range of values, depending on whether the
320 | key represents a single row or a slice of rows.
321 |
322 | Additionally, if a slice is given, the start and stop values can be
323 | omitted to indicate you wish to start from the first or last key,
324 | respectively.
325 |
326 | .. py:method:: __setitem__(key, value)
327 |
328 | Equivalent to :py:meth:`~Database.set`.
329 |
330 | .. py:method:: __delitem__(key)
331 |
332 | Equivalent to :py:meth:`~Database.delete`.
333 |
334 | .. py:method:: __contains__(key)
335 |
336 | Equivalent to :py:meth:`~Database.exists`.
337 |
338 | .. py:method:: __iter__()
339 |
340 | Equivalent to :py:meth:`~Database.items`.
341 |
342 | .. py:method:: __len__()
343 |
344 | Equivalent to iterating over all keys and returning count. This is the
345 | most accurate way to get the total number of keys, but is not very
346 | efficient. An alternative is to use the :py:attr:`Database.index_count`
347 | property, which returns an approximation of the number of keys in the
348 | database.
349 |
350 | .. py:method:: cursor(order='>=', key=None, prefix=None, keys=True, values=True)
351 |
352 | :param str order: ordering semantics (default is ">=")
353 | :param key: key to seek to before iterating.
354 | :param prefix: string prefix to match.
355 | :param bool keys: return keys when iterating.
356 | :param bool values: return values when iterating.
357 |
358 | Create a cursor with the given semantics. Typically you will want both
359 | ``keys=True`` and ``values=True`` (the defaults), which will cause the
360 | cursor to yield a 2-tuple consisting of ``(key, value)`` during
361 | iteration.
362 |
363 |
364 | Transaction
365 | -----------
366 |
367 | .. py:class:: Transaction()
368 |
369 | Transaction handle, used for executing one or more operations atomically.
370 | This class is not created directly - use :py:meth:`Sophia.transaction`.
371 |
372 | The transaction can be used as a context-manager. To read or write during a
373 | transaction, you should obtain a transaction-specific handle to the
374 | database you are operating on.
375 |
376 | Example:
377 |
378 | .. code-block:: python
379 |
380 | env = Sophia('/tmp/my-env')
381 | db = env.add_database('kv', Schema.key_value())
382 | env.open()
383 |
384 | with env.transaction() as txn:
385 | tdb = txn[db] # Obtain reference to "db" in the transaction.
386 | tdb['k1'] = 'v1'
387 | tdb.update(k2='v2', k3='v3')
388 |
389 | # At the end of the wrapped block, the transaction is committed.
390 | # The writes have been recorded:
391 | print(db['k1'], db['k3'])
392 | # ('v1', 'v3')
393 |
394 | .. py:method:: begin()
395 |
396 | Begin a transaction.
397 |
398 | .. py:method:: commit()
399 |
400 | :raises: SophiaError
401 |
402 | Commit all changes. An exception can occur if:
403 |
404 | 1. The transaction was rolled back, either explicitly or implicitly due
405 | to conflicting changes having been committed by a different
406 | transaction. **Not recoverable**.
407 | 2. A concurrent transaction is open and must be committed before this
408 | transaction can commit. **Possibly recoverable**.
409 |
410 | .. py:method:: rollback()
411 |
412 | Roll-back any changes made in the transaction.
413 |
414 | .. py:method:: __getitem__(db)
415 |
416 | :param Database db: database to reference during transaction
417 | :return: special database-handle for use in transaction
418 | :rtype: :py:class:`DatabaseTransaction`
419 |
420 | Obtain a reference to the database for use within the transaction. This
421 | object supports the same APIs as :py:class:`Database`, but any reads or
422 | writes will be made within the context of the transaction.
423 |
424 |
425 | Schema Definition
426 | -----------------
427 |
428 | .. py:class:: Schema(key_parts, value_parts)
429 |
430 | :param list key_parts: a list of ``Index`` objects (or a single index
431 | object) to use as the key of the database.
432 | :param list value_parts: a list of ``Index`` objects (or a single index
433 | object) to use for the values stored in the database.
434 |
435 | The schema defines the structure of the keys and values for a given
436 | :py:class:`Database`. They can be comprised of a single index-type or
437 | multiple indexes for composite keys or values.
438 |
439 | Example:
440 |
441 | .. code-block:: python
442 |
443 | # Simple schema defining text keys and values.
444 | simple = Schema(StringIndex('key'), StringIndex('value'))
445 |
446 | # Schema with composite key for storing timestamps and event-types,
447 | # along with msgpack-serialized data as the value.
448 | event_schema = Schema(
449 | [U64Index('timestamp'), StringIndex('type')],
450 | [MsgPackIndex('value')])
451 |
452 | Schemas are used when adding databases using the
453 | :py:meth:`Sophia.add_database` method.
454 |
455 | .. py:method:: add_key(index)
456 |
457 | :param BaseIndex index: an index object to add to the key parts.
458 |
459 | Add an index to the key. Allows :py:class:`Schema` to be built-up
460 | programmatically.
461 |
462 | .. py:method:: add_value(index)
463 |
464 | :param BaseIndex index: an index object to add to the value parts.
465 |
466 | Add an index to the value. Allows :py:class:`Schema` to be built-up
467 | programmatically.
468 |
469 | .. py:classmethod:: key_value()
470 |
471 | Short-hand for creating a simple text schema consisting of a single
472 | :py:class:`StringIndex` for both the key and the value.
473 |
474 |
475 | .. py:class:: BaseIndex(name)
476 |
477 | :param str name: Name for the key- or value-part the index represents.
478 |
479 | Indexes are used to define the key and value portions of a
480 | :py:class:`Schema`. Traditional key/value databases typically only
481 | supported a single-value, single-datatype key and value (usually bytes).
482 | Sophia is different in that keys or values can be comprised of multiple
483 | parts with differing data-types.
484 |
485 | For example, to emulate a typical key/value store:
486 |
487 | .. code-block:: python
488 |
489 | schema = Schema([BytesIndex('key')], [BytesIndex('value')])
490 | db = env.add_database('old_school', schema)
491 |
492 | Suppose we are storing time-series event logs. We could use a 64-bit
493 | integer for the timestamp (in micro-seconds) as well as a key to denote
494 | the event-type. The value could be arbitrary msgpack-encoded data:
495 |
496 | .. code-block:: python
497 |
498 | key = [U64Index('timestamp'), StringIndex('type')]
499 | value = [MsgPackIndex('value')]
500 | events = env.add_database('events', Schema(key, value))
501 |
502 | .. py:class:: SerializedIndex(name, serialize, deserialize)
503 |
504 | :param str name: Name for the key- or value-part the index represents.
505 | :param serialize: a callable that accepts data and returns bytes.
506 | :param deserialize: a callable that accepts bytes and deserializes the data.
507 |
508 | The :py:class:`SerializedIndex` can be used to transparently store data as
509 | bytestrings. For example, you could use a library like ``msgpack`` or
510 | ``pickle`` to transparently store and retrieve Python objects in the
511 | database:
512 |
513 | .. code-block:: python
514 |
515 | key = StringIndex('key')
516 | value = SerializedIndex('value', pickle.dumps, pickle.loads)
517 | pickled_db = env.add_database('data', Schema([key], [value]))
518 |
519 | **Note**: ``sophy`` already provides indexes for :py:class:`JsonIndex`,
520 | :py:class:`MsgPackIndex` and :py:class:`PickleIndex`.
521 |
522 | .. py:class:: BytesIndex(name)
523 |
524 | Store arbitrary binary data in the database.
525 |
526 | .. py:class:: StringIndex(name)
527 |
528 | Store text data in the database as UTF8-encoded bytestrings. When reading
529 | from a :py:class:`StringIndex`, data is decoded and returned as unicode.
530 |
531 | .. py:class:: JsonIndex(name)
532 |
533 | Store data as UTF8-encoded JSON. Python objects will be transparently
534 | serialized and deserialized when writing and reading, respectively.
535 |
536 | .. py:class:: MsgPackIndex(name)
537 |
538 | Store data using the msgpack serialization format. Python objects will
539 | be transparently serialized and deserialized when writing and reading.
540 |
541 | **Note**: Requires the ``msgpack-python`` library.
542 |
543 | .. py:class:: PickleIndex(name)
544 |
545 | Store data using Python's pickle serialization format. Python objects will
546 | be transparently serialized and deserialized when writing and reading.
547 |
548 | .. py:class:: UUIDIndex(name)
549 |
550 | Store UUIDs. Python ``uuid.UUID()`` objects will be stored as raw bytes and
551 | decoded to ``uuid.UUID()`` instances upon retrieval.
552 |
553 | .. py:class:: U64Index(name)
554 | .. py:class:: U32Index(name)
555 | .. py:class:: U16Index(name)
556 | .. py:class:: U8Index(name)
557 |
558 | Store unsigned integers of the given sizes.
559 |
560 | .. py:class:: U64RevIndex(name)
561 | .. py:class:: U32RevIndex(name)
562 | .. py:class:: U16RevIndex(name)
563 | .. py:class:: U8RevIndex(name)
564 |
565 | Store unsigned integers of the given sizes in reverse order.
566 |
567 |
568 | Cursor
569 | ------
570 |
571 | .. py:class:: Cursor()
572 |
573 | Cursor handle for a :py:class:`Database`. This object is not created
574 | directly but through the :py:meth:`Database.cursor` method or one of the
575 | database methods that returns a row iterator (e.g.
576 | :py:meth:`Database.items`).
577 |
578 | Cursors are iterable and, depending how they were configured, can return
579 | keys, values or key/value pairs.
580 |
581 | .. _settings:
582 |
583 | Settings
584 | --------
585 |
586 | Sophia supports a wide range of settings and configuration options. These
587 | settings are also documented in the `Sophia documentation `_.
588 |
589 | Environment settings
590 | ^^^^^^^^^^^^^^^^^^^^
591 |
592 | The following settings are available as properties on :py:class:`Sophia`:
593 |
594 | =============================== ============= ================================================
595 | Setting Type Description
596 | =============================== ============= ================================================
597 | version string, ro Get current Sophia version
598 | version_storage string, ro Get current Sophia storage version
599 | build string, ro Get git commit hash of build
600 | status string, ro Get environment status (eg online)
601 | errors int, ro Get number of errors
602 | **error** string, ro Get last error description
603 | path string, ro Get current Sophia environment directory
604 | ------------------------------- ------------- ------------------------------------------------
605 | **Backups**
606 | ------------------------------- ------------- ------------------------------------------------
607 | **backup_path** string Set backup path
608 | **backup_run** method Start backup in background (non-blocking)
609 | backup_active int, ro Show if backup is running
610 | backup_last int, ro Show ID of last-completed backup
611 | backup_last_complete int, ro Show if last backup succeeded
612 | ------------------------------- ------------- ------------------------------------------------
613 | **Scheduler**
614 | ------------------------------- ------------- ------------------------------------------------
615 | scheduler_threads int Get or set number of worker threads
616 | scheduler_trace(thread_id) method Get a worker trace for given thread
617 | ------------------------------- ------------- ------------------------------------------------
618 | **Transaction Manager**
619 | ------------------------------- ------------- ------------------------------------------------
620 | transaction_online_rw int, ro Number of active read/write transactions
621 | transaction_online_ro int, ro Number of active read-only transactions
622 | transaction_commit int, ro Total number of completed transactions
623 | transaction_rollback int, ro Total number of transaction rollbacks
624 | transaction_conflict int, ro Total number of transaction conflicts
625 | transaction_lock int, ro Total number of transaction locks
626 | transaction_latency string, ro Average transaction latency from start to end
627 | transaction_log string, ro Average transaction log length
628 | transaction_vlsn int, ro Current VLSN
629 | transaction_gc int, ro SSI GC queue size
630 | ------------------------------- ------------- ------------------------------------------------
631 | **Metrics**
632 | ------------------------------- ------------- ------------------------------------------------
633 | metric_lsn int, ro Current log sequential number
634 | metric_tsn int, ro Current transaction sequential number
635 | metric_nsn int, ro Current node sequential number
636 | metric_dsn int, ro Current database sequential number
637 | metric_bsn int, ro Current backup sequential number
638 | metric_lfsn int, ro Current log file sequential number
639 | ------------------------------- ------------- ------------------------------------------------
640 | **Write-ahead Log**
641 | ------------------------------- ------------- ------------------------------------------------
642 | log_enable int Enable or disable transaction log
643 | log_path string Get or set folder for log directory
644 | log_sync int Sync transaction log on every commit
645 | log_rotate_wm int Create a new log after "rotate_wm" updates
646 | log_rotate_sync int Sync log file on every rotation
647 | log_rotate method Force Sophia to rotate log file
648 | log_gc method Force Sophia to garbage-collect log file pool
649 | log_files int, ro Number of log files in the pool
650 | =============================== ============= ================================================
651 |
652 | Database settings
653 | ^^^^^^^^^^^^^^^^^
654 |
655 | The following settings are available as properties on :py:class:`Database`. By
656 | default, Sophia uses ``pread(2)`` to read from disk. When ``mmap``-mode is on
657 | (by default), Sophia handles all requests by directly accessing memory-mapped
658 | node files.
659 |
660 | =============================== ============= ===================================================
661 | Setting Type Description
662 | =============================== ============= ===================================================
663 | database_name string, ro Get database name
664 | database_id int, ro Database sequential ID
665 | database_path string, ro Directory for storing data
666 | **mmap** int Enable or disable mmap-mode
667 | direct_io int Enable or disable ``O_DIRECT`` mode.
668 | **sync** int Sync node file on compaction completion
669 | expire int Enable or disable key expiration
670 | **compression** string Specify compression type: lz4, zstd, none (default)
671 | limit_key int, ro Scheme key size limit
672 | limit_field int Scheme field size limit
673 | ------------------------------- ------------- ---------------------------------------------------
674 | **Index**
675 | ------------------------------- ------------- ---------------------------------------------------
676 | index_memory_used int, ro Memory used by database for in-memory key indexes
677 | index_size int, ro Sum of nodes size in bytes (e.g. database size)
678 | index_size_uncompressed int, ro Full database size before compression
679 | **index_count** int, ro Total number of keys in db, includes unmerged dupes
680 | index_count_dup int, ro Total number of transactional duplicates
681 | index_read_disk int, ro Number of disk reads since start
682 | index_read_cache int, ro Number of cache reads since start
683 | index_node_count int, ro Number of active nodes
684 | index_page_count int, ro Total number of pages
685 | ------------------------------- ------------- ---------------------------------------------------
686 | **Compaction**
687 | ------------------------------- ------------- ---------------------------------------------------
688 | **compaction_cache** int Total write cache size used for compaction
689 | compaction_checkpoint int
690 | compaction_node_size int Set a node file size in bytes.
691 | compaction_page_size int Set size of page
692 | compaction_page_checksum int Validate checksum during compaction
693 | compaction_expire_period int Run expire check process every ``N`` seconds
694 | compaction_gc_wm int GC starts when watermark value reaches ``N`` dupes
695 | compaction_gc_period int Check for a gc every ``N`` seconds
696 | ------------------------------- ------------- ---------------------------------------------------
697 | **Performance**
698 | ------------------------------- ------------- ---------------------------------------------------
699 | stat_documents_used int, ro Memory used by allocated document
700 | stat_documents int, ro Number of currently allocated documents
701 | stat_field string, ro Average field size
702 | stat_set int, ro Total number of Set operations
703 | stat_set_latency string, ro Average Set latency
704 | stat_delete int, ro Total number of Delete operations
705 | stat_delete_latency string, ro Average Delete latency
706 | stat_get int, ro Total number of Get operations
707 | stat_get_latency string, ro Average Get latency
708 | stat_get_read_disk string, ro Average disk reads by Get operation
709 | stat_get_read_cache string, ro Average cache reads by Get operation
710 | stat_pread int, ro Total number of pread operations
711 | stat_pread_latency string, ro Average pread latency
712 | stat_cursor int, ro Total number of cursor operations
713 | stat_cursor_latency string, ro Average cursor latency
714 | stat_cursor_read_disk string, ro Average disk reads by Cursor operation
715 | stat_cursor_read_cache string, ro Average cache reads by Cursor operation
716 | stat_cursor_ops string, io Average number of keys read by Cursor operation
717 | ------------------------------- ------------- ---------------------------------------------------
718 | **Scheduler**
719 | ------------------------------- ------------- ---------------------------------------------------
720 | scheduler_gc int, ro Show if GC operation is in progress
721 | scheduler_expire int, ro Show if expire operation is in progress
722 | scheduler_backup int, ro Show if backup operation is in progress
723 | scheduler_checkpoint int, ro
724 | =============================== ============= ===================================================
725 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | # import os
16 | # import sys
17 | # sys.path.insert(0, os.path.abspath('.'))
18 |
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = 'sophy'
23 | copyright = '2018, Charles Leifer'
24 | author = 'Charles Leifer'
25 |
26 | # The short X.Y version
27 | version = ''
28 | # The full version, including alpha/beta/rc tags
29 | release = '0.4.2'
30 |
31 |
32 | # -- General configuration ---------------------------------------------------
33 |
34 | # If your documentation needs a minimal Sphinx version, state it here.
35 | #
36 | # needs_sphinx = '1.0'
37 |
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 | ]
43 |
44 | # Add any paths that contain templates here, relative to this directory.
45 | templates_path = ['_templates']
46 |
47 | # The suffix(es) of source filenames.
48 | # You can specify multiple suffix as a list of string:
49 | #
50 | # source_suffix = ['.rst', '.md']
51 | source_suffix = '.rst'
52 |
53 | # The master toctree document.
54 | master_doc = 'index'
55 |
56 | # The language for content autogenerated by Sphinx. Refer to documentation
57 | # for a list of supported languages.
58 | #
59 | # This is also used if you do content translation via gettext catalogs.
60 | # Usually you set "language" from the command line for these cases.
61 | language = None
62 |
63 | # List of patterns, relative to source directory, that match files and
64 | # directories to ignore when looking for source files.
65 | # This pattern also affects html_static_path and html_extra_path .
66 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
67 |
68 | # The name of the Pygments (syntax highlighting) style to use.
69 | pygments_style = 'sphinx'
70 |
71 |
72 | # -- Options for HTML output -------------------------------------------------
73 |
74 | # The theme to use for HTML and HTML Help pages. See the documentation for
75 | # a list of builtin themes.
76 | #
77 | html_theme = 'alabaster'
78 |
79 | # Theme options are theme-specific and customize the look and feel of a theme
80 | # further. For a list of options available for each theme, see the
81 | # documentation.
82 | #
83 | # html_theme_options = {}
84 |
85 | # Add any paths that contain custom static files (such as style sheets) here,
86 | # relative to this directory. They are copied after the builtin static files,
87 | # so a file named "default.css" will overwrite the builtin "default.css".
88 | html_static_path = ['_static']
89 |
90 | # Custom sidebar templates, must be a dictionary that maps document names
91 | # to template names.
92 | #
93 | # The default sidebars (for documents that don't match any pattern) are
94 | # defined by theme itself. Builtin themes are using these templates by
95 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
96 | # 'searchbox.html']``.
97 | #
98 | # html_sidebars = {}
99 |
100 |
101 | # -- Options for HTMLHelp output ---------------------------------------------
102 |
103 | # Output file base name for HTML help builder.
104 | htmlhelp_basename = 'sophydoc'
105 |
106 |
107 | # -- Options for LaTeX output ------------------------------------------------
108 |
109 | latex_elements = {
110 | # The paper size ('letterpaper' or 'a4paper').
111 | #
112 | # 'papersize': 'letterpaper',
113 |
114 | # The font size ('10pt', '11pt' or '12pt').
115 | #
116 | # 'pointsize': '10pt',
117 |
118 | # Additional stuff for the LaTeX preamble.
119 | #
120 | # 'preamble': '',
121 |
122 | # Latex figure (float) alignment
123 | #
124 | # 'figure_align': 'htbp',
125 | }
126 |
127 | # Grouping the document tree into LaTeX files. List of tuples
128 | # (source start file, target name, title,
129 | # author, documentclass [howto, manual, or own class]).
130 | latex_documents = [
131 | (master_doc, 'sophy.tex', 'sophy Documentation',
132 | 'Charles Leifer', 'manual'),
133 | ]
134 |
135 |
136 | # -- Options for manual page output ------------------------------------------
137 |
138 | # One entry per manual page. List of tuples
139 | # (source start file, name, description, authors, manual section).
140 | man_pages = [
141 | (master_doc, 'sophy', 'sophy Documentation',
142 | [author], 1)
143 | ]
144 |
145 |
146 | # -- Options for Texinfo output ----------------------------------------------
147 |
148 | # Grouping the document tree into Texinfo files. List of tuples
149 | # (source start file, target name, title, author,
150 | # dir menu entry, description, category)
151 | texinfo_documents = [
152 | (master_doc, 'sophy', 'sophy Documentation',
153 | author, 'sophy', 'One line description of project.',
154 | 'Miscellaneous'),
155 | ]
156 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. sophy documentation master file, created by
2 | sphinx-quickstart on Sun Apr 22 20:55:15 2018.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | .. image:: http://media.charlesleifer.com/blog/photos/sophia-logo.png
7 | :target: http://sophia.systems
8 | :alt: sophia database
9 |
10 | sophy
11 | =====
12 |
13 | Python binding for `sophia `_ embedded database, v2.2.
14 |
15 | * Written in Cython for speed and low-overhead
16 | * Clean, memorable APIs
17 | * Comprehensive support for Sophia's features
18 | * Supports Python 2 and 3.
19 | * No 3rd-party dependencies besides Cython (for building).
20 |
21 | About Sophia:
22 |
23 | * Ordered key/value store
24 | * Keys and values can be composed of multiple fieldsdata-types
25 | * ACID transactions
26 | * MVCC, optimistic, non-blocking concurrency with multiple readers and writers.
27 | * Multiple databases per environment
28 | * Multiple- and single-statement transactions across databases
29 | * Prefix searches
30 | * Automatic garbage collection and key expiration
31 | * Hot backup
32 | * Compression
33 | * Multi-threaded compaction
34 | * ``mmap`` support, direct I/O support
35 | * APIs for variety of statistics on storage engine internals
36 | * BSD licensed
37 |
38 | Some ideas of where Sophia might be a good fit:
39 |
40 | * Running on application servers, low-latency / high-throughput
41 | * Time-series
42 | * Analytics / Events / Logging
43 | * Full-text search
44 | * Secondary-index for external data-store
45 |
46 | Limitations:
47 |
48 | * Not tested on Windoze.
49 |
50 | If you encounter any bugs in the library, please `open an issue `_,
51 | including a description of the bug and any related traceback.
52 |
53 | .. image:: http://media.charlesleifer.com/blog/photos/sophy-logo.png
54 | :alt: Sophy logo
55 |
56 | .. toctree::
57 | :maxdepth: 2
58 | :caption: Contents:
59 | :glob:
60 |
61 | installation
62 | quickstart
63 | api
64 |
65 |
66 |
67 | Indices and tables
68 | ==================
69 |
70 | * :ref:`genindex`
71 | * :ref:`modindex`
72 | * :ref:`search`
73 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | .. _installation:
2 |
3 | Installing
4 | ==========
5 |
6 | Up-to-date source code for `sophia `_ is bundled with
7 | the ``sophy`` source code, so the only thing you need to build is `Cython `_.
8 | If Cython is not installed, then the pre-generated C source files will be used.
9 |
10 | `sophy `_ can be installed directly from the
11 | source or from `pypi `_ using ``pip``.
12 |
13 | Installing with pip
14 | -------------------
15 |
16 | To install from PyPI:
17 |
18 | .. code-block:: bash
19 |
20 | $ pip install cython # optional
21 | $ pip install sophy
22 |
23 | To install the very latest version, you can install with git:
24 |
25 | .. code-block:: bash
26 |
27 | $ pip install -e git+https://github.com/coleifer/sophy#egg=sophy
28 |
29 | Obtaining the source code
30 | -------------------------
31 |
32 | The source code is hosted on `github `_ and
33 | can be obtained and installed:
34 |
35 | .. code-block:: bash
36 |
37 | $ git clone https://github.com/colefer/sophy
38 | $ cd sophy
39 | $ python setup.py build
40 | $ python setup.py install
41 |
42 | Running the tests
43 | -----------------
44 |
45 | Unit-tests and integration tests are distributed with the source and can be run
46 | from the root of the checkout:
47 |
48 | .. code-block:: bash
49 |
50 | $ python tests.py
51 |
--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
1 | .. _quickstart:
2 |
3 | Quick-start
4 | ===========
5 |
6 | Sophy is very simple to use. It acts like a Python ``dict`` object, but in
7 | addition to normal dictionary operations, you can read slices of data that are
8 | returned efficiently using cursors. Similarly, bulk writes using ``update()``
9 | use an efficient, atomic batch operation.
10 |
11 | Despite the simple APIs, Sophia has quite a few advanced features. There is too
12 | much to cover everything in this document, so be sure to check out the official
13 | `Sophia storage engine documentation `_.
14 |
15 | The next section will show how to perform common actions with ``sophy``.
16 |
17 | Using Sophy
18 | -----------
19 |
20 | Let's begin by importing ``sophy`` and creating an *environment*. The
21 | environment can host multiple *databases*, each of which may have a different
22 | *schema*. In this example our database will store UTF-8 strings as the key and
23 | value (though other data-types are supported). Finally we'll open the
24 | environment so we can start storing and retrieving data.
25 |
26 | .. code-block:: python
27 |
28 | from sophy import Sophia, Schema, StringIndex
29 |
30 | # Instantiate our environment by passing a directory path which will store
31 | # the various data and metadata for our databases.
32 | env = Sophia('/tmp/sophia-example')
33 |
34 | # We'll define a very simple schema consisting of a single utf-8 string for
35 | # the key, and a single utf-8 string for the associated value. Note that
36 | # the key or value accepts multiple indexes, allowing for composite
37 | # data-types.
38 | schema = Schema([StringIndex('key')], [StringIndex('value')])
39 |
40 | # Create a key/value database using the schema above.
41 | db = env.add_database('example_db', schema)
42 |
43 | if not env.open():
44 | raise Exception('Unable to open Sophia environment.')
45 |
46 | In the above example we used :py:class:`StringIndex` which stores UTF8-encoded
47 | string data. The following index types are available:
48 |
49 | * :py:class:`StringIndex` - UTF8-encoded string data (text, in other words).
50 | * :py:class:`BytesIndex` - bytestrings (binary data).
51 | * :py:class:`JsonIndex` - store value as UTF8-encoded JSON.
52 | * :py:class:`MsgPackIndex` - store arbitrary data using msgpack encoding.
53 | * :py:class:`PickleIndex` - store arbitrary data using python pickle module.
54 | * :py:class:`UUIDIndex` - store UUIDs.
55 | * :py:class:`SerializedIndex` - index that accepts serialize/deserialize
56 | functions and can be used for msgpack or pickled data, for example.
57 | * :py:class:`U64Index` - store 64-bit unsigned integers.
58 | * :py:class:`U32Index` - store 32-bit unsigned integers.
59 | * :py:class:`U16Index` - store 16-bit unsigned integers.
60 | * :py:class:`U8Index` - store 8-bit unsigned integers (or single bytes).
61 | * There are also :py:class:`U64RevIndex`, :py:class:`U32RevIndex`,
62 | :py:class:`U16RevIndex` and :py:class:`U8RevIndex` for storing integers in
63 | reverse order.
64 |
65 | CRUD operations
66 | ---------------
67 |
68 | Sophy databases use the familiar ``dict`` APIs for CRUD operations:
69 |
70 | .. code-block:: pycon
71 |
72 | >>> db['name'] = 'Huey'
73 | >>> db['animal_type'] = 'cat'
74 | >>> print(db['name'], 'is a', db['animal_type'])
75 | Huey is a cat
76 |
77 | >>> 'name' in db
78 | True
79 | >>> 'color' in db
80 | False
81 |
82 | >>> del db['name']
83 | >>> del db['animal_type']
84 | >>> print(db['name']) # raises a KeyError.
85 | KeyError: ('name',)
86 |
87 | To insert multiple items efficiently, use the :py:meth:`Database.update`
88 | method. Multiple items can be retrieved or deleted efficiently using
89 | :py:meth:`Database.multi_get`, :py:meth:`Database.multi_get_dict`, and
90 | :py:meth:`Database.multi_delete`:
91 |
92 | .. code-block:: pycon
93 |
94 | >>> db.update(k1='v1', k2='v2', k3='v3')
95 | >>> for value in db.multi_get('k1', 'k3', 'kx'):
96 | ... print(value)
97 |
98 | v1
99 | v3
100 | None
101 |
102 | >>> db.multi_get_dict(['k1', 'k3', 'kx'])
103 | {'k1': 'v1', 'k3': 'v3'}
104 |
105 | >>> db.multi_delete('k1', 'k3', 'kx')
106 | >>> 'k1' in db
107 | False
108 |
109 | Other dictionary methods
110 | ------------------------
111 |
112 | Sophy databases also provide efficient implementations of
113 | :py:meth:`~Database.keys`, :py:meth:`~Database.values` and
114 | :py:meth:`~Database.items` for iterating over the data-set. Unlike
115 | dictionaries, however, iterating directly over a Sophy :py:class:`Database`
116 | will return the equivalent of the :py:meth:`~Database.items` method (as opposed
117 | to just the keys).
118 |
119 | .. note::
120 | Sophia is an ordered key/value store, so iteration will return items in the
121 | order defined by their index. So for strings and bytes, this is
122 | lexicographic ordering. For integers it can be ascending or descending.
123 |
124 | .. code-block:: pycon
125 |
126 | >>> db.update(k1='v1', k2='v2', k3='v3')
127 | >>> list(db)
128 | [('k1', 'v1'),
129 | ('k2', 'v2'),
130 | ('k3', 'v3')]
131 |
132 | >>> db.items() # Returns a Cursor, which can be iterated.
133 |
134 | >>> [item for item in db.items()]
135 | [('k1', 'v1'),
136 | ('k2', 'v2'),
137 | ('k3', 'v3')]
138 |
139 | >>> list(db.keys())
140 | ['k1', 'k2', 'k3']
141 |
142 | >>> list(db.values())
143 | ['v1', 'v2', 'v3']
144 |
145 | There are two ways to get the count of items in a database. You can use the
146 | ``len()`` function, which is not very efficient since it must allocate a cursor
147 | and iterate through the full database. An alternative is the
148 | :py:attr:`Database.index_count` property, which may not be exact as it includes
149 | transaction duplicates and not-yet-merged duplicates:
150 |
151 | .. code-block:: pycon
152 |
153 | >>> len(db)
154 | 3
155 | >>> db.index_count
156 | 3
157 |
158 | Range queries
159 | -------------
160 |
161 | Because Sophia is an ordered data-store, performing ordered range scans is
162 | efficient. To retrieve a range of key-value pairs with Sophy, use the ordinary
163 | dictionary lookup with a ``slice`` as the index:
164 |
165 | .. code-block:: python
166 |
167 | >>> db.update(k1='v1', k2='v2', k3='v3', k4='v4')
168 | >>> db['k1':'k3']
169 |
170 |
171 | >>> list(db['k1':'k3']) # NB: other examples omit list() for clarity.
172 | [('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3')]
173 |
174 | >>> db['k1.x':'k3.x'] # Inexact matches are OK, too.
175 | [('k2', 'v2'), ('k3', 'v3')]
176 |
177 | >>> db[:'k2'] # Omitting start or end retrieves from first/last key.
178 | [('k1', 'v1'), ('k2', 'v2')]
179 |
180 | >>> db['k3':]
181 | [('k3', 'v3'), ('k4', 'v4')]
182 |
183 | >>> db['k3':'k1'] # To retrieve a range in reverse, use the higher key first.
184 | [('k3', 'v3'), ('k2', 'v2'), ('k1', 'v1')]
185 |
186 | To retrieve a range in reverse order where the start or end is unspecified, you
187 | can pass in ``True`` as the ``step`` value of the slice to also indicate
188 | reverse:
189 |
190 | .. code-block:: pycon
191 |
192 | >>> db[:'k2':True] # Start-to-"k2" in reverse.
193 | [('k2', 'v2'), ('k1', 'v1')]
194 |
195 | >>> db['k3'::True]
196 | [('k4', 'v4'), ('k3', 'v3')]
197 |
198 | >>> db[::True]
199 | [('k4', 'v4'), ('k3', 'v3'), ('k2', 'v2'), ('k1', 'v1')]
200 |
201 | Cursors
202 | -------
203 |
204 | For finer-grained control over iteration, or to do prefix-matching, Sophy
205 | provides a :py:class:`Cursor` interface.
206 |
207 | The :py:meth:`~Database.cursor` method accepts five parameters:
208 |
209 | * ``order`` (default=``>=``) - semantics for matching the start key and
210 | ordering results.
211 | * ``key`` - the start key
212 | * ``prefix`` - search for prefix matches
213 | * ``keys`` - (default=``True``) -- return keys while iterating
214 | * ``values`` - (default=``True``) -- return values while iterating
215 |
216 | Suppose we were storing events in a database and were using an
217 | ISO-8601-formatted date-time as the key. Since ISO-8601 sorts
218 | lexicographically, we could retrieve events in correct order simply by
219 | iterating. To retrieve a particular slice of time, a prefix could be specified:
220 |
221 | .. code-block:: python
222 |
223 | # Iterate over events for July, 2017:
224 | cursor = db.cursor(key='2017-07-01T00:00:00', prefix='2017-07-')
225 | for timestamp, event_data in cursor:
226 | process_event(timestamp, event_data)
227 |
228 | Transactions
229 | ------------
230 |
231 | Sophia supports ACID transactions. Even better, a single transaction can cover
232 | operations to multiple databases in a given environment.
233 |
234 | Example of using :py:meth:`Sophia.transaction`:
235 |
236 | .. code-block:: python
237 |
238 | account_balance = env.add_database('balance', ...)
239 | transaction_log = env.add_database('transaction_log', ...)
240 |
241 | # ...
242 |
243 | def transfer_funds(from_acct, to_acct, amount):
244 | with env.transaction() as txn:
245 | # To write to a database within a transaction, obtain a reference to
246 | # a wrapper object for the db:
247 | txn_acct_bal = txn[account_balance]
248 | txn_log = txn[transaction_log]
249 |
250 | # Transfer the asset by updating the respective balances. Note that we
251 | # are operating on the wrapper database, not the db instance.
252 | from_bal = txn_acct_bal[from_acct]
253 | txn_acct_bal[to_account] = from_bal + amount
254 | txn_acct_bal[from_account] = from_bal - amount
255 |
256 | # Log the transaction in the transaction_log database. Again, we use
257 | # the wrapper for the database:
258 | txn_log[from_account, to_account, get_timestamp()] = amount
259 |
260 | Multiple transactions are allowed to be open at the same time, but if there are
261 | conflicting changes, an exception will be thrown when attempting to commit the
262 | offending transaction:
263 |
264 | .. code-block:: pycon
265 |
266 | # Create a basic k/v store. Schema.key_value() is a convenience method
267 | # for string key / string value.
268 | >>> kv = env.add_database('main', Schema.key_value())
269 |
270 | # Open the environment in order to access the new db.
271 | >>> env.open()
272 |
273 | # Instead of using the context manager, we'll call begin() explicitly so we
274 | # can show the interaction of 2 open transactions.
275 | >>> txn = env.transaction().begin()
276 |
277 | >>> t_kv = txn[kv] # Obtain reference to kv database in transaction.
278 | >>> t_kv['k1'] = 'v1' # Set k1=v1.
279 |
280 | >>> txn2 = env.transaction().begin() # Start a 2nd transaction.
281 | >>> t2_kv = txn2[kv] # Obtain a reference to the "kv" db in 2nd transaction.
282 | >>> t2_kv['k1'] = 'v1-x' # Set k1=v1-x
283 |
284 | >>> txn2.commit() # ERROR !!
285 | SophiaError
286 | ...
287 | SophiaError('transaction is not finished, waiting for concurrent transaction to finish.')
288 |
289 | >>> txn.commit() # OK
290 |
291 | >>> txn2.commit() # Retry committing 2nd transaction. ERROR !!
292 | SophiaError
293 | ...
294 | SophiaError('transasction rolled back by another concurrent transaction.')
295 |
296 | Sophia detected a conflict and rolled-back the 2nd transaction.
297 |
298 | Index types, multi-field keys and values
299 | ----------------------------------------
300 |
301 | Sophia supports multi-field keys and values. Additionally, the individual
302 | fields can have different data-types. Sophy provides the following field
303 | types:
304 |
305 | * :py:class:`StringIndex` - UTF8-encoded string data (text, in other words).
306 | * :py:class:`BytesIndex` - bytestrings (binary data).
307 | * :py:class:`JsonIndex` - store value as UTF8-encoded JSON.
308 | * :py:class:`MsgPackIndex` - store arbitrary data using msgpack encoding.
309 | * :py:class:`PickleIndex` - store arbitrary data using python pickle module.
310 | * :py:class:`UUIDIndex` - store UUIDs.
311 | * :py:class:`SerializedIndex` - index that accepts serialize/deserialize
312 | functions and can be used for custom serialization formats.
313 | * :py:class:`U64Index` - store 64-bit unsigned integers.
314 | * :py:class:`U32Index` - store 32-bit unsigned integers.
315 | * :py:class:`U16Index` - store 16-bit unsigned integers.
316 | * :py:class:`U8Index` - store 8-bit unsigned integers (or single bytes).
317 | * There are also :py:class:`U64RevIndex`, :py:class:`U32RevIndex`,
318 | :py:class:`U16RevIndex` and :py:class:`U8RevIndex` for storing integers in
319 | reverse order.
320 |
321 | To store arbitrary data encoded using msgpack, for example:
322 |
323 | .. code-block:: python
324 |
325 | schema = Schema(StringIndex('key'), MsgPackIndex('value'))
326 | db = sophia_env.add_database('main', schema)
327 |
328 | If you have a custom serialization library you would like to use, you can use
329 | :py:class:`SerializedIndex`, passing the serialize/deserialize callables:
330 |
331 | .. code-block:: python
332 |
333 | # Equivalent to previous msgpack example.
334 | import msgpack
335 |
336 | schema = Schema(StringIndex('key'),
337 | SerializedIndex('value', msgpack.packb, msgpack.unpackb))
338 | db = sophia_env.add_database('main', schema)
339 |
340 | To declare a database with a multi-field key or value, you will pass the
341 | individual fields as arguments when constructing the :py:class:`Schema` object.
342 | To initialize a schema where the key is composed of two strings and a 64-bit
343 | unsigned integer, and the value is composed of a string, you would write:
344 |
345 | .. code-block:: python
346 |
347 | # Declare a schema consisting of a multi-part key and a string value.
348 | key_parts = [StringIndex('last_name'),
349 | StringIndex('first_name'),
350 | U64Index('area_code')]
351 | value_parts = [StringIndex('address_data')]
352 | schema = Schema(key_parts, value_parts)
353 |
354 | # Create a database using the above schema.
355 | address_book = env.add_database('address_book', schema)
356 | env.open()
357 |
358 | To store data, we use the same dictionary methods as usual, just passing tuples
359 | instead of individual values:
360 |
361 | .. code-block:: python
362 |
363 | address_book['kitty', 'huey', 66604] = '123 Meow St'
364 | address_book['puppy', 'mickey', 66604] = '1337 Woof-woof Court'
365 |
366 | To retrieve our data:
367 |
368 | .. code-block:: pycon
369 |
370 | >>> address_book['kitty', 'huey', 66604]
371 | '123 Meow St.'
372 |
373 | To delete a row:
374 |
375 | .. code-block:: pycon
376 |
377 | >>> del address_book['puppy', 'mickey', 66604]
378 |
379 | Indexing and slicing works as you would expect, with tuples being returned
380 | instead of scalar values where appropriate.
381 |
382 | .. note::
383 | When working with a multi-part value, a tuple containing the value
384 | components will be returned. When working with a scalar value, instead of
385 | returning a 1-item tuple, the value itself is returned.
386 |
387 | Configuring and Administering Sophia
388 | ------------------------------------
389 |
390 | Sophia can be configured using special properties on the :py:class:`Sophia` and
391 | :py:class:`Database` objects. Refer to the :ref:`settings configuration document `
392 | for the details on the available options, including whether they are read-only,
393 | and the expected data-type.
394 |
395 | For example, to query Sophia's status, you can use the :py:attr:`Sophia.status`
396 | property, which is a readonly setting returning a string:
397 |
398 | .. code-block:: pycon
399 |
400 | >>> print(env.status)
401 | online
402 |
403 | Other properties can be changed by assigning a new value to the property. For
404 | example, to read and then increase the number of threads used by the scheduler:
405 |
406 | .. code-block:: pycon
407 |
408 | >>> env.scheduler_threads
409 | 6
410 | >>> env.scheduler_threads = 8
411 |
412 | Database-specific properties are available as well. For example to get the
413 | number of GET and SET operations performed on a database, you would write:
414 |
415 | .. code-block:: pycon
416 |
417 | >>> print(db.stat_get, 'get operations')
418 | 24 get operations
419 | >>> print(db.stat_set, 'set operations')
420 | 33 set operations
421 |
422 | Refer to the :ref:`settings configuration table ` for a complete
423 | list of available settings.
424 |
425 | Backups
426 | -------
427 |
428 | Sophia can create a backup the database while it is running. To configure
429 | backups, you will need to set the path for backups before opening the
430 | environment:
431 |
432 | .. code-block:: python
433 |
434 | env = Sophia('/path/to/data')
435 | env.backup_path = '/path/for/backup-data/'
436 |
437 | env.open()
438 |
439 | At any time while the environment is open, you can call the ``backup_run()``
440 | method, and a backup will be started in a background thread:
441 |
442 | .. code-block:: python
443 |
444 | env.backup_run()
445 |
446 | Backups will be placed in numbered folders inside the ``backup_path`` specified
447 | during environment configuration. You can query the backup status and get the
448 | ID of the last-completed backup:
449 |
450 | .. code-block:: python
451 |
452 | env.backup_active # Returns 1 if running, 0 if completed/idle
453 | env.backup_last # Get ID of last-completed backup
454 | env.backup_last_complete # Returns 1 if last backup succeeded
455 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "cython"]
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from setuptools.extension import Extension
3 | try:
4 | from Cython.Build import cythonize
5 | except ImportError:
6 | import warnings
7 | cython_installed = False
8 | warnings.warn('Cython not installed, using pre-generated C source file.')
9 | else:
10 | cython_installed = True
11 |
12 |
13 | if cython_installed:
14 | python_source = 'sophy.pyx'
15 | else:
16 | python_source = 'sophy.c'
17 | cythonize = lambda obj: obj
18 |
19 | library_source = 'src/sophia.c'
20 |
21 | sophy = Extension(
22 | 'sophy',
23 | #extra_compile_args=['-g', '-O0'],
24 | #extra_link_args=['-g'],
25 | sources=[python_source, library_source])
26 |
27 | setup(
28 | name='sophy',
29 | version='0.6.4',
30 | description='Python bindings for the sophia database.',
31 | author='Charles Leifer',
32 | author_email='',
33 | ext_modules=cythonize([sophy]),
34 | )
35 |
--------------------------------------------------------------------------------
/sophy.pyx:
--------------------------------------------------------------------------------
1 | # cython: language_level=3
2 | cimport cython
3 | from cpython.bytes cimport PyBytes_AsStringAndSize
4 | from cpython.bytes cimport PyBytes_Check
5 | from cpython.unicode cimport PyUnicode_AsUTF8String
6 | from cpython.unicode cimport PyUnicode_Check
7 | from cpython.version cimport PY_MAJOR_VERSION
8 | from libc.stdint cimport int64_t
9 | from libc.stdint cimport uint8_t
10 | from libc.stdint cimport uint16_t
11 | from libc.stdint cimport uint32_t
12 | from libc.stdint cimport uint64_t
13 |
14 | import json
15 | import uuid
16 | from pickle import dumps as pdumps
17 | from pickle import loads as ploads
18 | try:
19 | from msgpack import packb as msgpack_packb
20 | from msgpack import unpackb as msgpack_unpackb
21 | mpackb = lambda o: msgpack_packb(o, use_bin_type=True)
22 | munpackb = lambda b: msgpack_unpackb(b, raw=False)
23 | except ImportError:
24 | mpackb = munpackb = None
25 |
26 |
27 | cdef extern from "src/sophia.h" nogil:
28 | cdef void *sp_env()
29 | cdef void *sp_document(void *)
30 | cdef int sp_setstring(void*, const char*, const void*, int)
31 | cdef int sp_setint(void*, const char*, int64_t)
32 | cdef void *sp_getobject(void*, const char*)
33 | cdef void *sp_getstring(void*, const char*, int*)
34 | cdef int64_t sp_getint(void*, const char*)
35 | cdef int sp_open(void *)
36 | cdef int sp_destroy(void *)
37 | cdef int sp_set(void*, void*)
38 | cdef int sp_upsert(void*, void*)
39 | cdef int sp_delete(void*, void*)
40 | cdef void *sp_get(void*, void*)
41 | cdef void *sp_cursor(void*)
42 | cdef void *sp_begin(void *)
43 | cdef int sp_prepare(void *)
44 | cdef int sp_commit(void *)
45 |
46 |
47 | class SophiaError(Exception): pass
48 |
49 |
50 | cdef bint IS_PY3K = PY_MAJOR_VERSION == 3
51 |
52 |
53 | cdef inline unicode decode(key):
54 | cdef unicode ukey
55 | if PyBytes_Check(key):
56 | ukey = key.decode('utf-8')
57 | elif PyUnicode_Check(key):
58 | ukey = key
59 | elif key is None:
60 | return None
61 | else:
62 | ukey = unicode(key)
63 | return ukey
64 |
65 |
66 | cdef inline bytes encode(key):
67 | cdef bytes bkey
68 | if PyUnicode_Check(key):
69 | bkey = PyUnicode_AsUTF8String(key)
70 | elif PyBytes_Check(key):
71 | bkey = key
72 | elif key is None:
73 | return None
74 | else:
75 | bkey = PyUnicode_AsUTF8String(unicode(key))
76 | return bkey
77 |
78 |
79 | cdef inline _getstring(void *obj, const char *key):
80 | cdef:
81 | char *buf
82 | int nlen
83 |
84 | buf = sp_getstring(obj, key, &nlen)
85 | if buf:
86 | return buf[:nlen - 1]
87 |
88 |
89 | cdef inline _getustring(void *obj, const char *key):
90 | cdef:
91 | char *buf
92 | int nlen
93 |
94 | buf = sp_getstring(obj, key, &nlen)
95 | if buf:
96 | return decode(buf[:nlen - 1])
97 |
98 |
99 | cdef inline _check(void *env, int rc):
100 | if rc == -1:
101 | error = _getustring(env, 'sophia.error')
102 | if error:
103 | raise SophiaError(error)
104 | else:
105 | raise SophiaError('unknown error occurred.')
106 |
107 |
108 | cdef inline check_open(Sophia env):
109 | if not env.is_open:
110 | raise SophiaError('Sophia environment is closed.')
111 |
112 |
113 | cdef class Configuration(object):
114 | cdef:
115 | dict settings
116 | Sophia env
117 |
118 | def __cinit__(self, Sophia env):
119 | self.env = env
120 |
121 | # A dictionary mapping the setting key (bytes) to the value, which is
122 | # either a bytestring or integer.
123 | self.settings = {}
124 |
125 | def set_option(self, key, value):
126 | cdef bytes bkey = encode(key)
127 |
128 | if isinstance(value, bool):
129 | value = value and 1 or 0
130 | elif isinstance(value, basestring):
131 | value = encode(value)
132 | elif not isinstance(value, int):
133 | raise ValueError('Setting value must be bool, int or string.')
134 |
135 | self.settings[bkey] = value
136 | if self.env.is_open:
137 | self._set(bkey, value)
138 |
139 | def get_option(self, key, is_string=True):
140 | check_open(self.env)
141 | cdef bytes bkey = encode(key)
142 | if is_string:
143 | return _getustring(self.env.env, bkey)
144 | else:
145 | return sp_getint(self.env.env, bkey)
146 |
147 | cdef clear_option(self, key):
148 | try:
149 | del self.settings[encode(key)]
150 | except KeyError:
151 | pass
152 |
153 | cdef int _set(self, key, value) except -1:
154 | cdef int rc
155 |
156 | if isinstance(value, int):
157 | rc = sp_setint(self.env.env, key, value)
158 | elif isinstance(value, bytes):
159 | rc = sp_setstring(self.env.env, key,
160 | value, 0)
161 | else:
162 | raise ValueError('Invalid setting detected: %s=%s' % (key, value))
163 |
164 | if rc == -1:
165 | error = _getustring(self.env.env, 'sophia.error')
166 | if error:
167 | raise SophiaError(error)
168 | else:
169 | raise SophiaError('unknown error occurred.')
170 |
171 | return rc
172 |
173 | cdef configure(self):
174 | for key, value in self.settings.items():
175 | self._set(key, value)
176 |
177 |
178 | def __config__(name, is_string=False, is_readonly=False):
179 | def _getter(self):
180 | return self.config.get_option(name, is_string)
181 | if is_readonly:
182 | return property(_getter)
183 | def _setter(self, value):
184 | self.config.set_option(name, value)
185 | return property(_getter, _setter)
186 |
187 | def __config_ro__(name, is_string=False):
188 | return __config__(name, is_string, True)
189 |
190 | def __operation__(name):
191 | def _method(self):
192 | self.config.set_option(name, 0)
193 | return _method
194 |
195 | def __dbconfig__(name, is_string=False, is_readonly=False):
196 | def _getter(self):
197 | return self.env.config.get_option('.'.join(('db', self.name, name)),
198 | is_string)
199 | if is_readonly:
200 | return property(_getter)
201 | def _setter(self, value):
202 | self.env.config.set_option('.'.join(('db', self.name, name)), value)
203 | return property(_getter, _setter)
204 |
205 | def __dbconfig_ro__(name, is_string=False):
206 | return __dbconfig__(name, is_string, True)
207 |
208 | def __dbconfig_s__(name, is_readonly=False):
209 | return __dbconfig__(name, True, is_readonly)
210 |
211 |
212 | cdef class Sophia(object):
213 | cdef:
214 | bint is_open
215 | bytes bpath
216 | readonly Configuration config
217 | dict database_lookup
218 | list databases
219 | readonly unicode path
220 | void *env
221 |
222 | def __cinit__(self):
223 | self.env = 0
224 |
225 | def __init__(self, path):
226 | self.config = Configuration(self)
227 | self.is_open = False
228 | self.database_lookup = {}
229 | self.databases = []
230 | self.path = decode(path)
231 | self.bpath = encode(path)
232 |
233 | def add_database(self, name, Schema schema):
234 | cdef Database db
235 |
236 | if self.is_open:
237 | raise SophiaError('cannot add database to open environment.')
238 |
239 | name = encode(name) # Always store name internally as bytestring.
240 | db = Database(self, name, schema)
241 | self.databases.append(db)
242 | self.database_lookup[name] = db
243 | return db
244 |
245 | def remove_database(self, name):
246 | if self.is_open:
247 | raise SophiaError('cannot remove database from open environment.')
248 |
249 | db = self.database_lookup.pop(encode(name))
250 | self.databases.remove(db)
251 |
252 | def get_database(self, name):
253 | return self.database_lookup[encode(name)]
254 |
255 | def __getitem__(self, name):
256 | return self.database_lookup[encode(name)]
257 |
258 | cdef configure_database(self, Database db):
259 | cdef:
260 | BaseIndex index
261 | bytes bname = encode(db.name)
262 | bytes iname
263 | int i
264 |
265 | self.set_string(b'db', bname)
266 |
267 | for i, index in enumerate(db.schema.key):
268 | # db..scheme =
269 | # db..scheme. = ,key(i)
270 | iname = encode(index.name)
271 | self.set_string(b'.'.join((b'db', bname, b'scheme')), index.bname)
272 | self.set_string(b'.'.join((b'db', bname, b'scheme', index.bname)),
273 | encode('%s,key(%d)' % (index.data_type, i)))
274 |
275 | for index in db.schema.value:
276 | self.set_string(b'.'.join((b'db', bname, b'scheme')), index.bname)
277 | self.set_string(b'.'.join((b'db', bname, b'scheme', index.bname)),
278 | encode(index.data_type))
279 |
280 | db.db = sp_getobject(self.env, b'db.' + bname)
281 |
282 | def open(self):
283 | if self.is_open:
284 | return False
285 |
286 | cdef Database db
287 |
288 | self.env = sp_env()
289 | self.set_string(b'sophia.path', self.bpath)
290 |
291 | for db in self.databases:
292 | self.configure_database(db)
293 |
294 | self.config.configure()
295 |
296 | cdef int rc = sp_open(self.env)
297 | _check(self.env, rc)
298 |
299 | self.is_open = True
300 | return self.is_open
301 |
302 | def close(self):
303 | if not self.is_open or not self.env:
304 | return False
305 | self.is_open = False
306 | sp_destroy(self.env)
307 | self.env = 0
308 | return True
309 |
310 | def __dealloc__(self):
311 | if self.is_open and self.env:
312 | sp_destroy(self.env)
313 |
314 | cdef set_string(self, const char *key, const char *value):
315 | sp_setstring(self.env, key, value, 0)
316 |
317 | cpdef Transaction transaction(self):
318 | return Transaction(self)
319 |
320 | version = __config_ro__('sophia.version', is_string=True)
321 | version_storage = __config_ro__('sophia.version_storage', is_string=True)
322 | build = __config_ro__('sophia.build', is_string=True)
323 | status = __config_ro__('sophia.status', is_string=True)
324 | errors = __config_ro__('sophia.errors')
325 | error = __config_ro__('sophia.error', is_string=True)
326 |
327 | backup_path = __config__('backup.path', is_string=True)
328 | backup_run = __operation__('backup.run')
329 | backup_active = __config_ro__('backup.active')
330 | backup_last = __config_ro__('backup.last')
331 | backup_last_complete = __config_ro__('backup.last_complete')
332 |
333 | scheduler_threads = __config__('scheduler.threads')
334 | def scheduler_trace(self, thread_id):
335 | return self.config.get_option('scheduler.%s.trace' % thread_id)
336 |
337 | transaction_online_rw = __config_ro__('transaction.online_rw')
338 | transaction_online_ro = __config_ro__('transaction.online_ro')
339 | transaction_commit = __config_ro__('transaction.commit')
340 | transaction_rollback = __config_ro__('transaction.rollback')
341 | transaction_conflict = __config_ro__('transaction.conflict')
342 | transaction_lock = __config_ro__('transaction.lock')
343 | transaction_latency = __config_ro__('transaction.latency', is_string=True)
344 | transaction_log = __config_ro__('transaction.log', is_string=True)
345 | transaction_vlsn = __config_ro__('transaction.vlsn')
346 | transaction_gc = __config_ro__('transaction.gc')
347 |
348 | metric_lsn = __config_ro__('metric.lsn')
349 | metric_tsn = __config_ro__('metric.tsn')
350 | metric_nsn = __config_ro__('metric.nsn')
351 | metric_dsn = __config_ro__('metric.dsn')
352 | metric_bsn = __config_ro__('metric.bsn')
353 | metric_lfsn = __config_ro__('metric.lfsn')
354 |
355 | log_enable = __config__('log.enable')
356 | log_path = __config__('log.path', is_string=True)
357 | log_sync = __config__('log.sync')
358 | log_rotate_wm = __config__('log.rotate_wm')
359 | log_rotate_sync = __config__('log.rotate_sync')
360 | log_rotate = __operation__('log.rotate')
361 | log_gc = __operation__('log.gc')
362 | log_files = __config_ro__('log.files')
363 |
364 |
365 | cdef class Transaction(object):
366 | cdef:
367 | Sophia env
368 | void *txn
369 |
370 | def __cinit__(self, Sophia env):
371 | self.env = env
372 | self.txn = 0
373 |
374 | def __dealloc__(self):
375 | if self.env.is_open and self.txn:
376 | sp_destroy(self.txn)
377 |
378 | cdef _reset(self, bint begin):
379 | self.txn = 0
380 | if begin:
381 | self.begin()
382 |
383 | def begin(self):
384 | check_open(self.env)
385 | if self.txn:
386 | raise SophiaError('This transaction has already been started.')
387 |
388 | self.txn = sp_begin(self.env.env)
389 | return self
390 |
391 | def commit(self, begin=True):
392 | check_open(self.env)
393 | if not self.txn:
394 | raise SophiaError('Transaction is not currently open. Cannot '
395 | 'commit.')
396 |
397 | cdef int rc = sp_commit(self.txn)
398 | if rc == 1:
399 | self.txn = 0
400 | raise SophiaError('transaction was rolled back by another '
401 | 'concurrent transaction.')
402 | elif rc == 2:
403 | # Do not clear out self.txn because we may be able to commit later.
404 | raise SophiaError('transaction is not finished, waiting for a '
405 | 'concurrent transaction to finish.')
406 | self._reset(begin)
407 |
408 | def rollback(self, begin=True):
409 | check_open(self.env)
410 | if not self.txn:
411 | raise SophiaError('Transaction is not currently open. Cannot '
412 | 'rollback.')
413 | sp_destroy(self.txn)
414 | self._reset(begin)
415 |
416 | def __enter__(self):
417 | self.begin()
418 | return self
419 |
420 | def __exit__(self, exc_type, exc_val, exc_tb):
421 | if exc_type:
422 | self.rollback(False)
423 | else:
424 | try:
425 | self.commit(False)
426 | except:
427 | self.rollback(False)
428 | raise
429 |
430 | def __getitem__(self, database):
431 | if not isinstance(database, Database):
432 | raise SophiaError('Transaction __getitem__ value must be a '
433 | 'Database instance.')
434 | return DatabaseTransaction(database, self)
435 |
436 | cdef Database get_database(self, Database database):
437 | return DatabaseTransaction(database, self)
438 |
439 |
440 | SCHEMA_STRING = 'string'
441 | SCHEMA_U64 = 'u64'
442 | SCHEMA_U32 = 'u32'
443 | SCHEMA_U16 = 'u16'
444 | SCHEMA_U8 = 'u8'
445 | SCHEMA_U64_REV = 'u64_rev'
446 | SCHEMA_U32_REV = 'u32_rev'
447 | SCHEMA_U16_REV = 'u16_rev'
448 | SCHEMA_U8_REV = 'u8_rev'
449 |
450 |
451 | cdef class BaseIndex(object):
452 | cdef:
453 | bytes bname
454 | object name
455 |
456 | by_reference = False
457 | data_type = ''
458 |
459 | def __init__(self, name):
460 | self.name = name
461 | self.bname = encode(name)
462 |
463 | cdef set_key(self, void *obj, value): pass
464 | cdef get_key(self, void *obj): pass
465 |
466 |
467 | cdef class SerializedIndex(BaseIndex):
468 | cdef object _serialize, _deserialize
469 |
470 | by_reference = True
471 | data_type = SCHEMA_STRING
472 |
473 | def __init__(self, name, serialize, deserialize):
474 | self.name = name
475 | self.bname = encode(name)
476 | self._serialize = serialize
477 | self._deserialize = deserialize
478 |
479 | cdef set_key(self, void *obj, value):
480 | cdef:
481 | bytes bvalue
482 | char *buf
483 | Py_ssize_t buflen
484 |
485 | bvalue = self._serialize(value)
486 | if not PyBytes_Check(bvalue):
487 | bvalue = encode(bvalue)
488 |
489 | PyBytes_AsStringAndSize(bvalue, &buf, &buflen)
490 | sp_setstring(obj, self.bname, buf, buflen + 1)
491 | return bvalue
492 |
493 | cdef get_key(self, void *obj):
494 | cdef:
495 | char *buf
496 | int buflen
497 |
498 | buf = sp_getstring(obj, self.bname, &buflen)
499 | if buf:
500 | return self._deserialize(buf[:buflen - 1])
501 |
502 |
503 | cdef class BytesIndex(BaseIndex):
504 | by_reference = True
505 | data_type = SCHEMA_STRING
506 |
507 | cdef set_key(self, void *obj, value):
508 | cdef:
509 | bytes bvalue = encode(value)
510 | char *buf
511 | Py_ssize_t buflen
512 |
513 | PyBytes_AsStringAndSize(bvalue, &buf, &buflen)
514 | sp_setstring(obj, self.bname, buf, buflen + 1)
515 | return bvalue
516 |
517 | cdef get_key(self, void *obj):
518 | return _getstring(obj, self.bname)
519 |
520 |
521 | cdef class StringIndex(BytesIndex):
522 | by_reference = True
523 | data_type = SCHEMA_STRING
524 |
525 | cdef get_key(self, void *obj):
526 | return _getustring(obj, self.bname)
527 |
528 |
529 | cdef class U64Index(BaseIndex):
530 | data_type = SCHEMA_U64
531 |
532 | cdef set_key(self, void *obj, value):
533 | cdef:
534 | uint64_t ival = value
535 | sp_setint(obj, self.bname, ival)
536 |
537 | cdef get_key(self, void *obj):
538 | return sp_getint(obj, self.bname)
539 |
540 |
541 | cdef class U32Index(U64Index):
542 | data_type = SCHEMA_U32
543 |
544 | cdef set_key(self, void *obj, value):
545 | cdef:
546 | uint32_t ival = value
547 | sp_setint(obj, self.bname, ival)
548 |
549 |
550 | cdef class U16Index(U64Index):
551 | data_type = SCHEMA_U16
552 |
553 | cdef set_key(self, void *obj, value):
554 | cdef:
555 | uint16_t ival = value
556 | sp_setint(obj, self.bname, ival)
557 |
558 |
559 | cdef class U8Index(U64Index):
560 | data_type = SCHEMA_U8
561 |
562 | cdef set_key(self, void *obj, value):
563 | cdef:
564 | uint8_t ival = value
565 | sp_setint(obj, self.bname, ival)
566 |
567 |
568 | cdef class U64RevIndex(U64Index):
569 | data_type = SCHEMA_U64_REV
570 |
571 | cdef class U32RevIndex(U32Index):
572 | data_type = SCHEMA_U32_REV
573 |
574 | cdef class U16RevIndex(U16Index):
575 | data_type = SCHEMA_U16_REV
576 |
577 | cdef class U8RevIndex(U8Index):
578 | data_type = SCHEMA_U8_REV
579 |
580 |
581 | cdef class JsonIndex(SerializedIndex):
582 | def __init__(self, name):
583 | jdumps = lambda v: json.dumps(v, separators=(',', ':')).encode('utf-8')
584 | jloads = lambda v: json.loads(v.decode('utf-8'))
585 | super(JsonIndex, self).__init__(name, jdumps, jloads)
586 |
587 | cdef class MsgPackIndex(SerializedIndex):
588 | def __init__(self, name):
589 | if mpackb is None or munpackb is None:
590 | raise SophiaError('msgpack-python library not installed!')
591 | super(MsgPackIndex, self).__init__(name, mpackb, munpackb)
592 |
593 | cdef class PickleIndex(SerializedIndex):
594 | def __init__(self, name):
595 | super(PickleIndex, self).__init__(name, pdumps, ploads)
596 |
597 | cdef class UUIDIndex(SerializedIndex):
598 | def __init__(self, name):
599 | uuid_encode = lambda u: u.bytes
600 | uuid_decode = lambda b: uuid.UUID(bytes=b)
601 | super(UUIDIndex, self).__init__(name, uuid_encode, uuid_decode)
602 |
603 |
604 | cdef normalize_tuple(Schema schema, tuple t):
605 | # This function is used when doing range comparisons to ensure we don't
606 | # accidentally try to compare unicode <-> bytes.
607 | cdef:
608 | BaseIndex index
609 | list accum = []
610 |
611 | for index, value in zip(schema.key, t):
612 | if isinstance(index, StringIndex):
613 | value = decode(value)
614 | elif isinstance(index, BytesIndex):
615 | value = encode(value)
616 | accum.append(value)
617 | return tuple(accum)
618 |
619 |
620 | cdef normalize_value(Schema schema, i):
621 | cdef BaseIndex idx = schema.key[0]
622 | if isinstance(idx, StringIndex):
623 | return decode(i)
624 | elif isinstance(idx, BytesIndex):
625 | return encode(i)
626 | return i
627 |
628 |
629 | @cython.freelist(256)
630 | cdef class Document(object):
631 | cdef:
632 | list refs
633 | void *handle
634 |
635 | def __cinit__(self):
636 | self.handle = 0
637 | self.refs = []
638 |
639 | cdef release_refs(self):
640 | self.refs = []
641 |
642 |
643 | cdef Document create_document(void *handle):
644 | cdef Document doc = Document.__new__(Document)
645 | doc.handle = handle
646 | return doc
647 |
648 |
649 | cdef class Schema(object):
650 | cdef:
651 | bint multi_key, multi_value
652 | list key
653 | list value
654 | readonly int key_length
655 | readonly int value_length
656 |
657 | def __init__(self, key_parts=None, value_parts=None):
658 | cdef:
659 | BaseIndex index
660 |
661 | self.key = []
662 | self.value = []
663 | if key_parts is not None:
664 | if isinstance(key_parts, BaseIndex):
665 | key_parts = (key_parts,)
666 | for index in key_parts:
667 | self.add_key(index)
668 | if value_parts is not None:
669 | if isinstance(value_parts, BaseIndex):
670 | value_parts = (value_parts,)
671 | for index in value_parts:
672 | self.add_value(index)
673 |
674 | def add_key(self, BaseIndex index):
675 | self.key.append(index)
676 | self.key_length = len(self.key)
677 | self.multi_key = self.key_length > 1
678 |
679 | def add_value(self, BaseIndex index):
680 | self.value.append(index)
681 | self.value_length = len(self.value)
682 | self.multi_value = self.value_length > 1
683 |
684 | cdef set_key(self, Document doc, tuple parts):
685 | cdef:
686 | BaseIndex index
687 | int i
688 |
689 | if len(parts) != self.key_length:
690 | raise ValueError('key must be a %s-tuple' % self.key_length)
691 |
692 | for i, index in enumerate(self.key):
693 | ref = index.set_key(doc.handle, parts[i])
694 | if index.by_reference:
695 | doc.refs.append(ref)
696 |
697 | cdef tuple get_key(self, Document doc):
698 | cdef:
699 | BaseIndex index
700 | list accum = []
701 |
702 | for index in self.key:
703 | accum.append(index.get_key(doc.handle))
704 | return tuple(accum)
705 |
706 | cdef set_value(self, Document doc, tuple parts):
707 | cdef:
708 | BaseIndex index
709 | int i
710 |
711 | if len(parts) != self.value_length:
712 | raise ValueError('value must be a %s-tuple' % self.value_length)
713 |
714 | for i, index in enumerate(self.value):
715 | ref = index.set_key(doc.handle, parts[i])
716 | if index.by_reference:
717 | doc.refs.append(ref)
718 |
719 | cdef tuple get_value(self, Document doc):
720 | cdef:
721 | BaseIndex index
722 | list accum = []
723 |
724 | for index in self.value:
725 | accum.append(index.get_key(doc.handle))
726 | return tuple(accum)
727 |
728 | @classmethod
729 | def key_value(cls):
730 | return Schema([StringIndex('key')], [StringIndex('value')])
731 |
732 |
733 | cdef class Database(object):
734 | cdef:
735 | bytes bname
736 | Schema schema
737 | void *db
738 | readonly Sophia env
739 | readonly name
740 |
741 | def __cinit__(self):
742 | self.db = 0
743 |
744 | def __init__(self, Sophia env, name, schema):
745 | self.env = env
746 | self.name = decode(name)
747 | self.bname = encode(name)
748 | self.schema = schema
749 |
750 | def __dealloc__(self):
751 | self.db = 0
752 |
753 | cdef void *_get_target(self) except NULL:
754 | return self.db
755 |
756 | cdef _set(self, tuple key, tuple value):
757 | cdef:
758 | void *handle = sp_document(self.db)
759 | Document doc = create_document(handle)
760 |
761 | self.schema.set_key(doc, key)
762 | self.schema.set_value(doc, value)
763 | sp_set(self._get_target(), doc.handle)
764 | doc.release_refs()
765 |
766 | def set(self, key, value):
767 | check_open(self.env)
768 | key = (key,) if not isinstance(key, tuple) else key
769 | value = (value,) if not isinstance(value, tuple) else value
770 | return self._set(key, value)
771 |
772 | cdef tuple _get(self, tuple key):
773 | cdef:
774 | void *handle = sp_document(self.db)
775 | void *result
776 | Document doc = create_document(handle)
777 |
778 | self.schema.set_key(doc, key)
779 | result = sp_get(self._get_target(), doc.handle)
780 | doc.release_refs()
781 | if not result:
782 | return
783 |
784 | doc.handle = result
785 | data = self.schema.get_value(doc)
786 | sp_destroy(result)
787 | return data
788 |
789 | def get(self, key, default=None):
790 | check_open(self.env)
791 | data = self._get((key,) if not isinstance(key, tuple) else key)
792 | if data is None:
793 | return default
794 |
795 | return data if self.schema.multi_value else data[0]
796 |
797 | cdef _exists(self, tuple key):
798 | cdef:
799 | void *handle = sp_document(self.db)
800 | void *result
801 | Document doc = create_document(handle)
802 |
803 | self.schema.set_key(doc, key)
804 | result = sp_get(self._get_target(), doc.handle)
805 | doc.release_refs()
806 | if result:
807 | sp_destroy(result)
808 | return True
809 | return False
810 |
811 | cdef _delete(self, tuple key):
812 | cdef:
813 | int ret
814 | void *handle = sp_document(self.db)
815 | Document doc = create_document(handle)
816 | self.schema.set_key(doc, key)
817 | ret = sp_delete(self._get_target(), doc.handle)
818 | doc.release_refs()
819 | return ret
820 |
821 | def delete(self, key):
822 | check_open(self.env)
823 | return self._delete((key,) if not isinstance(key, tuple) else key)
824 |
825 | def multi_delete(self, keys):
826 | check_open(self.env)
827 | for key in keys:
828 | self._delete((key,) if not isinstance(key, tuple) else key)
829 |
830 | def __getitem__(self, key):
831 | check_open(self.env)
832 | if isinstance(key, slice):
833 | return self.get_range(key.start, key.stop, key.step)
834 | else:
835 | key = (key,) if not isinstance(key, tuple) else key
836 | data = self._get(key)
837 | if data is None:
838 | raise KeyError(key)
839 | return data if self.schema.multi_value else data[0]
840 |
841 | def exists(self, key):
842 | check_open(self.env)
843 | return self._exists((key,) if not isinstance(key, tuple) else key)
844 |
845 | def __setitem__(self, key, value):
846 | self.set(key, value)
847 |
848 | def __delitem__(self, key):
849 | self.delete(key)
850 |
851 | def __contains__(self, key):
852 | return self.exists(key)
853 |
854 | cdef _update(self, dict _data, dict k):
855 | cdef tuple tkey, tvalue
856 | for source in (_data, k):
857 | if not source: continue
858 | for key in source:
859 | tkey = (key,) if not isinstance(key, tuple) else key
860 | value = source[key]
861 | tvalue = (value,) if not isinstance(value, tuple) else value
862 | self._set(tkey, tvalue)
863 |
864 | def update(self, dict _data=None, **kwargs):
865 | cdef Transaction txn
866 | check_open(self.env)
867 | with self.env.transaction() as txn:
868 | txn.get_database(self)._update(_data, kwargs)
869 |
870 | multi_set = update
871 |
872 | def multi_get(self, keys):
873 | return [self.get(key) for key in keys]
874 |
875 | def multi_get_dict(self, keys):
876 | cdef dict accum = {}
877 | for key in keys:
878 | try:
879 | accum[key] = self[key]
880 | except KeyError:
881 | pass
882 | return accum
883 |
884 | def get_range(self, start=None, stop=None, reverse=False):
885 | cdef Cursor cursor
886 | first = start is None
887 | last = stop is None
888 |
889 | if not self.schema.multi_key:
890 | if isinstance(start, tuple):
891 | start = start[0]
892 | if isinstance(stop, tuple):
893 | stop = stop[0]
894 |
895 | if reverse:
896 | if (first and not last) or (last and not first):
897 | start, stop = stop, start
898 | if (not first and not last) and (start < stop):
899 | start, stop = stop, start
900 | elif (not first and not last) and (start > stop):
901 | reverse = True
902 |
903 | # We need to normalize the stop key to avoid invalid comparisons
904 | # between bytes <-> unicode when detecting whether to stop iterating.
905 | if stop is not None:
906 | if self.schema.multi_key:
907 | stop = normalize_tuple(self.schema, stop)
908 | else:
909 | stop = normalize_value(self.schema, stop)
910 |
911 | order = '<=' if reverse else '>='
912 | cursor = self.cursor(order=order, key=start)
913 | for key, value in cursor:
914 | if stop:
915 | if reverse and key < stop:
916 | break
917 | elif not reverse and key > stop:
918 | break
919 |
920 | yield (key, value)
921 |
922 | def keys(self):
923 | return self.cursor(values=False)
924 |
925 | def values(self):
926 | return self.cursor(keys=False)
927 |
928 | def items(self):
929 | return self.cursor()
930 |
931 | def __iter__(self):
932 | return iter(self.cursor())
933 |
934 | def __len__(self):
935 | cdef:
936 | int i = 0
937 | Cursor curs = self.cursor(keys=False, values=False)
938 | for _ in curs: i += 1
939 | return i
940 |
941 | cpdef Cursor cursor(self, order='>=', key=None, prefix=None, keys=True,
942 | values=True):
943 | check_open(self.env)
944 | return Cursor(db=self, order=order, key=key, prefix=prefix, keys=keys,
945 | values=values)
946 |
947 | database_name = __dbconfig_ro__('name', is_string=True)
948 | database_id = __dbconfig_ro__('id')
949 | database_path = __dbconfig_ro__('path', is_string=True)
950 |
951 | mmap = __dbconfig__('mmap')
952 | direct_io = __dbconfig__('direct_io')
953 | sync = __dbconfig__('sync')
954 | expire = __dbconfig__('expire')
955 | compression = __dbconfig_s__('compression') # lz4, zstd, none
956 |
957 | limit_key = __dbconfig_ro__('limit.key')
958 | limit_field = __dbconfig__('limit.field')
959 |
960 | index_memory_used = __dbconfig_ro__('index.memory_used')
961 | index_size = __dbconfig_ro__('index.size')
962 | index_size_uncompressed = __dbconfig_ro__('index.size_uncompressed')
963 | index_count = __dbconfig_ro__('index.count')
964 | index_count_dup = __dbconfig_ro__('index.count_dup')
965 | index_read_disk = __dbconfig_ro__('index.read_disk')
966 | index_read_cache = __dbconfig_ro__('index.read_cache')
967 | index_node_count = __dbconfig_ro__('index.node_count')
968 | index_page_count = __dbconfig_ro__('index.page_count')
969 |
970 | compaction_cache = __dbconfig__('compaction.cache')
971 | compaction_checkpoint = __dbconfig__('compaction.checkpoint')
972 | compaction_node_size = __dbconfig__('compaction.node_size')
973 | compaction_page_size = __dbconfig__('compaction.page_size')
974 | compaction_page_checksum = __dbconfig__('compaction.page_checksum')
975 | compaction_expire_period = __dbconfig__('compaction.expire_period')
976 | compaction_gc_wm = __dbconfig__('compaction.gc_wm')
977 | compaction_gc_period = __dbconfig__('compaction.gc_period')
978 |
979 | stat_documents_used = __dbconfig_ro__('stat.documents_used')
980 | stat_documents = __dbconfig_ro__('stat.documents')
981 | stat_field = __dbconfig_ro__('stat.field', is_string=True)
982 | stat_set = __dbconfig_ro__('stat.set')
983 | stat_set_latency = __dbconfig_ro__('stat.set_latency', is_string=True)
984 | stat_delete = __dbconfig_ro__('stat.delete')
985 | stat_delete_latency = __dbconfig_ro__('stat.delete_latency', True)
986 | stat_get = __dbconfig_ro__('stat.get')
987 | stat_get_latency = __dbconfig_ro__('stat.get_latency', is_string=True)
988 | stat_get_read_disk = __dbconfig_ro__('stat.get_read_disk', is_string=True)
989 | stat_get_read_cache = __dbconfig_ro__('stat.get_read_cache', True)
990 | stat_pread = __dbconfig_ro__('stat.pread')
991 | stat_pread_latency = __dbconfig_ro__('stat.pread_latency', is_string=True)
992 | stat_cursor = __dbconfig_ro__('stat.cursor')
993 | stat_cursor_latency = __dbconfig_ro__('stat.cursor_latency', True)
994 | stat_cursor_read_disk = __dbconfig_ro__('stat.cursor_read_disk', True)
995 | stat_cursor_read_cache = __dbconfig_ro__('stat.cursor_read_cache', True)
996 | stat_cursor_ops = __dbconfig_ro__('stat.cursor_ops', True)
997 |
998 | scheduler_checkpoint = __dbconfig_ro__('scheduler.checkpoint')
999 | scheduler_gc = __dbconfig_ro__('scheduler.gc')
1000 | scheduler_expire = __dbconfig_ro__('scheduler.expire')
1001 | scheduler_backup = __dbconfig_ro__('scheduler.backup')
1002 |
1003 |
1004 | cdef class DatabaseTransaction(Database):
1005 | cdef:
1006 | Transaction transaction
1007 |
1008 | def __init__(self, Database db, Transaction transaction):
1009 | super(DatabaseTransaction, self).__init__(db.env, db.name, db.schema)
1010 | self.transaction = transaction
1011 | self.db = db.db
1012 |
1013 | cdef void *_get_target(self) except NULL:
1014 | if not self.transaction.txn:
1015 | raise SophiaError('Transaction is not active.')
1016 | return self.transaction.txn
1017 |
1018 |
1019 | @cython.freelist(32)
1020 | cdef class Cursor(object):
1021 | cdef:
1022 | Database db
1023 | Document current_item
1024 | readonly bint keys
1025 | readonly bint values
1026 | readonly bytes order
1027 | readonly bytes prefix
1028 | readonly key
1029 | void *cursor
1030 |
1031 | def __cinit__(self, Database db, order='>=', key=None, prefix=None,
1032 | keys=True, values=True):
1033 | self.db = db
1034 | self.order = encode(order)
1035 | if key:
1036 | self.key = (key,) if not isinstance(key, tuple) else key
1037 | self.prefix = encode(prefix) if prefix else None
1038 | self.keys = keys
1039 | self.values = values
1040 | self.current_item = None
1041 | self.cursor = 0
1042 |
1043 | def __dealloc__(self):
1044 | if not self.db.env.is_open:
1045 | return
1046 |
1047 | if self.cursor:
1048 | sp_destroy(self.cursor)
1049 |
1050 | def __iter__(self):
1051 | check_open(self.db.env)
1052 | if self.cursor:
1053 | sp_destroy(self.cursor)
1054 | self.cursor = 0
1055 |
1056 | self.cursor = sp_cursor(self.db.env.env)
1057 | cdef void *handle = sp_document(self.db.db)
1058 | self.current_item = create_document(handle)
1059 | if self.key:
1060 | self.db.schema.set_key(self.current_item, self.key)
1061 | sp_setstring(self.current_item.handle, b'order', self.order, 0)
1062 | if self.prefix:
1063 | sp_setstring(self.current_item.handle, b'prefix',
1064 | self.prefix,
1065 | (sizeof(char) * len(self.prefix)))
1066 | return self
1067 |
1068 | def __next__(self):
1069 | cdef void *handle = sp_get(self.cursor, self.current_item.handle)
1070 | if not handle:
1071 | sp_destroy(self.cursor)
1072 | self.cursor = 0
1073 | raise StopIteration
1074 | else:
1075 | self.current_item.handle = handle
1076 |
1077 | cdef:
1078 | Schema schema = self.db.schema
1079 | tuple key, value
1080 |
1081 | if self.keys and self.values:
1082 | key = schema.get_key(self.current_item)
1083 | value = schema.get_value(self.current_item)
1084 | return (key if schema.multi_key else key[0],
1085 | value if schema.multi_value else value[0])
1086 | elif self.keys:
1087 | key = schema.get_key(self.current_item)
1088 | return key if schema.multi_key else key[0]
1089 | elif self.values:
1090 | value = schema.get_value(self.current_item)
1091 | return value if schema.multi_value else value[0]
1092 |
--------------------------------------------------------------------------------
/src/sophia.h:
--------------------------------------------------------------------------------
1 | #ifndef SOPHIA_H_
2 | #define SOPHIA_H_
3 |
4 | /*
5 | * sophia database
6 | * sphia.org
7 | *
8 | * Copyright (c) Dmitry Simonenko
9 | * BSD License
10 | */
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | #include
17 | #include
18 |
19 | #if __GNUC__ >= 4
20 | # define SP_API __attribute__((visibility("default")))
21 | #else
22 | # define SP_API
23 | #endif
24 |
25 | SP_API void *sp_env(void);
26 | SP_API void *sp_document(void*);
27 | SP_API int sp_setstring(void*, const char*, const void*, int);
28 | SP_API int sp_setint(void*, const char*, int64_t);
29 | SP_API void *sp_getobject(void*, const char*);
30 | SP_API void *sp_getstring(void*, const char*, int*);
31 | SP_API int64_t sp_getint(void*, const char*);
32 | SP_API int sp_open(void*);
33 | SP_API int sp_destroy(void*);
34 | SP_API int sp_set(void*, void*);
35 | SP_API int sp_upsert(void*, void*);
36 | SP_API int sp_delete(void*, void*);
37 | SP_API void *sp_get(void*, void*);
38 | SP_API void *sp_cursor(void*);
39 | SP_API void *sp_begin(void*);
40 | SP_API int sp_prepare(void*);
41 | SP_API int sp_commit(void*);
42 |
43 | #ifdef __cplusplus
44 | }
45 | #endif
46 |
47 | #endif
48 |
--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import shutil
4 | import sys
5 | import unittest
6 | import uuid
7 |
8 | from sophy import *
9 |
10 |
11 | DB_NAME = 'db-test'
12 | TEST_DIR = 'sophia-test'
13 |
14 |
15 | def cleanup():
16 | if os.path.exists(TEST_DIR):
17 | shutil.rmtree(TEST_DIR)
18 |
19 |
20 | class BaseTestCase(unittest.TestCase):
21 | databases = (
22 | ('main', Schema([StringIndex('key')], [StringIndex('value')])),
23 | )
24 |
25 | def setUp(self):
26 | cleanup()
27 | self.env = self.create_env()
28 | for name, schema in self.databases:
29 | self.env.add_database(name, schema)
30 | assert self.env.open()
31 |
32 | def tearDown(self):
33 | assert self.env.close()
34 | cleanup()
35 |
36 | def create_env(self):
37 | return Sophia(TEST_DIR)
38 |
39 |
40 | class TestConfigurationStability(unittest.TestCase):
41 | def setUp(self):
42 | cleanup()
43 | self.env = Sophia(TEST_DIR)
44 |
45 | def tearDown(self):
46 | self.env.close()
47 | cleanup()
48 |
49 | def test_configuration_stability(self):
50 | self.env.scheduler_threads = 2
51 | schema = Schema([StringIndex('k'), U16Index('ki')], StringIndex('val'))
52 | db = self.env.add_database('main', schema)
53 | db.compression = 'lz4'
54 | self.env.open()
55 | self.assertEqual(self.env.scheduler_threads, 2)
56 | self.assertEqual(db.compression, 'lz4')
57 | self.assertEqual(db.mmap, 1)
58 | self.assertEqual(db.sync, 1)
59 |
60 | n = 1000
61 | for i in range(n):
62 | db['k%064d' % i, i] = 'v%0256d' % i
63 |
64 | for i in range(n):
65 | self.assertEqual(db['k%064d' % i, i], 'v%0256d' % i)
66 |
67 | self.assertTrue(self.env.close())
68 |
69 | # Start fresh with new env/db objects and validate config persists.
70 | env2 = Sophia(TEST_DIR)
71 | db2 = env2.add_database('main', schema)
72 | self.assertTrue(env2.open())
73 |
74 | # Scheduler threads does not persist.
75 | self.assertFalse(env2.scheduler_threads == 2)
76 |
77 | # Compression persists.
78 | self.assertEqual(db2.compression, 'lz4')
79 |
80 | # We can re-read the data.
81 | for i in range(n):
82 | self.assertEqual(db2['k%064d' % i, i], 'v%0256d' % i)
83 |
84 | db2['kx', 0] = 'vx'
85 | self.assertTrue(env2.close())
86 |
87 | # And re-open our original env.
88 | self.assertTrue(self.env.open())
89 |
90 | # Compression persists.
91 | self.assertEqual(db.compression, 'lz4')
92 |
93 | # We can re-read the data using our original db handle.
94 | for i in range(n):
95 | self.assertEqual(db['k%064d' % i, i], 'v%0256d' % i)
96 |
97 | self.assertEqual(db['kx', 0], 'vx')
98 |
99 |
100 | class TestConfiguration(BaseTestCase):
101 | def test_version(self):
102 | self.assertEqual(self.env.version, '2.2')
103 |
104 | def test_status(self):
105 | self.assertEqual(self.env.status, 'online')
106 |
107 |
108 | class TestBasicOperations(BaseTestCase):
109 | def test_crud(self):
110 | db = self.env['main']
111 | vals = (('huey', 'cat'), ('mickey', 'dog'), ('zaizee', 'cat'))
112 | for key, value in vals:
113 | db[key] = value
114 | for key, value in vals:
115 | self.assertEqual(db[key], value)
116 | self.assertTrue(key in db)
117 |
118 | del db['mickey']
119 | self.assertFalse('mickey' in db)
120 | self.assertRaises(KeyError, lambda: db['mickey'])
121 |
122 | db['huey'] = 'kitten'
123 | self.assertEqual(db['huey'], 'kitten')
124 | db.delete('huey')
125 | self.assertEqual(db.multi_get(['huey']), [None])
126 |
127 | db.set('k1', 'v1')
128 | db.set('k2', 'v2')
129 | self.assertEqual(db.get('k1'), 'v1')
130 | self.assertEqual(db.get('k2'), 'v2')
131 | self.assertTrue(db.get('k3') is None)
132 | self.assertEqual(db.get('k3', 'xx'), 'xx')
133 | db.delete('k1')
134 | self.assertTrue(db.get('k1') is None)
135 |
136 | def test_iterables(self):
137 | db = self.env['main']
138 | for i in range(4):
139 | db['k%s' % i] = 'v%s' % i
140 |
141 | items = list(db)
142 | self.assertEqual(items, [('k0', 'v0'), ('k1', 'v1'), ('k2', 'v2'),
143 | ('k3', 'v3')])
144 | self.assertEqual(list(db.items()), items)
145 |
146 | self.assertEqual(list(db.keys()), ['k0', 'k1', 'k2', 'k3'])
147 | self.assertEqual(list(db.values()), ['v0', 'v1', 'v2', 'v3'])
148 | self.assertEqual(len(db), 4)
149 | self.assertEqual(db.index_count, 4)
150 |
151 | def test_multi_get_set(self):
152 | db = self.env['main']
153 | for i in range(4):
154 | db['k%s' % i] = 'v%s' % i
155 |
156 | self.assertEqual(db.multi_get(['k0', 'k3', 'k99']), ['v0', 'v3', None])
157 | self.assertEqual(db.multi_get_dict(['k0', 'k3', 'k99']),
158 | {'k0': 'v0', 'k3': 'v3'})
159 |
160 | db.update(k0='v0-e', k3='v3-e', k99='v99-e')
161 | self.assertEqual(list(db), [('k0', 'v0-e'), ('k1', 'v1'), ('k2', 'v2'),
162 | ('k3', 'v3-e'), ('k99', 'v99-e')])
163 |
164 | def test_get_range(self):
165 | db = self.env['main']
166 | for i in range(4):
167 | db['k%s' % i] = 'v%s' % i
168 |
169 | for k1, k2 in (('k1', 'k2'), (('k1',), 'k2'), ('k1', ('k2',)),
170 | (('k1',), ('k2',))):
171 | self.assertEqual(list(db.get_range(k1, k2)), [
172 | ('k1', 'v1'), ('k2', 'v2')])
173 |
174 | self.assertEqual(list(db['k1':'k2']), [('k1', 'v1'), ('k2', 'v2')])
175 | self.assertEqual(list(db['k01':'k21']), [('k1', 'v1'), ('k2', 'v2')])
176 | self.assertEqual(list(db['k2':]), [('k2', 'v2'), ('k3', 'v3')])
177 | self.assertEqual(list(db[:'k1']), [('k0', 'v0'), ('k1', 'v1')])
178 | self.assertEqual(list(db['k2':'kx']), [('k2', 'v2'), ('k3', 'v3')])
179 | self.assertEqual(list(db['a1':'k1']), [('k0', 'v0'), ('k1', 'v1')])
180 | self.assertEqual(list(db[:'a1']), [])
181 | self.assertEqual(list(db['z1':]), [])
182 | self.assertEqual(list(db[:]), [('k0', 'v0'), ('k1', 'v1'),
183 | ('k2', 'v2'), ('k3', 'v3')])
184 |
185 | self.assertEqual(list(db['k2':'k1']), [('k2', 'v2'), ('k1', 'v1')])
186 | self.assertEqual(list(db['k21':'k01']), [('k2', 'v2'), ('k1', 'v1')])
187 | self.assertEqual(list(db['k2'::True]), [('k3', 'v3'), ('k2', 'v2')])
188 | self.assertEqual(list(db[:'k1':True]), [('k1', 'v1'), ('k0', 'v0')])
189 | self.assertEqual(list(db['kx':'k2']), [('k3', 'v3'), ('k2', 'v2')])
190 | self.assertEqual(list(db['k1':'a1']), [('k1', 'v1'), ('k0', 'v0')])
191 | self.assertEqual(list(db[:'a1':True]), [])
192 | self.assertEqual(list(db['z1'::True]), [])
193 | self.assertEqual(list(db[::True]), [('k3', 'v3'), ('k2', 'v2'),
194 | ('k1', 'v1'), ('k0', 'v0')])
195 |
196 | self.assertEqual(list(db['k1':'k2':True]),
197 | [('k2', 'v2'), ('k1', 'v1')])
198 | self.assertEqual(list(db['k2':'k1':True]),
199 | [('k2', 'v2'), ('k1', 'v1')])
200 |
201 | def test_open_close(self):
202 | db = self.env['main']
203 | db['k1'] = 'v1'
204 | db['k2'] = 'v2'
205 | self.assertTrue(self.env.close())
206 | self.assertTrue(self.env.open())
207 | self.assertFalse(self.env.open())
208 |
209 | self.assertEqual(db['k1'], 'v1')
210 | self.assertEqual(db['k2'], 'v2')
211 | db['k2'] = 'v2-e'
212 |
213 | self.assertTrue(self.env.close())
214 | self.assertTrue(self.env.open())
215 | self.assertEqual(db['k2'], 'v2-e')
216 |
217 | def test_transaction(self):
218 | db = self.env['main']
219 | db['k1'] = 'v1'
220 | db['k2'] = 'v2'
221 |
222 | with self.env.transaction() as txn:
223 | txn_db = txn[db]
224 | self.assertEqual(txn_db['k1'], 'v1')
225 | txn_db['k1'] = 'v1-e'
226 | del txn_db['k2']
227 | txn_db['k3'] = 'v3'
228 |
229 | self.assertEqual(db['k1'], 'v1-e')
230 | self.assertRaises(KeyError, lambda: db['k2'])
231 | self.assertEqual(db['k3'], 'v3')
232 |
233 | def test_rollback(self):
234 | db = self.env['main']
235 | db['k1'] = 'v1'
236 | db['k2'] = 'v2'
237 | with self.env.transaction() as txn:
238 | txn_db = txn[db]
239 | self.assertEqual(txn_db['k1'], 'v1')
240 | txn_db['k1'] = 'v1-e'
241 | del txn_db['k2']
242 | txn.rollback()
243 | txn_db['k3'] = 'v3'
244 |
245 | self.assertEqual(db['k1'], 'v1')
246 | self.assertEqual(db['k2'], 'v2')
247 | self.assertEqual(db['k3'], 'v3')
248 |
249 | def test_multiple_transaction(self):
250 | db = self.env['main']
251 | db['k1'] = 'v1'
252 | txn = self.env.transaction()
253 | txn.begin()
254 |
255 | txn_db = txn[db]
256 | txn_db['k2'] = 'v2'
257 | txn_db['k3'] = 'v3'
258 |
259 | txn2 = self.env.transaction()
260 | txn2.begin()
261 |
262 | txn2_db = txn2[db]
263 | txn2_db['k1'] = 'v1-e'
264 | txn2_db['k4'] = 'v4'
265 |
266 | txn.commit()
267 | txn2.commit()
268 |
269 | self.assertEqual(list(db), [('k1', 'v1-e'), ('k2', 'v2'), ('k3', 'v3'),
270 | ('k4', 'v4')])
271 |
272 | def test_transaction_conflict(self):
273 | db = self.env['main']
274 | db['k1'] = 'v1'
275 | txn = self.env.transaction()
276 | txn.begin()
277 |
278 | txn_db = txn[db]
279 | txn_db['k2'] = 'v2'
280 | txn_db['k3'] = 'v3'
281 |
282 | txn2 = self.env.transaction()
283 | txn2.begin()
284 |
285 | txn2_db = txn2[db]
286 | txn2_db['k2'] = 'v2-e'
287 |
288 | # txn is not finished, waiting for concurrent txn to finish.
289 | self.assertRaises(SophiaError, txn2.commit)
290 | txn.commit()
291 |
292 | # txn2 was rolled back by another concurrent txn.
293 | self.assertRaises(SophiaError, txn2.commit)
294 |
295 | # Only changes from txn are present.
296 | self.assertEqual(list(db), [('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3')])
297 |
298 | def test_cursor(self):
299 | db = self.env['main']
300 | db.update(k1='v1', k2='v2', k3='v3')
301 |
302 | curs = db.cursor()
303 | self.assertEqual(
304 | list(curs),
305 | [('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3')])
306 |
307 | curs = db.cursor(order='<')
308 | self.assertEqual(
309 | list(curs),
310 | [('k3', 'v3'), ('k2', 'v2'), ('k1', 'v1')])
311 |
312 |
313 | class TestGetRangeNormalizeValues(BaseTestCase):
314 | databases = (
315 | ('single_u', Schema(StringIndex('key'), U8Index('value'))),
316 | ('single_b', Schema(BytesIndex('key'), U8Index('value'))),
317 | ('multi_u', Schema([StringIndex('k0'), StringIndex('k1')],
318 | [U8Index('value')])),
319 | ('multi_b', Schema([BytesIndex('k0'), BytesIndex('k1')],
320 | [U8Index('value')])),
321 | ('multi_ub', Schema([StringIndex('k0'), BytesIndex('k1')],
322 | [U8Index('value')])),
323 | )
324 |
325 | def test_get_range_normalized_single(self):
326 | def assertRange(db, start, stop, exp):
327 | self.assertEqual([v for _, v in db.get_range(start, stop)], exp)
328 |
329 | for db_name in ('single_u', 'single_b'):
330 | db = self.env[db_name]
331 | for i in range(10):
332 | db['k%s' % i] = i
333 |
334 | assertRange(db, 'k2', 'k45', [2, 3, 4])
335 | assertRange(db, b'k2', b'k45', [2, 3, 4])
336 |
337 | def test_get_range_normalized_multi(self):
338 | def assertRange(db, start, stop, exp):
339 | self.assertEqual([v for _, v in db.get_range(start, stop)], exp)
340 |
341 | for db_name in ('multi_u', 'multi_b', 'multi_ub'):
342 | db = self.env[db_name]
343 | for i in range(10):
344 | db['k%s' % i, 'x%s' % i] = i
345 |
346 | assertRange(db, ('k2', 'x2'), ('k45', 'x45'), [2, 3, 4])
347 | assertRange(db, (b'k2', b'x2'), (b'k45', b'x45'), [2, 3, 4])
348 | assertRange(db, (b'k2', 'x2'), (b'k45', 'x45'), [2, 3, 4])
349 | assertRange(db, ('k2', b'x2'), ('k45', b'x45'), [2, 3, 4])
350 |
351 |
352 | class TestValidation(BaseTestCase):
353 | databases = (
354 | ('single', Schema(StringIndex('key'), U8Index('value'))),
355 | ('multi', Schema((U8Index('k1'), StringIndex('k2')),
356 | (U8Index('v1'), StringIndex('v2')))),
357 | )
358 |
359 | def test_validate_single(self):
360 | db = self.env['single']
361 | db.set('k1', 1)
362 | db.set(('k2',), 2)
363 | db.set('k3', (3,))
364 | db.set(('k4',), (4,))
365 | for i in range(1, 5):
366 | self.assertTrue(db.exists('k%s' % i))
367 | self.assertTrue(db.exists(('k%s' % i,)))
368 | self.assertEqual(db.get('k%s' % i), i)
369 | self.assertEqual(db.get(('k%s' % i,)), i)
370 |
371 | # Invalid key- and value-lengths.
372 | self.assertRaises(ValueError, db.set, ('k1', 1), 100)
373 | self.assertRaises(ValueError, db.set, 'k1', (101, 102))
374 | self.assertRaises(ValueError, db.get, ('k1', 1))
375 | self.assertRaises(ValueError, db.exists, ('k1', 1))
376 |
377 | # Bulk-operations.
378 | self.assertRaises(ValueError, db.update, {('k1', 1): 100})
379 | self.assertRaises(ValueError, db.update, {'k1': (101, 102)})
380 | self.assertRaises(ValueError, db.multi_get, [('k1', 1)])
381 | self.assertRaises(ValueError, db.multi_get_dict, [('k1', 1)])
382 | self.assertRaises(ValueError, db.multi_delete, [('k1', 1)])
383 |
384 | # No bogus data was written.
385 | self.assertEqual(db['k1'], 1)
386 |
387 | def test_validate_multi(self):
388 | db = self.env['multi']
389 | db.set((1, 'k1'), (11, 'v1'))
390 |
391 | self.assertTrue(db.exists((1, 'k1')))
392 | self.assertEqual(db.get((1, 'k1')), (11, 'v1'))
393 |
394 | # Invalid key- and value-lengths.
395 | self.assertRaises(ValueError, db.set, 1, (101, 'v1'))
396 | self.assertRaises(ValueError, db.set, (1, 'k1'), 102)
397 | self.assertRaises(ValueError, db.set, (1, 'k1', 2), (101, 'v1', 102))
398 | self.assertRaises(ValueError, db.get, 1)
399 | self.assertRaises(ValueError, db.get, (1, 'k1', 101))
400 | self.assertRaises(ValueError, db.exists, 1)
401 | self.assertRaises(ValueError, db.exists, (1, 'k1', 101))
402 |
403 | # Bulk-operations.
404 | self.assertRaises(ValueError, db.update, {1: (100, 'v1')})
405 | self.assertRaises(ValueError, db.update, {(1, 'k1'): 100})
406 | self.assertRaises(ValueError, db.update, {(1, 'k1', 2): 100})
407 | self.assertRaises(ValueError, db.update, {(1, 'k1', 2): (12, 'v1', 2)})
408 | for k in (1, (1, 'k1', 2)):
409 | self.assertRaises(ValueError, db.multi_get, [k])
410 | self.assertRaises(ValueError, db.multi_get_dict, [k])
411 | self.assertRaises(ValueError, db.multi_delete, [k])
412 |
413 | # No bogus data was written.
414 | self.assertEqual(db[1, 'k1'], (11, 'v1'))
415 |
416 |
417 | class TestCursorOptions(BaseTestCase):
418 | databases = (
419 | ('main', Schema(StringIndex('key'), U16Index('value'))),
420 | ('secondary', Schema([StringIndex('key_a'), StringIndex('key_b')],
421 | U8Index('value'))),
422 | )
423 |
424 | def test_cursor_options(self):
425 | db = self.env['main']
426 |
427 | k_tmpl = 'log:%08x:%08x:record%s'
428 | for i in range(16):
429 | db[k_tmpl % (i, i, i)] = i
430 |
431 | def assertCursor(cursor, indexes):
432 | self.assertEqual(list(cursor), [
433 | (k_tmpl % (i, i, i), i) for i in indexes])
434 |
435 | # Default ordering.
436 | assertCursor(db.cursor(), range(16))
437 |
438 | # Reverse ordering.
439 | assertCursor(db.cursor(order='<='), reversed(range(16)))
440 |
441 | # Default ordering with prefix.
442 | assertCursor(db.cursor(prefix='log:'), range(16))
443 |
444 | # Reverse ordering with prefix. Note that we have to specify a
445 | # start-key, which is probably indicative of a bug (see sophia #167).
446 | assertCursor(db.cursor(order='<=', prefix='log:'), []) # XXX: bug?
447 | assertCursor(db.cursor(order='<=', prefix='log:', key='m'),
448 | reversed(range(16)))
449 |
450 | # Use the following key as a starting-point.
451 | key = k_tmpl % (12, 12, 12)
452 |
453 | # Iterate up from log:0000000c:0000000c:recordc (inclusive).
454 | assertCursor(db.cursor(prefix='log:', key=key), range(12, 16))
455 | # Iterate up from log:0000000c:0000000c:recordc (exclusive).
456 | assertCursor(db.cursor(prefix='log:', key=key, order='>'),
457 | range(13, 16))
458 | # Iterate down from log:0000000c:0000000c:recordc (inclusive).
459 | assertCursor(db.cursor(prefix='log:', key=key, order='<='),
460 | reversed(range(13)))
461 | # Iterate down from log:0000000c:0000000c:recordc (exclusive).
462 | assertCursor(db.cursor(prefix='log:', key=key, order='<'),
463 | reversed(range(12)))
464 |
465 | def test_cursor_options_multikey(self):
466 | db = self.env['secondary']
467 | ka_tmpl = 'log:%08x'
468 | kb_tmpl = 'evt:%08x'
469 | for i in range(4):
470 | for j in range(4):
471 | db[ka_tmpl % i, kb_tmpl % j] = (4 * i) + j
472 |
473 | def assertCursor(cursor, indexes):
474 | self.assertEqual(list(cursor), [
475 | ((ka_tmpl % (i // 4), kb_tmpl % (i % 4)), i) for i in indexes])
476 |
477 | # Default and reverse ordering.
478 | assertCursor(db.cursor(), range(16))
479 | assertCursor(db.cursor(order='<='), reversed(range(16)))
480 |
481 | # Default and reverse ordering with prefix.
482 | assertCursor(db.cursor(prefix='log:'), range(16))
483 | assertCursor(db.cursor(order='<=', prefix='log:'), []) # XXX: bug?
484 | assertCursor(db.cursor(order='<=', prefix='log:', key=('m', '')),
485 | reversed(range(16)))
486 |
487 | ka = ka_tmpl % 2
488 | kb = kb_tmpl % 2
489 | for prefix in (None, 'log:'):
490 | assertCursor(db.cursor(prefix=prefix, key=(ka, kb)), range(10, 16))
491 | assertCursor(db.cursor(prefix=prefix, key=(ka, kb), order='>'),
492 | range(11, 16))
493 | assertCursor(db.cursor(prefix=prefix, key=(ka, kb), order='<='),
494 | reversed(range(11)))
495 | assertCursor(db.cursor(prefix=prefix, key=(ka, kb), order='<'),
496 | reversed(range(10)))
497 |
498 | assertCursor(db.cursor(prefix=ka), range(8, 12))
499 | assertCursor(db.cursor(prefix=ka, order='<='), []) # XXX: bug?
500 |
501 | # The second key does not factor into the prefix.
502 | assertCursor(db.cursor(prefix='evt:'), [])
503 | assertCursor(db.cursor(prefix='evt:', order='<='), [])
504 |
505 |
506 | class TestMultipleDatabases(BaseTestCase):
507 | databases = (
508 | ('main', Schema([StringIndex('key')], [StringIndex('value')])),
509 | ('secondary', Schema([StringIndex('key')], [StringIndex('value')])),
510 | )
511 |
512 | def test_multiple_databases(self):
513 | main = self.env['main']
514 | scnd = self.env['secondary']
515 |
516 | main.update(k1='v1', k2='v2', k3='v3')
517 | scnd.update(k1='v1_2', k2='v2_2', k3='v3_2')
518 |
519 | del main['k1']
520 | del scnd['k2']
521 | self.assertRaises(KeyError, lambda: main['k1'])
522 | self.assertRaises(KeyError, lambda: scnd['k2'])
523 |
524 | self.assertEqual(list(main), [('k2', 'v2'), ('k3', 'v3')])
525 | self.assertEqual(list(scnd), [('k1', 'v1_2'), ('k3', 'v3_2')])
526 |
527 | def test_multiple_db_txn(self):
528 | main = self.env['main']
529 | scnd = self.env['secondary']
530 |
531 | main.update(k1='v1', k2='v2')
532 | scnd.update(k1='v1_2', k2='v2_2')
533 |
534 | with self.env.transaction() as txn:
535 | t_main = txn[main]
536 | t_scnd = txn[scnd]
537 |
538 | del t_main['k1']
539 | t_main['k2'] = 'v2-e'
540 | t_main['k3'] = 'v3'
541 | del t_scnd['k2']
542 | t_scnd['k1'] = 'v1_2-e'
543 |
544 | self.assertEqual(list(main), [('k2', 'v2-e'), ('k3', 'v3')])
545 | self.assertEqual(list(scnd), [('k1', 'v1_2-e')])
546 |
547 | with self.env.transaction() as txn:
548 | t_main = txn[main]
549 | t_scnd = txn[scnd]
550 | del t_main['k2']
551 | t_scnd['k3'] = 'v3_2'
552 | txn.rollback()
553 | self.assertEqual(t_main['k2'], 'v2-e')
554 | self.assertRaises(KeyError, lambda: t_scnd['k3'])
555 | t_main['k3'] = 'v3-e'
556 | t_scnd['k2'] = 'v2_2-e'
557 |
558 | self.assertEqual(list(main), [('k2', 'v2-e'), ('k3', 'v3-e')])
559 | self.assertEqual(list(scnd), [('k1', 'v1_2-e'), ('k2', 'v2_2-e')])
560 |
561 | def test_open_close(self):
562 | self.assertTrue(self.env.close())
563 | self.assertTrue(self.env.open())
564 |
565 | def test_add_db(self):
566 | schema = Schema([StringIndex('key')], [StringIndex('value')])
567 | self.assertRaises(SophiaError, self.env.add_database, 'db-3', schema)
568 | self.env.close()
569 |
570 | self.env.add_database('db-3', schema)
571 | self.env.open()
572 | db = self.env['db-3']
573 | db['k1'] = 'v1'
574 | self.assertEqual(db['k1'], 'v1')
575 |
576 |
577 | class TestMultiKeyValue(BaseTestCase):
578 | databases = (
579 | ('main',
580 | Schema([U64Index('year'), U32Index('month'), U16Index('day'),
581 | StringIndex('event')],
582 | [StringIndex('source'), StringIndex('data')])),
583 | ('numbers',
584 | Schema([U16RevIndex('key')],
585 | [U16Index('v1'), U16Index('v2'), U16Index('v3'),
586 | U16Index('v4'), U8Index('v5')])),
587 | )
588 | test_data = (
589 | ((2017, 1, 1, 'holiday'), ('us', 'new years')),
590 | ((2017, 5, 29, 'holiday'), ('us', 'memorial day')),
591 | ((2017, 7, 4, 'holiday'), ('us', 'independence day')),
592 | ((2017, 9, 4, 'holiday'), ('us', 'labor day')),
593 | ((2017, 11, 23, 'holiday'), ('us', 'thanksgiving')),
594 | ((2017, 12, 25, 'holiday'), ('us', 'christmas')),
595 | ((2017, 7, 1, 'birthday'), ('private', 'huey')),
596 | ((2017, 5, 1, 'birthday'), ('private', 'mickey')),
597 | )
598 |
599 | def setUp(self):
600 | super(TestMultiKeyValue, self).setUp()
601 | self.db = self.env['main']
602 |
603 | def test_multi_key_crud(self):
604 | for key, value in self.test_data:
605 | self.db[key] = value
606 |
607 | for key, value in self.test_data:
608 | self.assertEqual(self.db[key], value)
609 |
610 | del self.db[2017, 11, 12, 'holiday']
611 | self.assertRaises(KeyError, lambda: self.db[2017, 11, 12, 'holiday'])
612 |
613 | def test_iteration(self):
614 | for key, value in self.test_data:
615 | self.db[key] = value
616 |
617 | self.assertEqual(list(self.db), sorted(self.test_data))
618 | self.assertEqual(list(self.db.items()), sorted(self.test_data))
619 | self.assertEqual(list(self.db.keys()),
620 | sorted(key for key, _ in self.test_data))
621 | self.assertEqual(list(self.db.values()),
622 | [value for _, value in sorted(self.test_data)])
623 |
624 | def test_update_multiget(self):
625 | self.db.update(dict(self.test_data))
626 | events = ((2017, 1, 1, 'holiday'),
627 | (2017, 12, 25, 'holiday'),
628 | (2017, 7, 1, 'birthday'))
629 | self.assertEqual(self.db.multi_get(events), [
630 | ('us', 'new years'),
631 | ('us', 'christmas'),
632 | ('private', 'huey')])
633 | self.assertEqual(self.db.multi_get_dict(events), {
634 | (2017, 1, 1, 'holiday'): ('us', 'new years'),
635 | (2017, 12, 25, 'holiday'): ('us', 'christmas'),
636 | (2017, 7, 1, 'birthday'): ('private', 'huey')})
637 |
638 | def test_ranges(self):
639 | self.db.update(dict(self.test_data))
640 | items = self.db[(2017, 2, 1, ''):(2017, 6, 1, '')]
641 | self.assertEqual(list(items), [
642 | ((2017, 5, 1, 'birthday'), ('private', 'mickey')),
643 | ((2017, 5, 29, 'holiday'), ('us', 'memorial day'))])
644 |
645 | items = self.db[:(2017, 2, 1, '')]
646 | self.assertEqual(list(items), [
647 | ((2017, 1, 1, 'holiday'), ('us', 'new years'))])
648 |
649 | items = self.db[(2017, 11, 1, '')::True]
650 | self.assertEqual(list(items), [
651 | ((2017, 12, 25, 'holiday'), ('us', 'christmas')),
652 | ((2017, 11, 23, 'holiday'), ('us', 'thanksgiving'))])
653 |
654 | def test_rev_indexes(self):
655 | nums = self.env['numbers']
656 | for i in range(100):
657 | key, v1, v2, v3, v4, v5 = range(i, 6 + i)
658 | nums[key] = (v1, v2, v3, v4, v5)
659 |
660 | self.assertEqual(len(nums), 100)
661 | self.assertEqual(nums[0], (1, 2, 3, 4, 5))
662 | self.assertEqual(nums[99], (100, 101, 102, 103, 104))
663 |
664 | self.assertEqual(list(nums[:2]), [])
665 | self.assertEqual(list(nums[2:]), [
666 | (2, (3, 4, 5, 6, 7)),
667 | (1, (2, 3, 4, 5, 6)),
668 | (0, (1, 2, 3, 4, 5))])
669 |
670 | self.assertEqual(list(nums.keys())[:3], [99, 98, 97])
671 | self.assertEqual(list(nums.values())[:3], [
672 | (100, 101, 102, 103, 104),
673 | (99, 100, 101, 102, 103),
674 | (98, 99, 100, 101, 102)])
675 |
676 | def test_bounds(self):
677 | nums = self.env['numbers']
678 | nums[0] = (0, 0, 0, 0, 0)
679 | self.assertEqual(nums[0], (0, 0, 0, 0, 0))
680 |
681 | nums[1] = (0, 0, 0, 0, 255)
682 | self.assertEqual(nums[1], (0, 0, 0, 0, 255))
683 |
684 |
685 | class TestEventSchema(BaseTestCase):
686 | databases = (
687 | ('main',
688 | Schema([U64Index('timestamp'), StringIndex('type')],
689 | [SerializedIndex('data', pickle.dumps, pickle.loads)])),
690 | )
691 |
692 | def setUp(self):
693 | super(TestEventSchema, self).setUp()
694 | self.db = self.env['main']
695 |
696 | def test_events_examples(self):
697 | ts = lambda i: 1000000000 + i
698 |
699 | self.db[ts(1), 'init'] = {'msg': 'starting up'}
700 | self.db[ts(2), 'info'] = {'msg': 'info1'}
701 | self.db[ts(3), 'info'] = {'msg': 'info2'}
702 | self.db[ts(3), 'warning'] = {'msg': 'warn1'}
703 | self.db[ts(4), 'info'] = {'msg': 'info3'}
704 | self.db[ts(4), 'error'] = {'msg': 'error1'}
705 |
706 | self.assertEqual(self.db[ts(3), 'info'], {'msg': 'info2'})
707 | self.assertEqual(self.db[ts(4), 'info'], {'msg': 'info3'})
708 | self.assertRaises(KeyError, lambda: self.db[ts(4), 'xx'])
709 |
710 | start = (ts(1), '')
711 | stop = (ts(3), '')
712 | data = self.db.get_range(start=start, stop=stop)
713 | self.assertEqual(list(data), [
714 | ((ts(1), 'init'), {'msg': 'starting up'}),
715 | ((ts(2), 'info'), {'msg': 'info1'}),
716 | ])
717 |
718 | stop = (ts(4), 'f')
719 | data = self.db.get_range(start=start, stop=stop, reverse=True)
720 | self.assertEqual(list(data), [
721 | ((ts(4), 'error'), {'msg': 'error1'}),
722 | ((ts(3), 'warning'), {'msg': 'warn1'}),
723 | ((ts(3), 'info'), {'msg': 'info2'}),
724 | ((ts(2), 'info'), {'msg': 'info1'}),
725 | ((ts(1), 'init'), {'msg': 'starting up'}),
726 | ])
727 |
728 | curs = self.db.cursor(order='<', key=(ts(3), 'info'), values=False)
729 | self.assertEqual(list(curs), [(ts(2), 'info'), (ts(1), 'init')])
730 |
731 | curs = self.db.cursor(order='>=', key=(ts(3), 'info'), values=False)
732 | self.assertEqual(list(curs), [
733 | (ts(3), 'info'),
734 | (ts(3), 'warning'),
735 | (ts(4), 'error'),
736 | (ts(4), 'info')])
737 |
738 |
739 | class TestMultiKeyValue(BaseTestCase):
740 | databases = (
741 | ('main',
742 | Schema([U32Index('a'), U32Index('b'), U32Index('c')],
743 | [U32Index('value')])),
744 | ('secondary',
745 | Schema([BytesIndex('a'), U32Index('b')],
746 | [U32Index('value')])),
747 | )
748 |
749 | def setUp(self):
750 | super(TestMultiKeyValue, self).setUp()
751 | self.db = self.env['main']
752 |
753 | def test_cursor_ops(self):
754 | for i in range(10):
755 | for j in range(5):
756 | for k in range(3):
757 | self.db[i, j, k] = i * j * k
758 |
759 | data = self.db[(3, 3, 0):(4, 2, 1)]
760 | self.assertEqual(list(data), [
761 | ((3, 3, 0), 0),
762 | ((3, 3, 1), 9),
763 | ((3, 3, 2), 18),
764 | ((3, 4, 0), 0),
765 | ((3, 4, 1), 12),
766 | ((3, 4, 2), 24),
767 | ((4, 0, 0), 0),
768 | ((4, 0, 1), 0),
769 | ((4, 0, 2), 0),
770 | ((4, 1, 0), 0),
771 | ((4, 1, 1), 4),
772 | ((4, 1, 2), 8),
773 | ((4, 2, 0), 0),
774 | ((4, 2, 1), 8),
775 | ])
776 |
777 | def test_ordering_string(self):
778 | db = self.env['secondary']
779 | db['a', 0] = 1
780 | db['b', 1] = 2
781 | db['b', 0] = 3
782 | db['d', 0] = 4
783 | db['c', 9] = 5
784 | db['c', 3] = 6
785 |
786 | data = list(db[(b'b', 0):(b'\xff', 5)])
787 | self.assertEqual(data, [
788 | ((b'b', 0), 3),
789 | ((b'b', 1), 2),
790 | ((b'c', 3), 6),
791 | ((b'c', 9), 5),
792 | ((b'd', 0), 4)])
793 |
794 | data = list(db[(b'\x00', 0):(b'b', 5)])
795 | self.assertEqual(data, [
796 | ((b'a', 0), 1),
797 | ((b'b', 0), 3),
798 | ((b'b', 1), 2)])
799 |
800 | data = list(db[(b'bb', 0):(b'cc', 5)])
801 | self.assertEqual(data, [
802 | ((b'c', 3), 6),
803 | ((b'c', 9), 5)])
804 |
805 |
806 | class TestStringVsBytes(BaseTestCase):
807 | databases = (
808 | ('string',
809 | Schema([StringIndex('key')],
810 | [StringIndex('value')])),
811 | ('bytes',
812 | Schema([BytesIndex('key')],
813 | [BytesIndex('value')])),
814 | )
815 |
816 | def setUp(self):
817 | super(TestStringVsBytes, self).setUp()
818 | self.sdb = self.env['string']
819 | self.bdb = self.env['bytes']
820 |
821 | def test_string_encoding(self):
822 | self.sdb[u'k1'] = u'v1'
823 | self.assertEqual(self.sdb[u'k1'], u'v1')
824 |
825 | smartquotes = u'\u2036hello\u2033'
826 | encoded = smartquotes.encode('utf-8')
827 | self.sdb[smartquotes] = smartquotes
828 | self.assertEqual(self.sdb[encoded], smartquotes)
829 |
830 | self.bdb[encoded] = encoded
831 | self.assertEqual(self.bdb[encoded], encoded)
832 |
833 | self.bdb[b'\xff'] = b'\xff'
834 | self.assertEqual(self.bdb[b'\xff'], b'\xff')
835 |
836 |
837 | class TestSerializedIndex(BaseTestCase):
838 | databases = (
839 | ('main',
840 | Schema(StringIndex('key'),
841 | SerializedIndex('value', pickle.dumps, pickle.loads))),
842 | )
843 |
844 | def setUp(self):
845 | super(TestSerializedIndex, self).setUp()
846 | self.db = self.env['main']
847 |
848 | def test_serialize_deserialize(self):
849 | self.db['k1'] = 'v1'
850 | self.db['k2'] = {'foo': 'bar', 'baz': 1}
851 | self.db['k3'] = None
852 |
853 | self.assertEqual(self.db['k1'], 'v1')
854 | self.assertEqual(self.db['k2'], {'foo': 'bar', 'baz': 1})
855 | self.assertTrue(self.db['k3'] is None)
856 | self.assertRaises(KeyError, lambda: self.db['k4'])
857 |
858 | data = list(self.db['k1':'k2'])
859 | self.assertEqual(data, [
860 | ('k1', 'v1'),
861 | ('k2', {'foo': 'bar', 'baz': 1})])
862 |
863 |
864 | class TestSerializedIndexImplementations(BaseTestCase):
865 | databases = (
866 | ('json',
867 | Schema(StringIndex('key'), JsonIndex('value'))),
868 | ('pickle',
869 | Schema(StringIndex('key'), PickleIndex('value'))),
870 | ('uuid',
871 | Schema(UUIDIndex('key'), StringIndex('value'))),
872 | )
873 |
874 | def setUp(self):
875 | super(TestSerializedIndexImplementations, self).setUp()
876 | self.jdb = self.env['json']
877 | self.pdb = self.env['pickle']
878 |
879 | def _do_test(self, db):
880 | db['k1'] = 'v1'
881 | db['k2'] = {'foo': 'bar', 'baz': 1}
882 | db['k3'] = None
883 |
884 | self.assertEqual(db['k1'], 'v1')
885 | self.assertEqual(db['k2'], {'foo': 'bar', 'baz': 1})
886 | self.assertTrue(db['k3'] is None)
887 | self.assertRaises(KeyError, lambda: db['k4'])
888 |
889 | data = list(db['k1':'k2'])
890 | self.assertEqual(data, [
891 | ('k1', 'v1'),
892 | ('k2', {'foo': 'bar', 'baz': 1})])
893 |
894 | def test_json(self):
895 | self._do_test(self.jdb)
896 |
897 | def test_pickle(self):
898 | self._do_test(self.pdb)
899 |
900 | def test_uuid(self):
901 | u1 = uuid.uuid4()
902 | u2 = uuid.uuid4()
903 |
904 | db = self.env['uuid']
905 | db[u1] = 'u1'
906 | db[u2] = 'u2'
907 | self.assertEqual(db[u1], 'u1')
908 | self.assertEqual(db[u2], 'u2')
909 |
910 | keys = list(db.keys())
911 | self.assertEqual(set(keys), set((u1, u2)))
912 |
913 |
914 | if __name__ == '__main__':
915 | unittest.main(argv=sys.argv)
916 |
--------------------------------------------------------------------------------