├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── README.md ├── btree.ss ├── build.ss ├── content-addressing.ss ├── db-queue.ss ├── db.ss ├── ebs.ss ├── gerbil.pkg ├── kvs-leveldb.ss ├── kvs-mux.ss ├── kvs-postgres.ss ├── kvs-sql.ss ├── kvs-sqlite.ss ├── kvs.ss ├── merkle-trie.ss ├── persist.md ├── persist.ss ├── pics ├── haddock1.webp ├── haddock2.webp ├── haddock3.webp ├── haddock4.webp ├── haddock5.webp └── haddock6.webp ├── slides-2025-lambdaconf.rkt ├── t ├── content-addressing-test.ss ├── db-test.ss ├── ebs-test.ss ├── kvs-leveldb-test.ss ├── kvs-mux-test.ss ├── kvs-sqlite-test.ss ├── kvs-test.ss ├── merkle-trie-test.ss └── persist-test.ss └── unit-tests.ss /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | defaults: 6 | run: 7 | shell: bash 8 | 9 | jobs: 10 | build_job: 11 | runs-on: ubuntu-latest 12 | # This is a big container with all Gerbil dependencies, Racket, geth... 13 | container: ghcr.io/glow-lang/glow:devel 14 | defaults: 15 | run: 16 | shell: bash 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v1 20 | - name: Configure git on docker 21 | # See https://github.com/actions/runner/issues/2033 22 | run: git config --global --add safe.directory $GITHUB_WORKSPACE 23 | - name: Build and run the unit-tests 24 | run: ./build.ss build-and-test 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | manifest.ss 2 | version.ss 3 | build-deps 4 | run/ 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gerbil-persist 2 | 3 | Gerbil-persist is a package to persist concurrent processes as well as data. 4 | It aims at implementing robust [**Orthogonal Persistence**](persist.md). 5 | The document linked above explains the programming model we are aiming at. 6 | See our upcoming presentation at LambdaConf 2025 [[slides](http://fare.tunes.org/files/cs/persist/slides-2025-lambdaconf.html), videos to be posted here after the conference]. 7 | 8 | The rest of this README explains where we’re at. 9 | ***The implementation is nowhere near finished; actually it is barely begun.*** 10 | 11 | ## A Persistence Short Story 12 | 13 | Ever had your browser cache purged? Ever changed browser? 14 | Experienced a power loss? Got your laptop stolen? 15 | Had Russian orcs bomb your datacenter? 16 | All in the midst of a blockchain interaction with valuable assets at stake? 17 | With gerbil-persist, the precious private off-chain data 18 | based on which you manage your tokens will persist 19 | across all these adversarial events, 20 | onto your next browser on your next computer in the next country. 21 | 22 | Your "decentralized application" or "wallet" will persist their data, 23 | completely encrypted, onto the Cloud—without the application having to add a 24 | single line of code to handle persistence, indeed with its having to explicitly 25 | add lines of code to locally disable persistence where performance demands it. 26 | Storage providers (whether centralized, or decentralized on Swarm or Filecoin) 27 | will not be able to decrypt any data without the master key. 28 | Row-level encryption will ensure data can be updated in increments 29 | proportional in size to the change (plus logarithmic indexes). 30 | Indexes are maintained entirely client-side, and 31 | servers only see a unindexed random-looking key value store. 32 | Data replication will add robustness at the expense of latency. 33 | Replication configuration can be stored in a DHT (e.g. ENS or other contract). 34 | 35 | ## Library Status 36 | 37 | The *current* working code is in [db.ss](db.ss), and uses 38 | [LevelDB](https://github.com/google/leveldb) as underlying key-value store. 39 | 40 | However, we are moving towards abstracting away the underlying store, 41 | and instead supporting [Sqlite](https://www.sqlite.org/index.html) 42 | as default on Gambit C, and 43 | [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API) 44 | on Gambit JS. 45 | 46 | We have not started work on the persistence of processes, 47 | but we have finally identified the details of a proper programming model for it: 48 | [**Orthogonal Persistence**](persist.md). 49 | 50 | ## Copyright and License 51 | 52 | Copyright 2020-2024 Mutual Knowledge Systems, Inc. All rights reserved. 53 | Gerbil-Persist is distributed under 54 | the Apache License, version 2.0. See the file [LICENSE](LICENSE). 55 | 56 | ## Our Persistence Model 57 | 58 | ### Our Model In a Nutshell 59 | 60 | For those familiar with the relevant concepts, our Persistence model is 61 | designed to be the storage layer underlying a system with [Orthogonal Persistence](persist.md) 62 | while embodying the paradigm of content-addressed (merkleized) data. 63 | 64 | As for the implementation, we create a least-common denominator abstraction 65 | reducing any of the most ubiquitous databases to a mere key value store, 66 | valued only for its key "ACID" properties. We add a row-level encryption layer, 67 | and we rebuild all the rest of the functionality we need on top of that. 68 | 69 | ### Persistence 70 | 71 | Persistence means that in case the system stops then is restarted, 72 | the user-visible state of the software will survive the interruption 73 | and computation will resume from that state, that was "persisted". 74 | 75 | The system may stop for many reasons: 76 | - The computer's power may go down 77 | (because its laptop batteries are empty, its desktop is being moved, 78 | the grid experiences interruptions, a man trips on a power cord, 79 | a disjunctor is tripped, or enemies cut the power). 80 | - The underlying operating system process may be killed 81 | (because a user closed its window or asked for it to be killed, 82 | because the operating system ran out of resources and killed it, 83 | or because some hardware malfunction otherwise occurred). 84 | - One of the internal or external services the system crucially 85 | depends on may itself be victim of some failure. 86 | 87 | Whichever it the cause the system was stopped, 88 | assuming the system state wasn't also corrupted 89 | (at which point nothing more can be done, but which can be prevented 90 | through hardware redundancy, which we will consider as a separate issue), 91 | persistence means that computations will resume from 92 | where the system was at the time it stopped. 93 | 94 | ### Concurrency 95 | 96 | The user can define many concurrent processes that will keep executing 97 | and mutating their state (a.k.a. mutators), while the system will persist 98 | the state of these processes, so that in case the system is stopped, 99 | the processes can be resumed in the state they were in at the time 100 | the system was stopped. 101 | 102 | Now while user processes may run concurrently with each other, 103 | the persistence layer itself is single-threaded and 104 | linearizes access to the database. This ensures that the data accesses 105 | from all user threads constitute a consistent state transition 106 | once assembled into a database transaction, such that 107 | processes can be suitably restored from the resulting state. 108 | 109 | The linearization constraint could conceivably be relaxed somewhat 110 | by having separate transactions and a clever locking mechanism; 111 | but such elaborate strategies would work neither 112 | on the simple key value stores that we want to support as backends, 113 | nor with the simple validation strategy we intend to use (see below). 114 | If more parallelism is desired, the simple obvious solution is 115 | to use several independent sequential encrypted stores instead of a single one. 116 | 117 | (Note that Gerbil compiles to Gambit itself by default compiles to a 118 | monoprocessing execution model. Gambit can also compile to a multiprocessing 119 | execution model, but the Gambit runtime is suspected to still contain 120 | a lot of functions that assume monoprocessing, and that need to be identified 121 | and updated before it is safe to use multiprocessing with Gambit. 122 | Therefore, no actual parallelism is lost with our current strategy, 123 | and running multiple separate Unix processes each with their separate database 124 | is necessary for parallel execution with Gerbil. 125 | On the other hand, Gerbil has a lot of infrastructure for programming with 126 | actors both local or remote, which was actually the original motivation 127 | for writing Gerbil.) 128 | 129 | ### Restorability Responsibility 130 | 131 | The user is responsible for ensuring that the stored data is indeed sufficient 132 | to restore his processes when the machine restarts to a state equivalent 133 | to the one saved at the time just before the machine stopped. 134 | 135 | This may be trivial (if the process just passively serve the data), or may 136 | require careful code generation (if the processes implement arbitrary 137 | activities in a Turing-capable language). 138 | Indeed, to represent arbitrary processes, you'd have to materialize the 139 | stack frames of their programs as data structures --- 140 | first-class continuations as marshallable data structures; 141 | that in addition to mapping the storage model of the programming language 142 | to that of the database. 143 | 144 | Doing these transformations is tedious and error-prone, 145 | and better done by an automated program and by a human compiler. 146 | Our persistence system does not presently assist with such code generation, but 147 | in the future a virtual machine and a compiler for a Scheme dialect 148 | could be provided that offers a rich enough programming model 149 | while automatically enforcing the persistence invariants. 150 | 151 | ### Transactionality 152 | 153 | A user process can run code in an atomic block to obtain a guarantee that 154 | all changes in the block will take effect in the same database transaction. 155 | The system will either put the process to sleep until it can safely start 156 | the block, or it will let the process run until completion of the block 157 | before it commits anything. 158 | To avoid long pauses, atomic blocks must always be short; 159 | long atomic blocks in one user process may cause the entire system 160 | to experience long delays, deadlocks, and/or memory overflow. 161 | 162 | A user process can also wait for its so far scheduled modifications to be 163 | fully committed before it proceeds (possibly right out of an atomic block), 164 | i.e. the equivalent of a Unix `fdatasync()`. 165 | The system will put the process to sleep until after the commit is complete. 166 | Waiting for commits is required before communication of signed commitment 167 | messages to outside parties (e.g. blockchain transactions), but not before 168 | communication to other inside parties within the same persistence environment. 169 | Whether the human at the console is considered "inside" or "outside" in this 170 | context is an interesting topic. Presumably the user interface would display 171 | relevant changes in different colors whether or not they have been fully 172 | committed. 173 | 174 | The user may not assume at which point during process execution a commit 175 | may or may not happen, except through its uses of the two facilities above. 176 | The system is then free to choose when to commit changes in order to 177 | maximize time spent doing useful computations versus either sleeping idle 178 | waiting for disk or doing administrative work supporting the useful 179 | computations but not useful in itself. 180 | The system implementer may choose any heuristics, but would presumably pick one 181 | that ensures changes are committed in a timely manner, rather than accumulate 182 | until a giant pause is necessary to commit them all, or worse, 183 | memory runs out, the process is killed, and all is lost before 184 | they had a chance of being committed. 185 | 186 | However, the user *may* assume that changes to the database *will* 187 | contain an atomic set of changes, such that no change is included 188 | without all the previous changes in the creation sequence. 189 | 190 | 191 | ### Transaction Schedule 192 | 193 | In practice, the system triggers a database transaction based on the following: 194 | - a timer set some fixed time after the previous transaction was committed, or 195 | - at least one process waiting for data to be committed, and at least 196 | some time has elapsed since the last transaction was committed. 197 | 198 | Indeed, if the system is already waiting for a database transaction to 199 | complete, then there's no point in trying to build a new transaction yet. 200 | Moreover, it doesn't make sense to issue new transaction much faster 201 | than it takes for a transaction to be committed, and so the runtime system 202 | may time how long it took for the previous commit to happen, 203 | and wait for at least as long before it issues a new transaction 204 | --- unless the system is otherwise idle in which case it may proceed. 205 | 206 | Nevertheless, if after some further delay, nothing has triggered a commit yet, 207 | and some changes have already been accumulating by the mutators, then 208 | the system will automatically issue a transaction and flush these changes 209 | to the database. 210 | The delay should be somewhat proportional to the time it takes for 211 | a transaction to go through. This would be something like 20ms or so 212 | if the database only writes to a local disk, but could be 500ms or more 213 | if synchronously writing to several database replicas in remote data centers. 214 | 215 | ### Transaction Waves 216 | 217 | On most database backends, the database connection is wholly unavailable 218 | until after the latest prepared transaction is committed anyway. 219 | So any new transaction will have to wait for the previous one to be committed 220 | before reading from the database, much less writing to it. 221 | *If* commits are done in the foreground, all user processes are effectively 222 | stopped until the database transaction is committed. 223 | 224 | *If* on the other hand, commits are done in the background, 225 | user processes that are not waiting for commit can keep mutating their state, 226 | but must do so without actually touching the database. 227 | In that case, they could instead be reading from a cache and writing 228 | to that cache, and only block when actual database access is required. 229 | The cache could contain the entirety of the data, or only part of it 230 | at which point attempts to read beyond the cache would block until the 231 | database is available again. Writes to the cache would also be queued 232 | so as to be issued to the database engine once it is available again. 233 | Transactions would then come in overlapping waves, wherein a new wave starts 234 | as the old starts being committed, becomes the only wave once the old one 235 | is committed, and eventually becomes the old wave as it is committed 236 | and a new wave replaces it. 237 | Reading and writing then happen using the current wave, which 238 | either directly accesses the database, or only the cache, depending on 239 | which stage this wave is at. 240 | 241 | Especially with synchronous remote replicas, you'll want a lot of successive waves 242 | being pipelined in parallel. 243 | 244 | ### Conflicts and Rollbacks 245 | 246 | Our persistence system assumes that it has exclusive control on the 247 | underlying database, so that there is no conflict with external writers. 248 | We also assumes that user processes will avoid any conflict between 249 | each other, by using the usual mutual exclusion mechanisms (mutex, etc.). 250 | 251 | Our persistence system does not offer any rollback facility: 252 | individual user processes run independently from each other yet partake in 253 | the same database transaction, therefore rollback of one process would 254 | rollback all changes from all processes. 255 | In a way, this is what happens if the entire system is stopped: 256 | all modifications of all user processes are rolled back and 257 | all user processes are reset to the state they were at 258 | as of the latest committed transaction. 259 | 260 | If somehow, the application calls for concurrent access to some resources, 261 | speculative evaluation under uncertain conflict detection and resolution, and 262 | rollback of whichever user process loses a dynamically detected conflict, 263 | then the user application must implement its own system of 264 | transactions, speculation, conflict detection and resolution, rollback, etc., 265 | on top of what the system provides. 266 | 267 | ### Encryption Model 268 | 269 | Our encryption model is explained in more details in this document: 270 | https://mukn.notion.site/Encrypted-Databases-a-Private-Low-Level-Storage-Model-582fd2775289465cb879d6acbfd7ff11 271 | 272 | We use row-based encryption, such that the database can be hosted by untrusted 273 | third parties each of whom may make their copy of the data unavailable, but 274 | may neither decode any of the data nor tamper with it. 275 | The only information they can extract is the size distribution of rows in 276 | the database at large and in each transaction (and even that could be reduced 277 | to overall size, at the cost of padding and chunking). 278 | 279 | Row-based encryption means that transactions only require incremental sending 280 | of data proportional to the amount of change effected, rather than encrypting 281 | and sending the entire database as with a simple file-level encryption. 282 | Also, unlike naive block-level encryption, row-based encryption allows for 283 | salt to be changed with every update to a row, preventing trivial cryptanalysis 284 | by the storage host XORing the multiple versions of a same block. 285 | 286 | ### Verifiable Integrity 287 | 288 | The usage model is that the user has a copy of the entire database locally, 289 | but uses remote backups that he doesn't fully trust, 290 | except to keep his data available. 291 | If the user loses his local copy, and/or once in a while 292 | just to verify the integrity of remote copies, the user will download 293 | the entirety of the database from one and/or a collection of his providers, 294 | and check that the root node is indeed the most recent version, 295 | and that the rest of the data is correctly content-addressed from that root. 296 | The root node also includes a checksum of everything 297 | (including the rest of the root node). 298 | All data is encrypted using a symmetric key derived from the user passphrase 299 | used as the seed of a suitable KDF (key derivation function). 300 | 301 | (The root node could also include a signature using an asymmetric key derived 302 | from the same seed, but I think this will be redundant and not needed.) 303 | 304 | ### Low-level Data Representation 305 | 306 | Our encryption model allows for content-addressing of pure persistent data 307 | structures plus a number of mutable rows that start with some random salt 308 | that changes at every update. For integrity-preservation purposes, 309 | a hash of the content of all mutable rows is kept in the root node. 310 | The rest of the data is accessed from the root node via the usual 311 | content-addressing mechanism, that has a built-in integrity mechanism. 312 | 313 | The integrity checker also verifies that no extra rows are present in the 314 | database: unreferenced rows are deleted by a reference counting 315 | garbage collector, which always work since the content-addressing model 316 | does not allow for cycles in the content-addressing directed graph; 317 | cyclic data structures can still be represented indirectly using integer 318 | indices as explicit pointers into an array. The reference counts themselves 319 | need only be explicitly stored when strictly larger than one, and 320 | can be stored in a separate table: a count of zero is represented 321 | by the row being absent, and a count of one by the row being present but 322 | its reference counting entry being absent from the counting table. 323 | Each index, each user-defined table, uses a hash of its type and name 324 | descriptor as part of its encryption salt, thereby avoiding undesired 325 | collisions, including with the reference counting table itself. 326 | 327 | Tables will be represented with B-tree of order 16 (according to Knuth's 328 | definition: maximum number of children in a non-root node), wherein short 329 | entries (7 256-bit words or less) are inlined (if their type allows for 330 | inlining) and larger entries are content-addressed. 331 | If a deterministic encoding is preferred, a patricia tree may be used instead, 332 | there again with or without inlining of leaf nodes. 333 | 334 | The top-level entry is stored checksummed and encrypted with a random salt 335 | at a storage key that also serves as checksum of the database master key. 336 | The cleartext is a structure containing a timestamp and transaction count 337 | that make it possible to identify which copy is most up-to-date, 338 | content-addressed links to a schema descriptor, a top-level object, 339 | and a table of indexes, including the reference counting table and 340 | an index of mutable cells (if we allow any beside the top-level entry). 341 | 342 | ### Pre-Commit Hook 343 | 344 | Whenever an actual commit is scheduled, a pre-commit hook is run 345 | that may do the following: 346 | - Tell all processes to either roll back or roll forward to a stable state, 347 | where applicable, then provide all its state as a single merkleized entity. 348 | - Recompute a common content-addressed index of all these processes that 349 | between commits evolve independently, so that all data may be accessible 350 | from a single mutable state cell. 351 | 352 | ## History 353 | 354 | This code started as a port from my OCaml library `legilogic_lib`, 355 | refactored, enhanced, and rewritten again. 356 | That OCaml library was already written to support multiple concurrent 357 | activities exchanging messages with blockchain applications, 358 | though the persistence model hasn't been clarified until much later. 359 | 360 | In the OCaml variant, every function was defined twice, 361 | as a client half sending a request and some server spaghetti code 362 | processing it, with some data in between and plenty of promises 363 | to communicate (like Gerbil `std/misc/completion`, just separating the 364 | `post!` and `wait!` capabilities). Instead, in Gerbil, I use locking 365 | so functions can be defined only once, at the cost of having to 366 | explicitly use the fields of a struct to share state instead of 367 | just nicely scoped variables. 368 | 369 | Also, I added a timer for deferred triggering of batches by transaction. 370 | I thought I had implemented that in OCaml, but that wasn't currently 371 | in the master branch of legicash-facts. This is made necessary in Gambit, 372 | because Gambit is lacking the OCaml feature allowing to run a FFI function 373 | (in this case, the leveldb in parallel with other OCaml functions). 374 | This parallelism in OCaml naturally allowed the workers to synchronize with 375 | the speed of the batch commits, but that won't work on Gambit, 376 | until we debug the Gambit SMP support. 377 | 378 | Final difference from OCaml, we use parameters for dynamic binding of the 379 | database context, where OCaml could only use global variables, static binding, 380 | and/or an explicit reader monad. 381 | 382 | ## Major Refactoring Underway 383 | 384 | We are in the process of deeply changing Gerbil-Persist. 385 | 386 | In particular we are (1) abstracting away the underlying key-value store, and 387 | (2) [encrypting all data in this underlying store](https://mukn.notion.site/Encrypted-Databases-a-Private-Low-Level-Storage-Model-582fd2775289465cb879d6acbfd7ff11). 388 | 389 | ### Basically done 390 | - Replace LevelDB by an abstraction over key value stores (see kvs.ss, kvs-leveldb.ss). 391 | - Use sqlite as second backend, intended as default for embedding on Gambit-C (kvs-sqlite.ss). 392 | - Add first layer of encryption: encrypted byte store atop kvs (ebs.ss). 393 | 394 | ### Needs testing 395 | - Rewrite a kvs-based variant of db.ss handle, queue, and merge multiple transactions (kvs-mux.ss). 396 | 397 | ### Short term planned changes 398 | - Add second layer of encryption: btrees on top of content-addressed store (btree.ss). 399 | - Add third layer of encryption: user-given db schema using gerbil-poo type descriptors (schema.ss). 400 | - Add proper support for reference counting / linear data structures 401 | 402 | ### Medium term planned changes 403 | - Support [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API) on Gambit-JS. 404 | - Implement asynchronous commits and transaction waves with a cache. 405 | - Implement persistent bytecode interpreter 406 | (based on the VM from SICP? Chibi? TinyScheme? Guile? cbpv? Ribbit? other?) 407 | - Implement compiler for persistent variant of Scheme 408 | - Support synchronous data replication on multiple remote IPFS providers 409 | (like [web3.storage](https://web3.storage), or any other known 410 | [IPFS pinning service](https://sourceforge.net/software/ipfs-pinning/)) 411 | 412 | ### Long term unplanned hopes 413 | - Support [PostgreSQL](https://www.postgresql.org/) on Gambit-C. 414 | - Support [CockroachDB](https://www.cockroachlabs.com/) as a replicated key-value store, 415 | - Implement our own shared-memory object database in the style of 416 | [manardb](https://github.com/danlentz/manardb). 417 | - Support synchronous data replication on [EthSwarm](https://www.ethswarm.org/) as well as IPFS. 418 | 419 | ## Bibliography 420 | 421 | * [Ribbit](https://github.com/udem-dlteam/ribbit#research-and-papers) 422 | * [Scope Sets](https://users.cs.utah.edu/plt/scope-sets/) 423 | * [Defunctionalization in Roc](https://github.com/roc-lang/roc/issues/5969) 424 | 425 | * [RRB-vectors](https://twitter.com/hyPiRion/status/1731725164084174949) 426 | * [The taming of the B-trees](https://www.scylladb.com/2021/11/23/the-taming-of-the-b-trees/) 427 | * [Eytzinger Binary Search](https://algorithmica.org/en/eytzinger) 428 | * [TreeLine: An Update-In-Place Key-Value Store for Modern Storage (2023)](https://www.vldb.org/pvldb/vol16/p99-yu.pdf) 429 | * [MyRocks: LSM-Tree Database Storage Engine Serving Facebook's Social Graph (2020)](https://www.vldb.org/pvldb/vol13/p3217-matsunobu.pdf) 430 | * See [Andy Pavlo](https://www.cs.cmu.edu/~pavlo/)'s course at [CMU](https://db.cs.cmu.edu/). 431 | * [Peer-to-Peer Ordered Search Indexes](https://0fps.net/2020/12/19/peer-to-peer-ordered-search-indexes/) 432 | * [An Introduction to Bε-trees and Write-Optimization](https://www.usenix.org/publications/login/oct15/bender) 433 | * [Revisiting B+-tree vs. LSM-tree](https://www.usenix.org/publications/loginonline/revisit-b-tree-vs-lsm-tree-upon-arrival-modern-storage-hardware-built) 434 | * [DBOS.dev](https://DBOS.dev) puts the OS on top of the DB rather than the other way around. 435 | * [BlueStore backend for Ceph distributed filesystem](https://x.com/petereliaskraft/status/1906420979896893823) 436 | * [A Transaction Model](https://jimgray.azurewebsites.net/papers/A%20Transaction%20Model%20RJ%202895.pdf) by Jim Gray (1980) 437 | * [Logica](https://logica.dev/), logic programming language that compiles to SQL 438 | * [PostgREST](https://docs.postgrest.org/en/v12/), REST app server in pgsql 439 | -------------------------------------------------------------------------------- /btree.ss: -------------------------------------------------------------------------------- 1 | ;;;; Pure Btrees on top of encrypted content-addressed database 2 | 3 | ;; TODO: Port btree's from CL: https://github.com/danlentz/cl-btree 4 | ;; Maybe also from ocaml: https://github.com/djs55/ocaml-btree https://github.com/mransan/btree 5 | ;; https://en.wikipedia.org/wiki/B-tree 6 | 7 | (import 8 | ./kvs) 9 | 10 | (export #t) 11 | 12 | -------------------------------------------------------------------------------- /build.ss: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gxi 2 | ;; -*- Gerbil -*- 3 | ;; This is the main build file for Gerbil-persist. Invoke it using 4 | ;; ./build.ss [cmd] 5 | ;; where [cmd] is typically left empty (same as "compile") 6 | ;; Note that may you need to first: 7 | ;; gxpkg install github.com/fare/gerbil-utils 8 | ;; gxpkg install github.com/fare/gerbil-crypto 9 | ;; gxpkg install github.com/fare/gerbil-poo 10 | 11 | (import :std/cli/multicall :std/misc/process :clan/building) 12 | (init-build-environment! 13 | name: "Gerbil-persist" 14 | deps: '("clan" "clan/poo" "clan/crypto")) 15 | 16 | (define-entry-point (build-and-test) 17 | (help: "Run all build and test commands" getopt: []) 18 | (compile) 19 | (run-process/batch ["./unit-tests.ss"]) 20 | (run-process/batch ["./unit-tests.ss" "integration"])) 21 | -------------------------------------------------------------------------------- /content-addressing.ss: -------------------------------------------------------------------------------- 1 | ;; Content Addressing 2 | (export #t) 3 | (import 4 | (for-syntax :clan/syntax) 5 | :gerbil/gambit 6 | :std/format :std/lazy :std/misc/completion :std/misc/hash :std/misc/ports :std/sugar 7 | :clan/base :clan/concurrency :clan/io :clan/string 8 | :clan/poo/object :clan/poo/mop :clan/poo/fun :clan/poo/io :clan/poo/type :clan/poo/brace 9 | :clan/crypto/keccak 10 | ./db ./db-queue ./persist) 11 | 12 | ;; Should the structures belo POO traits instead? Probably not until POO traits are efficient, too! 13 | ;; Should that be called a DigestingContext and ContentAddressingContext, 14 | ;; to be provided statically or dynamically to make ContentAddressable objects? 15 | ;; Note that in a given application, a *same* object may be considered from the point of view 16 | ;; of *several* Digesting contexts, because it may be tracked similtaneously in multiple 17 | ;; blockchains or registries. 18 | ;; But usually, there content-addressing is tied to a specific database, 19 | ;; and uses a fixed digesting context that is extended with extra caches. 20 | 21 | (defstruct Digesting 22 | (sexp ;; : SExp ;; how to print it. 23 | Digest ;; : Type ;; some fixed-size Bytes. 24 | digest)) ;; : Digest <- Bytes 25 | 26 | (defstruct (ContentAddressing Digesting) 27 | (key-prefix ;; : Bytes 28 | mutex)) ;; : Mutex 29 | 30 | ;; : ContentAddressing 31 | (def keccak-addressing 32 | (make-ContentAddressing 33 | 'keccak-addressing 34 | (BytesN 32) 35 | keccak256<-bytes 36 | (string->bytes "K2") 37 | (make-mutex 'k2cas))) 38 | 39 | ;; : (Parameter ContentAddressing) 40 | (def current-content-addressing (make-parameter keccak-addressing)) 41 | 42 | ;; : Digest <- Bytes ?Digesting 43 | (def (digest<-bytes bytes (digesting (current-content-addressing))) 44 | ((Digesting-digest digesting) bytes)) 45 | 46 | ;; : Digest <- T:Type T ?Digesting 47 | (def (digest<- type value (digesting (current-content-addressing))) 48 | (digest<-bytes (bytes<- type value) digesting)) 49 | 50 | ;; : Digest <- String ?Digesting 51 | (def (digest<-string string (digesting (current-content-addressing))) 52 | (digest<-bytes (string->bytes string) digesting)) 53 | 54 | ;; : Digest <- String ?Digesting 55 | (def (digest<-file path (digesting (current-content-addressing))) 56 | ;; TODO: make it work efficiently on large files without loading the entire file into memory, 57 | ;; just into buffers of say 8KB or 1MB, or whatever works best. 58 | (digest<-bytes (read-file-u8vector path) digesting)) 59 | 60 | ;; trait for digestability in a given content-addressing context 61 | (define-type (Digestable @ [] .bytes<- .digesting) 62 | .digest<-: (lambda (v (digesting .digesting)) (digest<-bytes (.bytes<- v) digesting))) 63 | 64 | ;; Non-functor function 65 | (define-type (DigestWrapper^ @ []) 66 | .tap: (lambda (t) (Digesting-Digest (.@ t .digesting))) 67 | .ap^: (cut .call <> .digest<- <>) 68 | .unap^: invalid 69 | .marshal^: (lambda (t v port) (marshal (Digesting-Digest (.@ t .digesting)) v port)) 70 | .unmarshal^: (lambda (t port) (unmarshal (Digesting-Digest (.@ t .digesting)) port))) 71 | 72 | ;; CAVEAT EMPTOR: This trait statically but *lazily* captures 73 | ;; the dynamic current-content-addressing in this interface at time of first reference. 74 | ;; This allows you to define all your interfaces independently from which digest function will be used, 75 | ;; but a given poo interface should be used in one context only, they should be initialized together, 76 | ;; you may want to statically clone and override in some cases, etc. 77 | (define-type (CurrentDigesting @ [Digestable]) 78 | .digesting: (current-content-addressing)) 79 | 80 | ;; : Bytes <- Digest ?ContentAddressing 81 | (def (content-addressing-key digest (content-addressing (current-content-addressing))) 82 | (u8vector-append (ContentAddressing-key-prefix content-addressing) digest)) 83 | 84 | (define-type (ContentAddressable @ [] sexp .digesting .digest<- .<-bytes .bytes<-) 85 | ;; CAVEAT EMPTOR: The application developers must ensure there are no collisions 86 | ;; with respect to sexp for types stored in a given content-addressable context. 87 | .content-cache: (make-hash-table weak-values: #t) 88 | 89 | ;; @ <- Digest TX 90 | .<-digest: 91 | (lambda (digest tx) 92 | ;; TODO: figure out what are or aren't Gambit's guarantees regarding 93 | ;; concurrent access to a table. 94 | ;; Concurrency, reentrance, etc., may cause issues here, but a mutex doesn't seem composable. 95 | ;; Some kind of transactional memory may be required, at which point, 96 | ;; should the caching, decoding and transacting service be moved "upstream" 97 | ;; into the database thread and/or mutex? 98 | (hash-ensure-ref .content-cache digest 99 | (cut .<-bytes (db-get (content-addressing-key digest .digesting) tx)))) 100 | 101 | .make-persistent: 102 | (lambda (x tx) 103 | (def b (.bytes<- x)) 104 | (def d (digest<-bytes b .digesting)) 105 | (def k (content-addressing-key d .digesting)) 106 | (unless (db-key? k tx) 107 | (make-dependencies-persistent @ x tx) 108 | (db-put! k b tx)))) 109 | 110 | (defstruct DV ;; (forall T:Type Type) 111 | (type ;; : T:Type 112 | value ;; : (Lazy T) 113 | digest ;; : (Lazy Digest) 114 | persisted?)) ;; : Bool 115 | 116 | (def (value<-dv dv) (force (DV-value dv))) 117 | (defrule (dv t x) (let (t t) (DV t (lazy x) (lazy (digest<- t x)) #f))) 118 | (def (digest<-dv dv) (force (DV-digest dv))) 119 | (def (dv<-digest t d) (DV t (lazy (.call t .<-digest d)) (lazy d) #t)) 120 | 121 | ;; ContentAddressed 122 | (define-type (ContentAddressed. @ [ContentAddressable] T .digesting) 123 | Wrapper: {(:: @ [Wrapper.]) 124 | .ap: (lambda (v) (dv T v)) 125 | .unap: value<-dv} 126 | .validate: 127 | (lambda (dv) 128 | (unless (DV? dv) (raise-type-error "not a DV" dv)) 129 | (match (std/lazy#&lazy-e (DV-value dv)) 130 | (['resolved . v] 131 | (validate T v) 132 | (match (std/lazy#&lazy-e (DV-digest dv)) 133 | (['resolved . d] 134 | (unless (equal? d (digest<- T v .digesting)) (raise-type-error "digest does not match" dv)) 135 | dv) 136 | (_ dv))) 137 | (_ dv))) 138 | .Digest: (Digesting-Digest .digesting) 139 | .bytes<-: digest<-dv 140 | .<-bytes: (cut dv<-digest @ <>) 141 | .digest<-: .bytes<- ;; don't double-digest! 142 | .marshal: (lambda (dv port) (marshal .Digest (digest<-dv dv) port)) 143 | .unmarshal: (lambda (port) (.<-bytes (unmarshal .Digest port))) 144 | .make-persistent: 145 | (lambda (dv tx) 146 | (unless (DV-persisted? dv) 147 | (let* ((d (digest<-dv dv)) 148 | (k (content-addressing-key d .digesting))) 149 | (unless (db-key? k tx) 150 | (let (v (value<-dv dv)) 151 | (make-dependencies-persistent T v tx) 152 | (db-put! k (bytes<- T v) tx))))))) 153 | 154 | (def (ContentAddressed T) 155 | {(:: @ ContentAddressed.) T 156 | sexp: `(ContentAddressed ,(.@ T sexp))}) 157 | 158 | (def (digest<-marshal marshal (digesting (current-content-addressing))) 159 | (digest<-bytes (call-with-output-u8vector marshal) digesting)) 160 | 161 | (defrules digest-product () 162 | ((_ (digesting) (val type) ...) 163 | (digest<-marshal (lambda (port) (marshal-product port (val type) ...)) digesting)) 164 | ((d (val type) ...) 165 | (d ((current-content-addressing)) (val type) ...))) 166 | -------------------------------------------------------------------------------- /db-queue.ss: -------------------------------------------------------------------------------- 1 | ;; Persistent message queues on top of leveldb 2 | 3 | ;; TODO: accept Gerbil actor style rpc.shutdown messages 4 | 5 | (export 6 | DbQueue? DbQueue-send! DbQueue-restore 7 | DbCommittedQueue? DbCommittedQueue-send! DbCommittedQueue-restore) 8 | (import 9 | :clan/db/leveldb 10 | :std/misc/completion :std/misc/deque :std/misc/list :std/misc/number :std/sugar 11 | :clan/base :clan/concurrency 12 | :clan/poo/object :clan/poo/mop :clan/poo/io :clan/poo/number :clan/poo/type 13 | ./db) 14 | 15 | (def PairUIntUInt (Pair UInt UInt)) ;; used to represent start and length of queue 16 | 17 | ;; : Bytes <- Bytes UInt 18 | (def (db-indexed-key db-key index) 19 | (u8vector-append db-key (bytes<- UInt index))) 20 | 21 | ;; A DB Queue 22 | (defstruct DbQueue 23 | (mx ;; : Mutex 24 | key ;; : Bytes ;; db-key 25 | start ;; : UInt ;; Next index to dequeue 26 | length ;; : UInt ;; Number of items in the queue. end = start + length 27 | manager)) ;; : Thread ;; or should we have a condition variable instead? 28 | 29 | ;; Assumes we already have a lock of the queue object, and the tx is open 30 | ;; : <- DbQueue TX 31 | (def (%DbQueue-update q tx) 32 | (db-put! (DbQueue-key q) (bytes<- PairUIntUInt (cons (DbQueue-start q) (DbQueue-length q))) tx)) 33 | 34 | ;; Internal: wake up the manager of a queue that isn't empty anymore. 35 | ;; : <- DbQueue Any 36 | (def (%DbQueue-wakeup q tag) 37 | (thread-send (DbQueue-manager q) tag)) 38 | 39 | ;; Internal: wake up the manager of a queue that isn't empty anymore. 40 | ;; : <- DbQueue Any 41 | (def (%DbQueue-send! q msg tx) 42 | (db-put! (db-indexed-key (DbQueue-key q) (+ (DbQueue-start q) (post-increment! (DbQueue-length q)))) 43 | msg tx) 44 | (%DbQueue-update q tx)) 45 | 46 | ;; Push a message into a DbQueue 47 | ;; Assumes that tx is open 48 | ;; : <- DbQueue Bytes TX 49 | (def (DbQueue-send! q msg tx) 50 | (with-lock (DbQueue-mx q) 51 | (lambda () 52 | (when (%DbQueue-empty? q) (%DbQueue-wakeup q #t)) 53 | (%DbQueue-send! q msg tx)))) 54 | 55 | ;; Internal: wake up the manager of a queue that isn't empty anymore. 56 | ;; : <- DbQueue Any 57 | (def (%DbQueue-receive! q tx) 58 | (let* ((index (post-increment! (DbQueue-start q))) 59 | (db-key (db-indexed-key (DbQueue-key q) index))) 60 | (decrement! (DbQueue-length q)) 61 | (begin0 62 | (db-get db-key tx) 63 | (db-delete! db-key tx) ;; or should we keep it indefinitely? 64 | (%DbQueue-update q tx)))) 65 | 66 | ;; Atomically pop a message from a DbQueue, or return #f if empty. 67 | ;; Assumes that tx is open 68 | ;; : (OrFalse Bytes) <- DbQueue TX 69 | (def (DbQueue-receive! q tx) 70 | (with-lock (DbQueue-mx q) 71 | (lambda () 72 | (and (not (%DbQueue-empty? q)) (%DbQueue-receive! q tx))))) 73 | 74 | ;; Query whether a DbQueue is empty 75 | ;; Assumes we hold the lock on the queue 76 | ;; : Bool <- DbQueue 77 | (def (%DbQueue-empty? q) 78 | (zero? (DbQueue-length q))) 79 | 80 | ;; Get the state of a DbQueue from the database, given its db-key 81 | ;; : (Pair UInt UInt) <- Bytes TX 82 | (def (DbQueue-state db-key tx) 83 | (cond ((db-get db-key tx) => (cut <-bytes PairUIntUInt <>)) 84 | (else '(0 . 0)))) ;; owl of you 85 | 86 | ;; Restore a DbQueue from its persisted state, or start a new one if none is present. 87 | ;; : DbQueue <- Any Bytes (<- Bytes TX) 88 | (def (DbQueue-restore name db-key processor) 89 | (def q (match (with-tx (tx) (DbQueue-state db-key tx)) 90 | ([start . length] (make-DbQueue (make-mutex name) db-key start length #f)))) 91 | (def manager 92 | (spawn/name/logged 93 | name 94 | (lambda () 95 | (while #t 96 | (thread-receive) ;; wait to be woken up 97 | (let/cc break 98 | (while #t 99 | (with-tx (tx) 100 | (def msg (DbQueue-receive! q tx)) 101 | (unless msg (break)) 102 | (processor msg tx)))))))) 103 | (set! (DbQueue-manager q) manager) 104 | q) 105 | 106 | ;; DB Committed Queue: only dequeue things that were fully committed 107 | (defstruct (DbCommittedQueue DbQueue) 108 | (committed-end ;; : UInt ;; Next index to not dequeue yet 109 | pending)) ;; : (Dequeue (Tuple UInt Completion UInt)) ;; dequeue of batch-id, batch-completion, end 110 | 111 | (def (%DbCommittedQueue-update-pending q tx) ;; the end was increased, so add to pending 112 | (def c (DbTransaction-connection tx)) 113 | (def batch-id (DbConnection-batch-id c)) 114 | (def qp (DbCommittedQueue-pending q)) 115 | (if (and (not (deque-empty? qp)) (= batch-id (car (peek-front qp)))) 116 | (pop-front! qp) ;; only wakeup once per batch id 117 | (let (completion (DbConnection-batch-completion c)) 118 | (spawn (lambda () (completion-wait! completion) (%DbQueue-wakeup q batch-id))))) 119 | (push-front! qp (cons batch-id (+ (DbQueue-start q) (DbQueue-length q))))) 120 | 121 | ;; Push a message into a DbCommittedQueue 122 | ;; Assumes we hold the lock on the q and that tx is open 123 | ;; : <- DbQueue Bytes TX 124 | (def (DbCommittedQueue-send! q msg tx) 125 | (%DbQueue-send! q msg tx) 126 | (%DbQueue-update q tx) 127 | (%DbCommittedQueue-update-pending q tx)) 128 | 129 | ;; Pop a message from a DbCommittedQueue 130 | ;; Assumes we hold the lock on the q, that the q is not empty, and that tx is open 131 | ;; : Bytes <- DbCommittedQueue TX 132 | (def (DbCommittedQueue-receive! q tx) 133 | (with-lock (DbQueue-mx q) 134 | (lambda () 135 | (and (not (%DbCommittedQueue-empty? q)) (%DbQueue-receive! q tx))))) 136 | 137 | ;; Query whether a DbCommittedQueue is empty 138 | ;; Assumes we hold the lock on the queue 139 | ;; : Bool <- DbCommittedQueue 140 | (def (%DbCommittedQueue-empty? q) 141 | (>= (DbQueue-start q) (DbCommittedQueue-committed-end q))) 142 | 143 | ;; Restore a DbQueue from its persisted state, or start a new one if none is present. 144 | ;; : DbCommittedQueue <- Any Bytes (<- UInt Bytes TX) 145 | (def (DbCommittedQueue-restore name db-key processor) 146 | (def q (match (with-tx (tx) (DbQueue-state db-key tx)) 147 | ([start . length] 148 | (make-DbCommittedQueue 149 | (make-mutex name) db-key start length #f (+ start length) (make-deque))))) 150 | (def manager 151 | (spawn/name/logged 152 | name 153 | (lambda () 154 | (while #t 155 | (let (batch-id (thread-receive)) 156 | (with-lock (DbQueue-mx q) 157 | (fun (DbCommittedQueue-manager-confirm-batch-id) 158 | (def qp (DbCommittedQueue-pending q)) 159 | (while (<= (car (peek-back qp)) batch-id) 160 | (set! (DbCommittedQueue-committed-end q) (cdr (pop-back! qp))))))) 161 | (let/cc break 162 | (while #t 163 | (with-tx (tx) 164 | (def msg (DbCommittedQueue-receive! q tx)) 165 | (unless msg (break)) 166 | (processor msg tx)))))))) 167 | (set! (DbQueue-manager q) manager) 168 | q) 169 | -------------------------------------------------------------------------------- /db.ss: -------------------------------------------------------------------------------- 1 | ;; User-level transactions on top of a single synchronous leveldb writebatch. 2 | ;; 3 | ;; This file defines transactional access to a simple key-value store. 4 | ;; All current transactions go into a current batch, that at one point is 5 | ;; triggered for commit. At that point, it will wait for current transactions 6 | ;; to all be closed while blocking any new transaction from being open. 7 | ;; Thus, transactions must always be short or there may be long delays, 8 | ;; deadlocks, and/or memory overflow. Also, we have only roll-forward and 9 | ;; not roll-back of transactions, and so they must use suitable mechanisms 10 | ;; for mutual exclusion and validation before to write anything to the database. 11 | ;; 12 | ;; Multiple threads can read the database with db-key? and db-get, 13 | ;; seeing the state before the current transaction batch, 14 | ;; and contribute to the current transaction with db-put! and db-delete!. 15 | ;; Transactions ensure that all updates made within the transaction are 16 | ;; included in a same atomic batch. A transaction is scheduled for commit 17 | ;; with close-transaction, and you can wait for its completion with 18 | ;; commit-transaction. 19 | ;; 20 | ;; with-tx is a no-op if there is already a (current-db-transaction); if not, 21 | ;; it opens a transaction, executes code while this transaction is the 22 | ;; (current-db-transaction), commit it at the end, and waits for commit. 23 | ;; 24 | ;; This code was ported from my OCaml library legilogic_lib, 25 | ;; though I did refactor and enhance the code as I ported it. 26 | ;; Notably, in the OCaml variant, every function was defined twice, 27 | ;; as a client half sending a request and some server spaghetti code 28 | ;; processing it, with some data in between and plenty of promises 29 | ;; to communicate (like Gerbil std/misc/completion, just separating the 30 | ;; post! and wait! capabilities). Instead, in Gerbil, I use locking 31 | ;; so functions can be defined only once, at the cost of having to 32 | ;; explicitly use the fields of a struct to share state instead of 33 | ;; just nicely scoped variables. 34 | ;; 35 | ;; Also, I added a timer for deferred triggering of batches by transaction. 36 | ;; I thought I had implemented that in OCaml, but that wasn't currently 37 | ;; in the master branch of legicash-facts. This is made necessary in Gambit, 38 | ;; because Gambit is lacking the OCaml feature allowing to run a FFI function 39 | ;; (in this case, the leveldb in parallel with other OCaml functions). 40 | ;; This parallelism in OCaml naturally allowed the workers to synchronize with 41 | ;; the speed of the batch commits, but that won't work on Gambit, 42 | ;; until we debug the Gambit SMP support. 43 | ;; 44 | ;; Final difference from OCaml, we use parameters for dynamic binding of the 45 | ;; database context, where OCaml could only use global variables, static binding, 46 | ;; and/or an explicit reader monad. 47 | 48 | (export #t) 49 | (import 50 | :clan/db/leveldb 51 | :std/assert 52 | :std/misc/completion :std/misc/list :std/misc/number 53 | :std/sugar 54 | :clan/base :clan/concurrency :std/misc/path :clan/path-config) 55 | 56 | (defstruct DbConnection 57 | (name ;; name, a string, also path to the leveldb storage 58 | leveldb ;; leveldb handle 59 | mx txcounter 60 | blocked-transactions open-transactions pending-transactions hooks 61 | batch-id batch batch-completion manager timer 62 | ready? triggered?) 63 | constructor: :init!) 64 | (defmethod {:init! DbConnection} 65 | (lambda (self name leveldb) 66 | (def mx (make-mutex name)) ;; Mutex 67 | (def txcounter 0) ;; Nat 68 | (def hooks (make-hash-table)) ;; (Table (<-) <- Any) 69 | (def blocked-transactions []) ;; (List Transaction) 70 | (def open-transactions (make-hash-table)) ;; mutable (HashSet Transaction) 71 | (def pending-transactions []) ;; (List Transaction) 72 | (def batch-id 0) ;; Nat 73 | (def batch (leveldb-writebatch)) ;; leveldb-writebatch 74 | (def batch-completion (make-completion '(db-batch 0))) 75 | (def ready? #t) ;; Bool 76 | (def triggered? #f) ;; Bool 77 | (def manager (db-manager self)) ;; Thread 78 | (def timer #f) ;; (Or Thread '#f) 79 | (struct-instance-init! 80 | self name leveldb mx txcounter 81 | blocked-transactions open-transactions pending-transactions hooks 82 | batch-id batch batch-completion manager timer 83 | ready? triggered?))) 84 | 85 | (def current-db-connection (make-parameter #f)) 86 | (def (open-db-connection name (opts (leveldb-default-options))) 87 | (def path (ensure-absolute-path name persistent-directory)) 88 | (create-directory* (path-parent path)) 89 | (DbConnection name (leveldb-open path opts))) 90 | (def (open-db-connection! name (opts (leveldb-default-options))) 91 | (current-db-connection (open-db-connection name opts))) 92 | (def (close-db-connection! c) 93 | (leveldb-close (DbConnection-leveldb c)) 94 | (thread-send (DbConnection-manager c) #f)) 95 | (def (close-db-connection c) 96 | (with-db-lock (c) 97 | (register-commit-hook! 'close (lambda _ (close-db-connection! c)) c) 98 | (db-trigger! c)) 99 | (thread-join! (DbConnection-manager c))) 100 | (def (call-with-db-connection fun name (opts (leveldb-default-options))) 101 | (def c (open-db-connection name)) 102 | (try 103 | (parameterize ((current-db-connection c)) 104 | (fun c)) 105 | (finally (close-db-connection c)))) 106 | (defrule (with-db-connection (c name ...) body ...) 107 | (call-with-db-connection (lambda (c) body ...) name ...)) 108 | (def (ensure-db-connection name) 109 | (def c (current-db-connection)) 110 | (if c 111 | (assert! (equal? (DbConnection-name c) name)) 112 | (open-db-connection! name))) 113 | 114 | ;; Mark the current batch as triggered, because either some transaction must be committed, 115 | ;; or a timer has hit since content was added, or we're closing the database. 116 | ;; ASSUMES YOU'RE HOLDING THE DB-LOCK 117 | ;; : <- DbConnection 118 | (def (db-trigger! c) 119 | (if (and (DbConnection-ready? c) (zero? (hash-length (DbConnection-open-transactions c)))) 120 | (finalize-batch! c) 121 | (set! (DbConnection-triggered? c) #t))) 122 | 123 | ;; : Real 124 | (def deferred-db-trigger-interval-in-seconds .02) 125 | 126 | ;; ASSUMES YOU'RE HOLDING THE DB-LOCK 127 | ;; : <- DbConnection 128 | (def (deferred-db-trigger! c) 129 | (unless (DbConnection-timer c) 130 | (let (batch-id (DbConnection-batch-id c)) 131 | (set! (DbConnection-timer c) 132 | (spawn/name/logged 133 | ['timer batch-id] 134 | (lambda () (thread-sleep! deferred-db-trigger-interval-in-seconds) 135 | (with-db-lock (c) 136 | (when (equal? batch-id (DbConnection-batch-id c)) 137 | (db-trigger! c))))))))) 138 | 139 | (defrules with-db-lock () 140 | ((_ (conn) body ...) (with-lock (DbConnection-mx conn) (lambda () body ...))) 141 | ((_ () body ...) (with-db-lock (current-db-connection) body ...))) 142 | (def (call-with-db-lock fun (conn (current-db-connection))) 143 | (with-db-lock (conn) (fun conn))) 144 | 145 | ;; status: blocked open pending complete 146 | ;; When opening a transaction, it may be blocked at first so the previous batch may be completed, 147 | ;; but by the time it is returned to the user, it is in open status; 148 | ;; when it is closed, it becomes pending until its batch is committed, 149 | ;; at which point it becomes complete and any thread sync'ing on it will be awakened. 150 | (defstruct DbTransaction (connection txid status) transparent: #t) 151 | (def current-db-transaction (make-parameter #f)) 152 | (def (DbTransaction-completion tx) 153 | (def c (DbTransaction-connection tx)) 154 | (with-db-lock (c) 155 | (case (DbTransaction-status tx) 156 | ((open pending) 157 | (DbConnection-batch-completion c)) 158 | (else #f)))) 159 | 160 | ;; : <- (OrFalse Completion) 161 | (def (wait-completion completion) 162 | (when completion (completion-wait! completion))) 163 | 164 | ;; Open Transaction 165 | ;; TODO: assert that the transaction_counter never wraps around? 166 | ;; Or check and block further transactions when it does, before resetting the counter? *) 167 | ;; TODO: commenting out the ready && triggered helps detect / enact deadlocks when running 168 | ;; tests, by having only one active transaction at a time; but then the hold can and 169 | ;; should be released as soon as "the" transaction is complete, unless we're already both 170 | ;; ready && triggered for the next batch commit. Have an option for that? 171 | (def (open-transaction (c (current-db-connection))) 172 | (defvalues (transaction completion) 173 | (with-db-lock (c) 174 | (let* ((txid (post-increment! (DbConnection-txcounter c))) 175 | (blocked? (and (DbConnection-ready? c) (DbConnection-triggered? c))) 176 | (status (if blocked? 'blocked 'open)) 177 | (transaction (DbTransaction c txid status))) 178 | (if blocked? 179 | (push! transaction (DbConnection-blocked-transactions c)) 180 | (hash-put! (DbConnection-open-transactions c) txid transaction)) 181 | (values transaction (and blocked? (DbConnection-batch-completion c)))))) 182 | (wait-completion completion) ;; wait without holding the lock 183 | transaction) 184 | 185 | ;; For now, let's 186 | ;; * Disallow nested transaction / auto-transactions. We want a clear transaction owner, and 187 | ;; the type / signature of functions will ensure that there is always one. 188 | ;; * Return the result of the inner expression, after the transaction is closed but not committed. 189 | ;; If you need to synchronize on the transaction, be sure to return it or otherwise memorize it, 190 | ;; or use after-commit from within the body. 191 | (def (call-with-tx fun (c #f) wait: (wait #f)) 192 | (awhen (t (current-db-transaction)) 193 | (error "Cannot nest transactions" t)) 194 | (def tx (open-transaction (or c (current-db-connection)))) 195 | (try 196 | (parameterize ((current-db-transaction tx)) 197 | (fun tx)) 198 | (finally 199 | (close-transaction tx) 200 | (when wait (sync-transaction tx))))) 201 | (defrule (with-tx (tx dbc ...) body ...) 202 | (call-with-tx (lambda (tx) body ...) dbc ...)) 203 | (defrule (without-tx body ...) 204 | (parameterize ((current-db-transaction #f)) body ...)) 205 | 206 | (def (call-with-committed-tx fun (c #f)) 207 | (call-with-tx fun c wait: #t)) 208 | (defrule (with-committed-tx (tx dbc ...) body ...) 209 | (call-with-committed-tx (lambda (tx) body ...) dbc ...)) 210 | (defrule (after-commit (tx) body ...) 211 | (without-tx (spawn/name/logged 212 | ['after-commit (DbTransaction-txid tx)] 213 | (lambda () (completion-wait! (DbTransaction-completion tx)) body ...)))) 214 | 215 | ;; Mark a transaction as ready to be committed. 216 | ;; Return a completion that will be posted when the transaction is committed to disk. 217 | ;; The system must otherwise ensure that the action that follows this promise 218 | ;; will be restarted by a new instance of this program in case the process crashes after this commit, 219 | ;; or is otherwise some client's responsibility to restart if the program acts as a server. 220 | (def (close-transaction (tx (current-db-transaction))) 221 | (match tx 222 | ((DbTransaction c txid status) 223 | (with-db-lock (c) 224 | (case status 225 | ((blocked open) 226 | (set! (DbTransaction-status tx) 'pending) 227 | (hash-remove! (DbConnection-open-transactions c) txid) 228 | (push! tx (DbConnection-pending-transactions c)) 229 | (deferred-db-trigger! c) 230 | (DbConnection-batch-completion c)) 231 | ((pending) 232 | (DbConnection-batch-completion c)) 233 | (else #f)))) 234 | (else (error "close-transaction: not a transaction" tx)))) 235 | 236 | ;; Close a transaction, then wait for it to be committed. 237 | (def (commit-transaction (transaction (current-db-transaction))) 238 | (wait-completion (close-transaction transaction))) 239 | 240 | ;; Sync to a transaction being committed. 241 | ;; Thou Shalt Not sync with the end of a transaction from within another transaction, 242 | ;; or you may deadlock, since that other transaction might be part of the same batch. 243 | ;; Instead, thou shalt sync on it in a background thread, that will then run 244 | ;; the very same code as you would if you would resume the persistent activity, 245 | ;; and that code must be effectively idempotent. 246 | (def (sync-transaction (transaction (current-db-transaction))) 247 | (wait-completion (DbTransaction-completion transaction))) 248 | 249 | ;; Register post-commit finalizer actions to be run after this batch commits, 250 | ;; with the batch id as a parameter. 251 | ;; The hook is called synchronously, but it if you use asynchronous message passing, 252 | ;; it is possible that the hooks may be called out of order. 253 | ;; ASSUMES YOU'RE HOLDING THE DB-LOCK 254 | ;; Unit <- Any (<- Nat) DbConnection 255 | (def (register-commit-hook! name hook (c (current-db-connection))) 256 | (hash-put! (DbConnection-hooks c) name hook)) 257 | 258 | (def leveldb-sync-write-options (leveldb-write-options sync: #f)) 259 | 260 | (def (db-manager c) 261 | (spawn/name/logged 262 | ['db-manager (DbConnection-name c)] 263 | (fun (db-manager-1) 264 | (let loop () 265 | (match (thread-receive) 266 | ([batch-id batch batch-completion hooks pending-transactions] 267 | ;; TODO: run the leveldb-write in a different OS thread. 268 | (leveldb-write (DbConnection-leveldb c) batch leveldb-sync-write-options) 269 | (for-each (lambda (tx) (set! (DbTransaction-status tx) 'complete)) 270 | pending-transactions) 271 | (for-each (lambda (hook) (hook batch-id)) hooks) 272 | (completion-post! batch-completion batch-id) 273 | (with-db-lock (c) 274 | (if (and (DbConnection-triggered? c) (zero? (hash-length (DbConnection-open-transactions c)))) 275 | (finalize-batch! c) 276 | (set! (DbConnection-ready? c) #t))) 277 | (loop)) 278 | (#f (void)) 279 | (x (error "foo" x))))))) 280 | 281 | ;; Fork a system thread to handle the commit; 282 | ;; when it's done, wakeup the wait-on-batch-commit completion 283 | (def (finalize-batch! c) 284 | (def batch-id (DbConnection-batch-id c)) 285 | (def batch (DbConnection-batch c)) 286 | (def batch-completion (DbConnection-batch-completion c)) 287 | (def hooks (hash-values (DbConnection-hooks c))) 288 | (def blocked-transactions (DbConnection-blocked-transactions c)) 289 | (def pending-transactions (DbConnection-pending-transactions c)) 290 | (set! (DbConnection-batch-id c) (1+ batch-id)) 291 | (set! (DbConnection-batch c) (leveldb-writebatch)) 292 | (set! (DbConnection-batch-completion c) (make-completion `(db-batch , (DbConnection-batch-id c)))) 293 | (set! (DbConnection-pending-transactions c) []) 294 | (set! (DbConnection-blocked-transactions c) []) 295 | (set! (DbConnection-ready? c) #f) 296 | (set! (DbConnection-triggered? c) #f) 297 | (set! (DbConnection-timer c) #f) 298 | (for-each (lambda (tx) 299 | (set! (DbTransaction-status tx) 'open) 300 | (hash-put! (DbConnection-open-transactions c) (DbTransaction-txid tx) tx)) 301 | blocked-transactions) 302 | (thread-send (DbConnection-manager c) [batch-id batch batch-completion hooks pending-transactions])) 303 | 304 | ;; Get the batch id: not just for testing, 305 | ;; but also, within a transaction, to get the id to prepare a hook, 306 | ;; e.g. to send newly committed but previously unsent messages. 307 | (def (get-batch-id (c (current-db-connection))) 308 | (DbConnection-batch-id c)) 309 | 310 | (def (db-get key (tx (current-db-transaction)) (opts (leveldb-default-read-options))) 311 | (leveldb-get (DbConnection-leveldb (DbTransaction-connection tx)) key opts)) 312 | (def (db-key? key (tx (current-db-transaction)) (opts (leveldb-default-read-options))) 313 | (leveldb-key? (DbConnection-leveldb (DbTransaction-connection tx)) key opts)) 314 | 315 | (def (db-put! k v (tx (current-db-transaction))) 316 | (def c (DbTransaction-connection tx)) 317 | (with-db-lock (c) 318 | (leveldb-writebatch-put (DbConnection-batch c) k v))) 319 | (def (db-put-many! l (tx (current-db-transaction))) 320 | (def c (DbTransaction-connection tx)) 321 | (with-db-lock (c) 322 | (let (batch (DbConnection-batch c)) 323 | (for-each (match <> ([k . v] (leveldb-writebatch-put batch k v))) l)))) 324 | (def (db-delete! k (tx (current-db-transaction))) 325 | (def c (DbTransaction-connection tx)) 326 | (with-db-lock (c) 327 | (leveldb-writebatch-delete (DbConnection-batch c) k))) 328 | 329 | #;(trace! current-db-connection current-db-transaction 330 | open-db-connection open-db-connection! 331 | close-db-connection! close-db-connection call-with-db-connection 332 | db-trigger! call-with-db-lock 333 | open-transaction call-with-tx call-with-committed-tx close-transaction 334 | commit-transaction register-commit-hook! db-manager finalize-batch! 335 | get-batch-id db-get db-key? db-put! db-put-many! db-delete!) 336 | -------------------------------------------------------------------------------- /ebs.ss: -------------------------------------------------------------------------------- 1 | ;; Encrypted Byte Store 2 | ;; Thanks to Alipha and maroon from IRC libera.chat #crypto for help with the design. 3 | ;; TODO: Merge context with that from content-addressing.ss 4 | ;; TODO: Optionally chunk data over some maximum size, e.g. 64KB, 4KB or 1KB. 5 | ;; TODO: Optionally pad data under some minimum size. e.g. 32B, 1KB or 4KB. 6 | ;; TODO: Have a competent cryptographer review. 7 | ;; TODO: allow use of alternative digest function to compute content-address hash; 8 | ;; TODO: maybe use a hash prefix to distinguish digest functions? See IPLD. 9 | ;; TODO: for user capabilities, prepend a per-domain prefix to the intent-addressed data; 10 | ;; TODO: do not allow applications to see the reference counts for their data, or fail to update them. 11 | ;; TODO: do allow applications to share content-addressed data, 12 | ;; TODO: without detecting if anyone else is presently sharing or not. 13 | 14 | (import :std/crypto/libcrypto :std/crypto/cipher :std/crypto/etc 15 | :clan/crypto/keccak) 16 | 17 | (export #t) 18 | 19 | (defstruct EncryptionContext 20 | (encrypt ;; Bytes <- IV Bytes ;; symmetric encryption (cut encrypt (make-cipher e) masterkey <> <>) 21 | decrypt ;; Bytes <- IV Bytes ;; symmetric decryption (cut decrypt (make-cipher e) masterkey <> <>) 22 | iv-len ;; Integer ;; length of an IV (initialization vector) 23 | digest ;; Hash <- Bytes ;; compute hash from bytes 24 | derive-ca-key ;; Bytes <- Hash ;; derive db key from content hash 25 | derive-ca-iv ;; IV <- Hash ;; derive iv from content hash 26 | derive-ia-key)) ;; Bytes <- Hash ;; derive db key from intent hash 27 | 28 | (defmethod {:init! EncryptionContext} 29 | (lambda (self encrypt decrypt digest masterkey keysalt valuesalt ivlen) 30 | (struct-instance-init! self encrypt decrypt digest masterkey keysalt valuesalt ivlen))) 31 | 32 | (def (standard-encryption-context 33 | masterkey 34 | cipher: (cipher (let (cipher (EVP_aes_256_ctr)) (cut make-cipher cipher))) 35 | digest: (digest keccak256<-bytes) 36 | iv-len: (iv-len 16)) ;; length of IV for cipher 37 | (def (make-salt str) (digest (u8vector-append masterkey (string->bytes str)))) 38 | (def ca-key-salt (make-salt "content-addressed-key")) 39 | (def ca-iv-salt (make-salt "content-addressed-initialization-vector")) 40 | (def ia-key-salt (make-salt "intent-addressed-key")) 41 | (make-EncryptionContext 42 | (cut encrypt (cipher) masterkey <> <>) 43 | (cut decrypt (cipher) masterkey <> <>) 44 | iv-len 45 | digest 46 | (lambda (hash) (digest (u8vector-append ca-key-salt hash))) 47 | (lambda (hash) (subu8vector (digest (u8vector-append ca-iv-salt hash)) 0 iv-len)) 48 | (lambda (hash) (digest (u8vector-append ia-key-salt hash))))) 49 | 50 | (def (store-content-addressed-bytes kvs crypt-ctx bytes) 51 | (with ((EncryptionContext encrypt _ _ digest derive-ca-key derive-ca-iv _) crypt-ctx) 52 | (def hash (digest bytes)) 53 | (def key (derive-ca-key hash)) 54 | (def iv (derive-ca-iv hash)) 55 | (def value (encrypt iv bytes)) 56 | {write-key kvs key value} 57 | hash)) 58 | 59 | (def (load-content-addressed-bytes kvs crypt-ctx hash) 60 | (with ((EncryptionContext _ decrypt _ digest derive-ca-key derive-ca-iv _) crypt-ctx) 61 | (def key (derive-ca-key hash)) 62 | (def iv (derive-ca-iv hash)) 63 | {read-decode-check-key 64 | kvs key (cut decrypt iv <>) (lambda (bytes) (equal? hash (digest bytes)))})) 65 | 66 | ;; Store bytes at an intent identified by some hash or other u8vector 67 | (def (store-intent-addressed-bytes kvs crypt-ctx intent bytes) 68 | (with ((EncryptionContext encrypt _ iv-len _ _ _ derive-ia-key) crypt-ctx) 69 | (def key (derive-ia-key intent)) 70 | (def iv (random-bytes iv-len)) 71 | (def value (encrypt iv bytes)) 72 | {write-key kvs key (u8vector-append iv value)})) 73 | 74 | (def (load-intent-addressed-bytes kvs crypt-ctx intent (valid? true)) 75 | (with ((EncryptionContext _ decrypt iv-len _ _ _ derive-ia-key) crypt-ctx) 76 | (def key (derive-ia-key intent)) 77 | {read-decode-check-key 78 | kvs key (lambda (iv+ct) 79 | (def iv (subu8vector iv+ct 0 iv-len)) 80 | (def ciphertext (subu8vector iv+ct iv-len (u8vector-length iv+ct))) 81 | (decrypt iv ciphertext)) 82 | valid?})) 83 | -------------------------------------------------------------------------------- /gerbil.pkg: -------------------------------------------------------------------------------- 1 | ;; Data and Activity persistence layer for Gerbil 2 | (package: clan/persist 3 | depend: ("github.com/mighty-gerbils/gerbil-utils" 4 | "github.com/mighty-gerbils/gerbil-poo" 5 | "github.com/mighty-gerbils/gerbil-crypto" 6 | "github.com/mighty-gerbils/gerbil-leveldb")) 7 | -------------------------------------------------------------------------------- /kvs-leveldb.ss: -------------------------------------------------------------------------------- 1 | ;;;; Key Value Store Interface 2 | 3 | (import 4 | (only-in :std/error check-argument) 5 | (only-in :std/misc/completion make-completion) 6 | (only-in :std/misc/number pre-increment!) 7 | (only-in :std/misc/path path-parent ensure-absolute-path) 8 | (only-in :clan/db/leveldb leveldb-open leveldb-default-options leveldb-get 9 | leveldb-writebatch leveldb-write leveldb-write-options leveldb-sync-write-options 10 | leveldb-writebatch-clear leveldb-writebatch-put leveldb-writebatch-delete) 11 | (only-in :clan/path-config persistent-directory) 12 | (only-in :clan/persist/kvs Kvs Kvs-connection)) 13 | 14 | (export #t) 15 | 16 | (defstruct (KvsLeveldb Kvs) 17 | (batch-id batch batch-completion) 18 | constructor: :init!) 19 | 20 | (defmethod {:init! KvsLeveldb} 21 | (lambda (self path (opts (leveldb-default-options))) 22 | (def abspath (ensure-absolute-path path persistent-directory)) 23 | (create-directory* (path-parent abspath)) 24 | (struct-instance-init! self (leveldb-open abspath opts) 0 #f #f))) 25 | 26 | (defmethod {begin-transaction KvsLeveldb} 27 | (lambda (self) 28 | (check-argument (not (KvsLeveldb-batch self)) 29 | "KvsLevelDb without transaction already started" self) 30 | (def batch-id (pre-increment! (KvsLeveldb-batch-id self))) 31 | (set! (KvsLeveldb-batch self) (leveldb-writebatch)) 32 | (set! (KvsLeveldb-batch-completion self) (make-completion `(db-batch ,batch-id))))) 33 | 34 | (defmethod {abort-transaction KvsLeveldb} 35 | (lambda (self) 36 | (check-argument (KvsLeveldb-batch self) 37 | "KvsLevelDb with transaction already started" self) 38 | (leveldb-writebatch-clear (KvsLeveldb-batch self)) 39 | (set! (KvsLeveldb-batch self) #f) 40 | (set! (KvsLeveldb-batch-completion self) #f))) 41 | 42 | (def leveldb-sync-write-options (leveldb-write-options sync: #f)) 43 | 44 | (defmethod {commit-transaction KvsLeveldb} 45 | (lambda (self) 46 | (check-argument (KvsLeveldb-batch self) 47 | "KvsLevelDb with transaction already started" self) 48 | (leveldb-write (Kvs-connection self) (KvsLeveldb-batch self) leveldb-sync-write-options) 49 | (set! (KvsLeveldb-batch self) #f) 50 | (set! (KvsLeveldb-batch-completion self) #f))) 51 | 52 | (defmethod {read-key KvsLeveldb} 53 | (lambda (K k) 54 | (def v (leveldb-get (Kvs-connection K) k)) 55 | (values v (and v #t)))) 56 | 57 | (defmethod {write-key KvsLeveldb} 58 | (lambda (K k v) 59 | (def b (KvsLeveldb-batch K)) 60 | (leveldb-writebatch-put b k v))) 61 | 62 | (defmethod {delete-key KvsLeveldb} 63 | (lambda (K k) 64 | (leveldb-writebatch-delete (KvsLeveldb-batch K) k))) 65 | -------------------------------------------------------------------------------- /kvs-mux.ss: -------------------------------------------------------------------------------- 1 | ;; Handle concurrent user-level transactions atop a single serialized system transaction. 2 | ;; See the Concurrency Model section in the README. 3 | (export #t) 4 | (import 5 | :std/assert 6 | :std/misc/completion 7 | :std/misc/list 8 | :std/misc/number 9 | :std/misc/path 10 | :std/sugar 11 | :clan/base 12 | :clan/concurrency 13 | :clan/path-config 14 | :clan/persist/kvs 15 | :clan/persist/kvs-sqlite) 16 | 17 | (defstruct KvsMux ;; multiplexing users for a key value store 18 | (kvs ;; underlying key value store 19 | name mx txcounter 20 | blocked-transactions open-transactions pending-transactions hooks 21 | batch-id batch batch-completion manager timer 22 | ready? triggered?) 23 | constructor: :init!) 24 | (defmethod {:init! KvsMux} 25 | (lambda (self name (kvs #f)) 26 | (unless kvs 27 | (set! kvs (make-KvsSqlite (ensure-absolute-path name persistent-directory)))) 28 | (def mx (make-mutex name)) ;; Mutex 29 | (def txcounter 0) ;; Nat 30 | (def hooks (make-hash-table)) ;; (Table (<-) <- Any) 31 | (def blocked-transactions []) ;; (List Transaction) 32 | (def open-transactions (make-hash-table)) ;; mutable (HashSet Transaction) 33 | (def pending-transactions []) ;; (List Transaction) 34 | (def cache (make-hash-table)) ;; mutable (Table Bytes <- Bytes) 35 | (def batch-id 0) ;; Nat 36 | (def batch-completion (make-completion '(db-batch 0))) 37 | (def ready? #t) ;; Bool 38 | (def triggered? #f) ;; Bool 39 | (def manager true #;(db-manager self)) ;; Thread 40 | (def timer #f) ;; (Or Thread '#f) 41 | (struct-instance-init! 42 | self name kvs mx txcounter 43 | blocked-transactions open-transactions pending-transactions hooks 44 | cache batch-id batch-completion manager timer 45 | ready? triggered?))) 46 | 47 | (def (open-db-connection name . opts) (apply make-KvsMux name opts)) 48 | (def (open-db-connection! name . opts) (current-db-connection (apply open-db-connection name opts))) 49 | (defrules with-db-lock () 50 | ((_ (km) body ...) (with-lock (KvsMux-mx km) (lambda () body ...))) 51 | ((_ () body ...) (with-db-lock (current-db-connection) body ...))) 52 | (def (call-with-db-lock fun (db (current-db-connection))) 53 | (with-db-lock (db) (fun db))) 54 | 55 | (defmethod {close KvsMux} 56 | (lambda (self) 57 | (with-db-lock (self) 58 | (register-commit-hook! 59 | 'close 60 | (lambda _ 61 | {close (KvsMux-kvs self)} 62 | (thread-send (KvsMux-manager self) #f)) 63 | self) 64 | (db-trigger! self)) 65 | (thread-join! (KvsMux-manager self)))) 66 | 67 | ;; Register post-commit finalizer actions to be run after this batch commits, 68 | ;; with the batch id as a parameter. 69 | ;; The hook is called synchronously, but it if you use asynchronous message passing, 70 | ;; it is possible that the hooks may be called out of order. 71 | ;; ASSUMES YOU'RE HOLDING THE KVSMUX-LOCK 72 | ;; Unit <- Any (<- Nat) KvsMux 73 | (def (register-commit-hook! name hook (c (current-db-connection))) 74 | (hash-put! (KvsMux-hooks c) name hook)) 75 | 76 | ;; Mark the current batch as triggered, because either some transaction must be committed, 77 | ;; or a timer has hit since content was added, or we're closing the database. 78 | ;; ASSUMES YOU'RE HOLDING THE LOCK 79 | ;; : <- KvsMux 80 | (def (db-trigger! c) 81 | (if (and (KvsMux-ready? c) (zero? (hash-length (KvsMux-open-transactions c)))) 82 | (finalize-batch! c) 83 | (set! (KvsMux-triggered? c) #t))) 84 | 85 | ;; Fork a system thread to handle the commit; 86 | ;; when it's done, wakeup the wait-on-batch-commit completion 87 | (def (finalize-batch! c) 88 | (def batch-id (KvsMux-batch-id c)) 89 | (def batch (KvsMux-batch c)) 90 | (def batch-completion (KvsMux-batch-completion c)) 91 | (def hooks (hash-values (KvsMux-hooks c))) 92 | (def blocked-transactions (KvsMux-blocked-transactions c)) 93 | (def pending-transactions (KvsMux-pending-transactions c)) 94 | (set! (KvsMux-batch-id c) (1+ batch-id)) 95 | #;(set! (KvsMux-batch c) (leveldb-writebatch)) ;; open 96 | (set! (KvsMux-batch-completion c) (make-completion `(db-batch , (KvsMux-batch-id c)))) 97 | (set! (KvsMux-pending-transactions c) []) 98 | (set! (KvsMux-blocked-transactions c) []) 99 | (set! (KvsMux-ready? c) #f) 100 | (set! (KvsMux-triggered? c) #f) 101 | (set! (KvsMux-timer c) #f) 102 | (for-each (lambda (tx) 103 | (set! (KvsMuxTx-status tx) 'open) 104 | (hash-put! (KvsMux-open-transactions c) (KvsMuxTx-txid tx) tx)) 105 | blocked-transactions) 106 | (thread-send (KvsMux-manager c) [batch-id batch batch-completion hooks pending-transactions])) 107 | 108 | ;;(def (call-with-db fun name) 109 | ;; (def c (open-db-connection name)) 110 | ;; (try 111 | ;; (parameterize ((current-db-connection c)) 112 | ;; (fun c)) 113 | ;; (finally (close-db-connection c)))) 114 | ;;(defrule (with-db-connection (c name ...) body ...) 115 | ;; (call-with-db-connection (lambda (c) body ...) name ...)) 116 | ;;(def (ensure-db-connection name) 117 | ;; (def c (current-db-connection)) 118 | ;; (if c 119 | ;; (assert! (equal? (KvsMux-name c) name)) 120 | ;; (open-db-connection! name))) 121 | 122 | ;; 50-100 transactions per second is about what we expect on a typical disk. 123 | ;; : Real 124 | (def deferred-db-trigger-interval-in-seconds .02) 125 | 126 | ;; ASSUMES YOU'RE HOLDING THE LOCK 127 | ;; : <- KvsMux 128 | (def (deferred-db-trigger! c) 129 | (unless (KvsMux-timer c) 130 | (let (batch-id (KvsMux-batch-id c)) 131 | (set! (KvsMux-timer c) 132 | (spawn/name/logged 133 | ['timer batch-id] 134 | (lambda () (thread-sleep! deferred-db-trigger-interval-in-seconds) 135 | (with-db-lock (c) 136 | (when (equal? batch-id (KvsMux-batch-id c)) 137 | (db-trigger! c))))))))) 138 | 139 | ;; status: blocked open pending complete 140 | ;; When opening a transaction, it may be blocked at first so the previous batch may be completed, 141 | ;; but by the time it is returned to the user, it is in open status; 142 | ;; when it is closed, it becomes pending until its batch is committed, 143 | ;; at which point it becomes complete and any thread sync'ing on it will be awakened. 144 | (defstruct KvsMuxTx (km txid status) transparent: #t) 145 | (def (KvsMuxTx-completion tx) 146 | (def c (KvsMuxTx-km tx)) 147 | (with-db-lock (c) 148 | (case (KvsMuxTx-status tx) 149 | ((open pending) 150 | (KvsMux-batch-completion c)) 151 | (else #f)))) 152 | 153 | ;; : <- (OrFalse Completion) 154 | (def (wait-completion completion) 155 | (when completion (completion-wait! completion))) 156 | 157 | ;; Open Transaction 158 | ;; TODO: assert that the transaction_counter never wraps around? 159 | ;; Or check and block further transactions when it does, before resetting the counter? *) 160 | ;; TODO: commenting out the ready && triggered helps detect / enact deadlocks when running 161 | ;; tests, by having only one active transaction at a time; but then the hold can and 162 | ;; should be released as soon as "the" transaction is complete, unless we're already both 163 | ;; ready && triggered for the next batch commit. Have an option for that? 164 | (defmethod {begin-transaction KvsMux} 165 | (lambda (self) 166 | (defvalues (transaction completion) 167 | (with-db-lock (self) 168 | (let* ((txid (post-increment! (KvsMux-txcounter self))) 169 | (blocked? (and (KvsMux-ready? self) (KvsMux-triggered? self))) 170 | (status (if blocked? 'blocked 'open)) 171 | (transaction (KvsMuxTx self txid status))) 172 | (if blocked? 173 | (push! transaction (KvsMux-blocked-transactions self)) 174 | (hash-put! (KvsMux-open-transactions self) txid transaction)) 175 | (values transaction (and blocked? (KvsMux-batch-completion self)))))) 176 | (wait-completion completion) ;; wait without holding the lock 177 | transaction)) 178 | 179 | ;; For now, let's 180 | ;; * Disallow nested transaction / auto-transactions. We want a clear transaction owner, and 181 | ;; the type / signature of functions will ensure that there is always one. 182 | ;; * Return the result of the inner expression, after the transaction is closed but not committed. 183 | ;; If you need to synchronize on the transaction, be sure to return it or otherwise memorize it, 184 | ;; or use after-commit from within the body. 185 | (def (call-with-tx fun km wait: (wait #f)) 186 | (awhen (t (current-db-transaction)) 187 | (error "Cannot nest transactions" t)) 188 | (def tx {begin-transaction (or km (current-db-connection))}) 189 | (try 190 | (parameterize ((current-db-transaction tx)) 191 | (fun tx)) 192 | (finally 193 | (close-transaction tx) 194 | (when wait (sync-transaction tx))))) 195 | (defrule (with-tx (tx dbc ...) body ...) 196 | (call-with-tx (lambda (tx) body ...) dbc ...)) 197 | (defrule (without-tx body ...) 198 | (parameterize ((current-db-transaction #f)) body ...)) 199 | 200 | (def (call-with-committed-tx fun (c #f)) 201 | (call-with-tx fun c wait: #t)) 202 | (defrule (with-committed-tx (tx dbc ...) body ...) 203 | (call-with-committed-tx (lambda (tx) body ...) dbc ...)) 204 | (defrule (after-commit (tx) body ...) 205 | (without-tx (spawn/name/logged 206 | ['after-commit (KvsMuxTx-txid tx)] 207 | (lambda () (completion-wait! (KvsMuxTx-completion tx)) body ...)))) 208 | 209 | ;; Mark a transaction as ready to be committed. 210 | ;; Return a completion that will be posted when the transaction is committed to disk. 211 | ;; The system must otherwise ensure that the action that follows this promise 212 | ;; will be restarted by a new instance of this program in case the process crashes after this commit, 213 | ;; or is otherwise some client's responsibility to restart if the program acts as a server. 214 | (def (close-transaction (tx (current-db-transaction))) 215 | (match tx 216 | ((KvsMuxTx c txid status) 217 | (with-db-lock (c) 218 | (case status 219 | ((blocked open) 220 | (set! (KvsMuxTx-status tx) 'pending) 221 | (hash-remove! (KvsMux-open-transactions c) txid) 222 | (push! tx (KvsMux-pending-transactions c)) 223 | (deferred-db-trigger! c) 224 | (KvsMux-batch-completion c)) 225 | ((pending) 226 | (KvsMux-batch-completion c)) 227 | (else #f)))) 228 | (else (error "close-transaction: not a transaction" tx)))) 229 | 230 | ;; Close a transaction, then wait for it to be committed. 231 | (def (commit-transaction (transaction (current-db-transaction))) 232 | (wait-completion (close-transaction transaction))) 233 | 234 | ;; Sync to a transaction being committed. 235 | ;; Thou Shalt Not sync with the end of a transaction from within another transaction, 236 | ;; or you may deadlock, since that other transaction might be part of the same batch. 237 | ;; Instead, thou shalt sync on it in a background thread, that will then run 238 | ;; the very same code as you would if you would resume the persistent activity, 239 | ;; and that code must be effectively idempotent. 240 | (def (sync-transaction (transaction (current-db-transaction))) 241 | (wait-completion (KvsMuxTx-completion transaction))) 242 | 243 | (def (db-manager c) 244 | (spawn/name/logged 245 | ['db-manager (KvsMux-name c)] 246 | (fun (db-manager-1) 247 | (let loop () 248 | (match (thread-receive) 249 | ([batch-id batch batch-completion hooks pending-transactions] 250 | ;; TODO: run the leveldb-write in a different OS thread. 251 | #;(leveldb-write (KvsMux-kvs c) batch leveldb-sync-write-options) 252 | (for-each (lambda (tx) (set! (KvsMuxTx-status tx) 'complete)) 253 | pending-transactions) 254 | (for-each (lambda (hook) (hook batch-id)) hooks) 255 | (completion-post! batch-completion batch-id) 256 | (with-db-lock (c) 257 | (if (and (KvsMux-triggered? c) (zero? (hash-length (KvsMux-open-transactions c)))) 258 | (finalize-batch! c) 259 | (set! (KvsMux-ready? c) #t))) 260 | (loop)) 261 | (#f (void)) 262 | (x (error "foo" x))))))) 263 | 264 | ;; Get the batch id: not just for testing, 265 | ;; but also, within a transaction, to get the id to prepare a hook, 266 | ;; e.g. to send newly committed but previously unsent messages. 267 | (def (get-batch-id (c (current-db-connection))) 268 | (KvsMux-batch-id c)) 269 | -------------------------------------------------------------------------------- /kvs-postgres.ss: -------------------------------------------------------------------------------- 1 | ;;;; Key Value Store Interface for Postgres 2 | 3 | (import 4 | :std/db/dbi 5 | :std/db/postgresql 6 | :std/db/postgresql-driver 7 | :std/iter 8 | :std/misc/path 9 | :std/sugar 10 | :clan/path-config 11 | :clan/persist/kvs 12 | :clan/persist/kvs-sql) 13 | 14 | (defstruct (KvsPostgres KvsSql) 15 | (begin-tx-stmt commit-tx-stmt abort-tx-stmt 16 | read-stmt write-stmt delete-stmt) 17 | constructor: :init!) 18 | 19 | 20 | (defmethod {:init! KvsPostgres} 21 | (lambda (self . args) 22 | (def connection (apply sql-connect args)) 23 | (sql-eval connection (string-append 24 | "CREATE TABLE IF NOT EXISTS kvs ( " 25 | "key BLOB, " 26 | "value BLOB NOT NULL, " 27 | "PRIMARY KEY (key)) ;")) 28 | (struct-instance-init! 29 | self connection 30 | (sql-prepare connection "BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE, READ WRITE") 31 | (sql-prepare connection "COMMIT TRANSACTION") 32 | (sql-prepare connection "ROLLBACK TRANSACTION") 33 | (sql-prepare connection "SELECT value FROM kvs WHERE key = ?") 34 | (sql-prepare connection "INSERT INTO kvs (key, value) VALUES (?, ?) ON CONFLICT DO UPDATE SET value = excluded.value") 35 | (sql-prepare connection "DELETE FROM kvs WHERE key = ?")))) 36 | -------------------------------------------------------------------------------- /kvs-sql.ss: -------------------------------------------------------------------------------- 1 | ;;;; Key Value Store for SQL in general (to be specialized by SQLite, PostgreSQL, etc.) 2 | 3 | (import 4 | :std/db/dbi 5 | :std/iter 6 | :std/misc/path 7 | :std/sugar 8 | :clan/path-config 9 | :clan/persist/kvs) 10 | 11 | (export #t) 12 | 13 | (defstruct (KvsSql Kvs) 14 | (begin-tx-stmt commit-tx-stmt abort-tx-stmt 15 | read-stmt write-stmt delete-stmt) 16 | constructor: :init!) 17 | 18 | (defmethod {:init! KvsSql} 19 | (lambda (self connection begin-tx-stmt commit-tx-stmt abort-tx-stmt read-stmt write-stmt delete-stmt) 20 | (struct-instance-init! self connection begin-tx-stmt commit-tx-stmt abort-tx-stmt read-stmt write-stmt delete-stmt))) 21 | 22 | (defmethod {begin-transaction KvsSql} (lambda (self) (sql-exec (KvsSql-begin-tx-stmt self)))) 23 | (defmethod {abort-transaction KvsSql} (lambda (self) (sql-exec (KvsSql-abort-tx-stmt self)))) 24 | (defmethod {commit-transaction KvsSql} (lambda (self) (sql-exec (KvsSql-commit-tx-stmt self)))) 25 | 26 | (defrule (with-statement (var stmt args ...) body ...) 27 | (let ((var stmt)) 28 | (try {bind var args ...} body ... 29 | (finally (sql-reset/clear stmt))))) 30 | 31 | (defmethod {read-key KvsSql} 32 | (lambda (K key) 33 | (with-statement (s (KvsSql-read-stmt K) key) 34 | (match {query-fetch s} 35 | ((eq? #!void) (values {query-row s} #t)) 36 | ((eq? iter-end) (values #f #f)))))) 37 | 38 | (defmethod {write-key KvsSql} 39 | (lambda (K k v) 40 | (with-statement (s (KvsSql-write-stmt K) k v) 41 | {exec s}))) 42 | 43 | (defmethod {delete-key KvsSql} 44 | (lambda (K k) 45 | (with-statement (s (KvsSql-delete-stmt K) k) 46 | {exec s}))) 47 | -------------------------------------------------------------------------------- /kvs-sqlite.ss: -------------------------------------------------------------------------------- 1 | ;;;; Key Value Store Interface for sqlite 2 | 3 | (import 4 | :std/db/dbi 5 | :std/db/sqlite 6 | :std/db/_sqlite 7 | :std/iter 8 | :std/misc/path 9 | :std/sugar 10 | :clan/path-config 11 | :clan/persist/kvs 12 | :clan/persist/kvs-sql) 13 | 14 | (export #t) 15 | 16 | (defstruct (KvsSqlite KvsSql) 17 | (begin-tx-stmt commit-tx-stmt abort-tx-stmt 18 | read-stmt write-stmt delete-stmt) 19 | constructor: :init!) 20 | 21 | (defmethod {:init! KvsSqlite} 22 | (lambda (self path (flags (fxior SQLITE_OPEN_READWRITE SQLITE_OPEN_CREATE))) 23 | (def abspath (ensure-absolute-path path persistent-directory)) 24 | (create-directory* (path-parent abspath)) 25 | (def connection (sqlite-open abspath flags)) 26 | (sql-eval connection (string-append 27 | "PRAGMA locking_mode = EXCLUSIVE ; " 28 | "PRAGMA synchronous = FULL ; ")) 29 | (sql-eval connection (string-append 30 | "CREATE TABLE IF NOT EXISTS kvs ( " 31 | "key BLOB PRIMARY KEY, " 32 | "value BLOB NOT NULL ) " 33 | "WITHOUT ROWID")) 34 | (struct-instance-init! 35 | self connection 36 | (sql-prepare connection "BEGIN IMMEDIATE TRANSACTION") 37 | (sql-prepare connection "COMMIT TRANSACTION") 38 | (sql-prepare connection "ROLLBACK TRANSACTION") 39 | (sql-prepare connection "SELECT value FROM kvs WHERE key = ?") 40 | (sql-prepare connection "INSERT INTO kvs (key, value) VALUES (?, ?) ON CONFLICT DO UPDATE SET value = excluded.value") 41 | (sql-prepare connection "DELETE FROM kvs WHERE key = ?")))) 42 | -------------------------------------------------------------------------------- /kvs.ss: -------------------------------------------------------------------------------- 1 | ;;;; Key Value Store Interface 2 | 3 | (import 4 | :std/error 5 | :std/db/dbi :std/db/sqlite :std/sugar :std/misc/list-builder 6 | :clan/base) 7 | 8 | (export #t) 9 | 10 | (def current-db-connection (make-parameter #f)) 11 | 12 | (def current-db-transaction (make-parameter #f)) 13 | 14 | (deferror-class DbError ()) 15 | 16 | (def (raise-db-error where message . irritants) 17 | (raise (make-DbError message where: where irritants: irritants))) 18 | 19 | (defstruct Kvs (connection) 20 | constructor: :init!) 21 | 22 | (defmethod {:init! Kvs} 23 | (lambda (self e) (struct-instance-init! self e))) 24 | 25 | (defmethod {close Kvs} 26 | (lambda (self) {close (Kvs-connection self)})) 27 | 28 | ;; NB: In the near future, a key value store backed by several remote servers may implement 29 | ;; this method by querying its multiple replicas and identifying whichever is correct. 30 | (defmethod {read-decode-check-key Kvs} 31 | (lambda (self key decode check?) 32 | (defvalues (bytes present?) {read-key self key}) 33 | (unless present? 34 | (raise-db-error 'read-decode-check-key "kvs key absent" key)) 35 | (def value (decode bytes)) 36 | (unless (check? value) 37 | (error 'kvs-data-tampering "Database was tampered with" self key)) 38 | value)) 39 | -------------------------------------------------------------------------------- /merkle-trie.ss: -------------------------------------------------------------------------------- 1 | (export #t) 2 | 3 | (import 4 | :clan/base 5 | :clan/poo/object :clan/poo/mop :clan/poo/type :clan/poo/brace :clan/poo/io 6 | :clan/poo/trie :clan/poo/number 7 | ./content-addressing) 8 | 9 | ;; TODO: support Value itself being digested! Pass around bytes, not values? 10 | ;; TODO: automatically work recursively on a descriptor for the "open recursion scheme" type functor 11 | ;; of which the data structure is a fixed-point. 12 | ;; TODO: support negative proofs. Specially detect empty digest? 13 | ;; TODO: support remember the current skip info in DigestedTrie, so you can properly simulate 14 | ;; .make-branch and .make-skip when re-digesting a trie with a leaf removed. 15 | 16 | (define-type (DigestedTrie. @ [Trie.] Key Height Value .digesting T Step .wrap) 17 | Digest: (Digesting-Digest .digesting) 18 | .validate: (.@ Digest .validate) 19 | .sexp<-: (.@ Digest .sexp<-) 20 | .marshal: (.@ Digest .marshal) 21 | .unmarshal: (.@ Digest .marshal) 22 | .bytes<-: (.@ Digest .bytes<-) 23 | .<-bytes: (.@ Digest .<-bytes) 24 | .json<-: (.@ Digest .json<-) 25 | .<-json: (.@ Digest .<-json) 26 | Wrapper: { 27 | .ap: (lambda (v) (digest<-bytes (.call T .bytes<- v) .digesting)) 28 | .unap: invalid .bind: invalid .map: invalid } 29 | Unstep: =>.+ {(:: @ [] .symmetric) 30 | .up: (.symmetric branch: (lambda (_ h l r) (.wrap (Branch h l r))) 31 | skip: (lambda (_ h l b c) (.wrap (Skip h l b c))))} 32 | Path: =>.+ {(:: @ [] .op) 33 | .up: (let (up (.@ Unstep .up)) (lambda (t path) (.op up t path)))}) 34 | 35 | (def (DigestedTrie Key Height Value .digesting) 36 | {(:: @ DigestedTrie.) Key Height Value .digesting 37 | sexp: `(DigestedTrie ,(.@ Key sexp) ,(.@ Height sexp) 38 | ,(.@ Value sexp) ,(Digesting-sexp .digesting))}) 39 | 40 | (define-type (MerkleTrie. @ [ContentAddressed. Trie.] 41 | Key Height Value .wrap .unwrap .refocus .zipper<- Path 42 | .digesting .digest<-) 43 | T: =>.+ { .walk-dependencies: 44 | (lambda (f t) (match t 45 | ((Empty) (void)) 46 | ((Leaf v) (f Value v)) 47 | ((Branch _ l r) (f @ l) (f @ r)) 48 | ((Skip _ _ _ c) (f @ c)))) } 49 | Digested: {(:: @D [DigestedTrie.]) Key Height Value .digesting} 50 | .proof<-: ;; : (Path Digest) 51 | (lambda (trie key) 52 | (match (.refocus ($Costep -1 key) (.zipper<- trie)) 53 | ([sub . up] (cons sub (.call Path .map .digest<- up))))) 54 | .validate-proof: 55 | (lambda (trie-digest sub up) 56 | (match (.unwrap sub) 57 | ((Leaf v) 58 | (validate Value v) 59 | (let (digest (car ((.@ Digested Path .up) (.call Digested .leaf v) up))) 60 | (unless (equal? trie-digest digest) 61 | (let (D (Digesting-Digest .digesting)) 62 | (raise-type-error "Digest doesn't match: " D trie-digest D digest up))))) 63 | ;; TODO: support negative proofs 64 | (_ (raise-type-error "No leaf" sub up))))) 65 | (def (MerkleTrie Key: (Key UInt) Height: (Height UInt) 66 | Value: (Value Any) Digesting: (.digesting keccak-addressing)) 67 | {(:: @ [MerkleTrie.]) Key Height Value .digesting 68 | sexp: `(MerkleTrie Key: ,(.@ Key sexp) Height: ,(.@ Height sexp) Value: ,(.@ Value sexp) 69 | Digesting: ,(Digesting-sexp .digesting))}) 70 | -------------------------------------------------------------------------------- /persist.md: -------------------------------------------------------------------------------- 1 | # Orthogonal Persistence, The Model 2 | 3 | _Orthogonal Persistence_: You bind variable `x` to 42 at your REPL, but then 4 | some “youths” steal your laptop while orcs raze your datacenter to the ground. 5 | So you move to a safer place, get a new laptop, and enter your passphrase in it; 6 | the system downloads encrypted backups from redundant datacenters on multiple 7 | other continents; after a few minutes, your interface is restored to the 8 | same state it was when you left it, and variable `x` is still bound to 42. 9 | Yet not one single instructions in any of your applications ever had 10 | to even mention anything about storage and retrieval. 11 | 12 | [This paper presents an original re-framing of Orthogonal Persistence, 13 | as refactored into a simple set of concepts in March 2024.] 14 | 15 | ## Orthogonal Persistence: Computations that Matter on Data that Matters 16 | 17 | «Designing a programming language without data persistence means 18 | “I only care about toy computations for people whose data doesn't matter.” 19 | Designing a database without a good programming language means 20 | “I only care about toy data not part of any computation that matters.”» 21 | 22 | ### Persistence by Default 23 | 24 | Orthogonal Persistence (of Processes, or however named activities), 25 | that John de Goes calls “Durable Execution”, is when 26 | ongoing processes persist (at least by default), 27 | such that if interrupted for any reason (memory exhaustion, 28 | hardware fault, power interruption, machine destroyed or stolen, etc.), 29 | they will automatically restart from last committed state. 30 | 31 | Orthogonal Persistence is to be opposed to the prevalent paradigm of 32 | Manual Persistence, wherein all data is transient by default, and 33 | developers have to explicitly write data to one or several 34 | disk files when they desire persistence, 35 | and further organize their actions into database transactions 36 | when they desire atomicity. 37 | Orthogonal Persistence essentially reverses the burden of persistence: 38 | all state changes are safely written to persistent storage by default, 39 | and escaping into transience is the edge case sometimes invoked for performance. 40 | 41 | Although most issues with Manual Persistence disappear with 42 | Orthogonal Persistence, some issues remain, though in a simplified way, 43 | reframed upside down into not having been Persistence issues after all, 44 | so much as essential logical issues that had been previously drowned 45 | among the cumbersome details of manual persistence: 46 | *persistence domains*, *atomicity*, *synchronization*, 47 | *publishing*, *resource exhaustion* and *schema upgrade*. 48 | 49 | ### Persistence Domains 50 | 51 | Orthogonal Persistence can use traditional disks, filesystems and databases 52 | as underlying storage as well as cloud storage. 53 | Each (partition of a) disk, (registered directory tree in a) filesystem, 54 | (set of tables in a) database or (prefixed area within a) cloud account 55 | is presented as a “persistence domain” to the user 56 | who wants to configure where and how his data will be persisted. 57 | 58 | In the Manual Persistence paradigm, developers talk about 59 | “Storage Backends”, and focus on physical devices and 60 | the many layers of drivers to access them, and 61 | how they are statically assembling them up into larger storage “volumes”. 62 | In the Orthogonal Persistence paradigm, developers prefer to talk about 63 | Persistence Domains as logical entities having a persistent identity, 64 | and how they are dynamically mapped down to various underlying substrates. 65 | This means a persistence domain can survive indefinitely 66 | through simple occasional reconfiguration, even though the 67 | underlying substrates necessarily have short finite shelf life 68 | of a few years at most each, which necessarily causes catastrophic 69 | transitions in the Manual Persistence paradigm. 70 | 71 | When mapping a persistence domain to a database or cloud service, 72 | transactions would of course be used for the sake of atomicity. 73 | On a filesystem or a raw device, atomicity would be implemented by 74 | batches of append-only writes followed by a `fdatasync`, 75 | then possibly a head-switching and another `fdatasync`, 76 | if the previous isn’t enough (or possibly as part of the next transaction, 77 | or with a protocol relying on multiple potential heads and checksums 78 | so the restore procedure can locate the latest head). 79 | To let the storage backend see the mutator actions in terms of transactions 80 | without the mutator being explicitly organized in such terms, 81 | Orthogonal Persistence uses *inversion of control*, whereby 82 | it maintains at any point in time one transaction per persistence domain. 83 | 84 | For the sake of liveness, the system can periodically 85 | commit these transactions and starts new transactions. 86 | For the sake of durability, replication can be achieved by federating 87 | several persistence domains using some private consensus mechanism. 88 | For the sake of privacy, encryption can be used. 89 | For the sake of integrity, cryptographic digests can be used. 90 | 91 | ### Atomicity 92 | 93 | Atomicity is the notion that some speculative effects must happen all together, 94 | or none at all must happen, and that users should never be able to observe 95 | the system in an “intermediate” state where some happened but not the others. 96 | For instance, accounting books must be balanced at all times, and 97 | when a transfer is attempted, it may fail or complete, but users should never 98 | observe accounts in a state where one was credited and the other not 99 | debited (yet), or the other way around. 100 | 101 | With respect to Persistence, the speculative aspect is about whether or not 102 | some computation was persisted or not. If it was persisted, then it is 103 | guaranteed to have taken place even in the case of the system crashing and 104 | restarting; if it wasn’t, then it is possible for the system to forget it. 105 | The crash may be fortuitous and unexpected (sudden power loss, theft of device), 106 | undesired but expected (laptop with empty battery, device failing of old age), 107 | or caused by Enemy action (random attack by a predator, or targeted attack by 108 | a personal or professional adversary). In any case, the system is supposed to 109 | restart in a state consistent with the effects either having happened or not, 110 | but not in a garbled state where some of them happened but not others, or out 111 | of order, or with some mixups in the details, or data corruption, or with 112 | the entire system in an unrecoverable state, etc. 113 | 114 | In the paradigm of Orthogonal Persistence, atomicity is managed with explicit 115 | _atomic sections_ (a.k.a. critical sections): 116 | code that produces intermediate states that should not be 117 | persisted is wrapped in such sections, quite similar to 118 | code sections that have disabled signals or interrupts, 119 | or that hold some mutual exclusion lock. 120 | 121 | Atomic sections must be short: long atomic sections will break liveness, 122 | i.e. may cause the system to become unresponsive. 123 | But there is otherwise no problem with nesting atomic sections: 124 | persistence remains disabled while executing within an atomic section, 125 | and is only reenabled when all nested atomic sections have been exited. 126 | Code that contains atomic sections can be freely composed with other 127 | code containing atomic sections, and you do not usually need even be aware 128 | of whether or not the code components you compose contain 129 | one or more atomic sections, or none at all. 130 | 131 | Based on disk latency, we may target say a millisecond as duration 132 | before which to commit the current transaction. 133 | When the timer is reached, the transaction is delayed 134 | until all current atomic sections are completed; 135 | and (possibly after a grace period) new atomic sections are blocked 136 | from even being started, until after the transaction is committed. 137 | 138 | Atomic sections can be contrasted with transactions, 139 | the way that atomicity is managed in manual persistence paradigm. 140 | With manual persistence, you often get no guarantee of atomicity: 141 | you have to hope nothing bad happens when the power eventually goes down. 142 | If you really care about atomicity, you use a database and organize 143 | your code into transactions within which you explicitly put the data changes 144 | that should be persisted. 145 | But unlike atomic sections, transactions are 146 | a low-level yet global concept that isn’t modular. 147 | See section below on *Modularity*. 148 | 149 | Note that atomicity is specific to a given persistence domain. 150 | Changes across multiple persistence domains are not atomic by default. 151 | In some cases, atomic changes across multiple persistence domains 152 | can be achieved, but they require that the domains be under common management, 153 | or at least to agree on some costly consensus protocol such as two-phase commit 154 | (see section [Composing Persistence Domains](#composing-persistence-domains)). 155 | 156 | ### Synchronization 157 | 158 | Synchronization is the ability to wait for some data to have been 159 | persisted for sure before to take action that depends on it 160 | (such as ensuring a transfer was complete before sending 161 | acknowledgements, or signing a follow-up transaction, etc.). 162 | 163 | In the Orthogonal Persistence paradigm, this is managed using explicit 164 | _memory barriers_: an instruction or procedure that pauses evaluation 165 | until after the changes so far are committed to (persistent) memory. 166 | Optionally, it may be combined with thread-spawning to register 167 | a callback function to be called after the current transaction commits, 168 | all from within an atomic section. 169 | 170 | Note that evaluation after a memory barrier can actually continue 171 | optimistically after the memory barrier. However, the side-effects it produces 172 | will not be observable by users unless and until after the changes are committed; 173 | the output side-effects will be queued, and the input side-effects may block 174 | execution until the memory barrier is passed and outputs dequeued. 175 | The optimistic evaluation after a memory barrier may also not acquire locks 176 | on resources still available before the memory barrier, only on new resources 177 | created in the same optimistic “generation”. 178 | 179 | By contrast, with manual persistence, synchronization requires 180 | one to explicit `COMMIT` a transaction (on a database), or 181 | to explicitly call `sync`, `fsync` or `fdatasync` (on a filesystem), 182 | in an operation that is outside of the transaction itself, 183 | typically running on a different machine, the client, 184 | rather than on the server. 185 | The client may try to make a complex computation made of several 186 | transactions, called a “saga”, but by definition, anything 187 | that happens outside the database is itself a process that doesn’t persist, 188 | that may somehow be restarted thanks to complex and onerous infrastructure 189 | on top of the operating system, but will have lost all its data and context. 190 | The client is furthermore written in a programming language that is 191 | completely different from that of the database, with its own evaluation model, 192 | which programmers usually prefer because programming languages that come 193 | with databases comparatively suck in too many ways to count. 194 | Moreover, all computation in a client thread is usually stopped 195 | while waiting for the transaction to be persisted, which adds a lot 196 | of latency and slowness to anything that uses the database, 197 | that can only be overcome by heroic efforts from developers. 198 | 199 | Synchronization and Atomicity go together: in Computer Science, 200 | every set of constructors or introduction rules come with 201 | a corresponding set of destructors or elimination rules. 202 | In other words, a concept matters only if you can make observations from it. 203 | Atomicity is about the grouping of some speculative effects together; 204 | synchronization is how you ensure that the speculation is settled… 205 | before you start new atomic groups of effects. 206 | 207 | Typically, before you send out a message that commits you 208 | economically or legally or in any meaningful way, 209 | you will want to make sure that any associated supporting document, 210 | serial number, random nonce, transaction log, etc., is persisted. 211 | And once you’re ready to send it out, you will retry sending it 212 | until you have committed an acknowledgement that it was received. 213 | Conversely, you’ll commit a message before sending an acknowledgement, 214 | and you’ll re-send the acknowledgement if you wake up within the window 215 | of time that it should be sent. 216 | 217 | Note how multiple processes or threads within the same persistence domain 218 | can interact in synchronous ways, with a shared underlying transaction 219 | and memory barriers, always going forward, never having to rollback, 220 | without any consistency problem. This is totally unlike the database 221 | transaction model, where processes can never know that they are not going 222 | to interfere with each other, with all but one having to be rolled back. 223 | A lot of the needless complexity of database servers can be eschewed. 224 | Usual mutexes and all the regular programming techniques of real 225 | programming languages that people actually use and understand can be used 226 | to deal with contention, including much simpler reimplementations of 227 | any of the mechanisms that databases provide at the wrong place. 228 | 229 | On the other hand, processes in distinct persistence domains must 230 | communicate via asynchronous messages and abstractions built atop such. 231 | Moreover, every asynchronous message sent to a persistent process must 232 | (1) be idempotent, and (2) can only be sent after a memory barrier. 233 | Indeed, due to transient failures and restarts, a given action may be taken 234 | many times after its triggering condition was committed, yet before 235 | a sufficient reaction was acknowledged. Thus, it should be possible 236 | to take these actions many times with no adverse effect. 237 | Meanwhile, messages sent to a transient process need be neither idempotent 238 | nor wait for a memory barrier; however, the entire interaction with a transient 239 | process must be reconstitutable from scratch at any point, since 240 | the transient process may go at any time and have to be replaced 241 | by a new one with empty state. 242 | 243 | ### Publishing 244 | 245 | Publishing is what users do when the changes they made have reached 246 | a stable, satisfactory point, or when they otherwise want to make 247 | a test branch or release of their code, 248 | a snapshot of their data for a legal or accounting report, 249 | a copy to share with a family member, friend or colleague, 250 | a regular backup for safekeeping, etc. 251 | 252 | In the Orthogonal Persistence paradigm, publishing is managed by explicitly 253 | _tagging_ a version of their data as suitable for sharing, 254 | and granting select others a capability to access the tagged version 255 | on a replica of their persistence domain. 256 | 257 | This is in contrast with the Manual Persistence paradigm, where publishing 258 | involves making a copy of a file or directory, or an archive of that directory 259 | (zip file, tarball, etc.). The many versions of these files are easily confused 260 | with each other, and their names and metadata often lack information, or 261 | carry incorrect metainformation that indicates the time of last copy rather than 262 | original publication. It is often hard to tell which copy came before or after 263 | which other, which was extracted from a branch with non-standard changes, 264 | which was already visited or is otherwise redundant, which is important to view, 265 | what are the changes, whether the copy has its data intact and complete, 266 | or what tampered with, corrupted or incomplete or garbled beyond the ability 267 | to restore any useful data, etc. 268 | 269 | Thus, in Orthogonal Persistence you never have to *save* a file, 270 | yet you still once in a while have to *name* or *tag* a version, 271 | as you would with e.g. `git tag` in a version control system. 272 | Your Continuous Integration (CI) system may also add metadata 273 | to indicate which versions passes various test suites, 274 | and whatever review and action flows involving humans happen 275 | would also track which version of which data corresponds to which 276 | step of which process. 277 | Once the persistence of the data has been made safe and 278 | decoupled from the mechanism of publication, instead of being 279 | the most expensive and unreliable step that trumps other concerns, 280 | it becomes more obvious what the actual needs of publication are, 281 | and how to address them in a the most adequate way. 282 | 283 | ### Resource Exhaustion 284 | 285 | Whether in the paradigm of Orthogonal Persistence or Manual Persistence, 286 | the underlying resources are ultimately the same, and finite. 287 | And eventually, they tarry. 288 | 289 | Now, inasmuch as Orthogonal Persistence is managed automatically, 290 | without direct understanding of the human user’s ultimate intent, 291 | and with less care and attention given by humans 292 | (which is the very purpose of it), 293 | it will generally be somewhat less efficient in its use of resources than 294 | a carefully curated manual persistent store would be 295 | (say by a factor 2x or so for a relatively simple algorithm). 296 | On the other hand, because Orthogonal Persistence makes more sense 297 | than Manual Persistence, it can be less wasteful in other ways, 298 | avoiding duplication data between multiple “uninstalled”, “installed”, 299 | “published” or “archived” versions of some program or document, 300 | or proliferation of now-redundant code for manual persistence, etc. 301 | Either way, resource exhaustion is just as serious an issue 302 | with Orthogonal Persistence as with Manual Persistence. 303 | 304 | With Manual Persistence, the issue can be handled by programs at risk of 305 | filling up disk space, but seldom is. Instead, users or administrators 306 | have to inspect disk space, clean up temporary areas, 307 | remove unneeded system packages, 308 | delete redundant copies and variants of programs and documents, etc., 309 | hopefully without deleting by mistake any essential information that wasn’t 310 | properly backed up already. 311 | 312 | With Orthogonal Persistence, persistence usually happens 313 | without user supervision, so instead users configure which processes run 314 | in which division of what persistence domain, and assign 315 | limits, alerts, shared or private soft or hard reserves, and 316 | emergency plans to each domain: 317 | every process and every domain is monitored, its resource usage accounted, 318 | so users or automated business agents 319 | can take proactive measures before they run out of bounds, and 320 | reactive measures after they do; 321 | processes are stopped when they run out of space, and 322 | can be resumed after space issues are solved; 323 | space is reserved so special debugging processes can run 324 | inside the scope of a stopped process, inspect and cleanup its data, 325 | garbage collect unneeded elements, and resume the process—or 326 | simply extract the useful data, restart a new process and 327 | shutter the one that ran out of space. 328 | 329 | ### Schema Upgrade 330 | 331 | The Schema is the set of all tables in a database, 332 | of all concrete object types in a persistent heap, 333 | of all file formats in a filesystem, etc., 334 | and the way they encode the data that matters to users. 335 | Schema Upgrade is what happens when the code for the application changes, 336 | and with it the way data is represented, yet the pre-existing data 337 | and the execution threads that process it are not dropped on the ground 338 | and forgotten or made to fail, their invariants broken, 339 | their variants stuck never to decrease again, but instead upgraded, 340 | made to follow the new invariants and variants, 341 | continuing the operations as if nothing had changed, 342 | or at least, not for the worse, only for the better. 343 | 344 | Certainly there are cases when the Right Thing to do is indeed 345 | to erase some obsolete data, and stop some stale processes; 346 | but then that same cleanup could have been done at any point, 347 | even without a schema upgrade, and independently from any other upgrade. 348 | But in general, most existing data, should be preserved, especially so 349 | if it concerns inventory, accounting, legal matters, 350 | incomplete orders, on-going operations, etc.—anything current 351 | about the human-meaningful activity being represented and operated. 352 | 353 | In the paradigm of Manual Persistence, any non-trivial Schema Upgrade is 354 | a catastrophic event, to be specially handled by some crack team, 355 | involving some ad hoc migration code, and often down time, voluntary or in-. 356 | To avoid such stress, developers often choose to restrict themselves to 357 | the “trivial” changes, only adding new tables and new fields initialized to NULL, 358 | wherein the complexity of the Schema only ever increases and edge cases accumulate, 359 | until the entire Schema is a mess that no one truly understands, 360 | filled with obsolete or broken data 361 | that poisons attempts at systematizing new processes. 362 | 363 | In the paradigm of Orthogonal Persistence, 364 | Schema Upgrade is a regular part of the development activity. 365 | Variables and fields being added include initial or default values or formulas; 366 | class redefinitions include migration methods; 367 | regression tests include data migration tests; etc. 368 | A whole lot of code can be automatically generated that 369 | without Orthogonal Persistence is usually written by hand—just by 370 | not forgetting the code history, and instead making schema evolution 371 | a regular part of the programming language. 372 | 373 | ### Model Independence 374 | 375 | Note how Orthogonal Persistence is not tied to any particular data model, 376 | or, for that matter, from any particular model for a query language, 377 | or for speculative potentially-conflicting concurrent transactions: 378 | You can use flat tables in the style of SQL or Datalog if you like, 379 | or a simple key-value store, a statically or dynamically typed object graph, 380 | and even a raw untyped memory space, or a mix of the above, 381 | or anything you like whatsoever, as fits whatever programming language you prefer to use. 382 | Persistence is Orthogonal to the Data Model, and the Data Model is Orthogonal to Persistence. 383 | 384 | It is quite an inefficient market that resulted in the ACID properties of Persistence 385 | being sold in a package-deal with an absurd data model, the Relational Data Model, 386 | that fits very few actual use cases; 387 | What more, it uses a horribly misdesigned query language (SQL), 388 | and there is no decent modification language, much less any attempt to standardize one; 389 | instead, databases provide you with a dubious model for speculative transactions, 390 | that allows them to improve their benchmarks, but only make things more complex for users. 391 | 392 | Instead, users should be able to use whichever data model and concurrency models 393 | fit their application, as specified in their regular programming language. 394 | If “tables everywhere” were really the paradigm in which 395 | it is most natural to think about a problem, programmers for this problem 396 | would naturally want to use a table-oriented language, such as APL. 397 | More likely, some of the data would be in tables, and other data would be 398 | in different data structures, indexed differently, just as in most programs in most languages. 399 | Whichever way, note that the appropriate language can *never* be SQL, 400 | because SQL is not a complete programming language, not being able to modify the data. 401 | The pathetic languages that database vendors typically come up with, 402 | made by people who are definitely not programming language experts 403 | or even actual amateurs, never make the cut, either. 404 | (An amateur by definition loves what he’s doing; 405 | those who misdesign these languages obviously have no love for this part of their job, 406 | since they’re not even trying to do it right, or learn the very basics of language design.) 407 | On the other hand, an appropriate language can conceivably have SQL embedded in it as a subset, 408 | or hopefully something roughly equivalent to SQL but better designed, 409 | such as C# with its LINQ extension. 410 | 411 | ## Why Orthogonal Persistence Matters 412 | 413 | ### Automation vs Manual Labor 414 | 415 | The opposition between Orthogonal Persistence and Manual Persistence 416 | is somewhat analogous to automatic memory management 417 | (with a garbage collector and/or declared ownership types) 418 | vs manual memory management (with malloc and free). 419 | 420 | In the prevalent paradigms of manual labor, 421 | one crucial aspect of software is managed through 422 | a lot of expensive and error-prone human labor. 423 | Humans must repetitively reproduce low-level usage patterns everywhere locally, 424 | yet with a lot of subtle and often hidden high-level global constraints 425 | that can’t fit in any human’s brain except in the simplest cases. 426 | Not only does this manual labor introduce exorbitant development costs, 427 | the mismatch between the laborers’ capabilities and the task at hand mean 428 | countless bugs, and an endless stream of critical security vulnerabilities. 429 | Down the line, users must waste enormous time in coping strategies—or 430 | fail to, with too often catastrophic consequences. 431 | 432 | In the paradigm of automation, the same aspect of software 433 | is managed automatically, through a strategy coded once and for all, 434 | that is correct by construction. 435 | The algorithm is applied relentlessly by a computer that never gets bored 436 | nor distracted, and will always enforce the global constraints 437 | that it never misses of confuses even in the subtlest of edge cases. 438 | Things Just Work for developers and users alike. 439 | What few bugs may exist in the algorithm are not application-specific, 440 | thus appear everywhere, can be detected early by anyone, fixed for everyone. 441 | 442 | There are many cases when the provided automatic strategy 443 | doesn’t perform as well as can a carefully implemented manual strategy. 444 | However, those who want extra performance can always get it, 445 | by improving the automation, adding performance annotations, 446 | writing code in a style that enables the required optimizations, 447 | or using an escape mechanism into manual management. 448 | At the very worst they will be back to the previous paradigm—but 449 | only for the small subdomain of cases for which they need utmost 450 | performance, that they know and care about and can afford to optimize, 451 | instead of everywhere, including the vast majority of aspects of the software 452 | they don’t have resources or interest to get correct much less optimize. 453 | 454 | ### Modularity 455 | 456 | Modularity is the ability to think about software in small chunks, locally, 457 | each independently from the rest of the software, 458 | except for a number of small delimited interfaces through which 459 | it interacts with that rest of the software. 460 | (Some Functional Programmers call that Compositionality; same difference.) 461 | 462 | Orthogonal Persistence has atomic sections, memory barriers and 463 | persistent processes, that are modular: 464 | they say “don't cut the computation here” which only depends on local knowledge and weak synchronization. 465 | Manual Persistence has transactions, commits and sagas, that aren’t: 466 | they say “cut the computation exactly here” which requires global knowledge and strong synchronization. 467 | 468 | - Atomic sections are modular, because you can call code or be called by code 469 | in a different module without even having to know whether or not 470 | it contains atomic sections. 471 | - You have to be careful when writing an atomic section, but it’s care 472 | you would need to put in anyway, and the developer who has to do it 473 | also “owns” all the data at stake and 474 | so has matching powers and responsibilities. 475 | Atomic sections only require local knowledge of the module 476 | in which they are written. 477 | - Similarly, memory barriers are modular because you can freely call 478 | functions in other modules without having to care whether or not 479 | they contain memory barriers. 480 | - When an activity of yours requires one or more memory barriers 481 | (possibly introduced by an indirect library module you don’t know about), 482 | it may be important to ensure that other activities will not concurrently 483 | observe the speculative data and assume it was persisted already. 484 | But this is easily fixed 485 | either by the other activity also using a memory barrier, 486 | or by using a mutual exclusion lock to ensure only one activity has access. 487 | - Mutual exclusion locks (mutexes) are modular once again because developers 488 | do not have to track down precisely which activity holds which locks 489 | (as long as a coherent lock discipline is enforced in general, 490 | which can be enforced with local knowledge). 491 | Indeed, mutual exclusion across memory barriers is possible precisely 492 | because the processes themselves are persistent, and so can be made 493 | to respect the invariants (coherent lock discipline) and variants 494 | (eventual release) of mutexes. By contrast, mutual exclusion across 495 | transactions is impossible, because clients do not persist but lose 496 | execution context, and therefore mutexes are *guaranteed* to 497 | eventually deadlock and leave the data they protect in a corrupted state. 498 | - Persistent processes are modular, because programmers can trust that 499 | every process will be robustly executed, its invariants preserved 500 | and its variants decreased, and thus need not be worried about any 501 | of these processes being stopped and lost in the middle of using some data 502 | other processes depend on, forever “temporarily” locked and corrupted, 503 | in a failure of safety and liveness. 504 | - Programmers don’t have to care about how to properly restart a entire zoo 505 | of transient processes in the correct dependency order to be confident that 506 | processes will resume correctly after a failure, correctly restoring all 507 | the required context that was previously correctly persisted, because 508 | the ecosystem is robust and already takes care of it for them. 509 | 510 | Manual Persistence is unmodular for the very same reasons, in reverse: 511 | - You cannot reason locally about transactions; 512 | you cannot compose smaller transactions into larger transactions 513 | or decompose larger ones into smaller ones; 514 | there is no good causal model within or across transactions; 515 | the entire transaction model is flat and unmodular. 516 | - Transactions are not modular because every function needs to know whether 517 | it’s already in a transaction or not, to be conscious of what global entry 518 | point in a completely different module owns the transaction. 519 | - Transactions are like always being in an giant atomic section that involves 520 | modules you don’t know about, handling data you don’t own yet must somehow 521 | respect, while those modules must also magically respect data they know 522 | nothing about from other modules. 523 | - The model of a persistent database with transient clients supposes that 524 | you already have a complete transient computation incapable of persistence, 525 | then builds a completely different model for persistence without computations, 526 | and try to make the two interact weakly, yet expect a robust result. 527 | - Some systems allow for “nested” transactions, but it’s really a lie because 528 | your code has to work against the least of the warranties of whether you’re 529 | already in a transaction or not and your code will be interrupted or not. 530 | - Nested transactions do not support the simplest and most important modular 531 | programming mechanism, having a function return a value, which cannot be 532 | protected by those nested transactions. 533 | - Transactions and commits offer no direct and safe way to schedule follow-up 534 | code to be executed after some effects are persisted. Any client-side 535 | follow-up code after it is guaranteed to not be guaranteed to run, 536 | and multi-transaction “sagas” that run on such clients 537 | are intrinsically unreliable. 538 | - Attempts to avoid sequences of many transactions 539 | by doing a lot of work in a single long transaction 540 | generate more data contention as the transaction gets longer, 541 | with less reliability, and more latency for everyone. 542 | Attempts to divide these transactions in many “batches” require a lot 543 | of engineering efforts to regain some performance without addressing 544 | the fundamental problem. The entire speculative model wherein clients 545 | try and retry many times is unreliable, and yes, unmodular. 546 | - One safe but indirect way to schedule follow-up code execution is to use 547 | database queues (expensive to emulate if not builtin) to commit events 548 | that some background daemon will dequeue and act upon in a follow-up 549 | transactions. But then you need to encode the entire context of the 550 | follow-up in each event, which is tantamous to manually implementing 551 | an ad-hoc version of process persistence, just for that follow-up. 552 | - Manual implementation of process persistence is not only tedious, 553 | but itself not modular, unless all modules are required to manually 554 | partake in a complex persistence protocol, at which point you’re 555 | systematically implementing process persistence by hand with humans 556 | as bad, slow and unreliable compilers. 557 | - The manual persistence protocol itself is unmodular, 558 | as persistence will make many details of module code part of its interface, 559 | and require lots of other modules to be modified whenever that code changes. 560 | - With or without follow-up internal events, Manual Persistence demands that 561 | either one process will be updated that handles all internal and external 562 | events, or multiple processes each for some kinds of events, that will each 563 | be supervised and managed and restarted in order without forgetting any. 564 | Either way, this requires a lot of advanced system administration and is 565 | extremely unmodular. 566 | 567 | All in all, Manual Persistence makes transactional applications a nightmare 568 | to design and maintain, and require a lot of coordination between developers 569 | of notionally independent modules, database administrators, network 570 | administrators, and a host of expensive infrastructure professionals. 571 | 572 | ### Atomic Section as Mutual Exclusion 573 | 574 | There is not just a clear analogy but a common higher pattern 575 | between atomicity of persistent changes and 576 | atomicity of changes in the context of concurrent activities. 577 | 578 | The widely accepted programming language solution for atomicity in the context of concurrency is 579 | *mutual exclusion* locks. 580 | The high-level primitive for them is some variant of a 581 | `with-lock` primitive to execute a simple thunk while holding a lock. 582 | The low-level primitive is an explicit lock object with which you manually 583 | do the above and carefully handle failure in the absence of exceptions. 584 | Either way, all your code is written in your usual programming language; 585 | most of the code is blissfully unaware of when locks are used; 586 | the few “critical sections” need be careful to respect a hierarchy of locks, 587 | but they are written in the same familiar language, and are pretty small. 588 | This primitive is thus modular: you can keep writing functions that don't have to care 589 | whether other functions ever use the primitive. 590 | The mutual exclusion mechanism is local and lightweight, efficient and scalable; 591 | it doesn’t force any global reorganization of your code, 592 | any specific data model, any specific control flow model. 593 | 594 | If concurrency had a “solution” for atomicity similar to transactions 595 | “solve” atomicity for databases, it would look like this: 596 | instead of local `with-lock` thunks, you must organize your code in 597 | global “transactions” thunks containing everything that affects application state 598 | without starting or ending inside a `with-lock` statement. 599 | That’s an inversion of control; 600 | to be done manually in languages without call/cc. 601 | Moreover, control flow outside transactions is considered unreliable, such that 602 | any control flow state required to ensure application invariants must be 603 | explicitly reified within the transaction (see Persisting Execution Context below). 604 | To add insult to injury, the parts done while holding locks as well as 605 | all changes that matter must be written as magic snippets 606 | in a completely different language 607 | that you must manually metaprogram with strings. 608 | Furthermore, if you keep your transactions too small, 609 | you will incur a lot overhead and kill performance, 610 | whereas long transactions will kill both concurrency and liveness. 611 | Finally, the data model for your transactions is completely different from 612 | the regular data model for your programming language. 613 | That’s what transactions for concurrency would be, and what transactions for databases are. 614 | 615 | If this design sounds completely crazy, that’s because it is. 616 | Yet that is what university professors haugthily teach, 617 | what billion-dollar data expert businesses sell, 618 | and what every else pays billions of dollars to buy and use. 619 | Analyzing the ins and outs of this madness would require its own separate essay. 620 | 621 | ### Persisting Execution Context 622 | 623 | In the dominating paradigm of Manual Persistence, it is relatively 624 | straightforward to restart a database client after it fails, 625 | though even this simple task leads to daunting complications with many 626 | bad surprises when reliability or scalability are desired. 627 | However, when doing so, all the execution context of the client is crucially 628 | lost at each random or not-so-random failure. 629 | Orthogonal Persistence preserves this context, and that is the main difference. 630 | How does Orthogonal Persistence work, and what would it take 631 | to achieve the same process persistence manually? 632 | 633 | Process persistence consists in persisting not just 634 | inactive, “dead”, data structures, but also 635 | the active “live” control structures of the processes that manipulate them: 636 | the virtual machine “registers” and the control stack for every thread, 637 | the shared data heap, the “file descriptors” or “handles” 638 | opened with the Operating System, the libraries loaded, 639 | any shared memory or resource held, any mutex at stake, etc. 640 | 641 | The *low-level* way to persist data is to use some Virtual Machine (VM) that 642 | will do it for you, such as [Golem](https://golem.cloud)’s WASM environment. 643 | The VM implementation knows where all your data is, and can make regular 644 | snapshots of the state of the virtual CPU and the associated memory, 645 | saving CPU registers and whichever memory pages have changed between two snapshots. 646 | You can only enjoy the hardware accelerations supported 647 | by the VM implementation, but there are a finite number of them, 648 | and worse comes to worst you could run the truly computation-intensive parts 649 | in a transient “coprocessor” process. 650 | In particular, making coherent snapshots when a process runs with multiple threads, 651 | possibly on multiple processors, can be especially tricky and costly, 652 | but it is ultimately possible. 653 | Note that while the virtual machine provides low-level persistence, 654 | you will want the higher-level language running on top of it to provide 655 | the ability to do schema upgrade: the language should support full schema dump and restore 656 | for the sake of restarting from a clean image, 657 | but maybe also dynamic class redefinition for the sake of faster schema upgrades. 658 | As for incremental updates between snapshots, 659 | they can be happen by following low-level sets of changes to the VM’s memory, 660 | or by following deterministic VM reactions to high-level recorded input 661 | (which is an approach known as “Prevalence” when done manually). 662 | 663 | The *high-level* way to persist data is to write your programs in some language 664 | that has a compiler that supports Orthogonal Persistence. 665 | The compiler will then make each piece of a program’s execution context into 666 | an explicit object that can be serialized and stored in an underlying database: 667 | Continuation-Passing Style will make stack frames into objects; 668 | lambda-lifting will make lexical scope into objects; 669 | various explicit monads will make the context of as many effects into objects; 670 | all stack-allocated and heap-allocated data must be tracked with suitable types, 671 | such that pointers to other data can be suitably saved and restored. 672 | Then, every low-level checkpointing transaction will 673 | add the new context objects to the database and delete the old ones from it. 674 | To detect which objects are old and can be deleted, and avert memory leaks, 675 | the compiler will implement the static or dynamic model of ownership or liveness 676 | of context objects in the programming language semantics. 677 | The high-level approach may or may not perform better than the low-level approach 678 | in persisting program; yet support for all the primitives and transformations described 679 | may be required anyway to deal with Schema Upgrade. 680 | 681 | A lot of the problems, solutions and workarounds will be the same whether 682 | the persistence implementation is higher-level (more work done by the compiler) 683 | or lower-level (more work done by the runtime environment). 684 | Ultimately, you can see a high-level solution as having the compiler generate 685 | a VM specialized for your program, instead of using a general-purpose 686 | low-level VM for all programs. 687 | There are costs and benefits to either approach with many tradeoffs, and 688 | the “best” solution for you might lie in the middle. 689 | 690 | Whether you choose a higher- or lower- level approach, 691 | good luck with doing it manually. Have some wonderful time with SQL. 692 | You’ll soon find that SQL’s rigid and flat (non-recursive) database schemas 693 | are fundamentally insufficient to store the state of (recursive) activities, 694 | even less so if also manually curated and you must either have thousands 695 | of typed tables (one per frame), or eschew the normalization and typechecking 696 | by the database. 697 | Except for the simplest of programs, you’ll find that it is undoable 698 | to implement manual persistence of processes without mistakes, 699 | what more with the limited resources available to you—except by automating it, 700 | at which point you have Orthogonal Persistence. 701 | What more, even small changes in your program can lead to large changes 702 | in the persistence model, that need to be correctly propagated everywhere; 703 | manual persistence is not modular at all. 704 | 705 | Finally, mind that running with Orthogonal Persistence requires no changes 706 | to local-only applications, but does require small changes to network 707 | applications that transact with remote servers. 708 | These will be local, modular, modifications, but modifications still. 709 | In particular, you will have to use the atomic section and memory barrier 710 | APIs to ensure your program correctly persists its effects 711 | when communicating with remote services. 712 | Also, when a persistent process wakes up after a pause, interrupt, failure, 713 | etc., it will reactivate all potential interrupted activitities, re-send 714 | all messages marked for sending that haven’t timed out yet, 715 | and poll all the information sources for messages it may have missed. 716 | Your network protocols will hopefully support a mode when the re-sent 717 | messages will be idempotent, and where the data sources can indeed be polled 718 | for messages received since the last one committed. 719 | 720 | ### Composing Persistence Domains 721 | 722 | Manually writing code to coherently persist data across several devices, 723 | filesystems, databases, storage services, is extremely hard, and 724 | will be fragile with respect to any change in the configuration. 725 | It will require cooperation from every module that manually persists any data, 726 | as well as special code to handle the outermost places that start and commit 727 | transactions. This is undoable in practice. 728 | At best, Manual Persistence will use some database service that is 729 | configured to have replicas, as part of a single protocol, 730 | with expensive system administrators to keep it all running. 731 | 732 | With Orthogonal Persistence, the underlying store is already abstracted away 733 | from every program for every user. The matter is then to be able to 734 | (1) define configurations that variously compose persistence domains 735 | that follow the protocol to create joins, shards, synchronous and 736 | asynchronous replicas, n-out-m consensus, encrypted stores, etc., and 737 | (2) seamlessly migrate data from one configuration to the other, 738 | without interrupting any of the programs running in that domain. 739 | 740 | This can be done in a generic fashion using domain combinators 741 | that work for all programs for all users, without stopping any activity, 742 | though maybe with increased latency and reduced speed during migration. 743 | With the proper combinators, you can even have your nice curated database still 744 | in one persistence domain, with more dynamic and less curated yet still persistent experiments 745 | in a separate persistence domain (though possibly on the same server, so you can still 746 | share transactions and not need an extra 2-Phase-Commit protocol). 747 | 748 | ## Long Range Issues with Persistence 749 | 750 | Orthogonal Persistence completely solves “short range” persistence issues: 751 | programmers do not have to explicitly open and close files and network connections, 752 | read and write, marshal and unmarshal, encode and decode data, 753 | organize their accesses in transactions, 754 | metaprogram a “database language” using strings, etc., 755 | not to mention handle all the associated errors, 756 | for their data to persist. 757 | Most of the code having to deal with persistence, 758 | which is a sizeable fraction of the code, disappears. 759 | Is automated away. Humans do not have to spend brain cycles about it anymore. 760 | 761 | But that means that mid- to long- range issues take the front stage: 762 | 763 | - Low-level languages, or “untrusted” low-level escapes from high-level languages, 764 | can *persistently* corrupt data, in ways that may only be detected long after the fact, 765 | after dancing a “fandango on core”. 766 | The need to survive low-level errors makes regular backups necessary, 767 | yet sometimes not sufficient. Scary. 768 | 769 | - Static languages will make Schema Upgrade hell by introducing much rigidity in the Schema, 770 | forcing expensive global transformations, 771 | introducing namespace issues to identify entities across schema upgrades, 772 | making incremental change harder and more expensive, etc. 773 | 774 | - Dynamic languages with suitable reflection primitives 775 | can solve the Schema Upgrade hell, 776 | but will lose in performance compared to low-level languages, 777 | and in safety compared to static languages. 778 | At least you can gain metaprogramming in exchange as a super-power, 779 | if you pick a Lisp instead of a blub language; yet most programmers fail to. 780 | 781 | - Partial code edits may corrupt the entire database by introducing 782 | broken inconsistent intermediate steps. 783 | Complete code edits may require omniscience of all code, libraries, etc., 784 | and more work than possible by one person in one session. 785 | Dealing with code edits therefore necessitates 786 | database versioning to be able to branch with code and revert, 787 | and virtualization so that edits do not jeopardize the entire universe, 788 | but only update a sub-universe with localized changes. 789 | Transactionality in code updates may enable processes to see a consistent version of the code. 790 | 791 | - Reflection is necessary to inspect processes, modify them, salvage the runaway ones. 792 | Yet some form of protection from unauthorized tampering is necessary. 793 | This calls for some kind of capability-based architecture, to contain the powers 794 | that would otherwise allow bad code to corrupt the entire system. 795 | 796 | - Forms of PCLSRing with respect to *user* invariants (not just “system” invariants) 797 | are necessary to cleanly kill processes, but also to stop, inspect, migrate, upgrade them. 798 | This requires both system support, compiler support for every language, and language support 799 | to enable users to define those invariants in the interaction “language” (formal or informal) 800 | of *their* users. 801 | 802 | - Schema Upgrade requires high-level language support, or else users may find the hard way that 803 | they can't keep their data without having to badly reinvent 804 | all of the mechanisms of manual persistence just for the sake of upgrading their Schema. 805 | 806 | - Inasmuch as persistent software must run on top of transient operating systems, 807 | all the transient details like hostname, pid, file descriptors, etc., 808 | must be virtualized away from the underlying transient operating system, 809 | since they might change from time to time as the process migrates 810 | to survive adversarial events. 811 | A "Persistent Operating System" might offer a Virtual Machine where they are replaced by 812 | some persistent handle that abstracts away the mapping between their persistent identity 813 | and an underlying transient implementation. 814 | A lower-level "Persistent Layer" might instead let the programmers (and their libraries) 815 | implement those abstractions themselves—but even then the transient hostname, pid and fds 816 | shall only be accessed from within a transient block that covers their lifetime, 817 | possibly as part of a dedicated transient thread (for e.g. file descriptors). 818 | 819 | All these concerns exist with Manual Persistence as well as with Orthogonal Persistence. 820 | But with Manual Persistence, the low-level concerns of getting any persistence at all 821 | create so much work and slows down development so much that these higher-level concerns 822 | appear secondary in comparison. 823 | By saving developers from those lowly concerns, 824 | Orthogonal Persistence elevates the struggle to write software, 825 | rather than eliminates it. 826 | 827 | ## Bibliography 828 | 829 | My [LambdaConf 2016 talk](https://www.youtube.com/watch?v=KsswTN2cCSc&t=250s) 830 | discusses Orthogonal Persistence, based on chapters 2 to 5 831 | of my blog [“Houyhnhnm Computing”](https://ngnghm.github.io) 832 | (pronounced “Hunam Computing”). 833 | 834 | [A Persistent System In Real Use: Experiences Of The First 13 Years](https://os.itec.kit.edu/65_2525.php), by Jochen Liedtke, IWOOS 1993. Even the processes are Persistent. See the 835 | [Website](https://6xq.net/eumel/), 836 | [Docs](https://github.com/PromyLOPh/eumel), 837 | [Code](https://github.com/PromyLOPh/eumel-src) and 838 | [Tools](https://github.com/PromyLOPh/eumel-tools) 839 | for the persistent system 840 | [EUMEL](https://archiveos.org/eumel/). 841 | TODO: Find code for the persistence work done on top of 842 | L3 (maybe included in or linked from Eumel repositories?) 843 | and L4 (see Charm?). 844 | 845 | Operating Systems that persist processes, such as 846 | [KeyKOS](https://archiveos.org/keykos/), 847 | [EROS](https://archiveos.org/eros/), 848 | [Coyotos](https://archiveos.org/coyotos/), 849 | [CapROS](https://archiveos.org/capros/), 850 | [Grasshopper](https://archiveos.org/grasshopper/), 851 | [Mungi](http://tunes.org/wiki/mungi.html), 852 | [Charm](https://archiveos.org/charm/), 853 | [BRiX](https://archiveos.org/brix/), 854 | [Argon](https://archiveos.org/argon/)... 855 | 856 | The Scottish School of Orthogonal Persistence, including systems such as 857 | [PS-Algol](https://en.wikipedia.org/wiki/PS-algol), 858 | [Napier88](https://en.wikipedia.org/wiki/Napier88), 859 | [Ten15](https://en.wikipedia.org/wiki/Ten15), and other 860 | historical British (mainly Scottish) persistent languages and systems. 861 | However, note that the otherwise interesting 2009 retrospective article 862 | “Orthogonal Persistence Revisited” by Alan Dearle, Graham N.C. Kirby and Ron Morrison 863 | does *not* mention persistence of continuations, stacks, execution contexts, processes, etc. 864 | Possibly because none of the systems mentioned had it. 865 | It also claims that PS-Algol was the first Persistent language, 866 | and fails to cite or credit Eumel whose language ELAN that may have been even earlier, 867 | and that did persist processes. 868 | 869 | [Acton](https://www.acton-lang.org/), 870 | a “fault tolerant distributed programming platform for building mission critical systems” 871 | based on a high-level language (active in 2024). 872 | 873 | Recent systems (as of 2024) offering “Durable Execution” or “Durable Computing”, 874 | to insist they cover processes, not just data: 875 | [Golem](https://Golem.cloud), 876 | a WASM-based execution platform to run highly reliable services 877 | with Orthogonal Persistence, which they call “Durable Execution”; 878 | in the same vein, [Temporal.io](https://temporal.io/), 879 | [restate](https://restate.dev), [DBOS](https://DBOS.dev)… 880 | they all rely on logging transaction, once in a while dumping a snapshot, 881 | and restoring state by replaying transactions from the snapshot. 882 | 883 | The Workshop on Persistent Object Systems, 884 | The Workshop on Database Programming Languages, 885 | old VLDB conferences around 2000, 886 | IWOOS, ICOODB, SOSP… 887 | gotta mine these conferences, and more. 888 | (Be sure to include some reading about “Pointer Swizzling”.) 889 | 890 | Recent work: 891 | [TreeSLS: A Whole-system Persistent Microkernel with Tree-structured State Checkpoint on NVM](https://dl.acm.org/doi/10.1145/3600006.3613160), SOSP 2023 892 | [Reducing Write Barrier Overheads for Orthogonal Persistence](https://dl.acm.org/doi/10.1145/3687997.3695646), SLE 2024 893 | 894 | Question: see how [Unison](https://www.unison-lang.org/), [Dark](https://darklang.com/) or 895 | [Val town](https://www.val.town/) and other “infrastructure included” languages 896 | do or don’t handle persistence. 897 | 898 | ## Coda: Friendly vs Unfriendly Persistence 899 | 900 | In today’s world (2024), all your data persists… on your enemies’ servers. 901 | The big corporations and bureaucracies that try to manipulate you 902 | know everything about you, and run AIs to analyze your behavior 903 | to manipulate you even more into buying their stuff and obeying their orders. 904 | Modern “apps”, that don’t have or need a “save” button anymore, 905 | may superficially look to end-users as if they had Orthogonal Persistence, 906 | but underneath everything uses Manual Persistence; 907 | corporations can afford thousands of developers, database experts and system administrators 908 | to make work it at scale, so as to spy on hundreds of millions of human cattle. 909 | Even when they are not officially allowed to use the information, 910 | they’ll use it to identify then target you, at which point they can use 911 | “parallel reconstruction”, entrapment, harassment or just plain illegal means 912 | to further oppress you. 913 | 914 | The only person who forgets everything is… you. 915 | You don’t have a good record trail of all your actions and transactions. 916 | What record you have, you cannot search. 917 | Every word said near the always-listening microphone 918 | of the position-tracking device known as “your phone” is recorded, 919 | and will be used against you, but you won’t be able to search 920 | for those words and be reminded of conversations that matter to you. 921 | If you liked a page, an article, a comment, a book, a movie, a game… 922 | the contents may disappear at any time, if they haven’t disappeared already. 923 | You may have paid for it, but the company on the other side may go out of business, 924 | may cancel the service you were using, or deactivate that particular item you liked. 925 | Worse, the contents may have been rewritten and sanitized 926 | to fit the ideology of the day, in an Orwellian move. 927 | With a bit of luck and a lot of effort, you might find some version of it on 928 | [archive.org](https://archive.org)… if it still exists the day you need it, 929 | its robots weren’t told not to archive that data, and 930 | the data wasn’t removed by legal actions or threats. 931 | Most of the games, demos and other programs you like or used to like, 932 | even if you still have access to a copy, will not run anymore, because 933 | they depend on a virtual machine that was obsoleted (e.g. Flash), 934 | or they relied on some remote service that either has disappeared already 935 | or will one day for sure eventually disappear. 936 | 937 | As a user, or even as an independent developer, you cannot afford 938 | to hire or become a database expert or a system administrator, much less both; 939 | very few individuals can afford to run by themselves a complete stack 940 | of all the software they might like to use, 941 | and none can afford to modify all the software they use that others wrote 942 | to suitably persist the data that matters into their database. 943 | Your only hope, our only hope as private citizens, is that there shall be 944 | a platform that automatically handles the persistence of every piece of data 945 | you see, for every program you use, in a way that *you* and other individuals 946 | can search and mine for patterns, while your communications with the rest of 947 | the world go through obfuscation channels so that *they* cannot. 948 | 949 | All your friendly processes, like Dory the fish, 950 | forget everything after a short while. 951 | All the enemy processes, like an elephant, remember everything. 952 | Help me change that for you and for everyone: 953 | Sponsor [@fare on GitHub](https://github.com/sponsors/fare). 954 | -------------------------------------------------------------------------------- /persist.ss: -------------------------------------------------------------------------------- 1 | ;; Persisting Data 2 | (export #t) 3 | (import 4 | (for-syntax :std/stxutil) 5 | :gerbil/gambit 6 | :std/assert :std/format 7 | :std/misc/completion :std/misc/hash 8 | :std/sugar :std/values 9 | :clan/base :clan/concurrency :clan/string 10 | :clan/poo/object :clan/poo/mop :clan/poo/io :clan/poo/type 11 | :clan/debug :clan/poo/debug 12 | ./db ./db-queue) 13 | 14 | (.defgeneric (walk-dependencies type f x) ;; Unit <- 'a:Type (Unit <- 'b:Type 'b) 'a 15 | slot: .walk-dependencies default: void) 16 | 17 | ;; Unit <- 'a:Type 'a TX 18 | (def (make-dependencies-persistent type x tx) 19 | (walk-dependencies type (cut make-persistent <> <> tx) x)) 20 | 21 | ;; Unit <- 'a:Type 'a TX 22 | (.defgeneric (make-persistent type x tx) 23 | slot: .make-persistent default: void) 24 | 25 | (define-type (Port @ Type.) .element?: port?) 26 | (define-type (Thread @ Type.) .element?: thread?) 27 | (define-type (Completion @ Type.) .element?: completion?) 28 | 29 | (define-type (TX @ Type.) .element?: DbTransaction?) 30 | 31 | ;; Persistent objects, whether passive data or activities. 32 | (define-type (Persistent. @ Type. 33 | ;; Prefix for keys in database. In a relational DB, that would be the name of the table. 34 | key-prefix ;; : u8vector 35 | ;; Type descriptor for keys (to be serialized as DB key) 36 | Key ;; : Type 37 | ;; Type descriptor for the persistent state 38 | State ;; : Type ;; states 39 | ;; Internal method to re-create the data object from 40 | ;; (1) the key, 41 | ;; (2) a function to persist the state, 42 | ;; (3) the initial state (whether a default state or one read from the database), 43 | ;; (4) a current transaction context in which the initial state was read. 44 | ;; The type must provide this method, but users won't use it directly: 45 | ;; they will call the read method, that will indirectly call make-activity with proper arguments. 46 | ;; : @ <- Key (Unit <- State TX) State TX 47 | .restore) 48 | restore: (validate (Fun @ <- Key (Fun Unit <- State TX) State TX) .restore) 49 | 50 | ;; Internal table of objects that have already been loaded from database. 51 | ;;loaded:: (Table @ <- Key) 52 | loaded: (make-hash-table) ;; weak-values: #t 53 | 54 | ;; Internal function that associates a key in the key-value store to a user-level key object of type Key. 55 | ;; : Bytes <- Key 56 | db-key<-: (lambda (key) (u8vector-append key-prefix (bytes<- Key key))) 57 | 58 | ;; Internal function that given (1) a db-key (as returned by the function above), 59 | ;; (2) a current state of type State, and a (3) current transaction context, 60 | ;; will save said current state in the current transaction. 61 | ;; Note that this modification will only be committed with the transaction, and 62 | ;; the activity will have to either synchronously commit-transaction if it owns the transaction, 63 | ;; or asynchronously call sync-transaction if it doesn't, 64 | ;; before it may assume the state being committed, 65 | ;; and start sending according messages to external systems and notifying the user it's done 66 | ;; (though if transactions have high latency, it might optimistically notify the user 67 | ;; that the change is underway). 68 | ;; : (Fun Unit <- Bytes State TX) 69 | saving: (lambda (db-key state tx) 70 | (make-dependencies-persistent State state tx) 71 | (db-put! db-key (bytes<- State state) tx)) 72 | ;; Internal function that given a key returns a default state to associate with that key 73 | ;; when no state is found in the database. 74 | ;; Not all activities have a default state, and the default method will just raise an error. 75 | ;; : (Fun State <- Key) 76 | make-default-state: (lambda (key) ;; override this method to provide a default state 77 | (error "Failed to load key" sexp key)) 78 | 79 | ;; Internal function that given (1) a db-key (as returned by the function above), 80 | ;; (2) a current state of type State, and (3) a current transaction context [TODO: no TX?] 81 | ;; will restore the object and register it as loaded. 82 | ;; Note that this will only be committed with the transaction, and the activity will have to 83 | ;; either synchronously commit-transaction if it owns the transaction, or asynchronously call 84 | ;; sync-transaction if it doesn't, before it may assume the state being committed. 85 | ;; : (Fun @ <- Key State TX) 86 | resume: (validate (Fun @ <- Key State TX) .resume) 87 | .resume: 88 | (lambda (key state tx) 89 | (def db-key (db-key<- key)) 90 | (when (hash-key? loaded db-key) 91 | (error "persistent activity already resumed" sexp key)) 92 | (def object (restore key (cut saving db-key <> <>) state tx)) 93 | (hash-put! loaded db-key object) 94 | object) 95 | 96 | ;; Internal function to resume an object from the database given a key and a transaction, 97 | ;; assuming the object wasn't loaded yet. 98 | ;; : (Fun @ <- Bytes Key TX) 99 | resume-from-db: 100 | (lambda (db-key key tx) 101 | (def state 102 | (cond 103 | ((db-get db-key tx) => (cut <-bytes State <>)) 104 | (else (make-default-state key)))) 105 | (resume key state tx)) 106 | 107 | ;; Function to create a new activity (1) associated to the given key, 108 | ;; (2) the state of which will be computed by the given initialization function 109 | ;; (that takes the saving function as argument), (3) in the context of the given transaction. 110 | ;; Note that any modification will only be committed with the transaction, and 111 | ;; the init function does not own the transaction and thus will have to call sync-transaction 112 | ;; or wait for some message by someone that does before it may assume the initial state is committed, 113 | ;; and start sending according messages to external systems. Similarly, the creating context 114 | ;; must eventually commit, but any part of it that wants to message based on the activity 115 | ;; has to sync-transaction to wait for it being saved. 116 | ;; Also, proper mutual exclusion must be used to ensure only one piece of code 117 | ;; may attempt create to create an activity with the given key at any point in time. 118 | make: (validate (Fun @ <- Key (Fun State <- (Fun Unit <- State TX) TX) TX) .make) 119 | .make: 120 | (lambda (key init tx) 121 | (def db-key (db-key<- key)) 122 | (when (db-key? db-key tx) 123 | (error "persistent activity already created" sexp (sexp<- Key key))) 124 | (def state (init (cut saving db-key <> <>) tx)) 125 | (resume key state tx))) 126 | 127 | ;; Persistent Data has no activity of its own, 128 | ;; and can be synchronously owned or asynchronously borrowed by persistent activities, 129 | ;; that will provide a transaction as a context to read of modify the data. 130 | ;; In case they may be borrowed, they must provide some mutual exclusion mechanism 131 | ;; that the borrowing activity will use to ensure data consistency. 132 | (define-type (PersistentData @ Persistent. 133 | Key loaded resume-from-db db-key<-) 134 | ;; Read the object from its key, given a context. 135 | ;; For activities, this is an internal function that should only be called via get. 136 | ;; For passive data, this is a function that borrowers may use after they ensure mutual exclusion. 137 | ;; For those kinds of objects where it makes sense, this may create a default activity. 138 | ;; Clients of this code must use proper mutual exclusion so there are no concurrent calls to get. 139 | ;; Get may indirectly call resume if the object is in the database, and make-default-state if not. 140 | get: (validate (Fun @ <- Key TX) .get) 141 | .get: 142 | (lambda (key tx) 143 | (def db-key (db-key<- key)) 144 | (or (hash-get loaded db-key) ;; use the db-key as key so we get the correct equality 145 | (resume-from-db db-key key tx)))) 146 | 147 | ;; Persistent activities compute independently from each other; 148 | ;; they may create transactions when they need to and borrow persistent data; 149 | ;; they may synchronize to I/O (including the DB) though outside transactions. 150 | ;; Activities communicate with each other using asynchronous messages. 151 | (define-type (PersistentActivity @ Persistent. 152 | Key loaded resume-from-db db-key<-) 153 | ;; Get the activity by its key. 154 | ;; No transaction is provided: the activity will make its own if needed. 155 | <-key: (validate (Fun @ <- Key) .<-key) 156 | .<-key: 157 | (lambda (key) 158 | (def db-key (db-key<- key)) 159 | (or (hash-get loaded db-key) ;; use the db-key as key so we get the correct equality 160 | (with-tx (tx) (resume-from-db db-key key tx))))) 161 | 162 | (defstruct persistent-cell (mx datum save!)) 163 | 164 | (def (call-with-persistent-cell cell f) 165 | (with-lock (persistent-cell-mx cell) 166 | (f (fun (with-cell accessor tx) 167 | (def (get-state) 168 | (assert! (eq? (DbTransaction-status tx) 'open)) 169 | (persistent-cell-datum cell)) 170 | (def (set-state! new-state) 171 | ((persistent-cell-save! cell) new-state tx) 172 | (set! (persistent-cell-datum cell) new-state)) 173 | (accessor get-state set-state!))))) 174 | 175 | ;; Persistent actor that has a persistent queue 176 | (define-type (PersistentQueueActor @ PersistentActivity 177 | Key State sexp <-key db-key<- 178 | ;; type of messages sent to the actor 179 | Message ;; : Type 180 | ;; function to process a message 181 | process) ;; : <- Message (State <-) (<- State) TX 182 | .restore: ;; Provide the interface function declared above. 183 | (lambda (key save! state tx) 184 | (def name [sexp (sexp<- Key key)]) 185 | (def (get-state) state) 186 | (def (set-state! new-state) (save! new-state tx) (set! state new-state)) 187 | (def (process-bytes msg tx) 188 | (def message (<-bytes Message msg)) 189 | ;;(DDT process: Key key State state Message message) 190 | (process message get-state set-state! tx)) 191 | (def qkey (u8vector-append (db-key<- key) #u8(81))) ;; 81 is ASCII for #\Q 192 | (def q (DbQueue-restore name qkey process)) 193 | (cons q get-state)) 194 | 195 | ;; Send a message to a persistent actor. 196 | ;; NB: to avoid redundant (de)serialization, use content-addressing of objects 197 | ;; to share cached values loaded from the database. 198 | ;; : <- Message TX 199 | send: 200 | (lambda (key message tx) 201 | (DbQueue-send! (car (<-key key)) (bytes<- Message message) tx)) 202 | 203 | ;; : State <- Key 204 | read: 205 | (lambda (k) ((cdr (<-key k))))) 206 | 207 | 208 | ;; Persistent actor that has a transaction at every request. 209 | ;; Two functions are called: f within the request, k outside of it, both in the context of the actor thread. 210 | ;; IMPORTANT: messages sent to the actor MUST be deterministically determined by other persistent data, 211 | ;; and idempotent in their effects; they must be re-sent until the desired effect is observed, 212 | ;; in case the process is halted before the message was fully processed. 213 | ;; Sometimes, you may have to pre-allocate a ticket/nonce/serial-number, save it, 214 | ;; so that you can feed the actor an idempotent message. 215 | (define-type (PersistentActor @ [Thread PersistentActivity] 216 | Key State <-key) 217 | .restore: ;; Provide the interface function declared above. 218 | (lambda (key save! state tx) 219 | (def name [sexp (sexp<- Key key)]) 220 | (def (get-state) state) 221 | (def (set-state! new-state) (save! new-state tx) (set! state new-state)) 222 | (def (process msg) 223 | ;;(DDT process: Any name State state Any msg) 224 | (match msg 225 | ([Transform: f k] 226 | (call/values (lambda () (with-tx (tx) (f get-state set-state! tx))) k)))) 227 | (def thread 228 | (without-tx 229 | (spawn/name/logged name (fun (make-persistent-actor) (while #t (process (thread-receive))))))) 230 | (set! (thread-specific thread) get-state) 231 | thread) 232 | 233 | ;; Run an asynchronous action (1) on the actor with given key, as given by 234 | ;; (2) a function that takes the current state as a parameter as well as 235 | ;; a function that sets the state to a new value, in (3) a given transaction context. 236 | ;; The function will be run asynchronously in the context of the actor, 237 | ;; and its result will be discarded. To return a value to the caller, 238 | ;; you must explicitly use a completion, or use the action method below, that does. 239 | ;; After all actions with a given tx are run, the sync method must be called. 240 | ;; If the tx is #f then a new transaction will be created in the actor's context 241 | ;; and synchronized; the action function may then use after-commit to send notifications. 242 | ;; : Unit <- Key (Unit <- State (<- State)) TX 243 | async-action: 244 | (lambda (key k) 245 | (thread-send (<-key key) [Transform: k void])) 246 | 247 | ;; Run a synchronous action (1) on the actor with given key, as given by 248 | ;; (2) a function that takes the current state as a parameter as well as 249 | ;; a function that sets the state to a new value, in (3) a given transaction context. 250 | ;; The function will be run asynchronously in the context of the actor, 251 | ;; while the caller waits synchronously for its result as transmitted via a completion. 252 | ;; After all actions with a given tx are run, the sync method must be called. 253 | ;; : (Fun 'a <- Key (Fun 'a <- State (Fun Unit <- State)) TX) 254 | action: 255 | (lambda (key f) 256 | (def c (make-completion)) 257 | (def (k . res) (completion-post! c (list->values res))) 258 | (thread-send (<-key key) [Transform: f k]) 259 | (completion-wait! c)) 260 | 261 | ;; Asynchronously notify (1) the actor with the given key that (2) work with the current tx is done; 262 | ;; the actor will must synchronize with that tx being committed before it starts processing requests 263 | ;; for other txs. 264 | ;; : Unit <- Key TX 265 | sync-action: 266 | (lambda (key f) 267 | (defvalues (res tx) 268 | (action key (lambda (get-state set-state! tx) (values (f get-state set-state! tx) tx)))) 269 | (sync-transaction tx) 270 | res) 271 | 272 | ;; State <- Key 273 | read: 274 | (lambda (key) 275 | ((thread-specific (<-key key))))) 276 | 277 | ;; TODO: handle mixin inheritance graph so we can make this a mixin rather than an alternative superclass 278 | (define-type (SavingDebug @ [] Key State key-prefix) 279 | saving: => 280 | (lambda (super) 281 | (fun (saving db-key state tx) 282 | ;;(def key (<-bytes Key (subu8vector db-key (bytes-length key-prefix) (bytes-length db-key)))) 283 | ;;(printf "SAVING ~s ~s => ~s\n" sexp (sexp<- Key key) (sexp<- State state)) 284 | (super db-key state tx))) 285 | .resume: => 286 | (lambda (super) 287 | (fun (resume key state tx) 288 | ;;(printf "RESUME ~s ~s => ~s\n" sexp (sexp<- Key key) (sexp<- State state)) 289 | (super key state tx)))) 290 | 291 | (define-type (DebugPersistentActivity @ [SavingDebug PersistentActivity])) 292 | 293 | (def (ensure-db-key key) 294 | (cond 295 | ((u8vector? key) key) 296 | ((string? key) (string->bytes key)) 297 | (else (error "Invalid db-key" key)))) 298 | 299 | (defstruct persistent-variable (mx type key value loaded?)) 300 | (def (get-persistent-variable pvar) 301 | (with ((persistent-variable mx type key _ _) pvar) 302 | (with-lock mx 303 | (lambda () 304 | (unless (persistent-variable-loaded? pvar) 305 | (let (bytes (with-tx (tx) (db-get key tx))) 306 | (when bytes 307 | (let (val (<-bytes type bytes)) 308 | (set! (persistent-variable-value pvar) val)))) 309 | (set! (persistent-variable-loaded? pvar) #t)) 310 | (persistent-variable-value pvar))))) 311 | (def (get-persistent-variable-set! pvar val) 312 | (with ((persistent-variable mx type key _ _) pvar) 313 | (with-lock mx 314 | (lambda () 315 | (with-tx (tx) (db-put! key (bytes<- type val) tx)) 316 | (set! (persistent-variable-loaded? pvar) #t) 317 | (set! (persistent-variable-value pvar) val))))) 318 | (def (%make-persistent-variable name type key initial-value) 319 | (make-persistent-variable (make-mutex 'name) type (ensure-db-key key) initial-value #f)) 320 | (defsyntax (define-persistent-variable stx) 321 | (syntax-case stx () 322 | ((d name type key initial-value) 323 | (with-syntax ((setter (stx-identifier #'d #'name "-set!"))) 324 | #'(begin 325 | (def pvar (%make-persistent-variable 'name type key initial-value)) 326 | (defrule (name) (get-persistent-variable pvar)) 327 | (defrule (setter val) (get-persistent-variable-set! pvar val))))))) 328 | -------------------------------------------------------------------------------- /pics/haddock1.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mighty-gerbils/gerbil-persist/30f4d46f4ef7bf425d267914691df896f6ce6681/pics/haddock1.webp -------------------------------------------------------------------------------- /pics/haddock2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mighty-gerbils/gerbil-persist/30f4d46f4ef7bf425d267914691df896f6ce6681/pics/haddock2.webp -------------------------------------------------------------------------------- /pics/haddock3.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mighty-gerbils/gerbil-persist/30f4d46f4ef7bf425d267914691df896f6ce6681/pics/haddock3.webp -------------------------------------------------------------------------------- /pics/haddock4.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mighty-gerbils/gerbil-persist/30f4d46f4ef7bf425d267914691df896f6ce6681/pics/haddock4.webp -------------------------------------------------------------------------------- /pics/haddock5.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mighty-gerbils/gerbil-persist/30f4d46f4ef7bf425d267914691df896f6ce6681/pics/haddock5.webp -------------------------------------------------------------------------------- /pics/haddock6.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mighty-gerbils/gerbil-persist/30f4d46f4ef7bf425d267914691df896f6ce6681/pics/haddock6.webp -------------------------------------------------------------------------------- /slides-2025-lambdaconf.rkt: -------------------------------------------------------------------------------- 1 | #lang at-exp racket @; -*- Scheme -*- 2 | #| 3 | Orthogonal Persistence, the Model 4 | 5 | Slides for presentation at LambdaConf 2025 in Estes Park, Colorado, 2025-05-12 6 | 7 | To compile it, use: 8 | mkdir -p run 9 | ln -s ../nada-poof/util ./ 10 | ln -s ../../nada-poof/resources run/ 11 | ln -s ../pics run/ 12 | racket slides-2025-lambdaconf.rkt > run/slides-2025-lambdaconf.html 13 | where nada-poof is a checkout of 14 | https://github.com/metareflection/poof 15 | 16 | To test interactively, try: 17 | racket -i -l scribble/reader -e "(use-at-readtable)" -l racket 18 | 19 | This document is available under the bugroff license. 20 | http://www.oocities.org/soho/cafe/5947/bugroff.html 21 | |# 22 | 23 | (require scribble/html 24 | "util/util.rkt" 25 | "util/coop.rkt" 26 | (rename-in "util/coop.rkt" (|@| $)) 27 | "util/protodoc.rkt" 28 | "util/reveal.rkt") 29 | 30 | (def (haddock x) 31 | (img src: (format "pics/haddock~a.webp" x) alt: (format "panel ~a" x) 32 | height: "250vmin" valign: 'top)) 33 | 34 | (def doc 35 | (docfix 36 | ($title "Orthogonal Persistence, the Model") 37 | ($kv 'slide 38 | (list 39 | @div[class: 'logo]{ 40 | @img[src: "resources/pic/mukn-name.svg" 41 | alt: "Mutual Knowledge Systems" 42 | width: "50%" 43 | valign: 'middle 44 | style: " 45 | vertical-align: middle; 46 | background-color: white; 47 | padding-left: .5em; 48 | padding-right: .5em; 49 | padding-top: .5em; 50 | padding-bottom: .5em; 51 | "]} 52 | @div[class: 'title 53 | style: "color: #55f; vertical-align: middle; text-align: center; font-size: 140%" 54 | @b{Orthogonal Persistence: @br the Model}] 55 | @(br clear: 'all) 56 | @p{@small{@(~)}} 57 | @C[style: "font-size: 66%"]{ 58 | François-René Rideau @(email "")} 59 | @C{@small{@Url{http://github.com/mighty-gerbils/gerbil-persist}}} 60 | @div[style: "font-size: 50%;" (~)] 61 | @C{@small{LambdaConf 2025-05-12}} 62 | @div[style: "font-size: 50%;" (~)] 63 | @table[style: "text-align: left; padding-left: 0; margin-left: 0; width: 100%; font-size: 50%;" 64 | (tr @td{@code{PgDn}: next} @td{@code{PgUp}: previous} @td{@code{↑ ↓ ← → ESC ⏎} 65 | @td{Touchscreen: swipe down until you must swipe right}})])) 66 | ($section "Prelude: Orthogonal Persistence?" 67 | $plan-slide 68 | ($slide "A Simple Story (1)" 69 | @;{ 70 | Generate a comic panel in the genre of Hergé. 71 | Captain Haddock, with a unicorn on his sweater, 72 | is sitting on a chair at a café terrace in San Theodoros. 73 | Haddock is on the bottom left of the panel; 74 | we see his back, he is turned slightly towards the right of the panel. 75 | He is typing on a laptop on the round café table, next to a glass of whisky. 76 | On the laptop, a white-on-black window on which is displayed "> foo = 42" 77 | and at the next line "> " aligned under the above ">", followed by a blinking underscore cursor. 78 | On the top left side of the panel, 79 | a robber in a grey hoodie is approaching subreptitiously; 80 | you can’t see the robber’s face, hidden under his hood. 81 | In the background, the view from the café is the beach of San Theodoros, 82 | with a large banner "San Theodoros" greeting people to the beach. 83 | There are two poor people on the beach. 84 | 85 | Generate a comic panel in the genre of Hergé. 86 | Captain Haddock, with a unicorn on his sweater, is on the bottom left of the panel; 87 | Haddock is at a café terrace, with a view on the beach of San Theodoros 88 | on the top left of the panel. 89 | We see his back, he is turned slightly towards the right of the panel. 90 | Haddock is half-standing instead of sitting, the round café table is wobbling in front of him, 91 | and the glass of whisky on it is falling. 92 | His laptop is now in the hands of the robber in a gray hoodie that hides his face, 93 | who stole it, and who is running away with it, away from Captain Haddock. 94 | The robber is on the right of the panel, heading away towards the top right of the panel. 95 | Captain Haddock, is cursing "Buccaneer!" and other censored curses. 96 | His left hand is on the table, and his right fist is raised. He has only two arms. 97 | The speech bubble points at Captain Haddock. 98 | In the background, the view from the café is the beach of San Theodoros, 99 | with a large banner "San Theodoros" greeting people to the beach. 100 | There are two poor people on the beach. 101 | 102 | Generate a comic panel in the genre of Hergé. 103 | We see Captain Haddock with a unicorn on his sweater, flabbergasted, 104 | three-quarter facing us, turned slightly towards the right of the panel, 105 | calling a customer support line. 106 | Haddock is holding a newspaper, the San Theodoros Times, that sports a headline: 107 | "Nuevo-Rican missile hits datacenter". 108 | The voice on the phone, with a speech bubble pointing at the listening part of the phone, 109 | tells him: 110 | "Yes, our engineers lost your data. Also everyone else’s. Also their lives." 111 | 112 | Generate a comic panel in the genre of Hergé. 113 | On the bottom left of the panel, Captain Haddock, with a unicorn on his sweater, 114 | is grumbling, sitting laptopless on a plane from San Theodoros back to Marlinspike. 115 | He is three quarter face, facing towards the right of the panel. 116 | On the porthole in front of him, you can see the wing of the plane he’s in. 117 | The picture of Haddock is itself but an insert in a background map 118 | that shows the plane itinerary. 119 | The map does not have label, but an arrow that covers the ocean part 120 | of the travel only, pointing towards Europe, point from (not towards) Paraguay. 121 | 122 | Generate a comic panel in the genre of Hergé. 123 | At the top left of the panel is the grand portico of Marlinspike Hall, 124 | with a staircase going down from it towards the right. 125 | At the bottom right of the panel is the bottom of the staircase, at street level. 126 | A computer technician is on the bottom right of the panel, seen from his back, 127 | at the bottom of the staircase. 128 | Captain Haddock, with a unicorn on his sweater, 129 | is on the top of left of the panel facing right and walking down the staircase, 130 | and whisky is ejected from the glass in his hand as as he suddenly opens his arms wide and high 131 | to effusively greet the technician. 132 | The technician, to the right of the panel, turned towards captain Haddock, 133 | carries in his hands a white box with a picture of a laptop on it. 134 | He offers the box to Captain Haddock. 135 | A speech bubble from Haddock says: "Ah my savior!" 136 | Another speech bubble from the technician declares: "It’s preinstalled, just enter your credentials". 137 | 138 | Nestor the Marlinskpike butler clad in black tailcoat, white shirt, black bowtie, 139 | no mustache, balding grayhaired, is standing next to Haddock on the staircase nearer the top of the panel. 140 | Nestor is splashed by the whisky on his face. 141 | Nestor is startled and tips the platter he was carrying. 142 | The bottle of whisky on that platter falls. 143 | 144 | Generate a comic panel in the genre of Hergé. 145 | Captain Haddock, with a unicorn on his sweater, 146 | was sitting on a chair at a terrace in his garden at Marlinspike Hall. 147 | He is on the bottom left of the panel; 148 | we see his back, he is turned slightly towards the right of the panel. 149 | On the table he has a laptop, 150 | with white-on-black window on which is display "> foo" 151 | and at the next line "42" aligned under the above ">", 152 | and yet on the next line, a prompt "> " aligned as well, followed by a blinking underscore cursor. 153 | Haddock says contentedly: "Yes! All definitions are still there." 154 | The speech bubble points at Haddock. 155 | On the top left side of the panel, 156 | Nestor the Marlinskpike butler is bringing the captain a bottle of whisky and a glass on a platter. 157 | ;} 158 | @(make-table (map (lambda (x) (list (map haddock x))) '((1 2 3))))) 159 | ($slide "A Simple Story (2)" 160 | @(make-table (map (lambda (x) (list (map haddock x))) '((4 5 6))))) 161 | @;{ 162 | @L{At a REPL on your laptop, you define @code{foo}} 163 | @L{You involuntarily donate your laptop to a “youth”} 164 | @L{Orcs raze your data center} 165 | @L{Back home, install new laptop, enter credentials} 166 | @L{Your session is as before, @code{foo} still defined} 167 | @C{… yet not a single LoC involved backing up data} ;; Actually, most code is forbidden to even try to back up data. 168 | ;} 169 | ($slide "Persistence" 170 | @L{Persistence is @em{when your data survives}} 171 | @L{Failure model: @em{what} does it survive?} ;; Compare with the notion of Attack Model wrt Security claims 172 | @L{Unexpected: power loss, accident, @em{hack}, theft, fire, war} 173 | @L{Expected: hardware and software end-of-life}) 174 | ($slide "Orthogonal" 175 | @L{Orthogonal: independent from what the program specifies} 176 | @L{The system, not the application, persists data} 177 | @L{System programmers do it once for all of everyone’s data} 178 | @L{Rather than reinvented differently by every application}) 179 | ($slide "Old Tech Made New Again" 180 | @L{OODB 1980s-now: Lisp, Smalltalk, C++, Java…} ;; Continuations don’t persist! 181 | @L{1980s-1990s: EUMEL, Grasshopper, EROS…} 182 | @L{Funding dried 2000s-2010s} ;; A few do persist continuations. Grasshopper. Funding dried. 183 | @L{Today “Durable Execution”. Renewed research.} 184 | @L{Me: From my good old HP28 to ORMs to this vision}) ;; All the old technology with a new twist to make it usable by regular people 185 | ($slide "Thesis (Technical)" 186 | @L{Orthogonal Persistence (OP) is the Right Thing™} 187 | @L{Persisting Processes simplifies everything else} 188 | @L{Database Transactions are the wrong Abstraction} 189 | @L{OP completely solves many problems, elevates the rest}) 190 | ($slide "Thesis (Socio-Economical)" 191 | @L{Orthogonal Persistence is Economically Inevitable} 192 | @L{OP will drastically change Software Architecture} 193 | @L{OP will disrupt existing Business Models} 194 | @L{OP will improve Division of Labor} 195 | @C{… a Great Business Opportunity TODAY})) 196 | ($section "Today: Fractal Transience" 197 | $plan-slide 198 | ($slide "Disappointing Users’ Natural Assumption" 199 | @L{My mom’s book database} 200 | @L{Who never lost data to no/bad/lost backups?} 201 | @L{C-x C-s in my fingers} ;; persistence automated... in wetware... but not if computer stolen 202 | @L{“If I didn’t mean for the computer to remember it, @br I wouldn’t type it”} 203 | @C{… But don’t modern Apps autopersist data?}) 204 | ($slide "Vendor Lock-In Persistence" 205 | @L{Easy case only: won’t sync across devices, accounts…} 206 | @L{Product Cancellation, Change of Ownership, Bankruptcy} 207 | @L{Forced Code Upgrade or Lack Thereof} 208 | @L{Data Hostage: Pay Bills & Pray Vendor Healthy}) 209 | ($slide "Developers: Hard Mode" 210 | @L{Computers, processes, etc., start empty} 211 | @L{Explicitly save and restore all data} 212 | @L{Storage Zoo: FS, DB, Web, DVCS…} 213 | @L{Fake it the hard way, and leave mess to Admin}) 214 | ($slide "Hard Problems" 215 | @L{Managing Many Out-of-Sync Copies} 216 | @L{Fragile Backups} 217 | @L{Schema Upgrades} 218 | @L{Merging or Splitting Data}) 219 | ($slide "What If...?" 220 | @L{Persistence was the default} 221 | @L{No more vendor lock-in} 222 | @L{Developers don’t have to care... except specialists} 223 | @L{Hard problems are made easier somehow?})) 224 | ($section "Orthogonal Persistence for Users" 225 | $plan-slide 226 | ($slide "It’s Just There" 227 | @L{Data “just” persists - Running Processes, too} 228 | @L{Code runs in ambient “domain” - persistent VM/container} 229 | @L{Interaction (e.g. Editing Document), not “Apps”} 230 | @L{Caveat: Subscribe to Backup Storage Provider}) ;; Admin role, can be delegated to accountable party 231 | ($slide "Example Domains" 232 | @L{@code{ 233 | (me (work (client1 client2 client3 hr)) @bri 234 | @~ (home (accounting kids cooking diy)) @bri 235 | @~ (secret (cia kgb mossad)) @bri 236 | @~ (love (wife lover1 lover2)))}}) 237 | ($slide "Domains" 238 | @L{User Creates Sub-Domains} 239 | @L{Origin Domain Validates Copy Out / Share Out} 240 | @L{Destination Domain Validates Paste In / Share In} 241 | @L{Resource Accounting, Backup Configuration}) 242 | ($slide "Activities" 243 | @L{Edit Document, Interact at will} 244 | @L{Always Saved, auto retention of old version} 245 | @L{Not always publishable} 246 | @L{Share some versions with some people, one way or both})) 247 | ($section "Orthogonal Persistence for Developers" 248 | $plan-slide 249 | ($slide "Just Do It" 250 | @L{Variables and @em{Processes} “just” persist, can be shared} ;; not "Unix" processes... 251 | @L{No more save and restore, copy and send} 252 | @L{Eliminate 30%-70% of all code — just for variables} 253 | @L{Processes may no longer fail mid-invariant}) 254 | ($slide "Capabilities" 255 | @L{Photo App: save... over net... download extensions} ;; also camera, etc. 256 | @L{Transient System: App can compromise everything} 257 | @L{Persistent System: App can only edit given picture} 258 | @L{Wider Reflective Capability Architecture}) 259 | ($slide "Security Improvement" 260 | @L{Narrower target on which to focus limited resources} 261 | @L{Fewer things to get wrong} 262 | @L{Smaller attack surface} 263 | @L{Flag whoever reaches for forbidden capabilities as hostile} 264 | @L{... whether code is written by humans or AI!}) 265 | ;; Knife at home being normal vs abnormal and threat detection (food pre-cut) 266 | ;; Fire at home being normal vs abnormal and autosprinkler (candles, cigarettes) 267 | ($slide "Full Abstraction" 268 | @L{Strong Separation of concerns} ;; blind quantifiers, parametric polymorphism 269 | @L{App developer not allowed to see how data is persisted} 270 | @L{Persistence Implementer not allowed to see data} 271 | @L{User or Admin can configure service and providers}) 272 | ($slide "Backup Configuration" 273 | @L{All data encrypted with use-once salt} ;; regular disk encryption: two-time pad 274 | @L{Add/Remove Provider... independent worldwide replicas} 275 | @L{Management layer handles RAID replication} 276 | @L{Financial layer plans (un)subscriptions})) 277 | ($section "Programming Model for Fractal Transience" 278 | $plan-slide 279 | ($slide "Transient Programming Languages Sucks" 280 | @L{PL: Everything is Transient—especially processes} 281 | @L{Complex storage protocols} 282 | @L{Atomicity super hard} 283 | @L{Persistence relies on wetware conventions}) 284 | ($slide "Databases are Bad (1)" 285 | @L{Atomicity, but very bad Package Deal} 286 | @L{Terrible programming languages} ;; pgsql, oracle sql, etc. 287 | @L{Horrible Data Model: Everything is a Table, except…} ;; Queues, Procedures. Very few want to program with tables. They use Spreadsheets or APL. Sometimes only one table. Store JSON. 288 | @L{Horrible Evaluation Model: client/server split, speculation…}) ;; split between "client" and "server". Speculation to increase server number but 289 | ($slide "Databases are Bad (2)" 290 | @L{No eff* way to persist processes or user-defined code} 291 | @L{Databases themselves don’t persist} ;; identity tied to a physical/logical machine, a software vendor, etc. 292 | @L{Databases don’t (de)compose} ;; federating databases: ouch. Adding backends: ouch. 293 | @L{Rigid Capabilities, Rigid Schema}) 294 | ($slide "System Myopia" 295 | @C{A programming language without good data persistence is @br 296 | for toy computations about irrelevant data.} 297 | @C{A database without a good programming language is @br 298 | for toy data not part of any relevant computation.})) 299 | ($section "Programming Model for Orthogonal Persistence" 300 | $plan-slide 301 | ($slide "Transactions are Bad" 302 | @L{Require Global Knowledge} 303 | @L{Don’t Compose} 304 | @L{Always too small or too big} 305 | @L{Low Level Concept, no one wants to use} 306 | @L{... Necessary because processes don’t persist}) 307 | ($slide "Anti-Transactions are Good" ;; Critically requires processes to persist! 308 | @L{“Don’t transact here”, a.k.a. Critical Section} 309 | @L{Section itself small, code that uses it large} 310 | @L{Local Knowledge Only, Composable} 311 | @L{High-Level Concept, already used everywhere} 312 | @L{... Possible because processes do persist}) 313 | ($slide "Don’t Commit, Synchronize" 314 | @L{Local constraint, easily automated} 315 | @L{Always persist transaction before to send} 316 | @L{Just like memory barriers in modern processors} 317 | @L{High-Level Concept, already used everywhere}) 318 | ($slide "Domains are Good" 319 | @L{Works with your data model, your execution model, your PL} 320 | @L{Identity not tied to a disk, machine, vendor, etc.} ;; missing concept: reified handling of identity. Address, URL 321 | @L{Dynamically add or remove backends} 322 | @L{Dynamically join (sub)Domains in consensus})) 323 | ($section "Decoupling Issues from Persistence" 324 | $plan-slide 325 | ($slide "Never Save and Copy, Publish and Share" 326 | @L{Never Save, Tag Stable Versions} 327 | @L{Never Copy and Modify, Fork Versions} ;; entire workspace in git 328 | @L{Never Copy and Send, Publish and Share} 329 | @L{Never Forget, Cache and Remember}) ;; preserve identity not just content 330 | ($slide "Never get OOM, manage quotas" 331 | @L{Stop processes & escape to metasystem} 332 | @L{Adjust and Restart: increase quota, GC, fix bug…} ;; either way, user-level PCLSRing 333 | @L{Meta: Predict usage, anticipate increase} ;; unlike today, don’t lose coupling of live code and data 334 | @L{Remember correlation of live code and data}) ;; unlike today 335 | ($slide "Never Migrate, Upgrade and Reconfigure" 336 | @L{All Schema, Backends will eventually get obsolete} 337 | @L{Schema Upgrade support in PL, testing} 338 | @L{Reconfigure backends, no lock-in to hw, sw, vendor} 339 | @L{Normal operations, not million-dollar crises}) 340 | ($slide "Never Corrupt, Maintain" 341 | @L{No Corruption-prone yet Static language} 342 | @L{Low-level or transient code in isolated VMs within Domain} 343 | @L{Live systems to cultivate, not dead programs to throw away} 344 | @L{Debug using reflection within version forks})) 345 | ($section "Conclusion: Consequences of Orthogonal Persistence" 346 | $plan-slide 347 | ($slide "Thesis (Recap)" 348 | @L{Orthogonal Persistence (OP) is the Right Thing™} 349 | @L{Persisting Continuations simplifies everything else} 350 | @L{Transactions are the wrong abstraction} 351 | @L{OP completely solves many problems, elevates the rest}) 352 | ($slide "Different Software Architecture" 353 | @L{Reflection, Capabilities, Versioning, Upgrade, Consensus…} ;; Different APIs 354 | @L{Pervasive changes throughout all system} ;; No save, no copy 355 | @L{Change mindset from PL to System Paradigm} 356 | @L{Run legacy code in managed VMs}) ;; Legacy 357 | ($slide "Economic Implications: New Markets" 358 | @L{Documents and Modules, not Apps} 359 | @L{Local Code, not Spying Advertising App Servers} 360 | @L{Competing Blind Storage, not Vertical Monopolies} 361 | @L{Admins answer to Users, not to App Vendors}) 362 | ($slide "Social Implications: Better Division of Labor" 363 | @L{Users: Empowered, Refocused—Can Delegate Admin} 364 | @L{Developers: Less Repetition, More Specialization} 365 | @L{Providers: Breaking Vertical Monopolies} 366 | @L{Defenders: Better Control on Smaller Attack Surfaces}) 367 | ($slide "Orthogonal Persistence’s Time Has Come!" 368 | @L{Whoever makes it usable first will redefine the industry} 369 | @L{Model: @Url{https://github.com/mighty-gerbils/gerbil-persist}} 370 | @L{X: @Url{https://x.com/ngnghm} @br Blog: @Url{https://ngnghm.github.io}} 371 | @C{Opportunities: @code{}})))) ;; Investment & Partnership Opportunities, soon I hope job opportunities 372 | 373 | (reveal-doc doc) 374 | 375 | 376 | #| 377 | MV: 378 | Make it clear in the story it’s for users, not just for developers 379 | Show examples of persisting data with ChatGPT generated code in Python vs in TScheme 380 | Using AI in Python without strongbox is a huge security risk 381 | Using AI in TScheme with strongbox is A-OK 382 | 383 | Focus 100% on business logic 384 | 385 | MV: on every slide, have the first part (3/4) accessible to everybody, including end-users. 386 | MV: only last 1/4 of the slide for the programmer. 387 | ... 388 | and what does the programmer have to do to support it? 389 | ... 390 | Traditional system... 391 | vs 392 | Our system... 393 | 394 | We can be the Wright Brothers (or Henry Ford) of Orthogonal Persistence. 395 | They were not the only ones trying to get fly , but they 396 | --- the first ones to get it off the ground for real. 397 | We are the times when SOMEONE will get it right. 398 | You can be the ones who get the magic ticket. 399 | Whoever gets it right will have a lot of control on the future. 400 | 401 | DN: No libertarian jokes 402 | 403 | DN: What kind of thign is this? 404 | - OP is an old context 405 | - New take on OP 406 | 407 | DN: Backups are the hardest thing to incentivize: 408 | it’s the kind of encrypted data that users generate and nobody, not even themselves, usually need. 409 | Requires protocol wherein the client regularly polls the data. 410 | F: decentralized is better if possible, but not necessary for persistence 411 | 412 | DN: long-standing "persistent" issue that memory, background storage and network have very different speed and latency. 413 | Some chinese companies have developed locally persistent memory as fast as RAM. 414 | This and very fast network means you can "just" persist everything all the time in a low-level way 415 | without improving the architecture. 416 | 417 | DN: General intelligence comes from three things together: minds, literacy and markets. 418 | Local persistence is sort of solved. However, over time, entropy increases, lots of tabs are opened. 419 | Restarting is nearly necessary. Blank slates are necessary. Life and death are necessary for evolution. 420 | F: Good problem to have. Problem elevated / revealed by solving 421 | 422 | JE: Make the claim about different software architecture way earlier, business model. 423 | 424 | JE: Idea of domain central to me, but what is it? Nail it down clearer. 425 | Filesystem directory? Database? Unix user? VM? 426 | 427 | JE: "Databases are bad" -- central argument. Gonna get some pushback. Be more hedged and humble. 428 | They have evolved on specialization. Deployment. 429 | Challenge: we have to beat databases at what they do. Acknowledge problems. 430 | 431 | JE: Critical sections vs transactions, etc. What happens to read consistency? Probably too technical for this talk. 432 | 433 | JE: Be humble about what transactions are good at. Research problem. Theoretical solution. 434 | 435 | JE: More impact if I back off from things that will trigger people. 436 | 437 | JE: What is the business model for the new software architecture? 438 | 439 | JE: What is the vulnerability model of SaaS products? Users are not in control. 440 | 441 | JE: "Fund me" => my DMs are open. 442 | 443 | JE: replace lisp (me (work ...)) with a tree diagram 444 | 445 | EA: Fewer slides. 446 | 447 | EA: Why orthogonal? 448 | 449 | Persistence becomes a system service. Just like memory management before... 450 | 451 | |# 452 | -------------------------------------------------------------------------------- /t/content-addressing-test.ss: -------------------------------------------------------------------------------- 1 | (export content-addressing-test) 2 | 3 | (import 4 | :std/sugar :std/test :std/text/hex 5 | ../content-addressing) 6 | 7 | (def content-addressing-test 8 | (test-suite "test suite for persist/content-addressing" 9 | (test-case "digest<-file" 10 | (check-equal? (hex-encode (digest<-file "/dev/null")) "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470")))) 11 | -------------------------------------------------------------------------------- /t/db-test.ss: -------------------------------------------------------------------------------- 1 | (export db-test) 2 | 3 | (import 4 | :gerbil/gambit 5 | :std/assert :std/format 6 | :std/misc/bytes :std/misc/list :std/misc/number :std/misc/repr 7 | :std/sugar :std/test 8 | :clan/concurrency :clan/path-config 9 | ../db) 10 | 11 | (def db-test 12 | (test-suite "test for persist/db" 13 | (test-case "open close db twice" 14 | (prn 1) 15 | (def c (open-db-connection "testdb")) 16 | (close-db-connection! c) 17 | (prn 2) 18 | (def c2 (open-db-connection "testdb")) 19 | (close-db-connection! c2)) 20 | (test-case "test-trivial-testdb-put-get" 21 | (def test-val (random-integer 1000000000)) 22 | (def key (string->bytes "test-key1")) 23 | (with-db-connection (c-path "testdb") 24 | (with-committed-tx (tx) (db-put! key (uint->u8vector test-val) tx)) 25 | (check-equal? (u8vector->uint (with-tx (tx) (db-get key tx))) test-val) 26 | (with-committed-tx (tx) (db-put! key (uint->u8vector (1+ test-val)) tx)) 27 | (check-equal? (u8vector->uint (with-tx (tx) (db-get key tx))) (1+ test-val)))) 28 | (test-case "test-trivial-testdb-commits" 29 | (def test-base (* 65536 (random-integer 65536))) 30 | (def n-workers 200) 31 | (def failures []) 32 | (def (test-commit i) 33 | (def key (uint->u8vector (+ i 65536))) 34 | (def val (string->bytes (number->string (+ test-base i)))) 35 | (try 36 | (match (current-db-transaction) 37 | ((DbTransaction connection txid status) 38 | (printf "~s\n" [LEAKED-TX-PARAMETER: i connection txid status]) 39 | (exit 1) 40 | (push! [LEAKED-TX-PARAMETER: i connection txid status] failures)) 41 | (#f (void))) 42 | ;; NB: use assertions instead of check, because we're running in a thread. 43 | (assert! (equal? (current-db-transaction) #f)) 44 | (with-committed-tx (tx) (db-put! key val tx)) 45 | (assert! (equal? (with-tx (tx) (db-get key tx)) val)) 46 | (catch (e) 47 | (printf "IN TEST ~d: ~a\n" i (error-message e)) 48 | (push! [i (error-message e)] failures)))) 49 | (defvalues (initial-batch-id final-batch-id) 50 | (with-db-connection (c "testdb") 51 | (def initial-batch-id (get-batch-id c)) 52 | (parallel-map test-commit (iota n-workers 1)) 53 | (def final-batch-id (get-batch-id c)) 54 | (values initial-batch-id final-batch-id))) 55 | (printf "initial batch: ~d\nfinal batch: ~a\n" initial-batch-id final-batch-id) 56 | (for-each (cut printf "~s\n" <>) failures) 57 | (check (- final-batch-id initial-batch-id) ? (cut <= 1 <> 3)) 58 | (check failures ? null?)))) 59 | -------------------------------------------------------------------------------- /t/ebs-test.ss: -------------------------------------------------------------------------------- 1 | (export ebs-test) 2 | 3 | (import 4 | :std/sugar :std/test :std/text/hex 5 | :clan/base :clan/path-config 6 | ../kvs ../kvs-sqlite ../ebs) 7 | 8 | (def ebs-test 9 | (test-suite "test suite for ebs (encrypted byte store)" 10 | (test-case "Test encrypted sqlite kvs" 11 | (def masterkey (string->bytes "Hello, World!\n+-0123456789ABCDEF")) 12 | (def dbpath (transient-path "t/kvs-sqlite-test.db")) 13 | (ignore-errors (delete-file dbpath)) 14 | (def K (make-KvsSqlite dbpath)) 15 | (def C (standard-encryption-context masterkey)) 16 | (def data (string->bytes "This is a test. I repeat. This is a test.")) 17 | (def intent (string->bytes "let there be light")) 18 | (def h ((EncryptionContext-digest C) data)) 19 | {begin-transaction K} 20 | (check-exception (load-content-addressed-bytes K C h) DbError?) 21 | (check-equal? (store-content-addressed-bytes K C data) h) 22 | {commit-transaction K} 23 | {begin-transaction K} 24 | (check-equal? (load-content-addressed-bytes K C h) data) 25 | (check-exception (load-intent-addressed-bytes K C intent) DbError?) 26 | (check-equal? (store-intent-addressed-bytes K C intent data) (void)) 27 | {commit-transaction K} 28 | {begin-transaction K} 29 | (check-equal? (load-intent-addressed-bytes K C intent) data) 30 | {commit-transaction K} 31 | (void)))) 32 | -------------------------------------------------------------------------------- /t/kvs-leveldb-test.ss: -------------------------------------------------------------------------------- 1 | (export kvs-leveldb-test) 2 | 3 | (import 4 | :std/test 5 | :std/misc/process 6 | :clan/base :clan/path-config 7 | :clan/persist/kvs-leveldb 8 | ./kvs-test) 9 | 10 | (def kvs-leveldb-test 11 | (test-suite "test suite for persist/kvs-leveldb" 12 | (test-case "Test leveldb" 13 | (def dbpath (transient-path "t/kvs-leveldb-test.db")) 14 | (run-process/batch ["rm" "-rf" dbpath]) 15 | (test-kvs (make-KvsLeveldb dbpath))))) 16 | -------------------------------------------------------------------------------- /t/kvs-mux-test.ss: -------------------------------------------------------------------------------- 1 | (export kvs-mux-test) 2 | 3 | (import 4 | :gerbil/gambit 5 | :std/assert :std/format 6 | :std/misc/list :std/misc/number :std/misc/repr 7 | :std/sugar :std/test 8 | :clan/concurrency :clan/path-config 9 | ../kvs ../kvs-sqlite ../kvs-mux) 10 | 11 | (def kvs-mux-test 12 | (test-suite "test for persist/db" 13 | #| 14 | (test-case "open close db twice" 15 | (prn 1) 16 | (def c (open-db-connection "testdb")) 17 | (close-db-connection! c) 18 | (prn 2) 19 | (def c2 (open-db-connection "testdb")) 20 | (close-db-connection! c2)) 21 | (test-case "test-trivial-testdb-put-get" 22 | (def test-val (random-integer 1000000000)) 23 | (def key (string->bytes "test-key1")) 24 | (with-db-connection (c-path "testdb") 25 | (with-committed-tx (tx) (db-put! key (bytes<-nat test-val) tx)) 26 | (check-equal? (nat<-bytes (with-tx (tx) (db-get key tx))) test-val) 27 | (with-committed-tx (tx) (db-put! key (bytes<-nat (1+ test-val)) tx)) 28 | (check-equal? (nat<-bytes (with-tx (tx) (db-get key tx))) (1+ test-val)))) 29 | (test-case "test-trivial-testdb-commits" 30 | (def test-base (* 65536 (random-integer 65536))) 31 | (def n-workers 200) 32 | (def failures []) 33 | (def (test-commit i) 34 | (def key (bytes<-nat (+ i 65536))) 35 | (def val (string->bytes (number->string (+ test-base i)))) 36 | (try 37 | (match (current-db-transaction) 38 | ((DbTransaction connection txid status completion) 39 | (printf "~s\n" [LEAKED-TX-PARAMETER: i connection txid status]) 40 | (exit 1) 41 | (push! [LEAKED-TX-PARAMETER: i connection txid status] failures)) 42 | (#f (void))) 43 | ;; NB: use assertions instead of check, because we're running in a thread. 44 | (assert! (equal? (current-db-transaction) #f)) 45 | (with-committed-tx (tx) (db-put! key val tx)) 46 | ;;(write [FOO: i key val (db-get key) (nat<-bytes key) (bytes->string val) (bytes->string (db-get key))])(newline) 47 | (assert! (equal? (with-tx (tx) (db-get key tx)) val)) 48 | (catch (e) 49 | (printf "IN TEST ~d: ~a\n" i (error-message e)) 50 | (push! [i (error-message e)] failures)))) 51 | (defvalues (initial-batch-id final-batch-id) 52 | (with-db-connection (c "testdb") 53 | (def initial-batch-id (get-batch-id c)) 54 | (parallel-map test-commit (iota n-workers 1)) 55 | (def final-batch-id (get-batch-id c)) 56 | (values initial-batch-id final-batch-id))) 57 | (printf "initial batch: ~d\nfinal batch: ~a\n" initial-batch-id final-batch-id) 58 | (for-each (cut printf "~s\n" <>) failures) 59 | (check (- final-batch-id initial-batch-id) ? (cut <= 1 <> 3)) 60 | (check failures ? null?)) 61 | |# 62 | (void))) 63 | -------------------------------------------------------------------------------- /t/kvs-sqlite-test.ss: -------------------------------------------------------------------------------- 1 | (export kvs-sqlite-test) 2 | 3 | (import 4 | :std/sugar 5 | :std/test 6 | :clan/base 7 | :clan/path-config 8 | :clan/persist/kvs-sqlite 9 | ./kvs-test) 10 | 11 | (def kvs-sqlite-test 12 | (test-suite "test suite for persist/kvs-sqlite" 13 | (test-case "Test sqlite" 14 | (def dbpath (transient-path "t/kvs-sqlite-test.db")) 15 | (ignore-errors (delete-file dbpath)) 16 | (test-kvs (make-KvsSqlite dbpath))))) 17 | -------------------------------------------------------------------------------- /t/kvs-test.ss: -------------------------------------------------------------------------------- 1 | (export kvs-test test-kvs) 2 | 3 | (import 4 | :std/test 5 | :clan/persist/kvs) 6 | 7 | ;; Test a Key Value Store interface 8 | (def (test-kvs K) 9 | (def k1 #u8(1)) 10 | (def k2 #u8(2 3)) 11 | (def v1 (string->bytes "Hello, World!")) 12 | (def v2 (string->bytes "FOO")) 13 | {begin-transaction K} 14 | (check-equal? (values->list {read-key K k1}) [#f #f]) 15 | {write-key K k1 v1} 16 | {commit-transaction K} 17 | {begin-transaction K} 18 | (check-equal? (values->list {read-key K k1}) [v1 #t]) 19 | {write-key K k1 v2} 20 | {commit-transaction K} 21 | {begin-transaction K} 22 | (check-equal? (values->list {read-key K k1}) [v2 #t]) 23 | {delete-key K k1} 24 | {commit-transaction K} 25 | (check-equal? (values->list {read-key K k1}) [#f #f]) 26 | (void)) 27 | 28 | (def kvs-test 29 | (test-suite "test suite for persist/kvs-sqlite" 30 | (void))) 31 | -------------------------------------------------------------------------------- /t/merkle-trie-test.ss: -------------------------------------------------------------------------------- 1 | (export #t) 2 | 3 | (import 4 | :gerbil/gambit 5 | :std/format :std/misc/list :std/misc/number :std/misc/repr 6 | :std/sugar :std/test :std/text/hex 7 | :clan/assert 8 | :clan/concurrency :clan/path-config 9 | :clan/testing 10 | :clan/poo/object :clan/poo/type :clan/poo/number :clan/poo/trie 11 | :clan/poo/t/table-testing 12 | ../db ../content-addressing ../merkle-trie) 13 | 14 | (def T (MerkleTrie Value: String)) 15 | 16 | (def-table-test-accessors T) 17 | 18 | (def trie-100 (<-alist al-100-decimal)) 19 | (def trie-10-12-57 (<-l '(10 12 57))) 20 | (def trie-4 (<-l '(1 2 3 4))) 21 | 22 | (def (check-proof t k v) 23 | (match (F .proof<- t k) 24 | ([sub . up] 25 | (assert-equal! (F .unwrap sub) (Leaf v)) 26 | (F .validate-proof (F .digest<- t) sub up)) 27 | (_ (error "foo")))) 28 | 29 | (def (merkle-tests T) 30 | (table-test-case T "merkle trie tests" 31 | (test-case "simple proof consistent 1" 32 | (check-proof (<-l '(0 1)) 0 "0")) 33 | (test-case "simple proof consistent 100" 34 | (check-proof (F .singleton 100 "100") 100 "100")) 35 | (def good-merkle-path 36 | ($Path ($Costep -1 42) 37 | (map (lambda (x) (BranchStep (hex-decode x))) 38 | ["7c79d94b79bf44bc3a8d1f9c1d6f887fad01a94aeaf95060f994ac6f29f2fbd4" 39 | "c024a35e4802d3a7e43d7c6e87f687417fc8a9ef1a3a664952151381e58942b1" 40 | "8399f332007d0fbacbc5b9eea7fff98295de897e8fd176981ff289e47eef7a5f" 41 | "d9b180853f5efbd8632128050ffdadf762e0304740507bbb801a04702353f858" 42 | "7b5bbad9ce47c55a0f88dd235c7bf23b7ebc341e0ba59106d2c247a60b9b60f6" 43 | "afb6fd06436bc357dfee6f4b33196f2240a685a876e22aa87eec79830560fdf7" 44 | "80a28f0ced76060709881876dfc255d2e06f6d0a0cb33c5677c1e0402079900d"]))) 45 | (def bad-merkle-path 46 | (match good-merkle-path 47 | (($Path C [s1 s2 s3 s4 s5 s6 s7]) ($Path C [s1 s2 s5 s4 s3 s6 s7])))) ;; swap steps 3 and 5 48 | (test-case "proof" 49 | (check-equal? (cdr (F .proof<- trie-100 42)) good-merkle-path)) 50 | (test-case "simple-proof-consistent" 51 | (check-proof (F .singleton 0 "0") 0 "0")) 52 | (test-case "simple-proof-consistent-4" 53 | (check-proof (F .singleton 4 "4") 4 "4")) 54 | (test-case "simple-proof-consistent-10" 55 | (check-proof trie-10-12-57 57 "57")) 56 | (test-case "simple-proof-consistent-24" 57 | (check-proof trie-4 2 "2")) 58 | (test-case "proof-inconsistent" 59 | (check-exception (F .validate-proof (F .digest<- trie-100) (F .leaf 42) good-merkle-path) true)) 60 | (test-case "proof-inconsistent" 61 | (check-exception (F .validate-proof (F .digest<- trie-100) (F .leaf 42) bad-merkle-path) true)))) 62 | 63 | (def merkle-trie-test 64 | (test-suite "integration test for persist/merkle-trie" 65 | (init-test-random-source!) 66 | (with-db-connection (c "testdb") 67 | (table-tests T) 68 | (merkle-tests T)))) 69 | -------------------------------------------------------------------------------- /t/persist-test.ss: -------------------------------------------------------------------------------- 1 | (export persist-test) 2 | 3 | (import 4 | :std/test 5 | :clan/poo/type 6 | :clan/poo/number 7 | ../db ../persist) 8 | 9 | (def persist-test 10 | (test-suite "test suite for persist/persist" 11 | (test-case "define-persistent-variable" 12 | (define-persistent-variable my-string String "my-string" "foo") 13 | (define-persistent-variable my-num UInt256 "my-num" 42) 14 | (with-db-connection (c "testdb") 15 | ;; Delete the persistent variables, in case the testdb is dirty 16 | (with-tx (tx) 17 | (for-each (lambda (k) (db-delete! (ensure-db-key k))) 18 | ["my-string" "my-num"])) 19 | (check (my-string) => "foo") 20 | (set! (my-string) "bar") 21 | (check (my-string) => "bar") 22 | (check (my-num) => 42) 23 | (set! (my-num) 69) 24 | (check (my-num) => 69)) 25 | (with-db-connection (c "testdb") 26 | (check (my-string) => "bar") 27 | (set! (my-string) "baz") 28 | (check (my-string) => "baz") 29 | (check (my-num) => 69) 30 | (set! (my-num) 100) 31 | (check (my-num) => 100)) 32 | ;; Reset for next tests 33 | (with-db-connection (c "testdb") 34 | (set! (my-string) "foo") 35 | (set! (my-num) 42))))) 36 | -------------------------------------------------------------------------------- /unit-tests.ss: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gxi 2 | ;; To run tests, use: ./unit-tests.ss 3 | ;; You can even run tests without first building with ./build.ss ! 4 | (import :clan/testing) 5 | (init-test-environment!) 6 | (import :clan/persist/version) 7 | --------------------------------------------------------------------------------