├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── docs ├── attachments │ ├── sedimentree-1.png │ ├── sedimentree-2.png │ ├── sedimentree-3.png │ ├── sedimentree-4.png │ └── sedimentree-5.png ├── protocol.md └── sedimentree.md ├── src ├── blob.rs ├── commit.rs ├── effects.rs ├── hex.rs ├── io.rs ├── leb128.rs ├── lib.rs ├── messages.rs ├── messages │ ├── decode.rs │ ├── encode.rs │ ├── encoding_types.rs │ └── stream.rs ├── notification_handler.rs ├── parse.rs ├── reachability.rs ├── request_handlers.rs ├── riblt.rs ├── sedimentree.rs ├── sedimentree │ ├── commit_dag.rs │ └── storage.rs ├── snapshots.rs ├── storage_key.rs ├── stories.rs ├── subscriptions.rs └── sync_docs.rs └── tests └── smoke.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "beelay-core" 3 | authors = ["Alex Good "] 4 | edition = "2021" 5 | license = "Apache-2.0" 6 | repository = "https://github.com/automerge/beelay" 7 | rust-version = "1.80.1" 8 | version = "0.1.0-alpha.1" 9 | description = "A new sync protocol for Automerge" 10 | 11 | [dependencies] 12 | blake3 = "1.5.4" 13 | bs58 = { version = "0.5.1", features = ["check"] } 14 | futures = "0.3.30" 15 | num = { version = "0.4.3", features = ["num-bigint"] } 16 | rand = "0.8.5" 17 | serde = { version = "1.0.210", features = ["derive"] } 18 | tracing = "0.1.40" 19 | 20 | [dev-dependencies] 21 | arbitrary = { version = "1.3.2", features = ["derive"] } 22 | bolero = { version = "0.11.1", features = ["arbitrary"] } 23 | tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Beelay 2 | 3 | This repository contains an experimental implementation of a new sync protocol for Automerge. In fact, because one of the goals of the project is to produce a sync protocol which allows end-to-end encryption of the document data this protocol will work for any data structure which likes being represented as a hash-linked DAG of changes. 4 | 5 | See [protocol.md]( docs/protocol.md ) for a more detailed description of how it works. 6 | 7 | ## Status 8 | 9 | This is very much a work in progress, expect things to be very broken and to change significantly before we're done. 10 | -------------------------------------------------------------------------------- /docs/attachments/sedimentree-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automerge/beelay/c817e9e627940fa193b7731a16e70792dda93b1e/docs/attachments/sedimentree-1.png -------------------------------------------------------------------------------- /docs/attachments/sedimentree-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automerge/beelay/c817e9e627940fa193b7731a16e70792dda93b1e/docs/attachments/sedimentree-2.png -------------------------------------------------------------------------------- /docs/attachments/sedimentree-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automerge/beelay/c817e9e627940fa193b7731a16e70792dda93b1e/docs/attachments/sedimentree-3.png -------------------------------------------------------------------------------- /docs/attachments/sedimentree-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automerge/beelay/c817e9e627940fa193b7731a16e70792dda93b1e/docs/attachments/sedimentree-4.png -------------------------------------------------------------------------------- /docs/attachments/sedimentree-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automerge/beelay/c817e9e627940fa193b7731a16e70792dda93b1e/docs/attachments/sedimentree-5.png -------------------------------------------------------------------------------- /docs/protocol.md: -------------------------------------------------------------------------------- 1 | # Beelay - A new sync protocol for Automerge 2 | 3 | This document describes a new sync protocol - tentatively named "Beelay" - designed for efficiently synchronizing collections of Automerge documents. 4 | 5 | ## Introduction and motivation 6 | 7 | An Automerge document can be thought of as like git for JSON. Each document is a data structure which can be materialized as a JSON object, but modifications to the document are stored within the document as a commit DAG rather like Git. This means that Automerge documents can be edited concurrently and merged together automatically later, providing a substrate for collaboration without central peers. 8 | 9 | One common requirement for collaborative software is real-time communication between concurrently editing peers. For this purpose Automerge includes a sync protocol which enables peers to send deltas rather than sending the entire document on every change - for things like per-keystroke editing this is essential. This sync protocol works but production use has exposed a few limitations: firstly, that running sync servers is expensive and compromises security and secondly, that applications frequently want to synchronize many documents. 10 | 11 | ### Sync Servers 12 | 13 | Due to the structure of networks on the internet, and due to requirements for data availability, it is often necessary to have a central server which acts as a relay and store-and-forward peer for sync messages. Unlike peers running on behalf of a single user these peers must synchronize many documents at once. The existing sync protocol requires that the entire document be in memory in order to run the sync protocol which limits the scale these servers can affordably reach. 14 | 15 | In addition, one of the appealing things about Automerge is that you don't need to trust a central server. Introducing a sync server somewhat compromises this feature - you must trust the server operator not to look at your data and to keep it secure. 16 | 17 | ### Collections of Documents 18 | 19 | An Automerge document is a "unit of sharing". You can't share subcomponents of a document with other people - or view the history of just one part of the document. This is a useful property because it makes the behavior of a document predictable in the face of concurrent changes but it is also a limitation because many useful applications involve sharing subsets of a users data with different people. 20 | 21 | To work around this limitation applications often create many Automerge documents and link them together via a location independent url. This introduces challenges for the sync protocol. We have users who have thousands of documents - synchronizing just the small number of documents which have changed in one session is very inefficient with the current scheme - which must load every document into memory and create a separate sync session for each one. 22 | 23 | ### Requirements 24 | 25 | These two sets of problems are technically unrelated, what brings them together for our purposes is that they can both be solved by a new sync protocol. Specifically what we require is a sync protocol which 26 | 27 | * Does not impose O(n) memory requirements on sync servers where n is the number of documents being synchronized 28 | * Allows for sync servers to operate over encrypted data to reduce the trust users have to place in sync servers 29 | * Provides a mechanism for efficiently determining what documents in a collection of documents have changed 30 | 31 | ## Design 32 | 33 | Beelay is an RPC based protocol which operates between two peers. We make no assumptions about the ordering of messages on the channel connecting these two peers. In typical interactions one peer will be a "sync server" and one will be a user agent within some application, although the protocol doesn't require this topology. 34 | 35 | To begin synchronizing a document with a remote peer the local Beelay peer first requests that the remote peer create a "snapshot" representing the current state of the given document and every document transitively reachable (via links) from it. The remote peer creates this snapshot and returns an identifier the local peer can use to perform [RIBLT-sync] with the snapshot. Once this sync is complete the local peer knows which documents in the collection are out of sync. 36 | 37 | At this point the local peer runs [ sedimentree sync ]( ./sedimentree.md ) for each out of sync document. Once this is complete the local peer knows that it is at least up to date with the state of the collection at the time of the snapshot. Finally the local peer can [listen] to any changes to documents in the snapshot which have occurred since the snapshot was created. Thus the local peer can stay in sync with live updates. 38 | 39 | It must be possible to run this synchronization protocol over encrypted data which means that we cannot directly examine the contents of the documents being synchronzed in order to extract the links between them. Instead, peers which have the clear text of the documents synchronize the links between documents to a "reachability index" on the server. This index is a very simple CRDT which is also synchronized via [sedimentree sync]. 40 | 41 | ### RIBLT Sync 42 | 43 | RIBLT sync refers to "Rateless Invertible Bloom Lookup Tables" as presented in [Practical Rateless Set Reconciliation](https://arxiv.org/abs/2402.02668). This scheme allows for a set of items to be reconciled between two peers with a bandwidth overhead which is proportional to the size of the set difference and with a very low number of round trips. 44 | 45 | ### Sedimentree Sync 46 | 47 | Sedimentree sync is a scheme we have designed for synchronizing commit DAGs such as those which make up an Automerge document. The important features of sedimentree sync are that it allows for compressing runs of operations in the commit DAG _and ommitting their change hashes_ from the compressed runs. This is crucial as it allows us to keep a very granular commit history without using enormous amounts of storage and/or bandwidth. See [sedimentree.md] for more details. 48 | 49 | ## Messages 50 | 51 | Beelay messages are encoded in a binary format which we describe here. With the following additional notation: 52 | 53 | ``` 54 | byte = %x00-FF 55 | ; any octet 56 | bytes = 1*byte 57 | ; any sequence of octets 58 | uleb128 = 1*8( %x00-7F ) / ( %x80-FF bytes ) 59 | ; unsigned LEB128 encoding 60 | leb128 = 1*8( %x00-7F ) / ( %x80-FF bytes ) 61 | ; signed LEB128 encoding 62 | ``` 63 | 64 | With the exception of notifications sent in response to listen requests, every message is either a request or a response. 65 | 66 | ``` 67 | message = message_type ((request_id (request / response)) / notification) 68 | message_type = %d00 ; request 69 | / %d01 ; response 70 | / %d02 ; notification 71 | request_id = 16(byte) 72 | 73 | request = create_snapshot_request 74 | / snapshot_symbols_request 75 | / fetch_sedimentree_request 76 | / fetch_blob_part_request 77 | / upload_commits_request 78 | / upload_blob_request 79 | / listen 80 | response = create_snapshot 81 | / snapshot_symbols_response 82 | / fetch_sedimentree_response 83 | / fetch_blob_part_response 84 | / upload_commits_response 85 | / upload_blob_response 86 | / listen 87 | ``` 88 | 89 | ### Create Snapshot 90 | 91 | The create snapshot request contains just the document ID of the root document to create a snapshot from. 92 | 93 | ``` 94 | create_snapshot_request = %d04 ; request type 95 | uleb128 ; length of root document ID 96 | bytes ; root document ID 97 | ``` 98 | 99 | The response to a create snapshot request contains the snapshot ID and the first "coded symbols" from the RIBLT sync for the snapshot in question. The receiver should attempt to peel these symbols and if they find they still need more, then use the `snapshot_symbols` request to request more symbols. 100 | 101 | ``` 102 | create_snapshot_response = %d04 ; response type 103 | 16(byte) ; snapshot ID 104 | uleb128 ; number of coded symbols 105 | *coded_symbol 106 | 107 | coded_symbol = 16(byte) ; first part of symbol which is a document ID when peeled 108 | 32(byte) ; second part of symbol, which is the hash of the heads of the 109 | ; document when peeled 110 | ``` 111 | 112 | 113 | ### Snapshot Symbols 114 | 115 | Used to request additional RIBLT coded symbols for an existing snapshot. 116 | 117 | ``` 118 | snapshot_symbols_request = %d05 ; request type 119 | 16(byte) ; snapshot ID 120 | 121 | snapshot_symbols_response = %d05 ; response type 122 | uleb128 ; number of coded symbols 123 | *coded_symbol 124 | ``` 125 | 126 | ### Fetch Sedimentree 127 | 128 | Request to fetch the Sedimentree (both content and reachability index) for a specific document. 129 | 130 | ``` 131 | fetch_sedimentree_request = %d01 ; request type 132 | uleb128 ; length of document ID 133 | bytes ; document ID 134 | 135 | fetch_sedimentree_response = %d02 ; response type 136 | sedimentree_data 137 | 138 | sedimentree_data = %d00 ; not found 139 | / (%d01 ; found 140 | content_summary 141 | index_summary) 142 | 143 | content_summary = uleb128 ; number of bundles 144 | *sedimentree_bundle 145 | 146 | index_summary = uleb128 ; number of bundles 147 | *sedimentree_bundle 148 | 149 | sedimentree_bundle = bytes ; bundle data 150 | ``` 151 | 152 | ### Fetch Blob Part 153 | 154 | Request to fetch a portion of a blob by its hash. 155 | 156 | ``` 157 | fetch_blob_part_request = %d02 ; request type 158 | 32(byte) ; blob hash 159 | uleb128 ; offset 160 | uleb128 ; length 161 | 162 | fetch_blob_part_response = %d03 ; response type 163 | uleb128 ; length of blob part 164 | bytes ; blob part data 165 | ``` 166 | 167 | ### Upload Commits 168 | 169 | Request to upload commits for a document. 170 | 171 | ``` 172 | upload_commits_request = %d00 ; request type 173 | uleb128 ; length of document ID 174 | bytes ; document ID 175 | commit_category ; single byte indicating commit category 176 | uleb128 ; number of upload items 177 | *upload_item 178 | 179 | upload_commits_response = %d01 ; response type 180 | ; empty response indicating success 181 | 182 | upload_item = blob_ref tree_part 183 | 184 | blob_ref = %d00 32(byte) ; blob hash 185 | / (%d01 ; inline blob 186 | uleb128 ; blob length 187 | bytes) ; blob data 188 | 189 | tree_part = %d00 ; stratum 190 | (%d00 / (%d01 32(byte))) ; optional start commit hash 191 | 32(byte) ; end commit hash 192 | uleb128 ; number of checkpoints 193 | *32(byte) ; checkpoint hashes 194 | / (%d01 ; commit 195 | 32(byte) ; commit hash 196 | uleb128 ; number of parents 197 | *32(byte)) ; parent hashes 198 | ``` 199 | 200 | ### Upload Blob 201 | 202 | Request to upload a complete blob. 203 | 204 | ``` 205 | upload_blob_request = %d03 ; request type 206 | uleb128 ; blob length 207 | bytes ; blob data 208 | 209 | upload_blob_response = %d01 ; response type 210 | ; empty response indicating success 211 | ``` 212 | 213 | ### Listen 214 | 215 | Request to listen for updates to a snapshot. Any changes to the documents reachable from the snapshot which have been discovered by the server since the snapshot was created will be delivered as "notification" messages, which are the only kind of message sent without an explicit request. 216 | 217 | ``` 218 | listen_request = %d06 ; request type 219 | 16(byte) ; snapshot ID 220 | 221 | listen_response = %d06 ; response type 222 | ; empty response indicating successful subscription 223 | 224 | notification = peer_id 225 | doc_id 226 | upload_item 227 | 228 | peer_id = uleb128 ; length of peer ID 229 | bytes ; peer ID bytes 230 | 231 | doc_id = uleb128 ; length of document ID 232 | bytes ; document ID bytes 233 | ``` 234 | 235 | ### Notifications 236 | 237 | Notifications are sent to a peer when a change is discovered to a document which is transitively reachable from a snapshot the peer issued a `listen` request to. 238 | 239 | 240 | ``` 241 | notification = from_peer 242 | document_id 243 | upload_item 244 | 245 | from_peer = uleb128 ; length of peer ID bytes 246 | bytes ; peer ID 247 | 248 | document_id = uleb128 ; length of document ID bytes 249 | bytes ; document ID 250 | 251 | upload_item = blob_ref tree_part 252 | 253 | blob_ref = %d00 32(byte) ; blob hash reference 254 | / (%d01 ; inline blob 255 | uleb128 ; blob length 256 | bytes) ; blob data 257 | 258 | tree_part = %d00 ; stratum 259 | (%d00 / (%d01 32(byte))) ; optional start commit hash 260 | 32(byte) ; end commit hash 261 | uleb128 ; number of checkpoints 262 | *32(byte) ; checkpoint hashes 263 | / (%d01 ; commit 264 | 32(byte) ; commit hash 265 | uleb128 ; number of parents 266 | *32(byte)) ; parent hashes 267 | ``` 268 | 269 | 270 | ### Error Response 271 | 272 | Any request can result in an error response: 273 | 274 | ``` 275 | error_response = %d00 ; error response type 276 | uleb128 ; length of error message 277 | bytes ; UTF-8 encoded error message 278 | ``` 279 | 280 | -------------------------------------------------------------------------------- /docs/sedimentree.md: -------------------------------------------------------------------------------- 1 | # Sedimentree Sync 2 | 3 | A method for storing and synchronising Automerge documents (or any causal commit DAG based data structure). 4 | 5 | ## Context and Motivation 6 | 7 | ### Commit graphs and metadata compression 8 | 9 | Automerge documents retain their entire editing history in a hash linked commit graph, much like Git. We'll be seeing a lot of commit graphs, I'll draw them like this: 10 | 11 | ```mermaid 12 | graph LR 13 | A --> B 14 | A --> C 15 | ``` 16 | 17 | The letters represent commit hashes and the arrows point from parents to children. 18 | 19 | In Git, commits are a snapshot of the filesystem and are made relatively infrequently. In Automerge, commits are sets of operations which should be applied to the document and are much more granular. We create an operation for every keystroke when editing text and we frequently create a commit for each operation. 20 | 21 | An Automerge document containing the string "hello" might well end up with a commit graph like this: 22 | 23 | ```mermaid 24 | graph LR 25 | A[insert 'h'] --> B 26 | B[insert 'e'] --> C 27 | C[insert 'l'] --> D 28 | D[insert 'l'] --> E 29 | E[insert 'o'] 30 | ``` 31 | 32 | The operations themselves are not as simple as this graph suggests. Instead of 'insert {x}' the operation is more like 'insert {x} following operation {y}', where `y` is the ID of a previous insert operation. For this to work every operation has to have an ID associated with it. 33 | 34 | A straightforward encoding of this structure adds a lot of metadata overhead to the underlying data. Fortunately we are able to compress most of this away. The details are quite fiddly but the important point is that we take advantage of the structure of chains of commits which are created one after another. 35 | 36 | The need to compress metadata has led to two different transport formats for Automerge documents. We either send individual commits (referred to in the Automerge API as "changes") or we encode the entire document. Which format we use depends on how we are synchronising and storing changes. 37 | 38 | ### The Current Sync Protocol 39 | 40 | The current sync protocol is a two-party protocol which uses the commit graph of the document on each peer to determine what each end is missing. Roughly speaking each peer sends the heads of it's commit graph and then the other end responds with any known descendants. Bloom filters are used to summarise the current state of each peer and so the protocol may require multiple iterations due to bloom filter false positives. 41 | 42 | Generally speaking the sync protocol operates in terms of commits, each end sends commits the other end may be missing. There is one important optimisation, when a peer determines that the remote end has no data at all (i.e. during initial synchronisation) then the peer sends the entire document as a single compressed chunk. 43 | 44 | There are lots of details to this protocol, but the important points are: 45 | 46 | * Running the protocol requires having the entire commit graph in memory - in order to perform ancestry calculations on it 47 | * The protocol is iterated, so it is impossible to know up front how much work there is to do before you are synchronised 48 | * Except in initial synchronisation we incur the metadata overhead of the commit graph because we are sending individual commit 49 | 50 | ### The problem 51 | 52 | The largest problem we currently have with the sync protocol is the memory intensive nature of running a sync server. The ongoing work on runtime memory compression will ameliorate this issue, but another related problem looms. The Beehive project is working on implementing end to end encryption (bee-to-bee encryption) for Automerge which will require that sync servers do not have access to the plaintext of the commits in the commit graph - but this seems like it will make metadata compression much more complicated. 53 | 54 | Recall that in the current sync protocol if a peer is performing initial synchronisation then we send them the entire compressed document in one go. This is crucial to avoid the bandwidth costs of the metadata overhead. Currently the sync server is able to produce this compressed document on the fly because it has the plaintext of the document available. In an end to end encrypted world all the sync server has is the commit graph, the actual contents of the commits are encrypted: 55 | 56 | ```mermaid 57 | flowchart LR 58 | A[A\n encrypted chunk] --> B 59 | A --> C 60 | B[B\n encrypted chunk] 61 | C[C\n encrypted chunk] 62 | ``` 63 | 64 | There is now no way for the sync server to produce a compressed document for initial sync. One way around this is to have plaintext nodes upload compressed documents to the sync server every so often. This raises questions: when should a plaintext node perform compression and how should sync servers decide which compressed chunk to send? 65 | 66 | There is another problem with the commit-graph-plus-compressed-upload approach - it doesn't solve the metadata overhead problem for federated sync servers. Federating sync servers which both operate over ciphertext will be forced to download all of the commit DAG because otherwise they have no way of knowing whether they have all the content. Granted, sync servers are likely to have fast connections and capacious storage but I still think it's extremely undesirable to have sync servers have such unpredictable performance characteristics. 67 | 68 | ## Design Goals 69 | 70 | * Predictable performance on both plaintext and ciphertext nodes 71 | * Low metadata overhead for initial sync 72 | * Low latency for staying-in-sync (real-time collaboration) 73 | * Support for progress bars and pause/resume of large synchronisation tasks 74 | * Stateless RPC style API (to facilitate horizontal scaling) 75 | * Minimally expressive storage requirements (i.e. don't require transactions from the storage layer) 76 | 77 | ## Sedimentrees 78 | 79 | ### Overview 80 | 81 | A sedimentree is a data structure which recursively compresses ranges of a commit graph in such a way that older commits (those close to the root of the graph) are compressed in larger chunks than newer commits. We can imagine the older chunks as being "underneath" newer chunks, like the strata of a sedimentary rock. 82 | 83 | This: 84 | 85 | ```mermaid 86 | flowchart LR 87 | A --> B 88 | B --> C 89 | A --> D 90 | D --> E 91 | E --> F 92 | F --> G 93 | E --> J 94 | G --> H 95 | J --> I 96 | H --> I 97 | ``` 98 | 99 | Becomes 100 | 101 | ![](./attachments/sedimentree-1.png) 102 | 103 | Where each rectangle is a chunk which contains all the chunks directly above it. Importantly only the lowermost stratum is needed, so this diagram can be simplified to: 104 | 105 | ![](./attachments/sedimentree-2.png) 106 | 107 | As implied by these diagrams, the sedimentree data structure first organises commits into a linear order and then compacts those ranges. We are able to do this in such a way that peers with overlapping but different commit graphs will agree on the boundaries and contents of each stratum and the contents of each stratum will contain commits ordered such that their metadata compresses well. 108 | 109 | ## Terminology 110 | 111 | A "commit" refers to the abstract idea of a node in the DAG which has a payload, a hash, and a set of parents identified by hash. A range of commits which has been compressed is referred to as a "stratum". A stratum has a start and end hash and zero or more interior "checkpoint" hashes - on which more later. If a commit is stored outside of a stratum it is a "loose commit". The payloads of both strata and loose commits are stored separately from the metadata about those objects as a "blob" - which is a content addressed binary array. 112 | 113 | ![](attachments/sedimentree-3.png) 114 | 115 | Each straum has a "level". Stratum with higher levels are further down in the sedimentree - composed of larger ranges of the commit graph. The first level stratum is level 1. 116 | 117 | A stratum which contains the data from some strata or loose commits above it is said to "support" the smaller strata. A sedimentree can be simplified by removing all the strata or loose commits which are supported by strata below them recursively, such a simplified sedimentree is called "minimal". 118 | 119 | ## Constructing a Sedimentree 120 | 121 | To construct a sedimentree we need these things 122 | 123 | * A way to organise the commit DAG into a linear order 124 | * A way to choose the stratum boundaries 125 | * A way to recognise whenever one stratum supports another 126 | 127 | All of these mechanisms need to produce the same results for the shared components of the history for peers with divergent commit graphs. Furthermore, we would like for chains of commits to be more likely to end up in the same stratum as this allows us to achieve better compression. 128 | 129 | Here are the ingredients we are going to use: 130 | 131 | * Order the graph via a reverse depth first traversal of the reversed change graph. I.e. start from the heads of the graph and traverse from commit to the parents of the commit 132 | * Choose stratum boundaries based on the number of leading zeros in the hash of each commit. A commit with two leading zeros is a level 1 stratum 133 | * Every stratum regardless of level retains checkpoint hashes - the hashes of the level 1 boundaries of which it is composed - which allows us to always tell if one stratum supports another by checking if the lower stratum contains the boundaries of the higher stratum in it's checkpoint 134 | 135 | ### Reverse Depth First Traversal 136 | 137 | Given that we need a traversal which places runs of sequential operations in the same bundle to take advantage of RLE compression it might seem natural to perform a depth first traversal starting from the root of the commit graph. For example, given 138 | 139 | ```mermaid 140 | graph LR 141 | a --> b 142 | a --> c 143 | c --> e 144 | ``` 145 | A depth first traversal would be `a,b,c,e` . Let's say that `a`, `b` and `e` are chunk boundaries. Then we have a chunk of `a,b` and `a,c,e` 146 | 147 | The problem with this is that concurrent changes can modify the traversal. Imagine we receive some new changes which make the DAG look like this: 148 | 149 | ```mermaid 150 | graph LR 151 | a --> b 152 | a --> c 153 | c --> d 154 | c --> e 155 | ``` 156 | Now the traversal is `a,b,e,c,d`. This is a problem because it means that the contents of a chunk could change. Our chunks have changed to `a,b` and `a,c,d,e`. This is disastrous, two peers with the same chunk boundaries disagree on the contents of the second chunk. 157 | 158 | Note, however, that the parents of a commit never change. This means that if we reverse the direction of the arrows and use a depth first traversal of the graph starting from the _ends_ of the chunk, then we have a stable ordering. For example, with the arrows reversed the first DAG becomes: 159 | 160 | ```mermaid 161 | graph LR 162 | b --> a 163 | e --> c 164 | c --> a 165 | ``` 166 | 167 | The depth first traversal of this graph, starting from `b` is `b, a` and from `d` is `d,c,a`. Then, after the concurrent change we have: 168 | 169 | ```mermaid 170 | graph LR 171 | b --> a 172 | d --> c 173 | e --> c 174 | c --> a 175 | ``` 176 | 177 | Stating from `b` we still have `b,a` and from `e` we still have `e,c,a` but now we also have `d` as a loose commit. 178 | 179 | ### Chunk Boundaries 180 | 181 | We want a way to divide up the linear order into chunks in such a way that everyone agrees on the chunk boundaries. We also want to be able to do this recursively, so that we choose the boundaries for lower strata consistently. We can do this by interpreting the hash of each commit as a number and using the number of trailing zeros in the number as the level of the chunk boundary. 182 | 183 | For example, if we have a commit with hash `0xbce71a3b59784f0d507fd66abeb8d95e6bb2f2d606ff159ae01f8c719b2e0000` then we can say that this is the boundary of a level 4 stratum due to the four trailing zeros. Because hashes are distributed uniformly (or else we have other problems) then the chance of any particular character in some hash being `0` is $\frac{1}{10}$ and so the chance of having $n$ trailing zeros is $10^{-n}$ which means that we will have a hash boundary approximately every $10^{n}$ changes. 184 | 185 | We are not forced to stick with base 10, we can interpret the hash as a number in some base $b$ and then the number of trailing zeros in that base will give us changes every $b^{n}$ changes. 186 | 187 | ### Supporting Stratum And Checkpoint Commits 188 | 189 | A stratum $x$ supports another stratum $y$ whenever $x$ contains all the commits in $y$. It is important for sedimentree sync to be able to determine whether one stratum supports another in order to be able to determine the minimal sedimentree. 190 | 191 | To this point I have talked about the boundaries of a stratum, but the start and an end hash is not enough to determine whether one stratum supports another without additional information. Consider this sedimentree: 192 | 193 | ![](attachments/sedimentree-4.png) 194 | 195 | In this example the ghosted out boxes represent commits (in the case of square boxes with a letter) and stratum (in the case of rectangles) which were used to derive the non-ghosted strata but which we don't have access to (maybe we never had them, maybe we discarded them). All we know is that we have some strata, one which starts at `A` and ends at `F` (the larger one), one which starts at `A` and ends at `C`, and one which starts at `G` and ends at `I`. How can we know which of the smaller strata the large one supports? 196 | 197 | To solve this we add the concept of "checkpoint commits". A checkpoint commit is a commit hash which would be the boundary of the smallest stratum in the system. For example, if we are producing strata for every commit that begins with two zeros then every commit hash which begins with two zeros is a checkpoint commit. We never discard checkpoint commits, which means that a stratum is now defined by it's start and end hash _and_ the checkpoint commits in it's interior. 198 | 199 | ![](attachments/sedimentree-5.png) 200 | 201 | With checkpoint commits we can always determine the supporting relationship. All stratum boundaries are on checkpoint commits, so if stratum $x$ supports stratum $y$ then the start and end hashes of $y$ will be somewhere in the set (start hash of x, end hash of x, checkpoints of x). 202 | ### Loose Commits 203 | 204 | The reverse depth first traversal ordering allows us to group commits into strata. But what do we do about commits for which we don't yet have a stratum boundary? Consider this DAG (with arrows from parents to children): 205 | 206 | ```mermaid 207 | graph LR 208 | a --> b 209 | b --> c 210 | a --> d 211 | ``` 212 | 213 | Let's say we have stratum boundaries, `a,c`. Then we have one chunk which is `c,b,a`, but `d` doesn't belong in any stratum yet because there are no stratum boundaries which are children of it. This means we must store and transmit the commit as is. However, the commit on its own is not enough because we also need to be able to determine if, given some stratum $x$, the commit is supported by the stratum so that we can discard loose commits when we receive strata which cover them. 214 | 215 | As with strata, just knowing the hash of a commit isn't enough to know whether it is supported by some stratum, so for loose commits we must ensure that we always retain all the commits linking the original commit back to any stratum boundaries which are it's parents. 216 | 217 | ## Syncing a Sedimentree 218 | 219 | Sedimentree sync starts by first requesting from a remote peer a "summary" of the minimal sedimentree according to that remote. Having received this summary we will know that there are certain ranges of the tree which we do not have and how large the data representing that section is. At this point we can decide to recurse into the missing structure to see if there are smaller sections of it to download at the cost of an extra round trip, or just download the whole missing part, accepting that we might download some data we already have. Once we have completed this process we know exactly which blobs we need to download from the remote in order to be in sync and we also know what blobs we need to upload in order for them to be in sync. 220 | 221 | #### Sedimentree Summaries 222 | 223 | The summary contains the boundaries of the strata in the tree as well as any loose commits. Importantly the summary _does not_ contain the internal checkpoint hashes of any of the strata or any of the actual data in the strata or commits. 224 | 225 | Omitting the checkpoint hashes is necessary because otherwise we would have to transmit all the checkpoint hashes in the document. If we use the first two leading zeros in base 10 as our strata boundary then this would mean we're sending approximately 1% of the hashes in the document every time you sync. A hash is 32 bytes and it is quite normal for a document to contain hundreds of thousands of changes, for large documents this would mean sending multiple megabytes of data just to get the sedimentree. 226 | 227 | For loose commits we can also omit all but the end of the commit chain and a count from the summary. 228 | 229 | -------------------------------------------------------------------------------- /src/blob.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | 3 | use crate::{leb128, parse}; 4 | 5 | #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, Hash)] 6 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 7 | pub struct BlobMeta { 8 | hash: BlobHash, 9 | size_bytes: u64, 10 | } 11 | 12 | impl BlobMeta { 13 | pub(crate) fn new(contents: &[u8]) -> Self { 14 | let hash = BlobHash::hash_of(contents); 15 | let size_bytes = contents.len() as u64; 16 | Self { hash, size_bytes } 17 | } 18 | 19 | pub(crate) fn parse( 20 | input: parse::Input<'_>, 21 | ) -> Result<(parse::Input<'_>, BlobMeta), parse::ParseError> { 22 | input.with_context("BlobMeta", |input| { 23 | let (input, hash) = BlobHash::parse(input)?; 24 | let (input, size_bytes) = leb128::parse(input)?; 25 | Ok((input, BlobMeta { hash, size_bytes })) 26 | }) 27 | } 28 | 29 | pub(crate) fn encode(&self, buf: &mut Vec) { 30 | self.hash.encode(buf); 31 | leb128::encode_uleb128(buf, self.size_bytes); 32 | } 33 | 34 | pub fn hash(&self) -> BlobHash { 35 | self.hash 36 | } 37 | 38 | pub fn size_bytes(&self) -> u64 { 39 | self.size_bytes 40 | } 41 | } 42 | 43 | #[derive(Clone, Copy, PartialEq, Eq, serde::Serialize, Hash)] 44 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 45 | pub struct BlobHash([u8; 32]); 46 | 47 | impl std::fmt::Debug for BlobHash { 48 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 49 | write!(f, "BlobHash({})", crate::hex::encode(&self.0)) 50 | } 51 | } 52 | 53 | impl BlobHash { 54 | pub(crate) fn hash_of(data: &[u8]) -> Self { 55 | let hash = blake3::hash(data); 56 | let mut bytes = [0; 32]; 57 | bytes.copy_from_slice(hash.as_bytes()); 58 | Self(bytes) 59 | } 60 | 61 | pub(crate) fn parse( 62 | input: parse::Input<'_>, 63 | ) -> Result<(parse::Input<'_>, BlobHash), parse::ParseError> { 64 | input.with_context("BlobHash", |input| { 65 | let (input, hash_bytes) = parse::arr::<32>(input)?; 66 | Ok((input, BlobHash::from(hash_bytes))) 67 | }) 68 | } 69 | 70 | pub(crate) fn encode(&self, buf: &mut Vec) { 71 | buf.extend_from_slice(&self.0); 72 | } 73 | } 74 | 75 | impl From<[u8; 32]> for BlobHash { 76 | fn from(bytes: [u8; 32]) -> Self { 77 | Self(bytes) 78 | } 79 | } 80 | 81 | impl std::fmt::Display for BlobHash { 82 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 83 | crate::hex::encode(&self.0).fmt(f) 84 | } 85 | } 86 | 87 | impl FromStr for BlobHash { 88 | type Err = error::InvalidBlobHash; 89 | 90 | fn from_str(s: &str) -> Result { 91 | let bytes = crate::hex::decode(s).map_err(error::InvalidBlobHash::InvalidHex)?; 92 | if bytes.len() != 32 { 93 | return Err(error::InvalidBlobHash::InvalidLength); 94 | } 95 | let mut hash = [0; 32]; 96 | hash.copy_from_slice(&bytes); 97 | Ok(BlobHash(hash)) 98 | } 99 | } 100 | 101 | mod error { 102 | use crate::parse; 103 | 104 | pub enum InvalidBlobHash { 105 | NotEnoughInput, 106 | InvalidHex(crate::hex::FromHexError), 107 | InvalidLength, 108 | } 109 | 110 | impl From for InvalidBlobHash { 111 | fn from(_value: parse::NotEnoughInput) -> Self { 112 | Self::NotEnoughInput 113 | } 114 | } 115 | 116 | impl std::fmt::Display for InvalidBlobHash { 117 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 118 | match self { 119 | Self::NotEnoughInput => write!(f, "Not enough input"), 120 | Self::InvalidHex(err) => write!(f, "Invalid hex: {}", err), 121 | Self::InvalidLength => write!(f, "Invalid length"), 122 | } 123 | } 124 | } 125 | 126 | impl std::fmt::Debug for InvalidBlobHash { 127 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 128 | std::fmt::Display::fmt(self, f) 129 | } 130 | } 131 | 132 | impl std::error::Error for InvalidBlobHash {} 133 | 134 | pub enum InvalidBlobMeta { 135 | NotEnoughInput, 136 | InvalidBlobHash(InvalidBlobHash), 137 | } 138 | 139 | impl From for InvalidBlobMeta { 140 | fn from(_value: parse::NotEnoughInput) -> Self { 141 | Self::NotEnoughInput 142 | } 143 | } 144 | 145 | impl From for InvalidBlobMeta { 146 | fn from(value: InvalidBlobHash) -> Self { 147 | Self::InvalidBlobHash(value) 148 | } 149 | } 150 | 151 | impl std::fmt::Display for InvalidBlobMeta { 152 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 153 | match self { 154 | Self::NotEnoughInput => write!(f, "Not enough input"), 155 | Self::InvalidBlobHash(e) => write!(f, "Invalid blob hash: {}", e), 156 | } 157 | } 158 | } 159 | 160 | impl std::fmt::Debug for InvalidBlobMeta { 161 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 162 | std::fmt::Display::fmt(self, f) 163 | } 164 | } 165 | 166 | impl std::error::Error for InvalidBlobMeta {} 167 | } 168 | -------------------------------------------------------------------------------- /src/commit.rs: -------------------------------------------------------------------------------- 1 | use crate::{hex, parse}; 2 | 3 | pub use error::InvalidCommitHash; 4 | 5 | #[derive(Clone, Copy, Eq, Hash, PartialEq, Ord, PartialOrd, serde::Serialize)] 6 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 7 | pub struct CommitHash([u8; 32]); 8 | 9 | impl CommitHash { 10 | pub fn as_bytes(&self) -> [u8; 32] { 11 | self.0 12 | } 13 | 14 | pub(crate) fn parse( 15 | input: parse::Input<'_>, 16 | ) -> Result<(parse::Input<'_>, CommitHash), parse::ParseError> { 17 | input.with_context("CommitHash", |input| { 18 | let (input, hash_bytes) = parse::arr::<32>(input)?; 19 | Ok((input, CommitHash::from(hash_bytes))) 20 | }) 21 | } 22 | 23 | pub(crate) fn encode(&self, buf: &mut Vec) { 24 | buf.extend_from_slice(&self.0); 25 | } 26 | } 27 | 28 | impl std::fmt::Display for CommitHash { 29 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 30 | hex::encode(&self.0).fmt(f) 31 | } 32 | } 33 | 34 | impl std::fmt::Debug for CommitHash { 35 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 36 | std::fmt::Display::fmt(self, f) 37 | } 38 | } 39 | 40 | impl From<[u8; 32]> for CommitHash { 41 | fn from(value: [u8; 32]) -> Self { 42 | CommitHash(value) 43 | } 44 | } 45 | 46 | impl<'a> From<&'a [u8; 32]> for CommitHash { 47 | fn from(value: &'a [u8; 32]) -> Self { 48 | CommitHash(value.clone()) 49 | } 50 | } 51 | 52 | impl std::str::FromStr for CommitHash { 53 | type Err = hex::FromHexError; 54 | 55 | fn from_str(s: &str) -> Result { 56 | let bytes = hex::decode(s)?; 57 | if bytes.len() == 32 { 58 | let mut id = [0; 32]; 59 | id.copy_from_slice(&bytes); 60 | Ok(CommitHash(id)) 61 | } else { 62 | Err(hex::FromHexError::InvalidStringLength) 63 | } 64 | } 65 | } 66 | 67 | impl<'a> TryFrom<&'a [u8]> for CommitHash { 68 | type Error = error::InvalidCommitHash; 69 | 70 | fn try_from(value: &'a [u8]) -> Result { 71 | if value.len() == 32 { 72 | let mut id = [0; 32]; 73 | id.copy_from_slice(value); 74 | Ok(CommitHash(id)) 75 | } else { 76 | Err(error::InvalidCommitHash(value.len())) 77 | } 78 | } 79 | } 80 | 81 | #[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize)] 82 | pub struct Commit { 83 | parents: Vec, 84 | contents: Vec, 85 | hash: CommitHash, 86 | } 87 | 88 | #[cfg(test)] 89 | impl<'a> arbitrary::Arbitrary<'a> for Commit { 90 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { 91 | let parents = Vec::::arbitrary(u)?; 92 | let contents = Vec::::arbitrary(u)?; 93 | let hash = [u8::arbitrary(u)?; 32]; 94 | Ok(Commit::new(parents, contents, hash.into())) 95 | } 96 | } 97 | 98 | impl Commit { 99 | pub fn new(parents: Vec, contents: Vec, hash: CommitHash) -> Self { 100 | Commit { 101 | parents, 102 | hash, 103 | contents, 104 | } 105 | } 106 | 107 | pub fn parents(&self) -> &[CommitHash] { 108 | &self.parents 109 | } 110 | 111 | pub fn contents(&self) -> &[u8] { 112 | &self.contents 113 | } 114 | 115 | pub fn hash(&self) -> CommitHash { 116 | self.hash 117 | } 118 | } 119 | 120 | #[derive(Debug, Clone, PartialEq, Eq)] 121 | pub struct CommitBundle { 122 | bundled_commits: Vec, 123 | start: Option, 124 | end: CommitHash, 125 | checkpoints: Vec, 126 | } 127 | 128 | impl CommitBundle { 129 | pub fn builder() -> BundleBuilder { 130 | BundleBuilder::new() 131 | } 132 | 133 | pub fn bundled_commits(&self) -> &[u8] { 134 | &self.bundled_commits 135 | } 136 | 137 | pub fn start(&self) -> Option { 138 | self.start 139 | } 140 | 141 | pub fn end(&self) -> CommitHash { 142 | self.end 143 | } 144 | 145 | pub fn checkpoints(&self) -> &[CommitHash] { 146 | &self.checkpoints 147 | } 148 | } 149 | 150 | pub struct Set(T); 151 | pub struct UnSet; 152 | 153 | pub struct BundleBuilder { 154 | start: Start, 155 | end: End, 156 | commits: Commits, 157 | checkpoints: Vec, 158 | } 159 | 160 | impl BundleBuilder { 161 | fn new() -> Self { 162 | BundleBuilder { 163 | start: UnSet, 164 | end: UnSet, 165 | commits: UnSet, 166 | checkpoints: vec![], 167 | } 168 | } 169 | } 170 | 171 | impl BundleBuilder { 172 | pub fn start(self, start: Option) -> BundleBuilder>, U, V> { 173 | BundleBuilder { 174 | start: Set(start), 175 | end: self.end, 176 | commits: self.commits, 177 | checkpoints: self.checkpoints, 178 | } 179 | } 180 | 181 | pub fn end(self, end: CommitHash) -> BundleBuilder, V> { 182 | BundleBuilder { 183 | start: self.start, 184 | end: Set(end), 185 | commits: self.commits, 186 | checkpoints: self.checkpoints, 187 | } 188 | } 189 | 190 | pub fn bundled_commits(self, commits: Vec) -> BundleBuilder>> { 191 | BundleBuilder { 192 | start: self.start, 193 | end: self.end, 194 | commits: Set(commits), 195 | checkpoints: self.checkpoints, 196 | } 197 | } 198 | 199 | pub fn checkpoints(self, checkpoints: Vec) -> Self { 200 | BundleBuilder { 201 | start: self.start, 202 | end: self.end, 203 | commits: self.commits, 204 | checkpoints, 205 | } 206 | } 207 | } 208 | 209 | impl BundleBuilder>, Set, Set>> { 210 | pub fn build(self) -> CommitBundle { 211 | CommitBundle { 212 | start: self.start.0, 213 | end: self.end.0, 214 | bundled_commits: self.commits.0, 215 | checkpoints: self.checkpoints, 216 | } 217 | } 218 | } 219 | 220 | #[derive(Debug, Clone, PartialEq, Eq)] 221 | pub enum CommitOrBundle { 222 | Commit(Commit), 223 | Bundle(CommitBundle), 224 | } 225 | 226 | mod error { 227 | pub struct InvalidCommitHash(pub(super) usize); 228 | 229 | impl std::fmt::Display for InvalidCommitHash { 230 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 231 | write!(f, "Invalid length {} for commit hash, expected 32", self.0) 232 | } 233 | } 234 | 235 | impl std::fmt::Debug for InvalidCommitHash { 236 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 237 | std::fmt::Display::fmt(self, f) 238 | } 239 | } 240 | 241 | impl std::error::Error for InvalidCommitHash {} 242 | } 243 | -------------------------------------------------------------------------------- /src/effects.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | borrow::BorrowMut, 3 | cell::{Ref, RefCell, RefMut}, 4 | collections::{HashMap, HashSet}, 5 | future::Future, 6 | rc::Rc, 7 | sync::Arc, 8 | task::{self, Waker}, 9 | }; 10 | 11 | use crate::{ 12 | io::{IoResult, IoResultPayload, IoTask}, 13 | messages::{FetchedSedimentree, Notification, UploadItem}, 14 | riblt::{self, doc_and_heads::CodedDocAndHeadsSymbol}, 15 | snapshots::{self}, 16 | subscriptions, BlobHash, CommitCategory, DocEvent, DocumentId, IoTaskId, PeerId, Request, 17 | RequestId, Response, SnapshotId, StorageKey, Task, 18 | }; 19 | 20 | pub(crate) struct State { 21 | pub(crate) io: Io, 22 | our_peer_id: PeerId, 23 | snapshots: HashMap, 24 | log: subscriptions::Log, 25 | subscriptions: subscriptions::Subscriptions, 26 | rng: R, 27 | } 28 | 29 | impl State { 30 | pub(crate) fn new(rng: R, our_peer_id: PeerId) -> Self { 31 | Self { 32 | our_peer_id: our_peer_id.clone(), 33 | io: Io { 34 | load_range: JobTracker::new(), 35 | load: JobTracker::new(), 36 | put: JobTracker::new(), 37 | delete: JobTracker::new(), 38 | requests: JobTracker::new(), 39 | asks: JobTracker::new(), 40 | wakers: Rc::new(RefCell::new(HashMap::new())), 41 | emitted_doc_events: Vec::new(), 42 | pending_puts: HashMap::new(), 43 | }, 44 | log: subscriptions::Log::new(), 45 | subscriptions: subscriptions::Subscriptions::new(our_peer_id), 46 | snapshots: HashMap::new(), 47 | rng, 48 | } 49 | } 50 | 51 | pub(crate) fn log(&mut self) -> &mut subscriptions::Log { 52 | &mut self.log 53 | } 54 | 55 | pub(crate) fn new_notifications(&mut self) -> HashMap> { 56 | self.subscriptions.new_events(&self.log) 57 | } 58 | 59 | fn task_fut Rc>>>( 60 | this: Rc>, 61 | task: Task, 62 | f: F, 63 | ) -> TaskFuture { 64 | let state = RefCell::borrow_mut(&this); 65 | let mut io = RefMut::map(state, |s| &mut s.io); 66 | let result = f(&mut *io); 67 | let wakers = io.wakers.clone(); 68 | TaskFuture { 69 | result, 70 | wakers, 71 | task, 72 | } 73 | } 74 | } 75 | 76 | pub(crate) struct Io { 77 | load_range: JobTracker>>, 78 | load: JobTracker>>, 79 | put: JobTracker), ()>, 80 | delete: JobTracker, 81 | requests: JobTracker, 82 | asks: JobTracker>, 83 | emitted_doc_events: Vec, 84 | // We don't actually use wakers at all, we keep track of the top level task 85 | // to wake up when a job completes in each JobTracker. However, the 86 | // contract of the `Future` trait is that when a task is due to be woken up 87 | // then the runtime will call it's waker. This is used by combinators like 88 | // `future::join_all` which manage a set of futures. These combinators 89 | // will pass their own waker to the futures they manage and then only poll 90 | // the managed futures when the waker they passed in is woken. This means 91 | // that we need to hold on to the wakers for each task and wake them even 92 | // though we don't use this mechanism ourselves. 93 | wakers: Rc>>>, 94 | pending_puts: HashMap)>, 95 | } 96 | 97 | impl Io { 98 | pub(crate) fn io_complete(&mut self, result: IoResult) -> Vec { 99 | let id = result.id(); 100 | let completed_tasks = match result.take_payload() { 101 | IoResultPayload::Load(payload) => self.load.complete_job(id, payload), 102 | IoResultPayload::Put => { 103 | self.pending_puts.remove(&id); 104 | self.put.complete_job(id, ()) 105 | } 106 | IoResultPayload::Delete => self.delete.complete_job(id, ()), 107 | IoResultPayload::LoadRange(payload) => self.load_range.complete_job(id, payload), 108 | IoResultPayload::Ask(peers) => self.asks.complete_job(id, peers), 109 | }; 110 | self.process_completed_tasks(&completed_tasks); 111 | 112 | completed_tasks 113 | } 114 | 115 | pub(crate) fn response_received(&mut self, response: IncomingResponse) -> Vec { 116 | let completed_tasks = self.requests.complete_job(response.id, response); 117 | self.process_completed_tasks(&completed_tasks); 118 | completed_tasks 119 | } 120 | 121 | fn process_completed_tasks(&mut self, completed_tasks: &[Task]) { 122 | let mut wakers_by_taskid = RefCell::borrow_mut(&mut self.wakers); 123 | for initiator in completed_tasks.iter() { 124 | if let Some(mut wakers) = wakers_by_taskid.remove(initiator) { 125 | for waker in wakers.drain(..) { 126 | waker.wake(); 127 | } 128 | } 129 | } 130 | } 131 | 132 | pub(crate) fn pop_new_tasks(&mut self) -> Vec { 133 | let mut result = Vec::new(); 134 | 135 | result.extend( 136 | self.load 137 | .pop_new_jobs() 138 | .into_iter() 139 | .map(|(task_id, key)| IoTask::load(task_id, key)), 140 | ); 141 | result.extend( 142 | self.load_range 143 | .pop_new_jobs() 144 | .into_iter() 145 | .map(|(task_id, prefix)| IoTask::load_range(task_id, prefix)), 146 | ); 147 | result.extend( 148 | self.delete 149 | .pop_new_jobs() 150 | .into_iter() 151 | .map(|(task_id, key)| IoTask::delete(task_id, key)), 152 | ); 153 | result.extend( 154 | self.put 155 | .pop_new_jobs() 156 | .into_iter() 157 | .map(|(task_id, (key, data))| IoTask::put(task_id, key, data)), 158 | ); 159 | result.extend( 160 | self.asks 161 | .pop_new_jobs() 162 | .into_iter() 163 | .map(|(task_id, doc_id)| IoTask::ask(task_id, doc_id)), 164 | ); 165 | result 166 | } 167 | 168 | pub(crate) fn pop_new_requests(&mut self) -> Vec<(RequestId, OutgoingRequest)> { 169 | self.requests.pop_new_jobs() 170 | } 171 | 172 | pub(crate) fn pop_new_notifications(&mut self) -> Vec { 173 | std::mem::take(&mut self.emitted_doc_events) 174 | } 175 | } 176 | 177 | pub(super) struct OutgoingRequest { 178 | pub(super) target: PeerId, 179 | pub(super) request: Request, 180 | } 181 | 182 | pub(crate) struct IncomingResponse { 183 | pub(super) id: RequestId, 184 | pub(super) response: Response, 185 | } 186 | 187 | pub(crate) struct JobTracker { 188 | new: Vec<(Descriptor, Payload)>, 189 | running: HashMap>>>, 190 | initiators_by_job: HashMap>, 191 | } 192 | 193 | impl 194 | JobTracker 195 | { 196 | pub(crate) fn new() -> Self { 197 | Self { 198 | new: Vec::new(), 199 | running: HashMap::new(), 200 | initiators_by_job: HashMap::new(), 201 | } 202 | } 203 | 204 | pub(crate) fn run( 205 | &mut self, 206 | initiator: Task, 207 | descriptor: Descriptor, 208 | payload: Payload, 209 | ) -> Rc>> { 210 | if self.running.contains_key(&descriptor) { 211 | self.initiators_by_job 212 | .entry(descriptor.clone()) 213 | .or_default() 214 | .insert(initiator); 215 | return self.running.get(&descriptor).unwrap().clone(); 216 | } else { 217 | let result = Rc::new(RefCell::new(None)); 218 | self.new.push((descriptor.clone(), payload)); 219 | self.running.insert(descriptor.clone(), result.clone()); 220 | self.initiators_by_job 221 | .entry(descriptor.clone()) 222 | .or_default() 223 | .insert(initiator); 224 | result 225 | } 226 | } 227 | 228 | pub(crate) fn pop_new_jobs(&mut self) -> Vec<(Descriptor, Payload)> { 229 | std::mem::take(&mut self.new) 230 | } 231 | 232 | pub(crate) fn complete_job(&mut self, descriptor: Descriptor, result: Result) -> Vec { 233 | if let Some(mut running) = self.running.remove(&descriptor) { 234 | running.borrow_mut().replace(Some(result)); 235 | } else { 236 | #[cfg(debug_assertions)] 237 | panic!("job not found"); 238 | 239 | #[cfg(not(debug_assertions))] 240 | tracing::warn!("job not found"); 241 | }; 242 | 243 | if let Some(initiators) = self.initiators_by_job.remove(&descriptor) { 244 | initiators.into_iter().collect() 245 | } else { 246 | #[cfg(debug_assertions)] 247 | panic!("initiators not found for job"); 248 | #[cfg(not(debug_assertions))] 249 | { 250 | tracing::warn!("initiators for job not found"); 251 | return Vec::new(); 252 | } 253 | } 254 | } 255 | } 256 | 257 | pub(crate) struct TaskEffects { 258 | task: Task, 259 | state: Rc>>, 260 | } 261 | 262 | impl std::clone::Clone for TaskEffects { 263 | fn clone(&self) -> Self { 264 | Self { 265 | task: self.task, 266 | state: self.state.clone(), 267 | } 268 | } 269 | } 270 | 271 | impl TaskEffects { 272 | pub(crate) fn new>(task: I, state: Rc>>) -> Self { 273 | Self { 274 | task: task.into(), 275 | state, 276 | } 277 | } 278 | 279 | pub(crate) fn load(&self, key: StorageKey) -> impl Future>> { 280 | let task_id = IoTaskId::new(); 281 | State::task_fut(self.state.clone(), self.task, |io| { 282 | io.load.run(self.task, task_id, key) 283 | }) 284 | } 285 | 286 | pub(crate) fn load_range( 287 | &self, 288 | prefix: StorageKey, 289 | ) -> impl Future>> + 'static { 290 | let task_id = IoTaskId::new(); 291 | let cached = RefCell::borrow(&self.state) 292 | .io 293 | .pending_puts 294 | .values() 295 | .filter_map({ 296 | let prefix = prefix.clone(); 297 | move |(key, value)| { 298 | if prefix.is_prefix_of(key) { 299 | Some((key.clone(), value.clone())) 300 | } else { 301 | None 302 | } 303 | } 304 | }) 305 | .collect::>(); 306 | tracing::trace!(?prefix, "loading range"); 307 | let load = State::task_fut(self.state.clone(), self.task, move |io| { 308 | io.load_range.run(self.task, task_id, prefix) 309 | }); 310 | async move { 311 | let stored = load.await; 312 | stored.into_iter().chain(cached).collect() 313 | } 314 | } 315 | 316 | pub(crate) fn put(&self, key: StorageKey, value: Vec) -> impl Future { 317 | tracing::trace!(?key, num_bytes = value.len(), "putting"); 318 | let task_id = IoTaskId::new(); 319 | RefCell::borrow_mut(&self.state) 320 | .io 321 | .pending_puts 322 | .insert(task_id, (key.clone(), value.clone())); 323 | let fut = State::task_fut(self.state.clone(), self.task, |io| { 324 | io.put.run(self.task, task_id, (key, value)) 325 | }); 326 | fut 327 | } 328 | 329 | #[allow(dead_code)] 330 | pub(crate) fn delete(&self, key: StorageKey) -> impl Future { 331 | let task_id = IoTaskId::new(); 332 | let fut = State::task_fut(self.state.clone(), self.task, |io| { 333 | io.delete.run(self.task, task_id, key) 334 | }); 335 | async move { 336 | fut.await; 337 | } 338 | } 339 | 340 | fn request(&self, from: PeerId, request: Request) -> impl Future { 341 | let request_id = RequestId::new(&mut *self.rng()); 342 | let request = OutgoingRequest { 343 | target: from, 344 | request, 345 | }; 346 | State::task_fut(self.state.clone(), self.task, |io| { 347 | io.requests.run(self.task, request_id, request) 348 | }) 349 | } 350 | 351 | pub(crate) fn upload_commits( 352 | &self, 353 | to_peer: PeerId, 354 | dag: DocumentId, 355 | data: Vec, 356 | category: CommitCategory, 357 | ) -> impl Future> { 358 | let request = Request::UploadCommits { 359 | doc: dag, 360 | data, 361 | category, 362 | }; 363 | let task = self.request(to_peer, request); 364 | async move { 365 | let response = task.await; 366 | match response.response { 367 | crate::Response::UploadCommits => Ok(()), 368 | crate::Response::Error(err) => Err(RpcError::ErrorReported(err)), 369 | _ => Err(RpcError::IncorrectResponseType), 370 | } 371 | } 372 | } 373 | 374 | pub(crate) fn fetch_blob_part( 375 | &self, 376 | from_peer: PeerId, 377 | blob: BlobHash, 378 | start: u64, 379 | length: u64, 380 | ) -> impl Future, RpcError>> { 381 | let request = Request::FetchBlobPart { 382 | blob, 383 | offset: start, 384 | length, 385 | }; 386 | let task = self.request(from_peer, request); 387 | async move { 388 | let response = task.await; 389 | match response.response { 390 | crate::Response::FetchBlobPart(data) => Ok(data), 391 | crate::Response::Error(err) => Err(RpcError::ErrorReported(err)), 392 | _ => Err(RpcError::IncorrectResponseType), 393 | } 394 | } 395 | } 396 | 397 | pub(crate) fn fetch_sedimentrees( 398 | &self, 399 | from_peer: PeerId, 400 | doc: DocumentId, 401 | ) -> impl Future> { 402 | let request = Request::FetchSedimentree(doc); 403 | let task = self.request(from_peer, request); 404 | async move { 405 | let response = task.await; 406 | match response.response { 407 | crate::Response::FetchSedimentree(result) => Ok(result), 408 | crate::Response::Error(err) => Err(RpcError::ErrorReported(err)), 409 | _ => Err(RpcError::IncorrectResponseType), 410 | } 411 | } 412 | } 413 | 414 | pub(crate) fn create_snapshot( 415 | &self, 416 | on_peer: PeerId, 417 | root_doc: DocumentId, 418 | ) -> impl Future< 419 | Output = Result< 420 | ( 421 | SnapshotId, 422 | Vec, 423 | ), 424 | RpcError, 425 | >, 426 | > { 427 | let request = Request::CreateSnapshot { root_doc }; 428 | let task = self.request(on_peer, request); 429 | async move { 430 | let response = task.await; 431 | match response.response { 432 | crate::Response::CreateSnapshot { 433 | snapshot_id, 434 | first_symbols, 435 | } => Ok((snapshot_id, first_symbols)), 436 | crate::Response::Error(err) => Err(RpcError::ErrorReported(err)), 437 | _ => Err(RpcError::IncorrectResponseType), 438 | } 439 | } 440 | } 441 | 442 | pub(crate) fn fetch_snapshot_symbols( 443 | &self, 444 | from_peer: PeerId, 445 | snapshot_id: SnapshotId, 446 | ) -> impl Future, RpcError>> { 447 | let request = Request::SnapshotSymbols { snapshot_id }; 448 | let task = self.request(from_peer, request); 449 | async move { 450 | let response = task.await; 451 | match response.response { 452 | crate::Response::SnapshotSymbols(symbols) => Ok(symbols), 453 | crate::Response::Error(err) => Err(RpcError::ErrorReported(err)), 454 | _ => Err(RpcError::IncorrectResponseType), 455 | } 456 | } 457 | } 458 | 459 | pub(crate) fn listen( 460 | &self, 461 | to_peer: PeerId, 462 | on_snapshot: SnapshotId, 463 | ) -> impl Future> { 464 | let request = Request::Listen(on_snapshot); 465 | let task = self.request(to_peer, request); 466 | async move { 467 | let response = task.await; 468 | match response.response { 469 | crate::Response::Listen => Ok(()), 470 | crate::Response::Error(err) => Err(RpcError::ErrorReported(err)), 471 | _ => Err(RpcError::IncorrectResponseType), 472 | } 473 | } 474 | } 475 | 476 | pub(crate) fn snapshots_mut<'a>( 477 | &'a mut self, 478 | ) -> RefMut< 479 | 'a, 480 | HashMap, 481 | > { 482 | let state = RefCell::borrow_mut(&self.state); 483 | RefMut::map(state, |s| &mut s.snapshots) 484 | } 485 | 486 | pub(crate) fn snapshots<'a>( 487 | &'a self, 488 | ) -> Ref<'a, HashMap> 489 | { 490 | let state = RefCell::borrow(&self.state); 491 | Ref::map(state, |s| &s.snapshots) 492 | } 493 | 494 | pub(crate) fn log<'a>(&'a mut self) -> RefMut<'a, subscriptions::Log> { 495 | let state = RefCell::borrow_mut(&self.state); 496 | RefMut::map(state, |s| &mut s.log) 497 | } 498 | 499 | pub(crate) fn subscriptions<'a>(&'a mut self) -> RefMut<'a, subscriptions::Subscriptions> { 500 | let state = RefCell::borrow_mut(&self.state); 501 | RefMut::map(state, |s| &mut s.subscriptions) 502 | } 503 | 504 | pub(crate) fn rng(&self) -> std::cell::RefMut<'_, R> { 505 | let state = RefCell::borrow_mut(&self.state); 506 | RefMut::map(state, |j| &mut j.rng) 507 | } 508 | 509 | pub(crate) fn our_peer_id(&self) -> std::cell::Ref<'_, PeerId> { 510 | let state = RefCell::borrow(&self.state); 511 | std::cell::Ref::map(state, |s: &State| &s.our_peer_id) 512 | } 513 | 514 | pub(crate) fn who_should_i_ask( 515 | &self, 516 | about_doc: DocumentId, 517 | ) -> impl Future> { 518 | let task_id = IoTaskId::new(); 519 | State::task_fut(self.state.clone(), self.task, |io| { 520 | io.asks.run(self.task, task_id, about_doc) 521 | }) 522 | } 523 | 524 | pub(crate) fn emit_doc_event(&self, evt: DocEvent) { 525 | let mut state = RefCell::borrow_mut(&self.state); 526 | state.io.emitted_doc_events.push(evt); 527 | } 528 | } 529 | 530 | pub(crate) enum RpcError { 531 | ErrorReported(String), 532 | IncorrectResponseType, 533 | } 534 | 535 | impl std::fmt::Display for RpcError { 536 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 537 | match self { 538 | RpcError::ErrorReported(err) => write!(f, "{}", err), 539 | RpcError::IncorrectResponseType => write!(f, "Incorrect response type"), 540 | } 541 | } 542 | } 543 | 544 | impl std::fmt::Debug for RpcError { 545 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 546 | write!(f, "{}", self) 547 | } 548 | } 549 | 550 | impl std::error::Error for RpcError {} 551 | 552 | struct TaskFuture { 553 | task: Task, 554 | result: Rc>>, 555 | wakers: Rc>>>, 556 | } 557 | 558 | impl Future for TaskFuture { 559 | type Output = T; 560 | 561 | fn poll( 562 | mut self: std::pin::Pin<&mut Self>, 563 | cx: &mut task::Context<'_>, 564 | ) -> task::Poll { 565 | let result = self.result.borrow_mut(); 566 | if let Some(result) = result.take() { 567 | task::Poll::Ready(result) 568 | } else { 569 | let mut wakers_by_task = RefCell::borrow_mut(&self.wakers); 570 | let wakers = wakers_by_task 571 | .entry(self.task) 572 | .or_insert_with(|| Vec::new()); 573 | wakers.push(cx.waker().clone()); 574 | task::Poll::Pending 575 | } 576 | } 577 | } 578 | 579 | pub(super) struct NoopWaker; 580 | 581 | impl task::Wake for NoopWaker { 582 | fn wake(self: Arc) {} 583 | } 584 | -------------------------------------------------------------------------------- /src/hex.rs: -------------------------------------------------------------------------------- 1 | pub(crate) fn encode(data: &[u8]) -> String { 2 | let mut result = String::with_capacity(data.len() * 2); 3 | for byte in data { 4 | result.push_str(&format!("{:02x}", byte)); 5 | } 6 | result 7 | } 8 | 9 | pub(crate) fn decode>(s: S) -> Result, FromHexError> { 10 | let s = s.as_ref(); 11 | if s.len() % 2 != 0 { 12 | return Err(FromHexError::InvalidStringLength); 13 | } 14 | 15 | let s = s.as_bytes(); 16 | 17 | s.chunks(2) 18 | .enumerate() 19 | .map(|(i, pair)| Ok(val(pair[0], 2 * i)? << 4 | val(pair[1], 2 * i + 1)?)) 20 | .collect() 21 | } 22 | 23 | fn val(c: u8, idx: usize) -> Result { 24 | match c { 25 | b'A'..=b'F' => Ok(c - b'A' + 10), 26 | b'a'..=b'f' => Ok(c - b'a' + 10), 27 | b'0'..=b'9' => Ok(c - b'0'), 28 | _ => Err(FromHexError::InvalidHexCharacter(c as char, idx)), 29 | } 30 | } 31 | 32 | pub enum FromHexError { 33 | InvalidHexCharacter(char, usize), 34 | InvalidStringLength, 35 | } 36 | 37 | impl std::fmt::Debug for FromHexError { 38 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 39 | match self { 40 | FromHexError::InvalidHexCharacter(c, idx) => { 41 | write!(f, "Invalid hex character '{}' at index {}", c, idx) 42 | } 43 | FromHexError::InvalidStringLength => { 44 | write!(f, "Invalid string length") 45 | } 46 | } 47 | } 48 | } 49 | 50 | impl std::fmt::Display for FromHexError { 51 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 52 | std::fmt::Debug::fmt(self, f) 53 | } 54 | } 55 | 56 | impl std::error::Error for FromHexError {} 57 | 58 | #[cfg(test)] 59 | mod tests { 60 | #[test] 61 | fn hex_encoding_roundtrip() { 62 | bolero::check!() 63 | .with_arbitrary::>() 64 | .for_each(|bytes| { 65 | let encoded = super::encode(bytes); 66 | let decoded = super::decode(encoded).unwrap(); 67 | assert_eq!(bytes, &decoded); 68 | }); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/io.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::{HashMap, HashSet}, 3 | sync::atomic::{AtomicU64, Ordering}, 4 | }; 5 | 6 | use crate::{DocumentId, PeerId, StorageKey}; 7 | 8 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] 9 | pub struct IoTaskId(u64); 10 | 11 | static LAST_IO_TASK_ID: AtomicU64 = AtomicU64::new(0); 12 | 13 | impl IoTaskId { 14 | pub(crate) fn new() -> IoTaskId { 15 | IoTaskId(LAST_IO_TASK_ID.fetch_add(1, Ordering::Relaxed)) 16 | } 17 | 18 | pub fn serialize(&self) -> String { 19 | self.0.to_string() 20 | } 21 | } 22 | 23 | impl std::str::FromStr for IoTaskId { 24 | type Err = std::num::ParseIntError; 25 | 26 | fn from_str(s: &str) -> Result { 27 | Ok(Self(s.parse()?)) 28 | } 29 | } 30 | 31 | #[derive(Debug)] 32 | pub struct IoTask { 33 | id: IoTaskId, 34 | action: IoAction, 35 | } 36 | 37 | impl IoTask { 38 | pub(crate) fn load(id: IoTaskId, key: StorageKey) -> IoTask { 39 | IoTask { 40 | id, 41 | action: IoAction::Load { key }, 42 | } 43 | } 44 | 45 | pub(crate) fn load_range(id: IoTaskId, prefix: StorageKey) -> IoTask { 46 | IoTask { 47 | id, 48 | action: IoAction::LoadRange { prefix }, 49 | } 50 | } 51 | 52 | pub(crate) fn put(id: IoTaskId, key: StorageKey, data: Vec) -> IoTask { 53 | IoTask { 54 | id, 55 | action: IoAction::Put { key, data }, 56 | } 57 | } 58 | 59 | pub(crate) fn delete(id: IoTaskId, key: StorageKey) -> IoTask { 60 | IoTask { 61 | id, 62 | action: IoAction::Delete { key }, 63 | } 64 | } 65 | 66 | pub(crate) fn ask(id: IoTaskId, doc: DocumentId) -> IoTask { 67 | IoTask { 68 | id, 69 | action: IoAction::Ask { about: doc }, 70 | } 71 | } 72 | 73 | pub fn action(&self) -> &IoAction { 74 | &self.action 75 | } 76 | 77 | pub fn take_action(self) -> IoAction { 78 | self.action 79 | } 80 | 81 | pub fn id(&self) -> IoTaskId { 82 | self.id 83 | } 84 | } 85 | 86 | #[derive(Debug)] 87 | pub enum IoAction { 88 | Load { key: StorageKey }, 89 | LoadRange { prefix: StorageKey }, 90 | Put { key: StorageKey, data: Vec }, 91 | Delete { key: StorageKey }, 92 | Ask { about: DocumentId }, 93 | } 94 | 95 | pub struct IoResult { 96 | id: IoTaskId, 97 | payload: IoResultPayload, 98 | } 99 | 100 | impl std::fmt::Debug for IoResult { 101 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 102 | let payload_desc = match &self.payload { 103 | IoResultPayload::Load(payload) => format!( 104 | "Load({})", 105 | payload 106 | .as_ref() 107 | .map(|b| format!("{} bytes", b.len())) 108 | .unwrap_or_else(|| "None".to_string()) 109 | ), 110 | IoResultPayload::LoadRange(payload) => format!("LoadRange({} keys)", payload.len()), 111 | IoResultPayload::Put => "Put".to_string(), 112 | IoResultPayload::Delete => "Delete".to_string(), 113 | IoResultPayload::Ask(peers) => format!("Ask({} peers)", peers.len()), 114 | }; 115 | f.debug_struct("IoResult") 116 | .field("id", &self.id) 117 | .field("payload", &payload_desc) 118 | .finish() 119 | } 120 | } 121 | 122 | impl IoResult { 123 | pub fn load(id: IoTaskId, payload: Option>) -> IoResult { 124 | IoResult { 125 | id, 126 | payload: IoResultPayload::Load(payload), 127 | } 128 | } 129 | 130 | pub fn load_range(id: IoTaskId, payload: HashMap>) -> IoResult { 131 | IoResult { 132 | id, 133 | payload: IoResultPayload::LoadRange(payload), 134 | } 135 | } 136 | 137 | pub fn put(id: IoTaskId) -> IoResult { 138 | IoResult { 139 | id, 140 | payload: IoResultPayload::Put, 141 | } 142 | } 143 | 144 | pub fn delete(id: IoTaskId) -> IoResult { 145 | IoResult { 146 | id, 147 | payload: IoResultPayload::Delete, 148 | } 149 | } 150 | 151 | pub fn ask(id: IoTaskId, peers: HashSet) -> IoResult { 152 | IoResult { 153 | id, 154 | payload: IoResultPayload::Ask(peers), 155 | } 156 | } 157 | 158 | pub(crate) fn take_payload(self) -> IoResultPayload { 159 | self.payload 160 | } 161 | 162 | pub fn id(&self) -> IoTaskId { 163 | self.id 164 | } 165 | } 166 | 167 | pub(crate) enum IoResultPayload { 168 | Load(Option>), 169 | LoadRange(HashMap>), 170 | Put, 171 | Delete, 172 | Ask(HashSet), 173 | } 174 | -------------------------------------------------------------------------------- /src/leb128.rs: -------------------------------------------------------------------------------- 1 | use crate::parse; 2 | 3 | pub(crate) fn encode_uleb128(buf: &mut Vec, mut val: u64) -> usize { 4 | let mut bytes_written = 0; 5 | loop { 6 | let mut byte = low_bits_of_u64(val); 7 | val >>= 7; 8 | if val != 0 { 9 | // More bytes to come, so set the continuation bit. 10 | byte |= CONTINUATION_BIT; 11 | } 12 | 13 | buf.push(byte); 14 | bytes_written += 1; 15 | 16 | if val == 0 { 17 | return bytes_written; 18 | } 19 | } 20 | } 21 | 22 | pub(crate) fn parse(input: parse::Input<'_>) -> Result<(parse::Input<'_>, u64), parse::ParseError> { 23 | let mut res = 0; 24 | let mut shift = 0; 25 | let mut input = input; 26 | 27 | loop { 28 | let (i, byte) = parse::u8(input)?; 29 | input = i; 30 | res |= ((byte & 0x7F) as u64) << shift; 31 | shift += 7; 32 | 33 | if (byte & 0x80) == 0 { 34 | if shift > 64 && byte > 1 { 35 | return Err(input.error("LEB128 value too large")); 36 | } else if shift > 7 && byte == 0 { 37 | return Err(input.error("LEB128 value is overlong")); 38 | } 39 | return Ok((input, res)); 40 | } else if shift > 64 { 41 | return Err(input.error("LEB128 value too large")); 42 | } 43 | } 44 | } 45 | 46 | const CONTINUATION_BIT: u8 = 1 << 7; 47 | 48 | #[inline] 49 | fn low_bits_of_byte(byte: u8) -> u8 { 50 | byte & !CONTINUATION_BIT 51 | } 52 | 53 | #[inline] 54 | fn low_bits_of_u64(val: u64) -> u8 { 55 | let byte = val & (u8::MAX as u64); 56 | low_bits_of_byte(byte as u8) 57 | } 58 | 59 | pub(crate) mod signed { 60 | use crate::parse; 61 | 62 | pub fn encode(buf: &mut Vec, mut val: i64) { 63 | loop { 64 | let mut byte = val as u8; 65 | // Keep the sign bit for testing 66 | val >>= 6; 67 | let done = val == 0 || val == -1; 68 | if done { 69 | byte &= !super::CONTINUATION_BIT; 70 | } else { 71 | // Remove the sign bit 72 | val >>= 1; 73 | // More bytes to come, so set the continuation bit. 74 | byte |= super::CONTINUATION_BIT; 75 | } 76 | 77 | buf.push(byte); 78 | 79 | if done { 80 | return; 81 | } 82 | } 83 | } 84 | 85 | pub(crate) fn parse( 86 | input: parse::Input<'_>, 87 | ) -> Result<(parse::Input<'_>, i64), parse::ParseError> { 88 | let mut res = 0; 89 | let mut shift = 0; 90 | 91 | let mut input = input; 92 | let mut prev = 0; 93 | loop { 94 | let (i, byte) = parse::u8(input)?; 95 | input = i; 96 | res |= ((byte & 0x7F) as i64) << shift; 97 | shift += 7; 98 | 99 | if (byte & 0x80) == 0 { 100 | if shift > 64 && byte != 0 && byte != 0x7f { 101 | // the 10th byte (if present) must contain only the sign-extended sign bit 102 | return Err(input.error("LEB128 value too large")); 103 | } else if shift > 7 104 | && ((byte == 0 && prev & 0x40 == 0) || (byte == 0x7f && prev & 0x40 > 0)) 105 | { 106 | // overlong if the sign bit of penultimate byte has been extended 107 | return Err(input.error("LEB128 value is overlong")); 108 | } else if shift < 64 && byte & 0x40 > 0 { 109 | // sign extend negative numbers 110 | res |= -1 << shift; 111 | } 112 | return Ok((input, res)); 113 | } else if shift > 64 { 114 | return Err(input.error("LEB128 value too large")); 115 | } 116 | prev = byte; 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/messages.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | leb128::encode_uleb128, parse, riblt::doc_and_heads::CodedDocAndHeadsSymbol, 3 | sedimentree::SedimentreeSummary, BlobHash, CommitCategory, CommitHash, DocumentId, PeerId, 4 | RequestId, SnapshotId, 5 | }; 6 | 7 | mod decode; 8 | mod encode; 9 | mod encoding_types; 10 | pub use decode::DecodeError; 11 | pub mod stream; 12 | 13 | #[derive(Debug)] 14 | pub struct Envelope { 15 | pub(crate) sender: PeerId, 16 | pub(crate) recipient: PeerId, 17 | pub(crate) payload: Payload, 18 | } 19 | 20 | impl Envelope { 21 | pub fn new(sender: PeerId, recipient: PeerId, payload: Payload) -> Self { 22 | Self { 23 | sender, 24 | recipient, 25 | payload, 26 | } 27 | } 28 | 29 | pub fn sender(&self) -> &PeerId { 30 | &self.sender 31 | } 32 | 33 | pub fn recipient(&self) -> &PeerId { 34 | &self.recipient 35 | } 36 | 37 | pub fn payload(&self) -> &Payload { 38 | &self.payload 39 | } 40 | 41 | pub(crate) fn take_payload(self) -> Payload { 42 | self.payload 43 | } 44 | } 45 | 46 | // A wrapper around the message enum so we can keep Message private 47 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] 48 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 49 | pub struct Payload(Message); 50 | 51 | impl Payload { 52 | pub(crate) fn new(message: Message) -> Self { 53 | Self(message) 54 | } 55 | 56 | pub fn encode(&self) -> Vec { 57 | encode::encode(self) 58 | } 59 | 60 | pub(crate) fn into_message(self) -> Message { 61 | self.0 62 | } 63 | } 64 | 65 | impl<'a> TryFrom<&'a [u8]> for Payload { 66 | type Error = decode::DecodeError; 67 | 68 | fn try_from(bytes: &'a [u8]) -> Result { 69 | let (msg, _) = decode::decode(bytes)?; 70 | Ok(msg) 71 | } 72 | } 73 | 74 | #[derive(Clone, PartialEq, Eq, serde::Serialize)] 75 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 76 | pub(crate) enum Message { 77 | Request(RequestId, Request), 78 | Response(RequestId, Response), 79 | Notification(Notification), 80 | } 81 | 82 | impl std::fmt::Debug for Message { 83 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 84 | match self { 85 | Message::Request(id, req) => write!(f, "Request(id={}, {})", id, req), 86 | Message::Response(id, resp) => write!(f, "Response(id={}, {})", id, resp), 87 | Message::Notification(notification) => write!(f, "Notification({})", notification), 88 | } 89 | } 90 | } 91 | 92 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] 93 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 94 | pub(crate) enum Response { 95 | Error(String), 96 | UploadCommits, 97 | FetchSedimentree(FetchedSedimentree), 98 | FetchBlobPart(Vec), 99 | CreateSnapshot { 100 | snapshot_id: SnapshotId, 101 | first_symbols: Vec, 102 | }, 103 | SnapshotSymbols(Vec), 104 | Listen, 105 | } 106 | 107 | impl std::fmt::Display for Response { 108 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 109 | match self { 110 | Response::Error(desc) => write!(f, "Error({})", desc), 111 | Response::UploadCommits => write!(f, "UploadCommits"), 112 | Response::FetchSedimentree(r) => write!(f, "FetchSedimentree({:?})", r), 113 | Response::FetchBlobPart(_) => write!(f, "FetchBlobPart"), 114 | Response::CreateSnapshot { 115 | snapshot_id, 116 | first_symbols, 117 | } => { 118 | write!( 119 | f, 120 | "CreateSnapshot(snapshot_id: {:?}, first_symbols: ({} symbols))", 121 | snapshot_id, 122 | first_symbols.len() 123 | ) 124 | } 125 | Response::SnapshotSymbols(symbols) => { 126 | write!(f, "SnapshotSymbols({} symbols)", symbols.len()) 127 | } 128 | Response::Listen => write!(f, "Listen"), 129 | } 130 | } 131 | } 132 | 133 | #[derive(Debug, Clone, PartialEq, Eq)] 134 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 135 | #[derive(serde::Serialize)] 136 | pub(crate) enum FetchedSedimentree { 137 | NotFound, 138 | Found(ContentAndIndex), 139 | } 140 | 141 | impl FetchedSedimentree { 142 | pub(crate) fn parse( 143 | input: parse::Input<'_>, 144 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 145 | input.with_context("FetchedSedimentree", |input| { 146 | let (input, tag) = parse::u8(input)?; 147 | match tag { 148 | 0 => Ok((input, FetchedSedimentree::NotFound)), 149 | 1 => { 150 | let (input, content_bundles) = SedimentreeSummary::parse(input)?; 151 | let (input, index_bundles) = SedimentreeSummary::parse(input)?; 152 | Ok(( 153 | input, 154 | FetchedSedimentree::Found(ContentAndIndex { 155 | index: index_bundles, 156 | content: content_bundles, 157 | }), 158 | )) 159 | } 160 | _ => Err(input.error("unknown tag")), 161 | } 162 | }) 163 | } 164 | 165 | pub(crate) fn encode(&self, out: &mut Vec) { 166 | match self { 167 | FetchedSedimentree::NotFound => { 168 | out.push(0); 169 | } 170 | FetchedSedimentree::Found(ContentAndIndex { content, index }) => { 171 | out.push(1); 172 | content.encode(out); 173 | index.encode(out); 174 | } 175 | } 176 | } 177 | } 178 | 179 | #[derive(Debug, Clone, PartialEq, Eq)] 180 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 181 | #[derive(serde::Serialize)] 182 | pub(crate) struct ContentAndIndex { 183 | pub(crate) content: SedimentreeSummary, 184 | pub(crate) index: SedimentreeSummary, 185 | } 186 | 187 | #[derive(Debug, Clone, PartialEq, Eq)] 188 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 189 | #[derive(serde::Serialize)] 190 | pub(crate) enum Request { 191 | UploadBlob(Vec), 192 | UploadCommits { 193 | doc: DocumentId, 194 | data: Vec, 195 | category: CommitCategory, 196 | }, 197 | FetchSedimentree(DocumentId), 198 | FetchBlobPart { 199 | blob: crate::BlobHash, 200 | offset: u64, 201 | length: u64, 202 | }, 203 | CreateSnapshot { 204 | root_doc: DocumentId, 205 | }, 206 | SnapshotSymbols { 207 | snapshot_id: SnapshotId, 208 | }, 209 | Listen(SnapshotId), 210 | } 211 | 212 | impl std::fmt::Display for Request { 213 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 214 | match self { 215 | Request::UploadBlob(blob) => write!(f, "UploadBlob({} bytes)", blob.len()), 216 | Request::UploadCommits { .. } => write!(f, "UploadCommits"), 217 | Request::FetchSedimentree(doc_id) => write!(f, "FetchSedimentree({})", doc_id), 218 | Request::FetchBlobPart { 219 | blob, 220 | offset, 221 | length, 222 | } => write!(f, "FetchBlobPart({:?}, {}, {})", blob, offset, length), 223 | Request::CreateSnapshot { root_doc } => { 224 | write!(f, "CreateSnapshot({})", root_doc) 225 | } 226 | Request::SnapshotSymbols { snapshot_id } => { 227 | write!(f, "SnapshotSymbols({})", snapshot_id) 228 | } 229 | Request::Listen(snapshot_id) => write!(f, "Listen({})", snapshot_id), 230 | } 231 | } 232 | } 233 | 234 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] 235 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 236 | pub struct UploadItem { 237 | pub(crate) blob: BlobRef, 238 | pub(crate) tree_part: TreePart, 239 | } 240 | 241 | impl UploadItem { 242 | pub(crate) fn parse( 243 | input: parse::Input<'_>, 244 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 245 | let (input, blob) = BlobRef::parse(input)?; 246 | let (input, tree_part) = TreePart::parse(input)?; 247 | Ok((input, UploadItem { blob, tree_part })) 248 | } 249 | 250 | pub(crate) fn encode(&self, buf: &mut Vec) { 251 | self.blob.encode(buf); 252 | self.tree_part.encode(buf); 253 | } 254 | } 255 | 256 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] 257 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 258 | pub enum TreePart { 259 | Stratum { 260 | start: Option, 261 | end: CommitHash, 262 | checkpoints: Vec, 263 | }, 264 | Commit { 265 | hash: CommitHash, 266 | parents: Vec, 267 | }, 268 | } 269 | 270 | impl TreePart { 271 | pub(crate) fn parse( 272 | input: parse::Input<'_>, 273 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 274 | input.with_context("TreePart", |input| { 275 | let (input, tag) = parse::u8(input)?; 276 | match tag { 277 | 0 => { 278 | let (input, start) = parse::maybe(input, CommitHash::parse)?; 279 | let (input, end) = CommitHash::parse(input)?; 280 | let (input, checkpoints) = parse::many(input, CommitHash::parse)?; 281 | Ok(( 282 | input, 283 | Self::Stratum { 284 | start, 285 | end, 286 | checkpoints, 287 | }, 288 | )) 289 | } 290 | 1 => { 291 | let (input, hash) = CommitHash::parse(input)?; 292 | let (input, parents) = parse::many(input, CommitHash::parse)?; 293 | Ok((input, Self::Commit { hash, parents })) 294 | } 295 | other => Err(input.error(format!("invalid tag: {}", other))), 296 | } 297 | }) 298 | } 299 | 300 | pub(crate) fn encode(&self, buf: &mut Vec) { 301 | match self { 302 | TreePart::Stratum { 303 | start, 304 | end, 305 | checkpoints, 306 | } => { 307 | buf.push(0); 308 | if let Some(start) = start { 309 | buf.push(1); 310 | start.encode(buf); 311 | } else { 312 | buf.push(0); 313 | } 314 | end.encode(buf); 315 | encode_uleb128(buf, checkpoints.len() as u64); 316 | for checkpoint in checkpoints { 317 | checkpoint.encode(buf); 318 | } 319 | } 320 | TreePart::Commit { hash, parents } => { 321 | buf.push(1); 322 | hash.encode(buf); 323 | encode_uleb128(buf, parents.len() as u64); 324 | for parent in parents { 325 | parent.encode(buf); 326 | } 327 | } 328 | } 329 | } 330 | } 331 | 332 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] 333 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 334 | pub enum BlobRef { 335 | Blob(BlobHash), 336 | Inline(Vec), 337 | } 338 | 339 | impl BlobRef { 340 | pub(crate) fn parse( 341 | input: parse::Input<'_>, 342 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 343 | input.with_context("BlobRef", |input| { 344 | let (input, tag) = parse::u8(input)?; 345 | match tag { 346 | 0 => { 347 | let (input, hash) = BlobHash::parse(input)?; 348 | Ok((input, BlobRef::Blob(hash))) 349 | } 350 | 1 => { 351 | let (input, data) = parse::slice(input)?; 352 | Ok((input, BlobRef::Inline(data.to_vec()))) 353 | } 354 | other => Err(input.error(format!("invalid tag: {}", other))), 355 | } 356 | }) 357 | } 358 | 359 | pub(crate) fn encode(&self, out: &mut Vec) { 360 | match self { 361 | BlobRef::Blob(hash) => { 362 | out.push(0); 363 | hash.encode(out); 364 | } 365 | BlobRef::Inline(data) => { 366 | out.push(1); 367 | encode_uleb128(out, data.len() as u64); 368 | out.extend(data); 369 | } 370 | } 371 | } 372 | } 373 | 374 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] 375 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 376 | pub struct Notification { 377 | pub(crate) from_peer: PeerId, 378 | pub(crate) doc: DocumentId, 379 | pub(crate) data: UploadItem, 380 | } 381 | 382 | impl std::fmt::Display for Notification { 383 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 384 | match &self.data.tree_part { 385 | TreePart::Commit { .. } => { 386 | write!(f, "Notification(doc = {}, type=new commit)", &self.doc) 387 | } 388 | TreePart::Stratum { .. } => { 389 | write!(f, "Notification(doc = {}, type=new stratum)", &self.doc) 390 | } 391 | } 392 | } 393 | } 394 | 395 | impl Notification { 396 | fn parse(input: parse::Input<'_>) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 397 | input.with_context("Notification", |input| { 398 | let (input, from_peer) = PeerId::parse(input)?; 399 | let (input, doc_id) = DocumentId::parse(input)?; 400 | let (input, data) = UploadItem::parse(input)?; 401 | Ok(( 402 | input, 403 | Self { 404 | from_peer, 405 | doc: doc_id, 406 | data, 407 | }, 408 | )) 409 | }) 410 | } 411 | 412 | fn encode(&self, out: &mut Vec) { 413 | self.from_peer.encode(out); 414 | self.doc.encode(out); 415 | self.data.encode(out); 416 | } 417 | } 418 | 419 | #[cfg(test)] 420 | mod tests { 421 | 422 | #[test] 423 | fn message_encoding_roundtrip() { 424 | bolero::check!() 425 | .with_arbitrary::() 426 | .for_each(|msg| { 427 | let encoded = super::encode::encode(msg); 428 | let (decoded, len) = super::decode::decode(&encoded).unwrap(); 429 | assert_eq!(len, encoded.len()); 430 | assert_eq!(msg, &decoded); 431 | }); 432 | } 433 | } 434 | -------------------------------------------------------------------------------- /src/messages/decode.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | parse, riblt::doc_and_heads::CodedDocAndHeadsSymbol, BlobHash, Commit, CommitCategory, 3 | CommitHash, DocumentId, Payload, RequestId, SnapshotId, 4 | }; 5 | 6 | use super::{ 7 | encoding_types::{MessageType, RequestType, ResponseType}, 8 | FetchedSedimentree, Message, Notification, UploadItem, 9 | }; 10 | 11 | pub use error::DecodeError; 12 | 13 | pub(super) fn decode(bytes: &[u8]) -> Result<(Payload, usize), DecodeError> { 14 | let input = parse::Input::new(bytes); 15 | let (input, payload) = parse_payload(input)?; 16 | Ok((payload, input.offset())) 17 | } 18 | 19 | pub(crate) fn parse_payload( 20 | input: parse::Input<'_>, 21 | ) -> Result<(parse::Input<'_>, Payload), parse::ParseError> { 22 | input.with_context("payload", |input| { 23 | let (input, message_type) = MessageType::parse(input)?; 24 | let (input, message) = match message_type { 25 | MessageType::Request => { 26 | input.with_context("request payload", |input| parse_request(input)) 27 | } 28 | MessageType::Response => { 29 | input.with_context("response payload", |input| parse_response(input)) 30 | } 31 | MessageType::Notification => input.with_context("notification payload", |input| { 32 | let (input, notification) = Notification::parse(input)?; 33 | Ok((input, Message::Notification(notification))) 34 | }), 35 | }?; 36 | let payload = Payload::new(message); 37 | Ok((input, payload)) 38 | }) 39 | } 40 | 41 | fn parse_request( 42 | input: parse::Input<'_>, 43 | ) -> Result<(parse::Input<'_>, Message), parse::ParseError> { 44 | let (input, request_id) = RequestId::parse(input)?; 45 | let (input, req_type) = RequestType::parse(input)?; 46 | match req_type { 47 | RequestType::UploadCommits => input.with_context("UploadCommits", |input| { 48 | let (input, dag) = DocumentId::parse(input)?; 49 | let (input, category) = CommitCategory::parse(input)?; 50 | let (input, data) = parse::many(input, UploadItem::parse)?; 51 | Ok(( 52 | input, 53 | Message::Request( 54 | request_id, 55 | super::Request::UploadCommits { 56 | doc: dag, 57 | data, 58 | category, 59 | }, 60 | ), 61 | )) 62 | }), 63 | RequestType::FetchMinimalBundles => input.with_context("FetchMinimalBundles", |input| { 64 | let (input, dag_id) = DocumentId::parse(input)?; 65 | Ok(( 66 | input, 67 | Message::Request(request_id, super::Request::FetchSedimentree(dag_id)), 68 | )) 69 | }), 70 | RequestType::FetchBlobPart => input.with_context("FetchBlobPart", |input| { 71 | let (input, blob) = BlobHash::parse(input)?; 72 | let (input, offset) = crate::leb128::parse(input)?; 73 | let (input, length) = crate::leb128::parse(input)?; 74 | Ok(( 75 | input, 76 | Message::Request( 77 | request_id, 78 | super::Request::FetchBlobPart { 79 | blob, 80 | offset, 81 | length, 82 | }, 83 | ), 84 | )) 85 | }), 86 | RequestType::UploadBlob => input.with_context("UploadBlob", |input| { 87 | let (input, data) = parse::slice(input)?; 88 | Ok(( 89 | input, 90 | Message::Request(request_id, super::Request::UploadBlob(data.to_vec())), 91 | )) 92 | }), 93 | RequestType::CreateSnapshot => input.with_context("CreateSnapshot", |input| { 94 | let (input, root_doc) = DocumentId::parse(input)?; 95 | Ok(( 96 | input, 97 | Message::Request(request_id, super::Request::CreateSnapshot { root_doc }), 98 | )) 99 | }), 100 | RequestType::SnapshotSymbols => input.with_context("SnapshotSymbols", |input| { 101 | let (input, snapshot_id) = SnapshotId::parse(input)?; 102 | Ok(( 103 | input, 104 | Message::Request(request_id, super::Request::SnapshotSymbols { snapshot_id }), 105 | )) 106 | }), 107 | RequestType::Listen => input.with_context("Listen", |input| { 108 | let (input, snapshot_id) = SnapshotId::parse(input)?; 109 | Ok(( 110 | input, 111 | Message::Request(request_id, super::Request::Listen(snapshot_id)), 112 | )) 113 | }), 114 | } 115 | } 116 | 117 | fn parse_response( 118 | input: parse::Input<'_>, 119 | ) -> Result<(parse::Input<'_>, Message), parse::ParseError> { 120 | let (input, request_id) = RequestId::parse(input)?; 121 | let (input, resp_type) = ResponseType::parse(input)?; 122 | let (input, resp) = match resp_type { 123 | ResponseType::UploadCommits => Ok((input, super::Response::UploadCommits)), 124 | ResponseType::FetchSedimentree => input.with_context("FetchSedimentree", |input| { 125 | FetchedSedimentree::parse(input) 126 | .map(|(input, fetched)| (input, super::Response::FetchSedimentree(fetched))) 127 | }), 128 | ResponseType::FetchBlobPart => input.with_context("FetchBlobPart", |input| { 129 | let (input, data) = parse::slice(input)?; 130 | Ok((input, super::Response::FetchBlobPart(data.to_vec()))) 131 | }), 132 | ResponseType::Err => input.with_context("Err", |input| { 133 | let (input, desc) = parse::str(input)?; 134 | Ok((input, super::Response::Error(desc.to_string()))) 135 | }), 136 | ResponseType::CreateSnapshot => input.with_context("CreateSnapshot", |input| { 137 | let (input, snapshot_id) = SnapshotId::parse(input)?; 138 | let (input, first_symbols) = parse::many(input, CodedDocAndHeadsSymbol::parse)?; 139 | Ok(( 140 | input, 141 | super::Response::CreateSnapshot { 142 | snapshot_id, 143 | first_symbols, 144 | }, 145 | )) 146 | }), 147 | ResponseType::SnapshotSymbols => input.with_context("SnapshotSymbols", |input| { 148 | let (input, symbols) = parse::many(input, CodedDocAndHeadsSymbol::parse)?; 149 | Ok((input, super::Response::SnapshotSymbols(symbols))) 150 | }), 151 | ResponseType::Listen => Ok((input, super::Response::Listen)), 152 | }?; 153 | Ok((input, Message::Response(request_id, resp))) 154 | } 155 | 156 | fn parse_commit(input: parse::Input) -> Result<(parse::Input<'_>, Commit), parse::ParseError> { 157 | input.with_context("Commit", |input| { 158 | let (input, parents) = parse::many(input, CommitHash::parse)?; 159 | let (input, hash) = CommitHash::parse(input)?; 160 | let (input, content) = parse::slice(input)?; 161 | Ok((input, Commit::new(parents, content.to_vec(), hash))) 162 | }) 163 | } 164 | 165 | mod error { 166 | use crate::parse; 167 | 168 | pub enum DecodeError { 169 | NotEnoughInput, 170 | Invalid(String), 171 | } 172 | 173 | impl From for DecodeError { 174 | fn from(err: parse::ParseError) -> Self { 175 | match err { 176 | parse::ParseError::NotEnoughInput => Self::NotEnoughInput, 177 | parse::ParseError::Other { .. } => Self::Invalid(err.to_string()), 178 | } 179 | } 180 | } 181 | 182 | impl std::error::Error for DecodeError {} 183 | 184 | impl std::fmt::Display for DecodeError { 185 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 186 | match self { 187 | Self::NotEnoughInput => write!(f, "Not enough input"), 188 | Self::Invalid(err) => write!(f, "Invalid input: {}", err), 189 | } 190 | } 191 | } 192 | 193 | impl std::fmt::Debug for DecodeError { 194 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 195 | match self { 196 | Self::NotEnoughInput => write!(f, "NotEnoughInput"), 197 | Self::Invalid(err) => write!(f, "Invalid({})", err), 198 | } 199 | } 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/messages/encode.rs: -------------------------------------------------------------------------------- 1 | use crate::{leb128::encode_uleb128, messages::Request, RequestId, Response}; 2 | 3 | use super::{ 4 | encoding_types::{MessageType, RequestType, ResponseType}, 5 | Message, 6 | }; 7 | 8 | pub(super) fn encode(payload: &super::Payload) -> Vec { 9 | let mut buf = Vec::new(); 10 | match &payload.0 { 11 | Message::Request(id, req) => encode_request(&mut buf, *id, &req), 12 | Message::Response(id, res) => encode_response(&mut buf, *id, &res), 13 | Message::Notification(notification) => { 14 | buf.push(MessageType::Notification.into()); 15 | notification.encode(&mut buf); 16 | } 17 | } 18 | buf 19 | } 20 | 21 | fn encode_request(buf: &mut Vec, id: RequestId, req: &Request) { 22 | buf.push(MessageType::Request.into()); 23 | buf.extend_from_slice(id.as_bytes()); 24 | 25 | match req { 26 | Request::UploadBlob(blob) => { 27 | buf.push(RequestType::UploadBlob.into()); 28 | encode_uleb128(buf, blob.len() as u64); 29 | buf.extend_from_slice(blob); 30 | } 31 | Request::UploadCommits { 32 | doc, 33 | data, 34 | category, 35 | } => { 36 | buf.push(RequestType::UploadCommits.into()); 37 | doc.encode(buf); 38 | category.encode(buf); 39 | encode_uleb128(buf, data.len() as u64); 40 | for datum in data { 41 | datum.encode(buf); 42 | } 43 | } 44 | Request::FetchSedimentree(doc_id) => { 45 | buf.push(RequestType::FetchMinimalBundles.into()); 46 | doc_id.encode(buf); 47 | } 48 | Request::FetchBlobPart { 49 | blob, 50 | offset, 51 | length, 52 | } => { 53 | buf.push(RequestType::FetchBlobPart.into()); 54 | blob.encode(buf); 55 | encode_uleb128(buf, *offset); 56 | encode_uleb128(buf, *length); 57 | } 58 | Request::CreateSnapshot { root_doc } => { 59 | buf.push(RequestType::CreateSnapshot.into()); 60 | root_doc.encode(buf); 61 | } 62 | Request::SnapshotSymbols { snapshot_id } => { 63 | buf.push(RequestType::SnapshotSymbols.into()); 64 | snapshot_id.encode(buf); 65 | } 66 | Request::Listen(snapshot_id) => { 67 | buf.push(RequestType::Listen.into()); 68 | snapshot_id.encode(buf); 69 | } 70 | } 71 | } 72 | 73 | fn encode_response(buf: &mut Vec, id: RequestId, resp: &Response) { 74 | buf.push(MessageType::Response.into()); 75 | buf.extend_from_slice(id.as_bytes()); 76 | 77 | match &resp { 78 | Response::UploadCommits => { 79 | buf.push(ResponseType::UploadCommits.into()); 80 | } 81 | Response::FetchSedimentree(fetched) => { 82 | buf.push(ResponseType::FetchSedimentree.into()); 83 | fetched.encode(buf); 84 | } 85 | Response::FetchBlobPart(data) => { 86 | buf.push(ResponseType::FetchBlobPart.into()); 87 | encode_uleb128(buf, data.len() as u64); 88 | buf.extend_from_slice(data); 89 | } 90 | Response::CreateSnapshot { 91 | snapshot_id, 92 | first_symbols, 93 | } => { 94 | buf.push(ResponseType::CreateSnapshot.into()); 95 | buf.extend_from_slice(snapshot_id.as_bytes()); 96 | encode_uleb128(buf, first_symbols.len() as u64); 97 | for symbol in first_symbols { 98 | symbol.encode(buf); 99 | } 100 | } 101 | Response::SnapshotSymbols(symbols) => { 102 | buf.push(ResponseType::SnapshotSymbols.into()); 103 | encode_uleb128(buf, symbols.len() as u64); 104 | for symbol in symbols { 105 | symbol.encode(buf); 106 | } 107 | } 108 | Response::Error(desc) => { 109 | buf.push(ResponseType::Err.into()); 110 | encode_uleb128(buf, desc.len() as u64); 111 | buf.extend_from_slice(desc.as_bytes()); 112 | } 113 | Response::Listen => { 114 | buf.push(ResponseType::Listen.into()); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/messages/encoding_types.rs: -------------------------------------------------------------------------------- 1 | use crate::parse; 2 | 3 | pub(super) enum MessageType { 4 | Request, 5 | Response, 6 | Notification, 7 | } 8 | 9 | impl MessageType { 10 | pub(super) fn parse( 11 | input: parse::Input<'_>, 12 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 13 | input.with_context("MessageDirection", |input| { 14 | let (input, byte) = parse::u8(input)?; 15 | let msg_type = MessageType::try_from(byte) 16 | .map_err(|e| input.error(format!("invalid message type: {:?}", e)))?; 17 | Ok((input, msg_type)) 18 | }) 19 | } 20 | } 21 | 22 | impl TryFrom for MessageType { 23 | type Error = error::InvalidMessageDirection; 24 | 25 | fn try_from(value: u8) -> Result { 26 | match value { 27 | 0 => Ok(Self::Request), 28 | 1 => Ok(Self::Response), 29 | 3 => Ok(Self::Notification), 30 | other => Err(error::InvalidMessageDirection(other)), 31 | } 32 | } 33 | } 34 | 35 | impl From for u8 { 36 | fn from(msg_type: MessageType) -> u8 { 37 | match msg_type { 38 | MessageType::Request => 0, 39 | MessageType::Response => 1, 40 | MessageType::Notification => 3, 41 | } 42 | } 43 | } 44 | 45 | #[derive(Clone, Copy, Debug, PartialEq)] 46 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 47 | pub(super) enum RequestType { 48 | UploadCommits, 49 | UploadBlob, 50 | FetchMinimalBundles, 51 | FetchBlobPart, 52 | CreateSnapshot, 53 | SnapshotSymbols, 54 | Listen, 55 | } 56 | 57 | impl RequestType { 58 | pub(super) fn parse( 59 | input: parse::Input<'_>, 60 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 61 | input.with_context("RequestType", |input| { 62 | let (input, byte) = parse::u8(input)?; 63 | let req_type = RequestType::try_from(byte) 64 | .map_err(|e| input.error(format!("invalid request type: {}", e)))?; 65 | Ok((input, req_type)) 66 | }) 67 | } 68 | } 69 | 70 | impl TryFrom for RequestType { 71 | type Error = error::InvalidRequestType; 72 | 73 | fn try_from(value: u8) -> Result { 74 | match value { 75 | 0 => Ok(Self::UploadCommits), 76 | 1 => Ok(Self::FetchMinimalBundles), 77 | 2 => Ok(Self::FetchBlobPart), 78 | 3 => Ok(Self::UploadBlob), 79 | 4 => Ok(Self::CreateSnapshot), 80 | 5 => Ok(Self::SnapshotSymbols), 81 | 6 => Ok(Self::Listen), 82 | _ => Err(error::InvalidRequestType(value)), 83 | } 84 | } 85 | } 86 | 87 | impl From for u8 { 88 | fn from(req: RequestType) -> u8 { 89 | match req { 90 | RequestType::UploadCommits => 0, 91 | RequestType::FetchMinimalBundles => 1, 92 | RequestType::FetchBlobPart => 2, 93 | RequestType::UploadBlob => 3, 94 | RequestType::CreateSnapshot => 4, 95 | RequestType::SnapshotSymbols => 5, 96 | RequestType::Listen => 6, 97 | } 98 | } 99 | } 100 | 101 | #[derive(Clone, Copy, Debug, PartialEq)] 102 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 103 | pub(super) enum ResponseType { 104 | Err, 105 | UploadCommits, 106 | FetchSedimentree, 107 | FetchBlobPart, 108 | CreateSnapshot, 109 | SnapshotSymbols, 110 | Listen, 111 | } 112 | 113 | impl ResponseType { 114 | pub(super) fn parse( 115 | input: parse::Input<'_>, 116 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 117 | input.with_context("ResponseType", |input| { 118 | let (input, byte) = parse::u8(input)?; 119 | let req_type = ResponseType::try_from(byte) 120 | .map_err(|e| input.error(format!("invalid request type: {:?}", e)))?; 121 | Ok((input, req_type)) 122 | }) 123 | } 124 | } 125 | 126 | impl TryFrom for ResponseType { 127 | type Error = error::InvalidResponseType; 128 | 129 | fn try_from(value: u8) -> Result { 130 | match value { 131 | 0 => Ok(Self::Err), 132 | 1 => Ok(Self::UploadCommits), 133 | 2 => Ok(Self::FetchSedimentree), 134 | 3 => Ok(Self::FetchBlobPart), 135 | 4 => Ok(Self::CreateSnapshot), 136 | 5 => Ok(Self::SnapshotSymbols), 137 | 6 => Ok(Self::Listen), 138 | _ => Err(error::InvalidResponseType(value)), 139 | } 140 | } 141 | } 142 | 143 | impl From for u8 { 144 | fn from(resp: ResponseType) -> Self { 145 | match resp { 146 | ResponseType::Err => 0, 147 | ResponseType::UploadCommits => 1, 148 | ResponseType::FetchSedimentree => 2, 149 | ResponseType::FetchBlobPart => 3, 150 | ResponseType::CreateSnapshot => 4, 151 | ResponseType::SnapshotSymbols => 5, 152 | ResponseType::Listen => 6, 153 | } 154 | } 155 | } 156 | 157 | mod error { 158 | pub struct InvalidMessageDirection(pub(super) u8); 159 | 160 | impl std::fmt::Display for InvalidMessageDirection { 161 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 162 | write!(f, "invalid message direction: {}", self.0) 163 | } 164 | } 165 | 166 | impl std::fmt::Debug for InvalidMessageDirection { 167 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 168 | write!(f, "InvalidMessageDirection({})", self.0) 169 | } 170 | } 171 | 172 | impl std::error::Error for InvalidMessageDirection {} 173 | 174 | pub struct InvalidRequestType(pub(super) u8); 175 | 176 | impl std::fmt::Display for InvalidRequestType { 177 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 178 | write!(f, "invalid request type: {}", self.0) 179 | } 180 | } 181 | 182 | impl std::fmt::Debug for InvalidRequestType { 183 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 184 | write!(f, "InvalidRequestType({})", self.0) 185 | } 186 | } 187 | 188 | impl std::error::Error for InvalidRequestType {} 189 | 190 | pub struct InvalidResponseType(pub(super) u8); 191 | 192 | impl std::fmt::Display for InvalidResponseType { 193 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 194 | write!(f, "invalid response type: {}", self.0) 195 | } 196 | } 197 | 198 | impl std::fmt::Debug for InvalidResponseType { 199 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 200 | write!(f, "InvalidResponseType({})", self.0) 201 | } 202 | } 203 | 204 | impl std::error::Error for InvalidResponseType {} 205 | } 206 | -------------------------------------------------------------------------------- /src/messages/stream.rs: -------------------------------------------------------------------------------- 1 | //! This module implements a simple handshake protocol for point-to-point connections 2 | //! 3 | //! The underlying messages of the beelay protocol which are passed to 4 | //! [`crate::Beelay::handle_event`] require that you already know the peer ID of the sender of the 5 | //! message. In many cases (e.g. a new TCP connection) you don't know anything about the other peer 6 | //! except that they have connected and so a simple handshake protocol is needed in which the two 7 | //! peers exchange their peer IDs before they can start sending messages. This module implements 8 | //! such a protocol. 9 | //! 10 | //! To use this module you first create a [`Connecting`] object using either [`Connecting::accept`] 11 | //! if you are the party being connected to or [`Connecting::connect`] if you are the party 12 | //! initiating the connection. Then you loop, calling [`Connecting::receive`] with any message the 13 | //! other end has sent. Each call to [`Connecting::receive`] will return a [`Step`] which tells you 14 | //! whether the handshake is complete and if so, what the peer IDs of the two parties are. 15 | //! 16 | //! Once the handshake is complete you will have a [`Connected`] object, which you can use to 17 | //! transform incoming [`Message`]s into [`crate::Envelope`]s which can be passed to 18 | //! [`crate::Beelay::handle_event`] and to transform outgoing [`crate::Envelope`]s into 19 | //! [`Message`]s which can be sent to the other party. 20 | //! 21 | //! # Example 22 | //! 23 | //! In the following example we make use of a pretend network which we model like this: 24 | //! 25 | //! ```rust 26 | //! fn receive_message() -> Vec { 27 | //! vec![] 28 | //! } 29 | //! 30 | //! fn send_message(msg: Vec) { 31 | //! } 32 | //! ``` 33 | //! 34 | //! ```rust,no_run 35 | //! use beelay_core::messages::stream::{Connecting, Connected, Step, Message}; 36 | //! use beelay_core::{Beelay, Envelope, Event, PeerId}; 37 | //! # fn receive_message() -> Vec { 38 | //! # vec![] 39 | //! # } 40 | //! # fn send_message(msg: Vec) { 41 | //! # } 42 | //! 43 | //! fn accept_connection(our_peer_id: PeerId) { 44 | //! let step = Connecting::accept(our_peer_id); 45 | //! let connected = handshake(step); 46 | //! run(connected); 47 | //! } 48 | //! 49 | //! fn connect_to_peer(our_peer_id: PeerId) { 50 | //! let step = Connecting::connect(our_peer_id); 51 | //! let connected = handshake(step); 52 | //! run(connected); 53 | //! } 54 | //! 55 | //! fn handshake(mut step: Step) -> Connected { 56 | //! loop { 57 | //! match step { 58 | //! Step::Continue(state, msg) => { 59 | //! if let Some(msg) = msg { 60 | //! send_message(msg.encode()); 61 | //! } 62 | //! let next_msg = receive_message(); 63 | //! step = state.receive(Message::decode(&next_msg).unwrap()).unwrap(); 64 | //! }, 65 | //! Step::Done(connected, msg) => { 66 | //! if let Some(msg) = msg { 67 | //! send_message(msg.encode()); 68 | //! } 69 | //! break connected; 70 | //! } 71 | //! } 72 | //! } 73 | //! } 74 | //! 75 | //! fn run(connected: Connected) { 76 | //! // Now we can start sending and receiving messages 77 | //! 78 | //! // We can translate incoming messages into an envelope to give to Beelay 79 | //! let incoming = receive_message(); 80 | //! let msg = Message::decode(&incoming).unwrap(); 81 | //! let envelope = connected.receive(msg).unwrap(); 82 | //! let beelay: Beelay:: = todo!(); 83 | //! beelay.handle_event(Event::receive(envelope)); 84 | //! println!("Received message from {}: {:?}", envelope.sender(), envelope.payload()); 85 | //! 86 | //! // A message somehow generated by an instance of Beelay in our application 87 | //! let envelope: Envelope = todo!(); 88 | //! let msg = connected.send(envelope); 89 | //! send_message(msg.encode()); 90 | //! } 91 | //! ``` 92 | use crate::{leb128::encode_uleb128, parse, Envelope, Payload, PeerId}; 93 | pub use error::{DecodeError, Error}; 94 | 95 | #[derive(Debug, PartialEq, Eq)] 96 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 97 | pub struct Message(MessageInner); 98 | 99 | impl Message { 100 | pub fn encode(&self) -> Vec { 101 | let msg_type = match &self.0 { 102 | MessageInner::HelloDearServer(_) => 0, 103 | MessageInner::WhyHelloDearClient(_) => 1, 104 | MessageInner::Data(_) => 2, 105 | }; 106 | let mut bytes = vec![msg_type]; 107 | match &self.0 { 108 | MessageInner::HelloDearServer(peer_id) => { 109 | encode_uleb128(&mut bytes, peer_id.as_bytes().len() as u64); 110 | bytes.extend_from_slice(peer_id.as_bytes()); 111 | } 112 | MessageInner::WhyHelloDearClient(peer_id) => { 113 | encode_uleb128(&mut bytes, peer_id.as_bytes().len() as u64); 114 | bytes.extend_from_slice(peer_id.as_bytes()); 115 | } 116 | MessageInner::Data(payload) => bytes.extend_from_slice(&payload.encode()), 117 | } 118 | bytes 119 | } 120 | 121 | pub fn decode(data: &[u8]) -> Result { 122 | let input = parse::Input::new(data); 123 | let (input, msg_type) = parse::u8(input)?; 124 | match msg_type { 125 | 0 => { 126 | let (_input, peer_id_str) = parse::str(input)?; 127 | let peer_id = PeerId::from(peer_id_str.to_string()); 128 | Ok(Message(MessageInner::HelloDearServer(peer_id))) 129 | } 130 | 1 => { 131 | let (_input, peer_id_str) = parse::str(input)?; 132 | let peer_id = PeerId::from(peer_id_str.to_string()); 133 | Ok(Message(MessageInner::WhyHelloDearClient(peer_id))) 134 | } 135 | 2 => { 136 | let (_input, payload) = crate::messages::decode::parse_payload(input)?; 137 | Ok(Message(MessageInner::Data(payload))) 138 | } 139 | _ => Err(DecodeError::Invalid("invalid message type".to_string())), 140 | } 141 | } 142 | } 143 | 144 | #[derive(Debug, PartialEq, Eq)] 145 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 146 | enum MessageInner { 147 | HelloDearServer(PeerId), 148 | WhyHelloDearClient(PeerId), 149 | Data(Payload), 150 | } 151 | 152 | /// The initial state of the handshake protocol. 153 | pub struct Connecting(PeerId); 154 | 155 | /// A step in the handshakeprotocol 156 | pub enum Step { 157 | /// Continue with the handshake. If the optional message is `Some` then it should be sent to 158 | /// the other end before waiting to receive another message. 159 | Continue(Connecting, Option), 160 | /// The handshake is complete. The `Connected` object contains the peer IDs of the two parties 161 | /// and if the optional message is `Some` then it should be sent to the other end. 162 | Done(Connected, Option), 163 | } 164 | 165 | impl Connecting { 166 | /// A handshake for accepting a connection. This will wait for the other end to send the first 167 | /// message 168 | /// 169 | /// # Arguments 170 | /// * `us` - The peer ID of the party accepting the connection 171 | pub fn accept(us: PeerId) -> Step { 172 | Step::Continue(Connecting(us), None) 173 | } 174 | 175 | /// A handshake for initiating a connection, this will send the first message. 176 | /// 177 | /// # Arguments 178 | /// * `us` - The peer ID of the party initiating the connection 179 | pub fn connect(us: PeerId) -> Step { 180 | Step::Continue( 181 | Connecting(us.clone()), 182 | Some(Message(MessageInner::HelloDearServer(us))), 183 | ) 184 | } 185 | 186 | /// Receive a message from the other end. 187 | pub fn receive(self, msg: Message) -> Result { 188 | match msg.0 { 189 | MessageInner::HelloDearServer(their_peer_id) => Ok(Step::Done( 190 | Connected { 191 | our_peer_id: self.0.clone(), 192 | their_peer_id, 193 | }, 194 | Some(Message(MessageInner::WhyHelloDearClient(self.0))), 195 | )), 196 | MessageInner::WhyHelloDearClient(their_peer_id) => Ok(Step::Done( 197 | Connected { 198 | our_peer_id: self.0, 199 | their_peer_id, 200 | }, 201 | None, 202 | )), 203 | _ => Err(Error::UnexpectedMessage), 204 | } 205 | } 206 | } 207 | 208 | /// The connected state of the handshake protocol 209 | #[derive(Clone)] 210 | pub struct Connected { 211 | our_peer_id: PeerId, 212 | their_peer_id: PeerId, 213 | } 214 | 215 | impl Connected { 216 | pub fn their_peer_id(&self) -> &PeerId { 217 | &self.their_peer_id 218 | } 219 | 220 | /// Receive a message from the other end and transform it into an envelope 221 | pub fn receive(&self, msg: Message) -> Result { 222 | match msg.0 { 223 | MessageInner::Data(payload) => Ok(Envelope { 224 | sender: self.their_peer_id.clone(), 225 | recipient: self.our_peer_id.clone(), 226 | payload, 227 | }), 228 | _ => Err(Error::UnexpectedMessage), 229 | } 230 | } 231 | 232 | /// Transform an envelope into a message which can be sent to the other end 233 | pub fn send(&self, env: Envelope) -> Message { 234 | Message(MessageInner::Data(env.take_payload())) 235 | } 236 | } 237 | 238 | mod error { 239 | use crate::parse; 240 | 241 | pub enum Error { 242 | UnexpectedMessage, 243 | } 244 | 245 | impl std::fmt::Display for Error { 246 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 247 | match self { 248 | Error::UnexpectedMessage => write!(f, "unexpected message"), 249 | } 250 | } 251 | } 252 | 253 | impl std::fmt::Debug for Error { 254 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 255 | std::fmt::Display::fmt(self, f) 256 | } 257 | } 258 | 259 | impl std::error::Error for Error {} 260 | 261 | pub enum DecodeError { 262 | NotEnoughInput, 263 | Invalid(String), 264 | } 265 | 266 | impl From for DecodeError { 267 | fn from(err: parse::ParseError) -> Self { 268 | match err { 269 | parse::ParseError::NotEnoughInput => DecodeError::NotEnoughInput, 270 | parse::ParseError::Other { context, error } => { 271 | DecodeError::Invalid(format!("{:?}: {}", context, error)) 272 | } 273 | } 274 | } 275 | } 276 | 277 | impl std::fmt::Display for DecodeError { 278 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 279 | match self { 280 | DecodeError::NotEnoughInput => write!(f, "not enough input"), 281 | DecodeError::Invalid(msg) => write!(f, "invalid input: {}", msg), 282 | } 283 | } 284 | } 285 | 286 | impl std::fmt::Debug for DecodeError { 287 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 288 | std::fmt::Display::fmt(self, f) 289 | } 290 | } 291 | 292 | impl std::error::Error for DecodeError {} 293 | } 294 | 295 | #[cfg(test)] 296 | mod tests { 297 | 298 | #[test] 299 | fn handshake_message_encoding_roundtrip() { 300 | bolero::check!() 301 | .with_arbitrary::() 302 | .for_each(|msg| { 303 | let encoded = msg.encode(); 304 | let decoded = super::Message::decode(&encoded).unwrap(); 305 | assert_eq!(msg, &decoded); 306 | }); 307 | } 308 | } 309 | -------------------------------------------------------------------------------- /src/notification_handler.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{AtomicU64, Ordering}; 2 | 3 | use crate::{ 4 | blob::BlobMeta, 5 | effects::TaskEffects, 6 | messages::{BlobRef, Notification, TreePart, UploadItem}, 7 | sedimentree::{self, LooseCommit}, 8 | Commit, CommitBundle, CommitCategory, CommitOrBundle, DocEvent, StorageKey, 9 | }; 10 | 11 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] 12 | pub(crate) struct HandlerId(u64); 13 | 14 | static LAST_HANDLER_ID: AtomicU64 = AtomicU64::new(0); 15 | 16 | impl HandlerId { 17 | pub(crate) fn new() -> HandlerId { 18 | HandlerId(LAST_HANDLER_ID.fetch_add(1, Ordering::Relaxed)) 19 | } 20 | } 21 | 22 | pub(crate) async fn handle(mut effects: TaskEffects, notification: Notification) { 23 | tracing::debug!(?notification, "received notification"); 24 | effects.log().remote_notification(¬ification); 25 | let Notification { 26 | from_peer, 27 | doc, 28 | data, 29 | } = notification; 30 | let UploadItem { blob, tree_part } = data; 31 | let BlobRef::Inline(blob_data) = blob else { 32 | panic!("blob refs in notifications not yet supported"); 33 | }; 34 | let data = match &tree_part { 35 | TreePart::Commit { hash, parents } => { 36 | CommitOrBundle::Commit(Commit::new(parents.clone(), blob_data.to_vec(), *hash)) 37 | } 38 | TreePart::Stratum { 39 | start, 40 | end, 41 | checkpoints, 42 | } => CommitOrBundle::Bundle( 43 | CommitBundle::builder() 44 | .start(*start) 45 | .end(*end) 46 | .bundled_commits(blob_data.to_vec()) 47 | .checkpoints(checkpoints.clone()) 48 | .build(), 49 | ), 50 | }; 51 | let blob = BlobMeta::new(&blob_data); 52 | effects 53 | .put(StorageKey::blob(blob.hash()), blob_data.clone()) 54 | .await; 55 | let path = StorageKey::sedimentree_root(&doc, CommitCategory::Content); 56 | match tree_part { 57 | TreePart::Commit { hash, parents } => { 58 | let loose = LooseCommit::new(hash, parents, blob); 59 | sedimentree::storage::write_loose_commit(effects.clone(), path, &loose).await; 60 | } 61 | TreePart::Stratum { 62 | start, 63 | end, 64 | checkpoints, 65 | } => { 66 | let bundle = CommitBundle::builder() 67 | .start(start) 68 | .end(end) 69 | .bundled_commits(blob_data) 70 | .checkpoints(checkpoints) 71 | .build(); 72 | sedimentree::storage::write_bundle(effects.clone(), path, bundle).await; 73 | } 74 | } 75 | effects.emit_doc_event(DocEvent { 76 | peer: from_peer, 77 | doc, 78 | data: data.clone(), 79 | }); 80 | } 81 | -------------------------------------------------------------------------------- /src/parse.rs: -------------------------------------------------------------------------------- 1 | pub(crate) use error::{NotEnoughInput, ParseError}; 2 | 3 | #[derive(Clone)] 4 | pub(super) struct Input<'a> { 5 | context: Vec, 6 | data: &'a [u8], 7 | offset: usize, 8 | } 9 | 10 | impl<'a> Input<'a> { 11 | pub(super) fn new(data: &'a [u8]) -> Self { 12 | Self { 13 | data, 14 | offset: 0, 15 | context: Vec::new(), 16 | } 17 | } 18 | 19 | fn read(self, len: usize) -> Option<(Self, &'a [u8])> { 20 | if len > self.data.len() { 21 | return None; 22 | } 23 | let (read, rest) = self.data.split_at(len); 24 | Some(( 25 | Self { 26 | data: rest, 27 | context: self.context, 28 | offset: self.offset + len, 29 | }, 30 | read, 31 | )) 32 | } 33 | 34 | pub(crate) fn with_context< 35 | S: AsRef, 36 | T, 37 | F: for<'b> Fn(Input<'b>) -> Result<(Input<'b>, T), error::ParseError>, 38 | >( 39 | mut self, 40 | context: S, 41 | f: F, 42 | ) -> Result<(Input<'a>, T), error::ParseError> { 43 | self.context.push(context.as_ref().to_string()); 44 | let (mut input, result) = f(self)?; 45 | input.context.pop(); 46 | Ok((input, result)) 47 | } 48 | 49 | pub(crate) fn error>(&self, msg: S) -> error::ParseError { 50 | error::ParseError::Other { 51 | context: self.context.clone(), 52 | error: msg.as_ref().to_string(), 53 | } 54 | } 55 | 56 | pub(crate) fn offset(&self) -> usize { 57 | self.offset 58 | } 59 | 60 | pub(crate) fn is_empty(&self) -> bool { 61 | self.offset >= self.data.len() 62 | } 63 | } 64 | 65 | pub(super) fn u8(input: Input<'_>) -> Result<(Input<'_>, u8), error::ParseError> { 66 | if let Some((input, data)) = input.read(1) { 67 | Ok((input, data[0])) 68 | } else { 69 | Err(error::ParseError::NotEnoughInput) 70 | } 71 | } 72 | 73 | #[allow(dead_code)] 74 | pub(super) fn bool(input: Input<'_>) -> Result<(Input<'_>, bool), error::ParseError> { 75 | let (input, data) = u8(input)?; 76 | Ok((input, data != 0)) 77 | } 78 | 79 | pub(super) fn slice(input: Input<'_>) -> Result<(Input<'_>, &'_ [u8]), error::ParseError> { 80 | let (input, len) = input.with_context("slice length", crate::leb128::parse)?; 81 | let (input, data) = input 82 | .read(len as usize) 83 | .ok_or(error::ParseError::NotEnoughInput)?; 84 | // .ok_or::(error::NotEnoughInput.into())?; 85 | Ok((input, data)) 86 | } 87 | 88 | pub(super) fn str(input: Input<'_>) -> Result<(Input<'_>, &'_ str), error::ParseError> { 89 | let (input, data) = slice(input)?; 90 | let result = 91 | std::str::from_utf8(data).map_err(|e| input.error(format!("invalid string: {}", e)))?; 92 | Ok((input, result)) 93 | } 94 | 95 | pub(super) fn many Fn(Input<'b>) -> Result<(Input<'b>, T), error::ParseError>, T>( 96 | input: Input<'_>, 97 | f: F, 98 | ) -> Result<(Input<'_>, Vec), error::ParseError> { 99 | let mut res = Vec::new(); 100 | let (mut input, count) = input.with_context("number of items", crate::leb128::parse)?; 101 | 102 | for elem in 0..count { 103 | let (i, v) = input.with_context(format!("element {}", elem), &f)?; 104 | // let (i, v) = f(input)?; 105 | input = i; 106 | res.push(v); 107 | } 108 | 109 | Ok((input, res)) 110 | } 111 | 112 | pub(crate) fn maybe Fn(Input<'b>) -> Result<(Input<'b>, R), error::ParseError>>( 113 | input: Input<'_>, 114 | f: F, 115 | ) -> Result<(Input<'_>, Option), error::ParseError> { 116 | let (input, has_value) = bool(input)?; 117 | if has_value { 118 | let (input, value) = f(input)?; 119 | Ok((input, Some(value))) 120 | } else { 121 | Ok((input, None)) 122 | } 123 | } 124 | 125 | pub(super) fn arr( 126 | input: Input<'_>, 127 | ) -> Result<(Input<'_>, [u8; N]), error::ParseError> { 128 | let mut res = [0; N]; 129 | 130 | let (input, bytes) = input.read(N).ok_or(error::ParseError::NotEnoughInput)?; 131 | 132 | res.copy_from_slice(bytes); 133 | Ok((input, res)) 134 | } 135 | 136 | pub(super) mod error { 137 | pub struct NotEnoughInput; 138 | 139 | impl std::fmt::Display for NotEnoughInput { 140 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 141 | write!(f, "not enough input") 142 | } 143 | } 144 | 145 | impl std::fmt::Debug for NotEnoughInput { 146 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 147 | std::fmt::Display::fmt(self, f) 148 | } 149 | } 150 | 151 | impl std::error::Error for NotEnoughInput {} 152 | 153 | pub enum ParseError { 154 | NotEnoughInput, 155 | Other { context: Vec, error: String }, 156 | } 157 | 158 | impl std::fmt::Display for ParseError { 159 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 160 | match self { 161 | ParseError::NotEnoughInput => write!(f, "not enough input"), 162 | ParseError::Other { context, error } => { 163 | write!(f, "error: {}", error)?; 164 | for ctx in context { 165 | write!(f, "\n in {}", ctx)?; 166 | } 167 | Ok(()) 168 | } 169 | } 170 | } 171 | } 172 | 173 | impl std::fmt::Debug for ParseError { 174 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 175 | std::fmt::Display::fmt(self, f) 176 | } 177 | } 178 | 179 | impl std::error::Error for ParseError {} 180 | } 181 | -------------------------------------------------------------------------------- /src/reachability.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use futures::StreamExt; 4 | 5 | use crate::{ 6 | effects::TaskEffects, 7 | parse, 8 | sedimentree::{self, MinimalTreeHash, Sedimentree}, 9 | CommitCategory, CommitHash, CommitOrBundle, DocumentId, StorageKey, 10 | }; 11 | 12 | #[derive(Default, Debug)] 13 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 14 | pub(crate) struct ReachabilityIndex { 15 | reachable: Vec, 16 | } 17 | 18 | impl ReachabilityIndex { 19 | pub(crate) fn new() -> ReachabilityIndex { 20 | ReachabilityIndex { 21 | reachable: Vec::new(), 22 | } 23 | } 24 | 25 | pub(crate) async fn from_tree( 26 | effects: TaskEffects, 27 | tree: Sedimentree, 28 | ) -> Self { 29 | let items = sedimentree::storage::data(effects.clone(), tree) 30 | .collect::>() 31 | .await; 32 | let altogether = items 33 | .into_iter() 34 | .map(|i| match i { 35 | CommitOrBundle::Commit(c) => c.contents().to_vec(), 36 | CommitOrBundle::Bundle(b) => b.bundled_commits().to_vec(), 37 | }) 38 | .flatten() 39 | .collect::>(); 40 | let mut result = ReachabilityIndex::new(); 41 | let mut input = parse::Input::new(&altogether); 42 | while !input.is_empty() { 43 | let (new_input, entry) = ReachabilityIndexEntry::parse(input).unwrap(); 44 | input = new_input; 45 | result += entry; 46 | } 47 | result 48 | } 49 | 50 | pub(crate) async fn load(effects: TaskEffects, doc_id: &DocumentId) -> Self { 51 | let tree = sedimentree::storage::load( 52 | effects.clone(), 53 | StorageKey::sedimentree_root(doc_id, CommitCategory::Index), 54 | ) 55 | .await 56 | .unwrap_or_default(); 57 | Self::from_tree(effects, tree).await 58 | } 59 | 60 | pub(crate) fn has_link(&self, to: &DocumentId) -> bool { 61 | self.reachable.contains(to) 62 | } 63 | } 64 | 65 | impl IntoIterator for ReachabilityIndex { 66 | type Item = DocumentId; 67 | type IntoIter = std::vec::IntoIter; 68 | 69 | fn into_iter(self) -> Self::IntoIter { 70 | self.reachable.into_iter() 71 | } 72 | } 73 | 74 | impl<'a> IntoIterator for &'a ReachabilityIndex { 75 | type Item = &'a DocumentId; 76 | type IntoIter = std::slice::Iter<'a, DocumentId>; 77 | 78 | fn into_iter(self) -> Self::IntoIter { 79 | self.reachable.iter() 80 | } 81 | } 82 | 83 | impl std::ops::AddAssign for ReachabilityIndex { 84 | fn add_assign(&mut self, other: ReachabilityIndex) { 85 | self.reachable.extend(other.reachable); 86 | } 87 | } 88 | 89 | impl std::ops::AddAssign for ReachabilityIndex { 90 | fn add_assign(&mut self, other: ReachabilityIndexEntry) { 91 | self.reachable.push(other.0); 92 | } 93 | } 94 | 95 | #[derive(Debug, PartialEq, Eq)] 96 | pub(crate) struct ReachabilityIndexEntry(crate::DocumentId); 97 | 98 | #[cfg(test)] 99 | impl<'a> arbitrary::Arbitrary<'a> for ReachabilityIndexEntry { 100 | fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { 101 | let doc_id = DocumentId::arbitrary(u)?; 102 | Ok(ReachabilityIndexEntry::new(doc_id)) 103 | } 104 | } 105 | 106 | impl ReachabilityIndexEntry { 107 | pub(crate) fn new(document_id: DocumentId) -> Self { 108 | ReachabilityIndexEntry(document_id) 109 | } 110 | 111 | pub(crate) fn parse( 112 | input: parse::Input<'_>, 113 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 114 | let (input, doc_id) = DocumentId::parse(input)?; 115 | Ok((input, ReachabilityIndexEntry::new(doc_id))) 116 | } 117 | 118 | pub(crate) fn encode(&self) -> Vec { 119 | self.0.as_bytes().to_vec() 120 | } 121 | 122 | pub(crate) fn hash(&self) -> CommitHash { 123 | let data = self.encode(); 124 | <[u8; 32]>::from(blake3::hash(&data)).into() 125 | } 126 | } 127 | 128 | pub(crate) async fn load_reachable_docs( 129 | effects: TaskEffects, 130 | root: DocumentId, 131 | ) -> HashMap { 132 | let mut to_process = vec![root]; 133 | let mut result = HashMap::new(); 134 | 135 | while let Some(doc) = to_process.pop() { 136 | let index = ReachabilityIndex::load(effects.clone(), &doc).await; 137 | for doc in index.into_iter() { 138 | to_process.push(doc); 139 | } 140 | if let Some(tree) = sedimentree::storage::load( 141 | effects.clone(), 142 | StorageKey::sedimentree_root(&doc, CommitCategory::Content), 143 | ) 144 | .await 145 | { 146 | result.insert(doc, tree.minimal_hash()); 147 | } 148 | } 149 | 150 | result 151 | } 152 | -------------------------------------------------------------------------------- /src/request_handlers.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | blob::BlobMeta, 3 | messages::{BlobRef, ContentAndIndex, FetchedSedimentree, TreePart, UploadItem}, 4 | riblt::{self, doc_and_heads::CodedDocAndHeadsSymbol}, 5 | sedimentree::{self, LooseCommit}, 6 | snapshots, 7 | subscriptions::Subscription, 8 | sync_docs, CommitBundle, CommitCategory, DocumentId, OutgoingResponse, PeerId, RequestId, 9 | Response, StorageKey, 10 | }; 11 | 12 | pub(super) async fn handle_request( 13 | mut effects: crate::effects::TaskEffects, 14 | from: PeerId, 15 | req_id: RequestId, 16 | request: crate::Request, 17 | ) -> Option { 18 | let response = match request { 19 | crate::Request::UploadCommits { 20 | doc, 21 | data, 22 | category, 23 | } => { 24 | upload_commits(effects, from.clone(), doc, data, category).await; 25 | Response::UploadCommits 26 | } 27 | crate::Request::FetchSedimentree(doc_id) => { 28 | let trees = fetch_sedimentree(effects, doc_id).await; 29 | Response::FetchSedimentree(trees) 30 | } 31 | crate::Request::FetchBlobPart { 32 | blob, 33 | offset, 34 | length, 35 | } => match effects.load(StorageKey::blob(blob)).await { 36 | None => Response::Error("no such blob".to_string()), 37 | Some(data) => { 38 | let offset = offset as usize; 39 | let length = length as usize; 40 | Response::FetchBlobPart(data[offset..offset + length].to_vec()) 41 | } 42 | }, 43 | crate::Request::UploadBlob(_vec) => todo!(), 44 | crate::Request::CreateSnapshot { root_doc } => { 45 | let (snapshot_id, first_symbols) = 46 | create_snapshot(effects, from.clone(), root_doc).await; 47 | Response::CreateSnapshot { 48 | snapshot_id, 49 | first_symbols, 50 | } 51 | } 52 | crate::Request::SnapshotSymbols { snapshot_id } => { 53 | if let Some((_, encoder)) = effects.snapshots_mut().get_mut(&snapshot_id) { 54 | Response::SnapshotSymbols(encoder.next_n_symbols(100)) 55 | } else { 56 | Response::Error("no such snapshot".to_string()) 57 | } 58 | } 59 | crate::Request::Listen(snapshot_id) => { 60 | let sub = effects 61 | .snapshots_mut() 62 | .get(&snapshot_id) 63 | .map(|(s, _)| Subscription::new(&from, s)); 64 | let remote_snapshots = effects 65 | .snapshots() 66 | .get(&snapshot_id) 67 | .map(|(s, _)| { 68 | s.remote_snapshots() 69 | .iter() 70 | .map(|(p, s)| (p.clone(), s.clone())) 71 | .collect::>() 72 | }) 73 | .unwrap_or_default(); 74 | let do_listen = remote_snapshots.into_iter().map(|(remote_peer, remote_snapshot)| { 75 | tracing::trace!(source_remote_peer=%from, target_remote_peer=%remote_peer, %remote_snapshot, "forwarding listen request"); 76 | effects.listen(remote_peer, remote_snapshot) 77 | }); 78 | futures::future::join_all(do_listen).await; 79 | if let Some(sub) = sub { 80 | effects.subscriptions().add(sub); 81 | Response::Listen 82 | } else { 83 | Response::Error(format!("no such snapshot")) 84 | } 85 | } 86 | }; 87 | Some(OutgoingResponse { 88 | target: from, 89 | id: req_id, 90 | response, 91 | }) 92 | } 93 | 94 | async fn fetch_sedimentree( 95 | effects: crate::effects::TaskEffects, 96 | doc_id: DocumentId, 97 | ) -> FetchedSedimentree { 98 | let content_root = StorageKey::sedimentree_root(&doc_id, CommitCategory::Content); 99 | let reachability_root = StorageKey::sedimentree_root(&doc_id, CommitCategory::Index); 100 | 101 | let content = crate::sedimentree::storage::load(effects.clone(), content_root); 102 | let index = crate::sedimentree::storage::load(effects, reachability_root); 103 | let (content, index) = futures::future::join(content, index).await; 104 | match (content, index) { 105 | (None, _) => FetchedSedimentree::NotFound, 106 | (Some(content), index) => FetchedSedimentree::Found(ContentAndIndex { 107 | content: content.minimize().summarize(), 108 | index: index.map(|i| i.minimize().summarize()).unwrap_or_default(), 109 | }), 110 | } 111 | } 112 | 113 | #[tracing::instrument(skip(effects))] 114 | async fn upload_commits( 115 | effects: crate::effects::TaskEffects, 116 | from_peer: PeerId, 117 | doc: DocumentId, 118 | data: Vec, 119 | content: CommitCategory, 120 | ) { 121 | tracing::trace!("handling upload"); 122 | let tasks = data.into_iter().map(|d| { 123 | let mut effects = effects.clone(); 124 | let from_peer = from_peer.clone(); 125 | async move { 126 | let (blob, data) = match d.blob.clone() { 127 | BlobRef::Blob(b) => { 128 | let data = effects.load(StorageKey::blob(b)).await; 129 | let Some(data) = data else { 130 | // TODO: return an error 131 | panic!("no such blob") 132 | }; 133 | (BlobMeta::new(&data), data) 134 | } 135 | BlobRef::Inline(contents) => { 136 | let blob = BlobMeta::new(&contents); 137 | effects 138 | .put(StorageKey::blob(blob.hash()), contents.clone()) 139 | .await; 140 | (blob, contents) 141 | } 142 | }; 143 | effects 144 | .log() 145 | .new_commit(doc.clone(), from_peer, d.clone(), content); 146 | match d.tree_part { 147 | TreePart::Commit { hash, parents } => { 148 | let commit = LooseCommit::new(hash, parents, blob); 149 | sedimentree::storage::write_loose_commit( 150 | effects.clone(), 151 | StorageKey::sedimentree_root(&doc, content), 152 | &commit, 153 | ) 154 | .await; 155 | } 156 | TreePart::Stratum { 157 | start, 158 | end, 159 | checkpoints, 160 | } => { 161 | let bundle = CommitBundle::builder() 162 | .start(start) 163 | .end(end) 164 | .checkpoints(checkpoints) 165 | .bundled_commits(data) 166 | .build(); 167 | sedimentree::storage::write_bundle( 168 | effects.clone(), 169 | StorageKey::sedimentree_root(&doc, content), 170 | bundle, 171 | ) 172 | .await; 173 | } 174 | } 175 | } 176 | }); 177 | futures::future::join_all(tasks).await; 178 | } 179 | 180 | async fn create_snapshot( 181 | mut effects: crate::effects::TaskEffects, 182 | requestor: PeerId, 183 | root_doc: DocumentId, 184 | ) -> (snapshots::SnapshotId, Vec) { 185 | let mut snapshot = snapshots::Snapshot::load(effects.clone(), root_doc).await; 186 | 187 | let mut peers_to_ask = effects.who_should_i_ask(root_doc.clone()).await; 188 | peers_to_ask.remove(&requestor); 189 | if !peers_to_ask.is_empty() { 190 | tracing::trace!(?peers_to_ask, "asking remote peers"); 191 | let syncing = peers_to_ask.into_iter().map(|p| async { 192 | let result = sync_docs::sync_root_doc(effects.clone(), &snapshot, p.clone()).await; 193 | (p, result) 194 | }); 195 | let forwarded = futures::future::join_all(syncing).await; 196 | snapshot = snapshots::Snapshot::load(effects.clone(), root_doc).await; 197 | for (peer, sync_result) in forwarded { 198 | snapshot.add_remote(peer, sync_result.remote_snapshot); 199 | } 200 | tracing::trace!(we_have_doc=%snapshot.we_have_doc(), "finished requesting missing doc from peers"); 201 | } else { 202 | tracing::trace!("no peers to ask"); 203 | } 204 | 205 | let snapshot_id = snapshot.id(); 206 | let mut encoder = riblt::doc_and_heads::Encoder::new(&snapshot); 207 | let first_symbols = encoder.next_n_symbols(10); 208 | effects 209 | .snapshots_mut() 210 | .insert(snapshot_id, (snapshot, encoder)); 211 | (snapshot_id, first_symbols) 212 | } 213 | -------------------------------------------------------------------------------- /src/riblt.rs: -------------------------------------------------------------------------------- 1 | use std::vec::Vec; 2 | 3 | pub(crate) trait Symbol { 4 | fn zero() -> Self; 5 | fn xor(&self, other: &Self) -> Self; 6 | fn hash(&self) -> u64; 7 | } 8 | 9 | #[derive(Clone, Copy)] 10 | pub(crate) enum Direction { 11 | ADD = 1, 12 | REMOVE = -1, 13 | } 14 | 15 | #[derive(Clone, Copy)] 16 | pub(crate) enum Error { 17 | InvalidDegree = 1, 18 | InvalidSize = 2, 19 | DecodeFailed = 3, 20 | } 21 | 22 | impl std::fmt::Debug for Error { 23 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 24 | match self { 25 | Error::InvalidDegree => f.write_str("InvalidDegree"), 26 | Error::InvalidSize => f.write_str("InvalidSize"), 27 | Error::DecodeFailed => f.write_str("DecodeFailed"), 28 | } 29 | } 30 | } 31 | 32 | impl std::fmt::Display for Error { 33 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 34 | std::fmt::Debug::fmt(&self, f) 35 | } 36 | } 37 | 38 | impl std::error::Error for Error {} 39 | 40 | #[derive(Clone, Copy)] 41 | pub(crate) struct SymbolMapping { 42 | source_idx: u64, 43 | coded_idx: u64, 44 | } 45 | 46 | #[derive(Clone, Copy)] 47 | pub(crate) struct RandomMapping { 48 | prng: u64, 49 | last_idx: u64, 50 | } 51 | 52 | #[derive(Clone, Copy)] 53 | pub(crate) struct HashedSymbol { 54 | symbol: T, 55 | hash: u64, 56 | } 57 | 58 | impl HashedSymbol { 59 | pub(crate) fn symbol(&self) -> T { 60 | self.symbol 61 | } 62 | } 63 | 64 | #[derive(Clone, Copy)] 65 | pub(crate) struct CodedSymbol { 66 | pub(crate) symbol: T, 67 | pub(crate) hash: u64, 68 | pub(crate) count: i64, 69 | } 70 | 71 | #[derive(Clone)] 72 | pub(crate) struct Encoder { 73 | symbols: Vec>, 74 | mappings: Vec, 75 | queue: Vec, 76 | next_idx: u64, 77 | } 78 | 79 | #[derive(Clone)] 80 | pub(crate) struct Decoder { 81 | coded: Vec>, 82 | local: Encoder, 83 | remote: Encoder, 84 | window: Encoder, 85 | decodable: Vec, 86 | num_decoded: u64, 87 | } 88 | 89 | impl RandomMapping { 90 | pub(crate) fn next_index(&mut self) -> u64 { 91 | let r = self.prng.wrapping_mul(0xda942042e4dd58b5); 92 | self.prng = r; 93 | self.last_idx = self.last_idx.wrapping_add( 94 | (((self.last_idx as f64) + 1.5) 95 | * (((1i64 << 32) as f64) / f64::sqrt((r as f64) + 1.0) - 1.0)) 96 | .ceil() as u64, 97 | ); 98 | return self.last_idx; 99 | } 100 | } 101 | 102 | impl CodedSymbol { 103 | pub(crate) fn apply(&mut self, sym: &HashedSymbol, direction: Direction) { 104 | self.symbol = self.symbol.xor(&sym.symbol); 105 | self.hash ^= sym.hash; 106 | self.count += direction as i64; 107 | } 108 | } 109 | 110 | impl Encoder { 111 | pub(crate) fn new() -> Self { 112 | return Encoder:: { 113 | symbols: Vec::>::new(), 114 | mappings: Vec::::new(), 115 | queue: Vec::::new(), 116 | next_idx: 0, 117 | }; 118 | } 119 | 120 | pub(crate) fn reset(&mut self) { 121 | self.symbols.clear(); 122 | self.mappings.clear(); 123 | self.queue.clear(); 124 | self.next_idx = 0; 125 | } 126 | 127 | pub(crate) fn add_hashed_symbol_with_mapping( 128 | &mut self, 129 | sym: &HashedSymbol, 130 | mapp: &RandomMapping, 131 | ) { 132 | self.symbols.push(*sym); 133 | self.mappings.push(*mapp); 134 | 135 | self.queue.push(SymbolMapping { 136 | source_idx: (self.symbols.len() as u64) - 1, 137 | coded_idx: mapp.last_idx, 138 | }); 139 | 140 | // Fix tail 141 | // 142 | let mut cur: usize = self.queue.len() - 1; 143 | while cur > 0 { 144 | let parent = (cur - 1) / 2; 145 | if cur == parent || self.queue[parent].coded_idx <= self.queue[cur].coded_idx { 146 | break; 147 | } 148 | self.queue.swap(parent, cur); 149 | cur = parent; 150 | } 151 | } 152 | 153 | pub(crate) fn add_hashed_symbol(&mut self, sym: &HashedSymbol) { 154 | self.add_hashed_symbol_with_mapping( 155 | sym, 156 | &RandomMapping { 157 | prng: sym.hash, 158 | last_idx: 0, 159 | }, 160 | ); 161 | } 162 | 163 | pub(crate) fn add_symbol(&mut self, sym: &T) { 164 | self.add_hashed_symbol(&HashedSymbol:: { 165 | symbol: *sym, 166 | hash: sym.hash(), 167 | }); 168 | } 169 | 170 | pub(crate) fn apply_window( 171 | &mut self, 172 | sym: &CodedSymbol, 173 | direction: Direction, 174 | ) -> CodedSymbol { 175 | let mut next_sym = *sym; 176 | 177 | if self.queue.is_empty() { 178 | self.next_idx += 1; 179 | return next_sym; 180 | } 181 | 182 | while self.queue[0].coded_idx == self.next_idx { 183 | next_sym.apply(&self.symbols[self.queue[0].source_idx as usize], direction); 184 | self.queue[0].coded_idx = self.mappings[self.queue[0].source_idx as usize].next_index(); 185 | 186 | // Fix head 187 | // 188 | let mut cur: usize = 0; 189 | loop { 190 | let mut child = cur * 2 + 1; 191 | if child >= self.queue.len() { 192 | break; 193 | } 194 | let right_child = child + 1; 195 | if right_child < self.queue.len() 196 | && self.queue[right_child].coded_idx < self.queue[child].coded_idx 197 | { 198 | child = right_child; 199 | } 200 | if self.queue[cur].coded_idx <= self.queue[child].coded_idx { 201 | break; 202 | } 203 | self.queue.swap(cur, child); 204 | cur = child; 205 | } 206 | } 207 | 208 | self.next_idx += 1; 209 | return next_sym; 210 | } 211 | 212 | pub(crate) fn produce_next_coded_symbol(&mut self) -> CodedSymbol { 213 | return self.apply_window( 214 | &CodedSymbol:: { 215 | symbol: T::zero(), 216 | hash: 0, 217 | count: 0, 218 | }, 219 | Direction::ADD, 220 | ); 221 | } 222 | } 223 | 224 | impl Decoder { 225 | pub(crate) fn new() -> Self { 226 | return Decoder:: { 227 | coded: Vec::>::new(), 228 | local: Encoder::::new(), 229 | remote: Encoder::::new(), 230 | window: Encoder::::new(), 231 | decodable: Vec::::new(), 232 | num_decoded: 0, 233 | }; 234 | } 235 | 236 | pub(crate) fn reset(&mut self) { 237 | self.coded.clear(); 238 | self.local.reset(); 239 | self.remote.reset(); 240 | self.window.reset(); 241 | self.decodable.clear(); 242 | self.num_decoded = 0; 243 | } 244 | 245 | pub(crate) fn add_symbol(&mut self, sym: &T) { 246 | self.window.add_hashed_symbol(&HashedSymbol:: { 247 | symbol: *sym, 248 | hash: sym.hash(), 249 | }); 250 | } 251 | 252 | pub(crate) fn add_coded_symbol(&mut self, sym: &CodedSymbol) { 253 | let mut next_sym = self.window.apply_window(sym, Direction::REMOVE); 254 | next_sym = self.remote.apply_window(&next_sym, Direction::REMOVE); 255 | next_sym = self.local.apply_window(&next_sym, Direction::ADD); 256 | 257 | self.coded.push(next_sym); 258 | 259 | if ((next_sym.count == 1 || next_sym.count == -1) 260 | && (next_sym.hash == next_sym.symbol.hash())) 261 | || (next_sym.count == 0 && next_sym.hash == 0) 262 | { 263 | self.decodable.push((self.coded.len() as i64) - 1); 264 | } 265 | } 266 | 267 | fn apply_new_symbol(&mut self, sym: &HashedSymbol, direction: Direction) -> RandomMapping { 268 | let mut mapp = RandomMapping { 269 | prng: sym.hash, 270 | last_idx: 0, 271 | }; 272 | 273 | while mapp.last_idx < (self.coded.len() as u64) { 274 | let n = mapp.last_idx as usize; 275 | self.coded[n].apply(&sym, direction); 276 | 277 | if (self.coded[n].count == -1 || self.coded[n].count == 1) 278 | && self.coded[n].hash == self.coded[n].symbol.hash() 279 | { 280 | self.decodable.push(n as i64); 281 | } 282 | 283 | mapp.next_index(); 284 | } 285 | 286 | return mapp; 287 | } 288 | 289 | pub(crate) fn try_decode(&mut self) -> Result<(), Error> { 290 | let mut didx: usize = 0; 291 | 292 | // self.decodable.len() will increase in apply_new_symbol 293 | // 294 | while didx < self.decodable.len() { 295 | let cidx = self.decodable[didx] as usize; 296 | let sym = self.coded[cidx]; 297 | 298 | match sym.count { 299 | 1 => { 300 | let new_sym = HashedSymbol:: { 301 | symbol: T::zero().xor(&sym.symbol), 302 | hash: sym.hash, 303 | }; 304 | 305 | let mapp = self.apply_new_symbol(&new_sym, Direction::REMOVE); 306 | self.remote.add_hashed_symbol_with_mapping(&new_sym, &mapp); 307 | self.num_decoded += 1; 308 | } 309 | 310 | -1 => { 311 | let new_sym = HashedSymbol:: { 312 | symbol: T::zero().xor(&sym.symbol), 313 | hash: sym.hash, 314 | }; 315 | 316 | let mapp = self.apply_new_symbol(&new_sym, Direction::ADD); 317 | self.local.add_hashed_symbol_with_mapping(&new_sym, &mapp); 318 | self.num_decoded += 1; 319 | } 320 | 321 | 0 => { 322 | self.num_decoded += 1; 323 | } 324 | 325 | _ => { 326 | return Err(Error::InvalidDegree); 327 | } 328 | } 329 | 330 | didx += 1; 331 | } 332 | 333 | self.decodable.clear(); 334 | 335 | return Ok(()); 336 | } 337 | 338 | pub(crate) fn decoded(&self) -> bool { 339 | return self.num_decoded == (self.coded.len() as u64); 340 | } 341 | 342 | pub(crate) fn get_remote_symbols(&self) -> Vec> { 343 | return self.remote.symbols.clone(); 344 | } 345 | 346 | pub(crate) fn get_local_symbols(&self) -> Vec> { 347 | return self.local.symbols.clone(); 348 | } 349 | } 350 | 351 | pub mod doc_and_heads { 352 | use std::hash::{Hash, Hasher}; 353 | 354 | use crate::{leb128, parse, sedimentree::MinimalTreeHash, DocumentId}; 355 | 356 | #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize)] 357 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 358 | pub(crate) struct DocAndHeadsSymbol { 359 | part1: [u8; 16], 360 | part2: [u8; 32], 361 | } 362 | 363 | impl DocAndHeadsSymbol { 364 | pub(crate) fn new(doc: &DocumentId, hash: &MinimalTreeHash) -> Self { 365 | Self { 366 | part1: doc.as_bytes().clone(), 367 | part2: hash.as_bytes().clone(), 368 | } 369 | } 370 | pub(crate) fn decode(self) -> (DocumentId, MinimalTreeHash) { 371 | ( 372 | DocumentId::from(self.part1), 373 | MinimalTreeHash::from(self.part2), 374 | ) 375 | } 376 | } 377 | 378 | impl super::Symbol for DocAndHeadsSymbol { 379 | fn zero() -> Self { 380 | Self { 381 | part1: [0; 16], 382 | part2: [0; 32], 383 | } 384 | } 385 | 386 | fn xor(&self, other: &Self) -> Self { 387 | Self { 388 | part1: std::array::from_fn(|i| self.part1[i] ^ other.part1[i]), 389 | part2: std::array::from_fn(|i| self.part2[i] ^ other.part2[i]), 390 | } 391 | } 392 | 393 | fn hash(&self) -> u64 { 394 | let mut hasher = std::collections::hash_map::DefaultHasher::new(); 395 | self.part1.hash(&mut hasher); 396 | self.part2.hash(&mut hasher); 397 | hasher.finish() 398 | } 399 | } 400 | 401 | impl DocAndHeadsSymbol { 402 | pub(crate) fn parse( 403 | input: parse::Input<'_>, 404 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 405 | input.with_context("RibltSymbol", |input| { 406 | let (input, part1) = parse::arr::<16>(input)?; 407 | let (input, part2) = parse::arr::<32>(input)?; 408 | Ok((input, Self { part1, part2 })) 409 | }) 410 | } 411 | 412 | pub(crate) fn encode(&self, out: &mut Vec) { 413 | out.extend(&self.part1); 414 | out.extend(&self.part2); 415 | } 416 | } 417 | 418 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] 419 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 420 | pub(crate) struct CodedDocAndHeadsSymbol { 421 | symbol: DocAndHeadsSymbol, 422 | hash: u64, 423 | count: i64, 424 | } 425 | 426 | impl CodedDocAndHeadsSymbol { 427 | pub(crate) fn parse( 428 | input: parse::Input<'_>, 429 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 430 | let (input, symbol) = DocAndHeadsSymbol::parse(input)?; 431 | let (input, hash_bytes) = parse::arr::<8>(input)?; 432 | let hash = u64::from_be_bytes(hash_bytes); 433 | let (input, count) = leb128::signed::parse(input)?; 434 | Ok(( 435 | input, 436 | Self { 437 | symbol, 438 | hash, 439 | count, 440 | }, 441 | )) 442 | } 443 | 444 | pub(crate) fn encode(&self, out: &mut Vec) { 445 | self.symbol.encode(out); 446 | out.extend(self.hash.to_be_bytes()); 447 | leb128::signed::encode(out, self.count); 448 | } 449 | 450 | pub(crate) fn into_coded(&self) -> super::CodedSymbol { 451 | super::CodedSymbol { 452 | symbol: self.symbol, 453 | count: self.count, 454 | hash: self.hash, 455 | } 456 | } 457 | } 458 | 459 | pub(crate) struct Encoder { 460 | riblt: super::Encoder, 461 | } 462 | 463 | impl Encoder { 464 | pub(crate) fn new(snapshot: &crate::snapshots::Snapshot) -> Self { 465 | let mut enc = super::Encoder::new(); 466 | for (doc, heads) in snapshot.our_docs_2() { 467 | enc.add_symbol(&DocAndHeadsSymbol::new(&doc, &heads)); 468 | } 469 | Encoder { riblt: enc } 470 | } 471 | 472 | pub(crate) fn next_n_symbols(&mut self, n: u64) -> Vec { 473 | let mut result = vec![]; 474 | for _ in 0..n { 475 | let symbol = self.riblt.produce_next_coded_symbol(); 476 | result.push(CodedDocAndHeadsSymbol { 477 | symbol: symbol.symbol, 478 | hash: symbol.hash, 479 | count: symbol.count, 480 | }); 481 | } 482 | result 483 | } 484 | } 485 | } 486 | -------------------------------------------------------------------------------- /src/sedimentree/storage.rs: -------------------------------------------------------------------------------- 1 | use futures::StreamExt; 2 | 3 | use crate::{ 4 | blob::BlobMeta, effects::TaskEffects, parse, Commit, CommitBundle, CommitHash, CommitOrBundle, 5 | StorageKey, 6 | }; 7 | 8 | use super::{Diff, LooseCommit, Sedimentree, Stratum}; 9 | 10 | pub(crate) async fn load( 11 | effects: TaskEffects, 12 | path: StorageKey, 13 | ) -> Option { 14 | let strata = { 15 | let effects = effects.clone(); 16 | let path = path.with_subcomponent("strata"); 17 | async move { 18 | let raw = effects.load_range(path).await; 19 | if raw.is_empty() { 20 | return None; 21 | } 22 | let mut result = Vec::new(); 23 | for (key, bytes) in raw { 24 | match Stratum::parse(parse::Input::new(&bytes)) { 25 | Ok((input, stratum)) => { 26 | if !input.is_empty() { 27 | tracing::warn!(%key, "leftoever input when parsing stratum"); 28 | } 29 | result.push(stratum); 30 | } 31 | Err(e) => { 32 | tracing::warn!(err=?e, %key, "error loading stratum") 33 | } 34 | } 35 | } 36 | Some(result) 37 | } 38 | }; 39 | let commits = async move { 40 | let raw = effects 41 | .load_range(path.with_subcomponent("loose_commits")) 42 | .await; 43 | if raw.is_empty() { 44 | return None; 45 | } 46 | let mut result = Vec::new(); 47 | for (key, bytes) in raw { 48 | tracing::trace!(%key, "loading loose commit"); 49 | match LooseCommit::parse(parse::Input::new(&bytes)) { 50 | Ok((input, commit)) => { 51 | if !input.is_empty() { 52 | tracing::warn!(%key, "leftoever input when parsing loose commit"); 53 | } 54 | result.push(commit); 55 | } 56 | Err(e) => { 57 | tracing::warn!(err=?e, %key, "error loading loose commit"); 58 | } 59 | } 60 | } 61 | Some(result) 62 | }; 63 | let (stratum, commits) = futures::future::join(strata, commits).await; 64 | match (stratum, commits) { 65 | (None, None) => None, 66 | (maybe_stratum, maybe_commits) => Some(Sedimentree::new( 67 | maybe_stratum.unwrap_or_default(), 68 | maybe_commits.unwrap_or_default(), 69 | )), 70 | } 71 | } 72 | 73 | pub(crate) async fn update( 74 | effects: TaskEffects, 75 | path: StorageKey, 76 | original: Option<&Sedimentree>, 77 | new: &Sedimentree, 78 | ) { 79 | let (new_strata, new_commits) = original 80 | .map(|o| { 81 | let Diff { 82 | left_missing_strata: _deleted_strata, 83 | left_missing_commits: _deleted_commits, 84 | right_missing_strata: new_strata, 85 | right_missing_commits: new_commits, 86 | } = o.diff(new); 87 | (new_strata, new_commits) 88 | }) 89 | .unwrap_or_else(|| (new.strata.iter().collect(), new.commits.iter().collect())); 90 | 91 | let save_strata = { 92 | let effects = effects.clone(); 93 | let path = path.clone(); 94 | new_strata.into_iter().map(move |s| { 95 | let effects = effects.clone(); 96 | let path = path.clone(); 97 | async move { 98 | let key = strata_path(&path, s); 99 | let mut data = Vec::new(); 100 | s.encode(&mut data); 101 | effects.put(key, data).await; 102 | } 103 | }) 104 | }; 105 | 106 | let save_commits = new_commits.into_iter().map(move |c| { 107 | let effects = effects.clone(); 108 | let path = path.clone(); 109 | async move { 110 | let key = commit_path(&path, &c.hash()); 111 | let mut data = Vec::new(); 112 | c.encode(&mut data); 113 | effects.put(key, data).await; 114 | } 115 | }); 116 | 117 | futures::future::join( 118 | futures::future::join_all(save_strata), 119 | futures::future::join_all(save_commits), 120 | ) 121 | .await; 122 | } 123 | 124 | pub(crate) fn data( 125 | effects: TaskEffects, 126 | tree: Sedimentree, 127 | ) -> impl futures::Stream { 128 | let items = tree.into_items().map(|item| { 129 | let effects = effects.clone(); 130 | async move { 131 | match item { 132 | super::CommitOrStratum::Commit(c) => { 133 | let Some(data) = effects.load(StorageKey::blob(c.blob().hash())).await else { 134 | return None; 135 | }; 136 | Some(CommitOrBundle::Commit(Commit::new( 137 | c.parents().to_vec(), 138 | data, 139 | c.hash(), 140 | ))) 141 | } 142 | super::CommitOrStratum::Stratum(s) => { 143 | let data = effects.load(StorageKey::blob(s.meta().blob().hash())).await; 144 | let Some(data) = data else { return None }; 145 | Some(CommitOrBundle::Bundle( 146 | CommitBundle::builder() 147 | .start(s.start()) 148 | .end(s.end()) 149 | .bundled_commits(data) 150 | .checkpoints(s.checkpoints().to_vec()) 151 | .build(), 152 | )) 153 | } 154 | } 155 | } 156 | }); 157 | futures::stream::FuturesUnordered::from_iter(items).filter_map(|f| futures::future::ready(f)) 158 | } 159 | 160 | pub(crate) async fn write_loose_commit( 161 | effects: TaskEffects, 162 | path: StorageKey, 163 | commit: &LooseCommit, 164 | ) { 165 | tracing::trace!(commit_has=?commit.hash(), "writing loose commit"); 166 | let key = commit_path(&path, &commit.hash()); 167 | let mut data = Vec::new(); 168 | commit.encode(&mut data); 169 | effects.put(key, data).await; 170 | } 171 | 172 | pub(crate) async fn load_loose_commit( 173 | effects: TaskEffects, 174 | root: StorageKey, 175 | commit_hash: CommitHash, 176 | ) -> Option { 177 | let commit_path = commit_path(&root, &commit_hash); 178 | let Some(data) = effects.load(commit_path).await else { 179 | return None; 180 | }; 181 | let (_, result) = LooseCommit::parse(parse::Input::new(&data)).unwrap(); 182 | Some(result) 183 | } 184 | 185 | pub(crate) async fn write_bundle( 186 | effects: TaskEffects, 187 | path: StorageKey, 188 | bundle: CommitBundle, 189 | ) { 190 | let blob = BlobMeta::new(bundle.bundled_commits()); 191 | effects 192 | .put( 193 | StorageKey::blob(blob.hash()), 194 | bundle.bundled_commits().to_vec(), 195 | ) 196 | .await; 197 | let stratum = Stratum::new( 198 | bundle.start(), 199 | bundle.end(), 200 | bundle.checkpoints().to_vec(), 201 | blob, 202 | ); 203 | let key = strata_path(&path, &stratum); 204 | let mut stratum_bytes = Vec::new(); 205 | stratum.encode(&mut stratum_bytes); 206 | effects.put(key, stratum_bytes).await; 207 | } 208 | 209 | fn strata_path(prefix: &StorageKey, s: &Stratum) -> StorageKey { 210 | let stratum_name = format!( 211 | "{}-{}", 212 | s.start() 213 | .map(|s| s.to_string()) 214 | .unwrap_or_else(|| "ROOT".to_string()), 215 | s.end() 216 | ); 217 | prefix 218 | .with_subcomponent("strata") 219 | .with_subcomponent(stratum_name) 220 | } 221 | 222 | fn commit_path(prefix: &StorageKey, c: &CommitHash) -> StorageKey { 223 | prefix 224 | .with_subcomponent("loose_commits") 225 | .with_subcomponent(c.to_string()) 226 | } 227 | -------------------------------------------------------------------------------- /src/snapshots.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::{HashMap, HashSet}, 3 | hash::Hash, 4 | }; 5 | 6 | use crate::{ 7 | effects::TaskEffects, hex, parse, reachability, sedimentree::MinimalTreeHash, CommitCategory, 8 | DocumentId, PeerId, StorageKey, 9 | }; 10 | 11 | #[derive(Copy, Clone, PartialEq, Eq, serde::Serialize, Hash)] 12 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 13 | pub struct SnapshotId([u8; 16]); 14 | 15 | impl std::str::FromStr for SnapshotId { 16 | type Err = error::BadSnapshotId; 17 | 18 | fn from_str(s: &str) -> Result { 19 | let bytes = hex::decode(s).map_err(error::BadSnapshotId::InvalidHex)?; 20 | if bytes.len() == 16 { 21 | let mut id = [0; 16]; 22 | id.copy_from_slice(&bytes); 23 | Ok(Self(id)) 24 | } else { 25 | Err(error::BadSnapshotId::InvalidLength) 26 | } 27 | } 28 | } 29 | 30 | impl std::fmt::Display for SnapshotId { 31 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 32 | hex::encode(&self.0).fmt(f) 33 | } 34 | } 35 | 36 | impl std::fmt::Debug for SnapshotId { 37 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 38 | std::fmt::Display::fmt(self, f) 39 | } 40 | } 41 | 42 | impl SnapshotId { 43 | pub(crate) fn parse( 44 | input: parse::Input<'_>, 45 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 46 | let (input, id) = parse::arr::<16>(input)?; 47 | Ok((input, Self(id))) 48 | } 49 | 50 | pub(crate) fn as_bytes(&self) -> &[u8; 16] { 51 | &self.0 52 | } 53 | 54 | pub(crate) fn encode(&self, buf: &mut Vec) { 55 | buf.extend_from_slice(&self.0); 56 | } 57 | 58 | pub(crate) fn random(rng: &mut R) -> Self { 59 | let mut id = [0; 16]; 60 | rng.fill_bytes(&mut id); 61 | Self(id) 62 | } 63 | } 64 | 65 | pub(crate) struct Snapshot { 66 | root_doc: DocumentId, 67 | id: SnapshotId, 68 | we_have_doc: bool, 69 | local: HashMap, 70 | local_log_offset: usize, 71 | remote_snapshots: HashMap, 72 | } 73 | 74 | impl Snapshot { 75 | pub(crate) async fn load( 76 | mut effects: TaskEffects, 77 | root_doc: DocumentId, 78 | ) -> Self { 79 | let id = SnapshotId::random(&mut *effects.rng()); 80 | let we_have_doc = !effects 81 | .load_range(StorageKey::sedimentree_root( 82 | &root_doc, 83 | CommitCategory::Content, 84 | )) 85 | .await 86 | .is_empty(); 87 | let docs_to_hashes = if we_have_doc { 88 | reachability::load_reachable_docs(effects.clone(), root_doc).await 89 | } else { 90 | HashMap::new() 91 | }; 92 | Self { 93 | id, 94 | root_doc, 95 | we_have_doc, 96 | local: docs_to_hashes, 97 | local_log_offset: effects.log().offset(), 98 | remote_snapshots: HashMap::new(), 99 | } 100 | } 101 | 102 | pub(crate) fn id(&self) -> SnapshotId { 103 | self.id 104 | } 105 | 106 | pub(crate) fn root_doc(&self) -> &DocumentId { 107 | &self.root_doc 108 | } 109 | 110 | pub(crate) fn local_log_offset(&self) -> usize { 111 | self.local_log_offset 112 | } 113 | 114 | pub(crate) fn we_have_doc(&self) -> bool { 115 | self.we_have_doc 116 | } 117 | 118 | pub(crate) fn our_docs(&self) -> HashSet { 119 | self.local.keys().cloned().collect() 120 | } 121 | 122 | pub(crate) fn our_docs_2(&self) -> &HashMap { 123 | &self.local 124 | } 125 | 126 | pub(crate) fn add_remote(&mut self, peer: PeerId, snapshot: SnapshotId) { 127 | self.remote_snapshots.insert(peer, snapshot); 128 | } 129 | 130 | pub(crate) fn remote_snapshots(&self) -> &HashMap { 131 | &self.remote_snapshots 132 | } 133 | } 134 | 135 | mod error { 136 | use crate::hex; 137 | 138 | pub enum BadSnapshotId { 139 | InvalidHex(hex::FromHexError), 140 | InvalidLength, 141 | } 142 | 143 | impl std::fmt::Display for BadSnapshotId { 144 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 145 | match self { 146 | Self::InvalidHex(e) => write!(f, "invalid hex: {:?}", e), 147 | Self::InvalidLength => write!(f, "invalid length"), 148 | } 149 | } 150 | } 151 | 152 | impl std::fmt::Debug for BadSnapshotId { 153 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 154 | std::fmt::Display::fmt(self, f) 155 | } 156 | } 157 | 158 | impl std::error::Error for BadSnapshotId {} 159 | } 160 | -------------------------------------------------------------------------------- /src/storage_key.rs: -------------------------------------------------------------------------------- 1 | // Storage layout: 2 | // 3 | // /commits// 4 | 5 | use crate::{CommitCategory, DocumentId}; 6 | 7 | #[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] 8 | pub struct StorageKey { 9 | namespace: Namespace, 10 | remaining: Vec, 11 | } 12 | 13 | #[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] 14 | pub enum Namespace { 15 | Dags, 16 | Sedimentrees, 17 | Blobs, 18 | Other(String), 19 | } 20 | 21 | impl AsRef for Namespace { 22 | fn as_ref(&self) -> &str { 23 | match self { 24 | Namespace::Dags => "dags", 25 | Namespace::Sedimentrees => "sedimentrees", 26 | Namespace::Blobs => "blobs", 27 | Namespace::Other(name) => name, 28 | } 29 | } 30 | } 31 | 32 | impl std::fmt::Display for Namespace { 33 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 34 | match self { 35 | Namespace::Dags => write!(f, "dags"), 36 | Namespace::Blobs => write!(f, "blobs"), 37 | Namespace::Sedimentrees => write!(f, "sedimentrees"), 38 | Namespace::Other(name) => write!(f, "{}", name), 39 | } 40 | } 41 | } 42 | 43 | impl std::fmt::Display for StorageKey { 44 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 45 | write!(f, "{}", self.namespace)?; 46 | for part in &self.remaining { 47 | write!(f, "/{}", part)?; 48 | } 49 | Ok(()) 50 | } 51 | } 52 | 53 | impl std::fmt::Debug for StorageKey { 54 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 55 | write!(f, "{}", self) 56 | } 57 | } 58 | 59 | impl StorageKey { 60 | pub fn blob(blob: crate::BlobHash) -> Self { 61 | StorageKey { 62 | namespace: Namespace::Blobs, 63 | remaining: vec![blob.to_string()], 64 | } 65 | } 66 | 67 | pub fn sedimentree_root(doc: &DocumentId, category: CommitCategory) -> StorageKey { 68 | StorageKey { 69 | namespace: Namespace::Sedimentrees, 70 | remaining: vec![doc.to_string(), category.to_string()], 71 | } 72 | } 73 | 74 | pub fn is_prefix_of(&self, other: &StorageKey) -> bool { 75 | self.namespace == other.namespace 76 | && self 77 | .remaining 78 | .iter() 79 | .zip(other.remaining.iter()) 80 | .all(|(a, b)| a == b) 81 | } 82 | 83 | pub fn namespace(&self) -> &str { 84 | self.namespace.as_ref() 85 | } 86 | 87 | pub fn remaining(&self) -> &[String] { 88 | &self.remaining 89 | } 90 | 91 | pub fn components(&self) -> impl Iterator { 92 | std::iter::once(self.namespace.as_ref()).chain(self.remaining.iter().map(|s| s.as_str())) 93 | } 94 | 95 | pub fn name(&self) -> Option<&str> { 96 | self.remaining.last().map(|s| s.as_str()) 97 | } 98 | 99 | pub fn with_subcomponent>(&self, subcomponent: S) -> StorageKey { 100 | let mut remaining = self.remaining.clone(); 101 | remaining.push(subcomponent.as_ref().to_string()); 102 | StorageKey { 103 | namespace: self.namespace.clone(), 104 | remaining, 105 | } 106 | } 107 | } 108 | 109 | impl TryFrom> for StorageKey { 110 | type Error = Error; 111 | 112 | fn try_from(value: Vec) -> Result { 113 | if value.iter().any(|part| part.contains('/')) { 114 | return Err(Error::ContainedSlashes); 115 | } 116 | if value.is_empty() { 117 | return Err(Error::Empty); 118 | } 119 | let namespace = match value[0].as_str() { 120 | "dags" => Namespace::Dags, 121 | other => Namespace::Other(other.to_string()), 122 | }; 123 | Ok(StorageKey { 124 | namespace, 125 | remaining: value[2..].to_vec(), 126 | }) 127 | } 128 | } 129 | 130 | pub enum Error { 131 | Empty, 132 | ContainedSlashes, 133 | } 134 | 135 | impl std::fmt::Display for Error { 136 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 137 | match self { 138 | Self::Empty => write!( 139 | f, 140 | "attempted to create a storage key from an empty list of strings" 141 | ), 142 | Self::ContainedSlashes => write!(f, "storage key components cannot contain slashes"), 143 | } 144 | } 145 | } 146 | 147 | impl std::fmt::Debug for Error { 148 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 149 | std::fmt::Display::fmt(self, f) 150 | } 151 | } 152 | 153 | impl std::error::Error for Error {} 154 | -------------------------------------------------------------------------------- /src/stories.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{AtomicU64, Ordering}; 2 | 3 | use futures::{future::LocalBoxFuture, FutureExt}; 4 | 5 | use crate::{ 6 | blob::BlobMeta, 7 | effects::TaskEffects, 8 | messages::{BlobRef, TreePart, UploadItem}, 9 | reachability::{ReachabilityIndex, ReachabilityIndexEntry}, 10 | sedimentree::{self, LooseCommit}, 11 | snapshots, sync_docs, AddLink, BundleSpec, Commit, CommitBundle, CommitCategory, 12 | CommitOrBundle, DocumentId, PeerId, StorageKey, Story, SyncDocResult, 13 | }; 14 | 15 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 16 | pub struct StoryId(u64); 17 | 18 | static LAST_STORY_ID: AtomicU64 = AtomicU64::new(0); 19 | 20 | impl StoryId { 21 | pub(crate) fn new() -> Self { 22 | Self(LAST_STORY_ID.fetch_add(1, Ordering::Relaxed)) 23 | } 24 | 25 | pub fn serialize(&self) -> String { 26 | self.0.to_string() 27 | } 28 | } 29 | 30 | impl std::str::FromStr for StoryId { 31 | type Err = std::num::ParseIntError; 32 | 33 | fn from_str(s: &str) -> Result { 34 | Ok(Self(s.parse()?)) 35 | } 36 | } 37 | 38 | #[derive(Debug)] 39 | pub enum StoryResult { 40 | SyncDoc(SyncDocResult), 41 | AddCommits(Vec), 42 | AddLink, 43 | AddBundle, 44 | CreateDoc(DocumentId), 45 | LoadDoc(Option>), 46 | Listen, 47 | } 48 | 49 | pub(super) fn handle_story<'a, R: rand::Rng + 'static>( 50 | mut effects: crate::effects::TaskEffects, 51 | story: super::Story, 52 | ) -> LocalBoxFuture<'static, StoryResult> { 53 | match story { 54 | Story::SyncDoc { 55 | root_id, 56 | peer: with_peer, 57 | } => { 58 | async move { StoryResult::SyncDoc(sync_linked_docs(effects, root_id, with_peer).await) } 59 | .boxed_local() 60 | } 61 | Story::AddCommits { 62 | doc_id: dag_id, 63 | commits, 64 | } => async move { 65 | let result = add_commits(effects, dag_id, commits).await; 66 | StoryResult::AddCommits(result) 67 | } 68 | .boxed_local(), 69 | Story::LoadDoc { doc_id } => async move { 70 | StoryResult::LoadDoc( 71 | load_doc_commits(&mut effects, &doc_id, CommitCategory::Content).await, 72 | ) 73 | } 74 | .boxed_local(), 75 | Story::CreateDoc => { 76 | async move { StoryResult::CreateDoc(create_doc(effects).await) }.boxed_local() 77 | } 78 | Story::AddLink(add) => async move { 79 | add_link(effects, add).await; 80 | tracing::trace!("add link complete"); 81 | StoryResult::AddLink 82 | } 83 | .boxed_local(), 84 | Story::AddBundle { doc_id, bundle } => async move { 85 | add_bundle(effects, doc_id, bundle).await; 86 | StoryResult::AddBundle 87 | } 88 | .boxed_local(), 89 | Story::Listen { 90 | peer_id, 91 | snapshot_id, 92 | } => async move { 93 | if let Err(e) = effects.listen(peer_id, snapshot_id).await { 94 | tracing::error!(err=?e, "error listening to peer"); 95 | } 96 | StoryResult::Listen 97 | } 98 | .boxed_local(), 99 | } 100 | } 101 | 102 | pub(crate) async fn sync_linked_docs( 103 | effects: crate::effects::TaskEffects, 104 | root: DocumentId, 105 | remote_peer: PeerId, 106 | ) -> SyncDocResult { 107 | let our_snapshot = snapshots::Snapshot::load(effects.clone(), root.clone()).await; 108 | sync_docs::sync_root_doc(effects, &our_snapshot, remote_peer).await 109 | } 110 | 111 | #[tracing::instrument(skip(effects, commits))] 112 | async fn add_commits( 113 | effects: crate::effects::TaskEffects, 114 | doc_id: DocumentId, 115 | commits: Vec, 116 | ) -> Vec { 117 | // TODO: This function should return an error if we are missing a chain from 118 | // each commit back to the last bundle boundary. 119 | 120 | let has_commit_boundary = commits 121 | .iter() 122 | .any(|c| sedimentree::Level::from(c.hash()) <= sedimentree::TOP_STRATA_LEVEL); 123 | 124 | let save_tasks = commits.into_iter().map(|commit| { 125 | let mut effects = effects.clone(); 126 | async move { 127 | tracing::debug!(commit = %commit.hash(), "adding commit"); 128 | 129 | let tree_path = StorageKey::sedimentree_root(&doc_id, CommitCategory::Content); 130 | 131 | let blob = BlobMeta::new(commit.contents()); 132 | let key = StorageKey::blob(blob.hash()); 133 | let have_commit = sedimentree::storage::load_loose_commit( 134 | effects.clone(), 135 | tree_path.clone(), 136 | commit.hash(), 137 | ) 138 | .await 139 | .is_some(); 140 | if have_commit { 141 | tracing::debug!(hash=%commit.hash(), "commit already exists in storage"); 142 | return; 143 | } 144 | effects.put(key, commit.contents().to_vec()).await; 145 | 146 | let loose = 147 | sedimentree::LooseCommit::new(commit.hash(), commit.parents().to_vec(), blob); 148 | sedimentree::storage::write_loose_commit(effects.clone(), tree_path, &loose).await; 149 | let item = UploadItem { 150 | blob: BlobRef::Inline(commit.contents().to_vec()), 151 | tree_part: TreePart::Commit { 152 | hash: commit.hash(), 153 | parents: commit.parents().to_vec(), 154 | }, 155 | }; 156 | let our_peer_id = effects.our_peer_id().clone(); 157 | effects 158 | .log() 159 | .new_commit(doc_id, our_peer_id, item.clone(), CommitCategory::Content); 160 | } 161 | }); 162 | let _ = futures::future::join_all(save_tasks).await; 163 | 164 | // If any of the commits might be a bundle boundary, load the sedimentree 165 | // and see if any new bundles are needed 166 | if has_commit_boundary { 167 | tracing::trace!("has commit boundary"); 168 | let tree = sedimentree::storage::load( 169 | effects.clone(), 170 | StorageKey::sedimentree_root(&doc_id, CommitCategory::Content), 171 | ) 172 | .await; 173 | if let Some(tree) = tree { 174 | tree.missing_bundles(doc_id) 175 | } else { 176 | Vec::new() 177 | } 178 | } else { 179 | Vec::new() 180 | } 181 | } 182 | 183 | #[tracing::instrument(skip(effects, link), fields(from=%link.from, to=%link.to))] 184 | async fn add_link(effects: crate::effects::TaskEffects, link: AddLink) { 185 | tracing::trace!("adding link"); 186 | let index_tree = sedimentree::storage::load( 187 | effects.clone(), 188 | StorageKey::sedimentree_root(&link.from, CommitCategory::Index), 189 | ) 190 | .await 191 | .unwrap_or_default(); 192 | let heads = index_tree.heads(); 193 | let index = ReachabilityIndex::from_tree(effects.clone(), index_tree).await; 194 | if index.has_link(&link.to) { 195 | tracing::trace!("link already exists"); 196 | return; 197 | } 198 | 199 | let new_entry = ReachabilityIndexEntry::new(link.to); 200 | 201 | let encoded = new_entry.encode(); 202 | let blob = BlobMeta::new(&encoded); 203 | effects 204 | .put(StorageKey::blob(blob.hash()), encoded.clone()) 205 | .await; 206 | 207 | let commit = LooseCommit::new(new_entry.hash(), heads, blob); 208 | sedimentree::storage::write_loose_commit( 209 | effects.clone(), 210 | StorageKey::sedimentree_root(&link.from, CommitCategory::Index), 211 | &commit, 212 | ) 213 | .await; 214 | } 215 | 216 | #[tracing::instrument(skip(effects))] 217 | async fn create_doc(effects: crate::effects::TaskEffects) -> DocumentId { 218 | let doc_id = DocumentId::random(&mut *effects.rng()); 219 | tracing::trace!(?doc_id, "creating doc"); 220 | doc_id 221 | } 222 | 223 | #[tracing::instrument(skip(effects, content))] 224 | async fn load_doc_commits( 225 | effects: &mut crate::effects::TaskEffects, 226 | doc_id: &DocumentId, 227 | content: CommitCategory, 228 | ) -> Option> { 229 | let Some(tree) = sedimentree::storage::load( 230 | effects.clone(), 231 | StorageKey::sedimentree_root(doc_id, content), 232 | ) 233 | .await 234 | .map(|t| t.minimize()) else { 235 | return None; 236 | }; 237 | let bundles = tree.strata().map(|s| { 238 | let effects = effects.clone(); 239 | async move { 240 | let blob = effects 241 | .load(StorageKey::blob(s.meta().blob().hash())) 242 | .await 243 | .unwrap(); 244 | let bundle = CommitBundle::builder() 245 | .start(s.start()) 246 | .end(s.end()) 247 | .checkpoints(s.checkpoints().to_vec()) 248 | .bundled_commits(blob) 249 | .build(); 250 | CommitOrBundle::Bundle(bundle) 251 | } 252 | }); 253 | let commits = tree.loose_commits().map(|c| { 254 | let effects = effects.clone(); 255 | async move { 256 | let blob = effects 257 | .load(StorageKey::blob(c.blob().hash())) 258 | .await 259 | .unwrap(); 260 | let commit = Commit::new(c.parents().to_vec(), blob, c.hash()); 261 | CommitOrBundle::Commit(commit) 262 | } 263 | }); 264 | let (mut bundles, commits) = futures::future::join( 265 | futures::future::join_all(bundles), 266 | futures::future::join_all(commits), 267 | ) 268 | .await; 269 | bundles.extend(commits); 270 | Some(bundles) 271 | } 272 | 273 | async fn add_bundle( 274 | effects: TaskEffects, 275 | doc_id: DocumentId, 276 | bundle: CommitBundle, 277 | ) { 278 | sedimentree::storage::write_bundle( 279 | effects, 280 | StorageKey::sedimentree_root(&doc_id, CommitCategory::Content), 281 | bundle, 282 | ) 283 | .await; 284 | } 285 | -------------------------------------------------------------------------------- /src/subscriptions.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | 3 | use crate::{ 4 | messages::{Notification, UploadItem}, 5 | parse, 6 | snapshots::Snapshot, 7 | CommitCategory, DocumentId, PeerId, 8 | }; 9 | 10 | #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, Hash)] 11 | #[cfg_attr(test, derive(arbitrary::Arbitrary))] 12 | pub struct SubscriptionId([u8; 16]); 13 | 14 | impl SubscriptionId { 15 | pub(crate) fn random(rng: &mut R) -> Self { 16 | let mut id = [0; 16]; 17 | rng.fill_bytes(&mut id); 18 | Self(id) 19 | } 20 | 21 | pub(crate) fn parse( 22 | input: parse::Input<'_>, 23 | ) -> Result<(parse::Input<'_>, Self), parse::ParseError> { 24 | let (input, id) = parse::arr::<16>(input)?; 25 | Ok((input, Self(id))) 26 | } 27 | 28 | pub(crate) fn as_bytes(&self) -> &[u8; 16] { 29 | &self.0 30 | } 31 | } 32 | 33 | pub(crate) struct DocEvent { 34 | doc: DocumentId, 35 | from_peer: PeerId, 36 | contents: UploadItem, 37 | category: CommitCategory, 38 | } 39 | 40 | pub(crate) struct Log(Vec); 41 | 42 | impl Log { 43 | pub(crate) fn new() -> Self { 44 | Self(Vec::new()) 45 | } 46 | 47 | pub(crate) fn offset(&self) -> usize { 48 | self.0.len() 49 | } 50 | 51 | pub(crate) fn remote_notification(&mut self, notification: &Notification) { 52 | self.0.push(DocEvent { 53 | doc: notification.doc, 54 | from_peer: notification.from_peer.clone(), 55 | contents: notification.data.clone(), 56 | category: CommitCategory::Content, 57 | }) 58 | } 59 | 60 | pub(crate) fn new_commit( 61 | &mut self, 62 | doc: DocumentId, 63 | from_peer: PeerId, 64 | item: UploadItem, 65 | category: CommitCategory, 66 | ) { 67 | self.0.push(DocEvent { 68 | doc, 69 | from_peer, 70 | contents: item, 71 | category, 72 | }) 73 | } 74 | } 75 | 76 | #[derive(Debug)] 77 | pub(crate) struct Subscription { 78 | offset: usize, 79 | peer: PeerId, 80 | docs: HashSet, 81 | } 82 | 83 | impl Subscription { 84 | pub(crate) fn new(for_peer: &PeerId, starting_from: &Snapshot) -> Self { 85 | let mut docs = starting_from.our_docs().clone(); 86 | docs.insert(starting_from.root_doc().clone()); 87 | tracing::trace!(?for_peer, start_docs=?docs, "Creating subscription"); 88 | Subscription { 89 | offset: starting_from.local_log_offset(), 90 | peer: for_peer.clone(), 91 | docs, 92 | } 93 | } 94 | } 95 | 96 | pub(crate) struct Subscriptions { 97 | our_peer_id: PeerId, 98 | subscriptions: Vec, 99 | } 100 | 101 | impl Subscriptions { 102 | pub(crate) fn new(our_peer_id: PeerId) -> Self { 103 | Self { 104 | our_peer_id, 105 | subscriptions: Vec::new(), 106 | } 107 | } 108 | 109 | pub(crate) fn add(&mut self, sub: Subscription) { 110 | self.subscriptions.push(sub) 111 | } 112 | 113 | pub(crate) fn new_events(&mut self, log: &Log) -> HashMap> { 114 | let mut result = HashMap::new(); 115 | for sub in &mut self.subscriptions { 116 | let events: &mut Vec = result.entry(sub.peer.clone()).or_default(); 117 | for event in &log.0[sub.offset..] { 118 | if sub.docs.contains(&event.doc) 119 | && event.from_peer != sub.peer 120 | && event.category == CommitCategory::Content 121 | { 122 | events.push(Notification { 123 | from_peer: self.our_peer_id.clone(), 124 | doc: event.doc.clone(), 125 | data: event.contents.clone(), 126 | }) 127 | } 128 | } 129 | sub.offset = log.offset(); 130 | } 131 | result 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/sync_docs.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | use futures::{pin_mut, StreamExt}; 4 | 5 | use crate::{ 6 | blob::BlobMeta, 7 | effects::TaskEffects, 8 | messages::{BlobRef, ContentAndIndex, FetchedSedimentree, TreePart, UploadItem}, 9 | parse, 10 | riblt::{self, doc_and_heads::DocAndHeadsSymbol}, 11 | sedimentree::{self, LooseCommit, RemoteDiff, Stratum}, 12 | snapshots, CommitCategory, DocumentId, PeerId, StorageKey, SyncDocResult, 13 | }; 14 | 15 | #[tracing::instrument(skip(effects, our_snapshot))] 16 | pub(crate) async fn sync_root_doc( 17 | effects: crate::effects::TaskEffects, 18 | our_snapshot: &snapshots::Snapshot, 19 | remote_peer: PeerId, 20 | ) -> SyncDocResult { 21 | tracing::trace!("beginning root doc sync"); 22 | 23 | let OutOfSync { 24 | their_differing, 25 | our_differing, 26 | their_snapshot, 27 | } = find_out_of_sync_docs(effects.clone(), &our_snapshot, remote_peer.clone()).await; 28 | 29 | tracing::trace!(?our_differing, ?their_differing, we_have_doc=%our_snapshot.we_have_doc(), "syncing differing docs"); 30 | 31 | let found = our_snapshot.we_have_doc() || !their_differing.is_empty(); 32 | 33 | let syncing = our_differing 34 | .union(&their_differing) 35 | .into_iter() 36 | .cloned() 37 | .map(|d| sync_doc(effects.clone(), remote_peer.clone(), d)); 38 | futures::future::join_all(syncing).await; 39 | 40 | SyncDocResult { 41 | found, 42 | local_snapshot: our_snapshot.id(), 43 | remote_snapshot: their_snapshot, 44 | differing_docs: our_differing.union(&their_differing).cloned().collect(), 45 | } 46 | } 47 | 48 | struct OutOfSync { 49 | their_differing: HashSet, 50 | our_differing: HashSet, 51 | their_snapshot: crate::SnapshotId, 52 | } 53 | 54 | async fn find_out_of_sync_docs( 55 | effects: TaskEffects, 56 | local_snapshot: &crate::snapshots::Snapshot, 57 | peer: PeerId, 58 | ) -> OutOfSync { 59 | // Make a remote snapshot and stream symbols from it until we have decoded 60 | let (snapshot_id, first_symbols) = effects 61 | .create_snapshot(peer.clone(), local_snapshot.root_doc().clone()) 62 | .await 63 | .unwrap(); 64 | let mut local_riblt = riblt::Decoder::::new(); 65 | for (doc_id, heads) in local_snapshot.our_docs_2().iter() { 66 | local_riblt.add_symbol(&DocAndHeadsSymbol::new(doc_id, heads)); 67 | } 68 | let symbols = futures::stream::iter(first_symbols).chain( 69 | futures::stream::unfold(effects, move |effects| { 70 | let effects = effects.clone(); 71 | let snapshot_id = snapshot_id.clone(); 72 | let peer = peer.clone(); 73 | async move { 74 | let symbols = effects 75 | .fetch_snapshot_symbols(peer, snapshot_id) 76 | .await 77 | .unwrap(); 78 | Some((futures::stream::iter(symbols), effects)) 79 | } 80 | }) 81 | .flatten(), 82 | ); 83 | pin_mut!(symbols); 84 | while let Some(symbol) = symbols.next().await { 85 | local_riblt.add_coded_symbol(&symbol.into_coded()); 86 | local_riblt.try_decode().unwrap(); 87 | if local_riblt.decoded() { 88 | break; 89 | } 90 | } 91 | let remote_differing_docs = local_riblt 92 | .get_remote_symbols() 93 | .into_iter() 94 | .map(|s| s.symbol().decode().0); 95 | let local_differing_docs = local_riblt 96 | .get_local_symbols() 97 | .into_iter() 98 | .map(|s| s.symbol().decode().0); 99 | OutOfSync { 100 | their_differing: remote_differing_docs.collect(), 101 | our_differing: local_differing_docs.collect(), 102 | their_snapshot: snapshot_id, 103 | } 104 | } 105 | 106 | async fn sync_doc( 107 | effects: crate::effects::TaskEffects, 108 | peer: PeerId, 109 | doc: DocumentId, 110 | ) { 111 | tracing::trace!(peer=%peer, %doc, "syncing doc"); 112 | let content_root = StorageKey::sedimentree_root(&doc, CommitCategory::Content); 113 | let our_content = sedimentree::storage::load(effects.clone(), content_root.clone()).await; 114 | 115 | let index_root = StorageKey::sedimentree_root(&doc, CommitCategory::Index); 116 | let our_index = sedimentree::storage::load(effects.clone(), index_root.clone()).await; 117 | 118 | let (their_index, their_content) = 119 | match effects.fetch_sedimentrees(peer.clone(), doc).await.unwrap() { 120 | FetchedSedimentree::Found(ContentAndIndex { content, index }) => { 121 | (Some(index), Some(content)) 122 | } 123 | FetchedSedimentree::NotFound => (None, None), 124 | }; 125 | 126 | let sync_content = sync_sedimentree( 127 | effects.clone(), 128 | peer.clone(), 129 | doc.clone(), 130 | CommitCategory::Content, 131 | our_content, 132 | their_content, 133 | ); 134 | let sync_index = sync_sedimentree( 135 | effects.clone(), 136 | peer.clone(), 137 | doc.clone(), 138 | CommitCategory::Index, 139 | our_index, 140 | their_index, 141 | ); 142 | futures::future::join(sync_content, sync_index).await; 143 | } 144 | 145 | async fn sync_sedimentree( 146 | effects: TaskEffects, 147 | with_peer: PeerId, 148 | doc: DocumentId, 149 | category: CommitCategory, 150 | local: Option, 151 | remote: Option, 152 | ) { 153 | let RemoteDiff { 154 | remote_strata, 155 | remote_commits, 156 | local_strata, 157 | local_commits, 158 | } = match (&local, &remote) { 159 | (Some(local), Some(remote)) => local.diff_remote(&remote), 160 | (None, Some(remote)) => remote.into_remote_diff(), 161 | (Some(local), None) => local.into_local_diff(), 162 | (None, None) => return, 163 | }; 164 | 165 | let root = StorageKey::sedimentree_root(&doc, category); 166 | 167 | let download = async { 168 | let effects = effects.clone(); 169 | let peer = with_peer.clone(); 170 | let download_strata = remote_strata.into_iter().map(|s| { 171 | let effects = effects.clone(); 172 | let peer = peer.clone(); 173 | async move { 174 | let blob = fetch_blob(effects.clone(), peer.clone(), *s.blob()) 175 | .await 176 | .unwrap(); 177 | let (_, stratum) = Stratum::parse(parse::Input::new(&blob)).unwrap(); 178 | stratum 179 | } 180 | }); 181 | let download_commits = remote_commits.into_iter().map(|c| { 182 | let effects = effects.clone(); 183 | let peer = peer.clone(); 184 | async move { 185 | fetch_blob(effects.clone(), peer.clone(), *c.blob()) 186 | .await 187 | .unwrap(); 188 | let commit = LooseCommit::new(c.hash(), c.parents().to_vec(), *c.blob()); 189 | commit 190 | } 191 | }); 192 | let (downloaded_strata, downloaded_commits) = futures::future::join( 193 | futures::future::join_all(download_strata), 194 | futures::future::join_all(download_commits), 195 | ) 196 | .await; 197 | let mut updated = local.clone().unwrap_or_default(); 198 | for stratum in downloaded_strata { 199 | updated.add_stratum(stratum); 200 | } 201 | for commit in downloaded_commits { 202 | updated.add_commit(commit); 203 | } 204 | sedimentree::storage::update(effects, root, local.as_ref(), &updated.minimize()).await; 205 | }; 206 | 207 | let upload = async { 208 | let effects = effects.clone(); 209 | let peer = with_peer.clone(); 210 | enum StratumOrCommit<'a> { 211 | Commit(sedimentree::LooseCommit), 212 | Stratum(&'a sedimentree::Stratum), 213 | } 214 | let to_upload = local_commits 215 | .into_iter() 216 | .cloned() 217 | .map(|c| StratumOrCommit::Commit(c)) 218 | .chain( 219 | local_strata 220 | .into_iter() 221 | .map(|s| StratumOrCommit::Stratum(s)), 222 | ) 223 | .map(|item| async { 224 | match item { 225 | StratumOrCommit::Commit(c) => { 226 | let blob = effects 227 | .load(StorageKey::blob(c.blob().hash())) 228 | .await 229 | .unwrap(); 230 | UploadItem { 231 | blob: BlobRef::Inline(blob), 232 | tree_part: TreePart::Commit { 233 | hash: c.hash(), 234 | parents: c.parents().to_vec(), 235 | }, 236 | } 237 | } 238 | StratumOrCommit::Stratum(s) => { 239 | let blob = effects 240 | .load(StorageKey::blob(s.meta().blob().hash())) 241 | .await 242 | .unwrap(); 243 | UploadItem { 244 | blob: BlobRef::Inline(blob), 245 | tree_part: TreePart::Stratum { 246 | start: s.start(), 247 | end: s.end(), 248 | checkpoints: s.checkpoints().to_vec(), 249 | }, 250 | } 251 | } 252 | } 253 | }); 254 | let to_upload = futures::future::join_all(to_upload).await; 255 | effects 256 | .upload_commits(peer, doc, to_upload, category) 257 | .await 258 | .unwrap(); 259 | }; 260 | 261 | futures::future::join(download, upload).await; 262 | } 263 | 264 | async fn fetch_blob( 265 | effects: TaskEffects, 266 | from_peer: PeerId, 267 | blob: BlobMeta, 268 | ) -> Result, crate::effects::RpcError> { 269 | let data = effects 270 | .fetch_blob_part(from_peer, blob.hash(), 0, blob.size_bytes()) 271 | .await?; 272 | effects 273 | .put(StorageKey::blob(blob.hash()), data.clone()) 274 | .await; 275 | Ok(data) 276 | } 277 | --------------------------------------------------------------------------------