├── package.json ├── test └── test-basics.js ├── index.js └── README.md /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "car-transaction", 3 | "version": "1.0.1", 4 | "description": "", 5 | "type": "module", 6 | "main": "index.js", 7 | "scripts": { 8 | "test": "echo \"Error: no test specified\" && exit 1" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "license": "ISC", 13 | "devDependencies": { 14 | "standard": "^17.0.0" 15 | }, 16 | "dependencies": { 17 | "@ipld/car": "^5.1.0", 18 | "@ipld/dag-cbor": "^9.0.0", 19 | "multiformats": "^11.0.1" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/test-basics.js: -------------------------------------------------------------------------------- 1 | import Transaction from '../index.js' 2 | 3 | const test = async () => { 4 | // start a basic transaction 5 | const t = Transaction.create() 6 | 7 | const subCID = await t.write({ some: 'data' }) 8 | await t.write({ sub: subCID }) 9 | const buffer = await t.commit() 10 | 11 | // read a transaction 12 | const { root, get } = await Transaction.load(buffer) 13 | // root is a cid 14 | const { sub } = await get(root) 15 | const { some } = await get(sub) 16 | // get retrieves the block and decodes it 17 | if (some !== 'data') throw new Error('data error') 18 | } 19 | 20 | test() 21 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | import { CarReader, CarBufferWriter as CBW } from '@ipld/car' 2 | import { bytes as byteslib } from 'multiformats' 3 | import { decode as digest } from 'multiformats/hashes/digest' 4 | import * as dagcbor from '@ipld/dag-cbor' 5 | import * as raw from 'multiformats/codecs/raw' 6 | import { sha256 } from 'multiformats/hashes/sha2' 7 | import * as Block from 'multiformats/block' 8 | 9 | const { isBinary } = byteslib 10 | 11 | const encode = value => { 12 | if (isBinary(value)) { 13 | return Block.encode({ value, hasher: sha256, codec: raw }) 14 | } 15 | return Block.encode({ value, hasher: sha256, codec: dagcbor }) 16 | } 17 | 18 | const decode = ({ bytes, cid }) => { 19 | let hasher, codec 20 | const { code } = cid 21 | const hashcode = cid.multihash.code || digest(cid.multihash).code 22 | 23 | if (hashcode === 0x12) { 24 | hasher = sha256 25 | } else { 26 | throw new Error('Unsupported hash function: ' + hashcode) 27 | } 28 | 29 | if (code === 0x71) { 30 | codec = dagcbor 31 | } else if (code === 0x55) { 32 | codec = raw 33 | } else { 34 | throw new Error('Unsupported codec: ' + code) 35 | } 36 | 37 | return Block.decode({ bytes, cid, codec, hasher }) 38 | } 39 | 40 | class Transaction { 41 | constructor () { 42 | this.blocks = [] 43 | } 44 | 45 | static create () { 46 | return new this() 47 | } 48 | 49 | static async load (buffer) { 50 | const reader = await CarReader.fromBytes(buffer) 51 | const [ root ] = await reader.getRoots() 52 | const get = cid => reader.get(cid).then(block => decode(block)).then(({ value }) => value ) 53 | return { root, get } 54 | } 55 | 56 | async write (obj) { 57 | const block = await encode(obj) 58 | this.last = block 59 | this.blocks.push(block) 60 | return block.cid 61 | } 62 | 63 | async commit () { 64 | const cid = this.last.cid 65 | let size = 0 66 | let headerSize = CBW.headerLength({ roots: [cid] }) 67 | size += headerSize 68 | for (const block of this.blocks) { 69 | size += CBW.blockLength(block) 70 | } 71 | const buffer = new Uint8Array(size) 72 | const writer = await CBW.createWriter(buffer, { headerSize }) 73 | writer.addRoot(cid) 74 | for (const block of this.blocks) { 75 | writer.write(block) 76 | } 77 | await writer.close() 78 | return writer.bytes 79 | } 80 | } 81 | 82 | export default Transaction 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # car-transaction 2 | 3 | IPLD transaction as CAR buffer that can be used as a database transaction. 4 | 5 | ## Usage 6 | 7 | ```js 8 | import Transaction from 'car-transaction' 9 | 10 | const run = async () => { 11 | // start a basic transaction 12 | const t = Transaction.create() 13 | 14 | const subCID = await t.write({ some: 'data' }) 15 | await t.write({ sub: subCID }) 16 | const buffer = await t.commit() 17 | 18 | // read a transaction 19 | // the last write is always the root 20 | const { root, get } = await Transaction.load(buffer) 21 | // root is a cid 22 | const { sub } = await get(root) 23 | const { some } = await get(sub) 24 | // get retrieves the block and decodes it 25 | if (some !== 'data') throw new Error('data error') 26 | } 27 | 28 | run() 29 | ``` 30 | 31 | # Guide to IPLD-over-ObjectStores (S3, R2, etc) 32 | 33 | This is how we build a decentralized web of all web3 application 34 | data on top of widely available and competitively priced 35 | cloud object stores. 36 | 37 | IPLD is the data structure layer beneath IPFS. It works in IPFS 38 | protocols and outside them, on disc, in memory, etc. 39 | 40 | So you can build these little merkle trees with the library above that 41 | * are encoded in `dag-cbor`, a fairly efficient format, 42 | * and all the hash addressed web3 and blockchain stuff works and interops, 43 | * and it's as easy as working with JSON, 44 | * with inline binary, 45 | * and you have these hash links that allow you to make little trees, 46 | * which is how you can get de-duplication and diffing properties of git, cause it's all merkle graphs, 47 | * and all the cool graph things you can do with graph databases, 48 | * and you can also link to data in git, bittorrent, ETH, Bitcoin, etc. 49 | * you can link to IPFS files, 50 | * or you can encode those IPFS files into the unixfs block format and include them in the transcations. 51 | * and those transactions are encoded in a well known format called CAR (kinda like git-pack files for IPFS), 52 | * and we just released this open source project that is a [cloud native implementation of IPFS](https://github.com/elastic-ipfs) 53 | * so all you need is a way to store CAR files and hand the URL to Elastic IPFS 54 | * which is a cloud native thing and pretty much as hard to operate as all the other cloud native things. 55 | * but if you don't want to run it yourself, and you really want your data in the public IPFS network 56 | just DM me on twitter (@mikeal) and we'll figure something out, because we're already running 57 | this and it's not that hard to hook up more data-sources, we just haven't productized it yet, 58 | we're just running it to keep ALL the NFT's safe and available (god bless the gifs) 59 | 60 | And merkle trees are very cool, you can do all kinds of diffing and CRDT structures, but I won't 61 | get into all that yet because, just storing these little trees allow you to build graphs of 62 | incredible complexity and if we start there we won't see the basic stuff we also get from leveraging 63 | these ObjectStores to store them. 64 | 65 | Conceptually, you can think of IPLD-over-ObjectStores as being 66 | * IPLD databases, 67 | * that are key/value stores, 68 | * with a single index, 69 | * with a fairly powerful query language, 70 | * that can implement some interesting privacy and access patterns 71 | * cause **hashes,** 72 | * that could represent an IPLD "network," 73 | * which you can decide to keep open, closed, limited access, whatever. 74 | * and they can also represent an IPLD replication set, 75 | * that could be a filter 76 | * or an index 77 | * that can be streaming to another index 78 | * which has all the same properties as the database we started with 79 | recursively until you stop extending this particular network branch 80 | and that's how you know you're working with a graph. 81 | 82 | And each instance of an ObjectStore can be all of these things ***simultaneously***. 83 | These features aren't mutually exclusive, they're combinatory, as long as you follow 84 | a few simple patterns. 85 | 86 | All these services have a roughly equivalent interface 87 | * S3 (AWS, DigitalOcean, pretty much every cloud provider has a compatible interface), 88 | * R2 (Cloudflare's wonderful new product), 89 | * Also CouchDB, and PouchDB, cause I got roots, 90 | * and while we're at it, the whole [level](https://github.com/level) ecosystem. 91 | 92 | Some of them do A LOT more, but they all have at least these properties: 93 | * You can store a binary value, even if that isn't the default value type. 94 | * You can store that binary value by a **string key**. 95 | * Which forms a stored index, 96 | * that you can perform range queries against, 97 | * that, while highly distributed, tends to slow down if you bang on the same keyspace enough. 98 | * Which is pretty different than some of the databases we're used to. Most open source databases 99 | have a local disc optimization in the file writer that, when you group data together like this, will 100 | get bulked together. 101 | * But these big distributed things like S3, the keyspace is distributed across a bunch of machines, 102 | so when they do a similar optimization on the read side, adding locality to the keyspace, you lose the 103 | distribution of your writes across the bucket. 104 | * But we're working with hashes! 105 | * We've got perfectly balanced distribution across a keyspace for days! 106 | * So if we key things by hash prefixes we'll always evenly distribute across the keyspace. 107 | * As the keyspace grows, the distribution of writes is even across whatever load balancing any of these vendors are doing 108 | which means that **the writes just get faster the bigger it gets.** It's beautiful to watch. 109 | * Something I started telling people to do a while back was move from using 110 | * `/$hash` to using 111 | * `/$hash/data` for their keys in S3. 112 | * Because S3's performance docs said that performance was only limited "per prefix" which gave an indication into 113 | how they were optimizing some of this by looking at the '/' as a key prefix. 114 | * I pointed about 4K concurrent Lambdas at open data encoding for the Filecoin launch, each one writing thousands of individual blocks 115 | this way, so i put a few Billion 116 | keys into an S3 bucket as fast as it could take them, and when i went over a billion keys it got noticably faster. I had 117 | to ask AWS to raise the cap on our Lambdas (this is way easier now, and is per cloud formation stack) 118 | 119 | So we can really blow these things up with IPLD data. 120 | 121 | This means that, anything you build on this is something pretty close to the fastest cloud database offering available 122 | * at whatever price these gigantic companies have driven the price down to in a rapidly commoditizing market. 123 | * that is now competing with blockchains like Filecoin 124 | * which you can also store those CAR files in natively. 125 | * Cloudflare even has free egress w/ R2, and it's cheaper than S3. 126 | * That's bananas! Free reads! 127 | * I've been at this a while, I wrote PouchDB in 2010, which apparently you could now configure to write to R2 and get free reads from a CDN! 128 | * Anyway, you can also write these little graphs into it and they are even more powerful. 129 | 130 | And, if you write a cloud function that derives a **single string key** from the transaction, 131 | you've got a query language in all of these vendors for range queries across the index of that 132 | key 133 | * that can return queries with or without the values included, 134 | * with pagination, 135 | * and a buncha client libraries that already exist, 136 | * and HTTP caching infra already built for them and integrated into these vendors. 137 | 138 | And if you stick to the rule of only deriving the key **from the data itself** you never bake outside 139 | context into the key that can't be replicated along with the data to another location when you need to 140 | solve a new problem. 141 | 142 | And of course, you can configure cloud functions to fire on every write, 143 | * so you can do filtered replication to other buckets and datasources 144 | * which can create new transactions using the same library above 145 | * each of which will inherit all the same replication abilities of this database, 146 | * so there's **no longer any differentiation between the capabilties of primary stores and indexes**. 147 | * because we're not building flat databases anymore, 148 | * this is much more useful, and way cooler, 149 | * we're just writing branches of gigantic graphs to little (or huge!) transaction tables, 150 | * so don't think of it as a KV store, the key AND THE VALUE are in or derived from the value data, 151 | * and that produces a single index over *those transactions*, 152 | * and if we want to write multiple indexes for the same data we have two choices, 153 | * store the data again in two buckets (potentially discarding blocks in the transaction we don't need in the value) 154 | * or take the hash of the transaction (CAR CID) and put that at the end of the key with a zero byte value. 155 | * which i don't think S3 knows how to charge you for other than per-request because they don't charge you for keyspace 🤩 156 | * which gives you the choice between paying for a copy of the data or eating the performance hit of a 157 | secondary read when you query those indexes. 158 | * and since all this data can easily be put on IPFS, 159 | * all the graphs you write can be read as a single graph by anyone who traverses it 160 | * and their graphs can link to yours 161 | * and vice versa 162 | * and that's why we've been calling it Web3 this whole time  163 | * and it's not a blockchain 164 | * until you put a consensus layer over it. 165 | * so if you need this to be a blockchain thing, 166 | * or you really need this NOT to be a blockchain thing, 167 | * it's whatever one you want it to be. 168 | 169 | Because what was looking like a flat database a moment ago is actually 170 | * an even larger graph database that can travel like a graph itself, 171 | * mutate into other states, 172 | * filter out data, 173 | * combine data from different sources, 174 | 175 | And it can do this across 176 | * different ObjectStores 177 | * in the same data center, 178 | * the same cloud provider, 179 | * different providers, 180 | * p2p networks, 181 | * local nodes 182 | * disc 183 | * memory 184 | * browsers 185 | * that's entirely up to you. 186 | 187 | And that string key we're using for the index. 188 | 189 | Something I wish more people knew about, is this brilliant library Dominic Tarr wrote 190 | * who also wrote ssb 191 | * and is one of the nicest most genuine human beings on the planet 192 | * who lives on a boat 193 | * and with an excited smile on his face will describe himself as a "cyber hobo." 194 | 195 | He wrote [this library that implements the typewise/bytewise encoder/decoder in regular 196 | strings](https://github.com/dominictarr/charwise). 197 | So you can use it for the keys on these ObjectStores and then use the ListObject 198 | interfaces to write queries against these more advanced sorting functions. 199 | 200 | You might be used to modelling your bucket keys to leverage the "file heirachy" in S3. 201 | These products tend to describe this as a feature of '/' to trigger your familiarity with 202 | file directory heirarchy, which are more widely understood that something like typewise/bytewise. 203 | But under the hood they just implement regular utf8 string sorting, which gives the sorting properties 204 | you'd want in a '/' based directory heirarchy. 205 | 206 | What you get with `charwise` is **nested sorting** within any element of the keyspace 207 | to an arbitrary depth. First, just read the rules about [which JSON types get ordered where](https://github.com/deanlandolt/bytewise#order-of-supported-structures). Now, notice that sorting within objects and arrays enables the nesting of keys. 208 | 209 | So you can do something like 210 | 211 | ```js 212 | [ null, [ 'a' ] ] 213 | [ 'a' , [ 'a' ] ] 214 | [ 'a' , [ 'b', [ 'and on and on' ] ] ] 215 | ``` 216 | 217 | And since, when you're working with these little graphs, they all have hash addresses, you get to do some 218 | cool things within these nested sorting structures. 219 | * Like if you want to make sure any part of it is evenly distributed, use a hash that describes the item 220 | potency of the index. 221 | * And if you have some security or privacy context you're enforcing over reading the index, you put 222 | the hash of something they would need to know into the nested sorting structure, which saves you 223 | from maintaining a secondary index of permissions 224 | * cause cryptography is really cool like that. 225 | 226 | Just remember, 227 | * you'll want to get *something* with a hash in it into something before `/` when you 228 | derive the key, that'll force distribution across all the key's you're writing. 229 | * And if you want to **optimize** for the reader, you'll need to bake that into the entire prefix ahead of each 230 | `/` you stick in the key. 231 | 232 | Working within those two constraints you can optimize each index for your particular use case. 233 | 234 | And there's more, but I'm tired of typing, and I think that the next set of things I write about this will 235 | include examples of cool things others are doing with what is already here. Happy hacking! 236 | 237 | Much love to all the old Node.js database hackers who used to bounce around Oakland and Berlin and 238 | a hundred JavaScript conferences in the 2010's. I miss ya'll and I forget how much cool stuff we figured out 239 | that we haven't shared with everyone who didn't read that code. 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | --------------------------------------------------------------------------------