├── package.json
├── test
    └── test-basics.js
├── index.js
└── README.md


/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "car-transaction",
 3 |   "version": "1.0.1",
 4 |   "description": "",
 5 |   "type": "module",
 6 |   "main": "index.js",
 7 |   "scripts": {
 8 |     "test": "echo \"Error: no test specified\" && exit 1"
 9 |   },
10 |   "keywords": [],
11 |   "author": "",
12 |   "license": "ISC",
13 |   "devDependencies": {
14 |     "standard": "^17.0.0"
15 |   },
16 |   "dependencies": {
17 |     "@ipld/car": "^5.1.0",
18 |     "@ipld/dag-cbor": "^9.0.0",
19 |     "multiformats": "^11.0.1"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/test/test-basics.js:
--------------------------------------------------------------------------------
 1 | import Transaction from '../index.js' 
 2 | 
 3 | const test = async () => {
 4 |   // start a basic transaction
 5 |   const t = Transaction.create()
 6 | 
 7 |   const subCID = await t.write({ some: 'data' })
 8 |   await t.write({ sub: subCID })
 9 |   const buffer = await t.commit()
10 | 
11 |   // read a transaction
12 |   const { root, get } = await Transaction.load(buffer)
13 |   // root is a cid
14 |   const { sub } = await get(root)
15 |   const { some } = await get(sub)
16 |   // get retrieves the block and decodes it
17 |   if (some !== 'data') throw new Error('data error')
18 | }
19 | 
20 | test()
21 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | import { CarReader, CarBufferWriter as CBW } from '@ipld/car'
 2 | import { bytes as byteslib } from 'multiformats'
 3 | import { decode as digest } from 'multiformats/hashes/digest'
 4 | import * as dagcbor from '@ipld/dag-cbor'
 5 | import * as raw from 'multiformats/codecs/raw'
 6 | import { sha256 } from 'multiformats/hashes/sha2'
 7 | import * as Block from 'multiformats/block'
 8 | 
 9 | const { isBinary } = byteslib
10 | 
11 | const encode = value => {
12 |   if (isBinary(value)) {
13 |     return Block.encode({ value, hasher: sha256, codec: raw })
14 |   }
15 |   return Block.encode({ value, hasher: sha256, codec: dagcbor })
16 | }
17 | 
18 | const decode = ({ bytes, cid }) => {
19 |   let hasher, codec
20 |   const { code } = cid
21 |   const hashcode = cid.multihash.code || digest(cid.multihash).code
22 | 
23 |   if (hashcode === 0x12) {
24 |     hasher = sha256
25 |   } else {
26 |     throw new Error('Unsupported hash function: ' + hashcode)
27 |   }
28 | 
29 |   if (code === 0x71) {
30 |     codec = dagcbor
31 |   } else if (code === 0x55) {
32 |     codec = raw
33 |   } else {
34 |     throw new Error('Unsupported codec: ' + code)
35 |   }
36 | 
37 |   return Block.decode({ bytes, cid, codec, hasher })
38 | }
39 | 
40 | class Transaction {
41 |   constructor () {
42 |     this.blocks = []
43 |   }
44 | 
45 |   static create () {
46 |     return new this()
47 |   }
48 | 
49 |   static async load (buffer) {
50 |     const reader = await CarReader.fromBytes(buffer)
51 |     const [ root ] = await reader.getRoots()
52 |     const get = cid => reader.get(cid).then(block => decode(block)).then(({ value }) => value )
53 |     return { root, get }
54 |   }
55 | 
56 |   async write (obj) {
57 |     const block = await encode(obj)
58 |     this.last = block
59 |     this.blocks.push(block)
60 |     return block.cid
61 |   }
62 | 
63 |   async commit () {
64 |     const cid = this.last.cid
65 |     let size = 0
66 |     let headerSize = CBW.headerLength({ roots: [cid] })
67 |     size += headerSize
68 |     for (const block of this.blocks) {
69 |       size += CBW.blockLength(block)
70 |     }
71 |     const buffer = new Uint8Array(size)
72 |     const writer = await CBW.createWriter(buffer, { headerSize })
73 |     writer.addRoot(cid)
74 |     for (const block of this.blocks) {
75 |       writer.write(block)
76 |     }
77 |     await writer.close()
78 |     return writer.bytes
79 |   }
80 | }
81 | 
82 | export default Transaction
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # car-transaction
  2 | 
  3 | IPLD transaction as CAR buffer that can be used as a database transaction.
  4 | 
  5 | ## Usage
  6 | 
  7 | ```js
  8 | import Transaction from 'car-transaction' 
  9 | 
 10 | const run = async () => {
 11 |   // start a basic transaction
 12 |   const t = Transaction.create()
 13 | 
 14 |   const subCID = await t.write({ some: 'data' })
 15 |   await t.write({ sub: subCID })
 16 |   const buffer = await t.commit()
 17 | 
 18 |   // read a transaction
 19 |   // the last write is always the root
 20 |   const { root, get } = await Transaction.load(buffer)
 21 |   // root is a cid
 22 |   const { sub } = await get(root)
 23 |   const { some } = await get(sub)
 24 |   // get retrieves the block and decodes it
 25 |   if (some !== 'data') throw new Error('data error')
 26 | }
 27 | 
 28 | run()
 29 | ```
 30 | 
 31 | # Guide to IPLD-over-ObjectStores (S3, R2, etc)
 32 | 
 33 | This is how we build a decentralized web of all web3 application
 34 | data on top of widely available and competitively priced
 35 | cloud object stores.
 36 | 
 37 | IPLD is the data structure layer beneath IPFS. It works in IPFS
 38 | protocols and outside them, on disc, in memory, etc.
 39 | 
 40 | So you can build these little merkle trees with the library above that
 41 | * are encoded in `dag-cbor`, a fairly efficient format,
 42 | * and all the hash addressed web3 and blockchain stuff works and interops,
 43 | * and it's as easy as working with JSON,
 44 |   * with inline binary,
 45 |   * and you have these hash links that allow you to make little trees,
 46 |   * which is how you can get de-duplication and diffing properties of git, cause it's all merkle graphs,
 47 |   * and all the cool graph things you can do with graph databases,
 48 |   * and you can also link to data in git, bittorrent, ETH, Bitcoin, etc.
 49 |   * you can link to IPFS files,
 50 |     * or you can encode those IPFS files into the unixfs block format and include them in the transcations.
 51 | * and those transactions are encoded in a well known format called CAR (kinda like git-pack files for IPFS),
 52 |   * and we just released this open source project that is a [cloud native implementation of IPFS](https://github.com/elastic-ipfs)
 53 |   * so all you need is a way to store CAR files and hand the URL to Elastic IPFS
 54 |     * which is a cloud native thing and pretty much as hard to operate as all the other cloud native things.
 55 |   * but if you don't want to run it yourself, and you really want your data in the public IPFS network
 56 |     just DM me on twitter (@mikeal) and we'll figure something out, because we're already running
 57 |     this and it's not that hard to hook up more data-sources, we just haven't productized it yet,
 58 |     we're just running it to keep ALL the NFT's safe and available (god bless the gifs)
 59 | 
 60 | And merkle trees are very cool, you can do all kinds of diffing and CRDT structures, but I won't
 61 | get into all that yet because, just storing these little trees allow you to build graphs of
 62 | incredible complexity and if we start there we won't see the basic stuff we also get from leveraging
 63 | these ObjectStores to store them.
 64 | 
 65 | Conceptually, you can think of IPLD-over-ObjectStores as being
 66 | * IPLD databases,
 67 |   * that are key/value stores,
 68 |   * with a single index,
 69 |     * with a fairly powerful query language,
 70 |     * that can implement some interesting privacy and access patterns
 71 |       * cause **hashes,**
 72 |   * that could represent an IPLD "network,"
 73 |     * which you can decide to keep open, closed, limited access, whatever.
 74 |   * and they can also represent an IPLD replication set,
 75 |     * that could be a filter
 76 |     * or an index
 77 |     * that can be streaming to another index
 78 |       * which has all the same properties as the database we started with
 79 |         recursively until you stop extending this particular network branch
 80 |         and that's how you know you're working with a graph.
 81 | 
 82 | And each instance of an ObjectStore can be all of these things ***simultaneously***.
 83 | These features aren't mutually exclusive, they're combinatory, as long as you follow
 84 | a few simple patterns.
 85 | 
 86 | All these services have a roughly equivalent interface
 87 | * S3 (AWS, DigitalOcean, pretty much every cloud provider has a compatible interface),
 88 | * R2 (Cloudflare's wonderful new product),
 89 | * Also CouchDB, and PouchDB, cause I got roots,
 90 |   * and while we're at it, the whole [level](https://github.com/level) ecosystem.
 91 | 
 92 | Some of them do A LOT more, but they all have at least these properties:
 93 | * You can store a binary value, even if that isn't the default value type.
 94 | * You can store that binary value by a **string key**.
 95 |   * Which forms a stored index,
 96 |     * that you can perform range queries against,
 97 |   * that, while highly distributed, tends to slow down if you bang on the same keyspace enough.
 98 |   * Which is pretty different than some of the databases we're used to. Most open source databases
 99 |       have a local disc optimization in the file writer that, when you group data together like this, will
100 |       get bulked together.
101 |   * But these big distributed things like S3, the keyspace is distributed across a bunch of machines,
102 |       so when they do a similar optimization on the read side, adding locality to the keyspace, you lose the
103 |       distribution of your writes across the bucket.
104 |   * But we're working with hashes!
105 |   * We've got perfectly balanced distribution across a keyspace for days!
106 |   * So if we key things by hash prefixes we'll always evenly distribute across the keyspace.
107 |   * As the keyspace grows, the distribution of writes is even across whatever load balancing any of these vendors are doing
108 |     which means that **the writes just get faster the bigger it gets.** It's beautiful to watch.
109 |   * Something I started telling people to do a while back was move from using
110 |     * `/$hash` to using
111 |     * `/$hash/data` for their keys in S3.
112 |   * Because S3's performance docs said that performance was only limited "per prefix" which gave an indication into
113 |     how they were optimizing some of this by looking at the '/' as a key prefix.
114 |   * I pointed about 4K concurrent Lambdas at open data encoding for the Filecoin launch, each one writing thousands of individual blocks
115 |     this way, so i put a few Billion
116 |     keys into an S3 bucket as fast as it could take them, and when i went over a billion keys it got noticably faster. I had
117 |     to ask AWS to raise the cap on our Lambdas (this is way easier now, and is per cloud formation stack)
118 | 
119 | So we can really blow these things up with IPLD data.
120 | 
121 | This means that, anything you build on this is something pretty close to the fastest cloud database offering available
122 | * at whatever price these gigantic companies have driven the price down to in a rapidly commoditizing market.
123 |   * that is now competing with blockchains like Filecoin
124 |     * which you can also store those CAR files in natively.
125 | * Cloudflare even has free egress w/ R2, and it's cheaper than S3.
126 | * That's bananas! Free reads!
127 | * I've been at this a while, I wrote PouchDB in 2010, which apparently you could now configure to write to R2 and get free reads from a CDN!
128 | * Anyway, you can also write these little graphs into it and they are even more powerful.
129 | 
130 | And, if you write a cloud function that derives a **single string key** from the transaction,
131 | you've got a query language in all of these vendors for range queries across the index of that
132 | key 
133 | * that can return queries with or without the values included,
134 | * with pagination, 
135 | * and a buncha client libraries that already exist,
136 | * and HTTP caching infra already built for them and integrated into these vendors.
137 | 
138 | And if you stick to the rule of only deriving the key **from the data itself** you never bake outside
139 | context into the key that can't be replicated along with the data to another location when you need to
140 | solve a new problem.
141 | 
142 | And of course, you can configure cloud functions to fire on every write,
143 | * so you can do filtered replication to other buckets and datasources
144 | * which can create new transactions using the same library above
145 | * each of which will inherit all the same replication abilities of this database,
146 | * so there's **no longer any differentiation between the capabilties of primary stores and indexes**.
147 | * because we're not building flat databases anymore,
148 |   * this is much more useful, and way cooler,
149 |   * we're just writing branches of gigantic graphs to little (or huge!) transaction tables,
150 |   * so don't think of it as a KV store, the key AND THE VALUE are in or derived from the value data,
151 |   * and that produces a single index over *those transactions*,
152 |   * and if we want to write multiple indexes for the same data we have two choices,
153 |     * store the data again in two buckets (potentially discarding blocks in the transaction we don't need in the value)
154 |     * or take the hash of the transaction (CAR CID) and put that at the end of the key with a zero byte value.
155 |       * which i don't think S3 knows how to charge you for other than per-request because they don't charge you for keyspace 🤩
156 |     * which gives you the choice between paying for a copy of the data or eating the performance hit of a
157 |       secondary read when you query those indexes.
158 |  * and since all this data can easily be put on IPFS,
159 |    * all the graphs you write can be read as a single graph by anyone who traverses it
160 |    * and their graphs can link to yours
161 |    * and vice versa
162 |    * and that's why we've been calling it Web3 this whole time 
163 | * and it's not a blockchain
164 |   * until you put a consensus layer over it.
165 |   * so if you need this to be a blockchain thing,
166 |   * or you really need this NOT to be a blockchain thing,
167 |   * it's whatever one you want it to be.
168 | 
169 | Because what was looking like a flat database a moment ago is actually 
170 | * an even larger graph database that can travel like a graph itself, 
171 | * mutate into other states,
172 | * filter out data,
173 | * combine data from different sources,
174 | 
175 | And it can do this across
176 | * different ObjectStores
177 |   * in the same data center,
178 |   * the same cloud provider,
179 |   * different providers,
180 |   * p2p networks,
181 |   * local nodes
182 |     * disc
183 |     * memory
184 |     * browsers
185 |   * that's entirely up to you.
186 | 
187 | And that string key we're using for the index.
188 | 
189 | Something I wish more people knew about, is this brilliant library Dominic Tarr wrote
190 | * who also wrote ssb
191 | * and is one of the nicest most genuine human beings on the planet
192 | * who lives on a boat
193 | * and with an excited smile on his face will describe himself as a "cyber hobo."
194 | 
195 | He wrote [this library that implements the typewise/bytewise encoder/decoder in regular
196 | strings](https://github.com/dominictarr/charwise). 
197 | So you can use it for the keys on these ObjectStores and then use the ListObject
198 | interfaces to write queries against these more advanced sorting functions.
199 | 
200 | You might be used to modelling your bucket keys to leverage the "file heirachy" in S3.
201 | These products tend to describe this as a feature of '/' to trigger your familiarity with
202 | file directory heirarchy, which are more widely understood that something like typewise/bytewise.
203 | But under the hood they just implement regular utf8 string sorting, which gives the sorting properties
204 | you'd want in a '/' based directory heirarchy.
205 | 
206 | What you get with `charwise` is **nested sorting** within any element of the keyspace
207 | to an arbitrary depth. First, just read the rules about [which JSON types get ordered where](https://github.com/deanlandolt/bytewise#order-of-supported-structures). Now, notice that sorting within objects and arrays enables the nesting of keys.
208 | 
209 | So you can do something like 
210 | 
211 | ```js
212 | [ null, [ 'a' ] ]
213 | [ 'a' , [ 'a' ] ]
214 | [ 'a' , [ 'b', [ 'and on and on' ] ] ]
215 | ```
216 | 
217 | And since, when you're working with these little graphs, they all have hash addresses, you get to do some
218 | cool things within these nested sorting structures.
219 | * Like if you want to make sure any part of it is evenly distributed, use a hash that describes the item
220 |   potency of the index.
221 | * And if you have some security or privacy context you're enforcing over reading the index, you put
222 |   the hash of something they would need to know into the nested sorting structure, which saves you
223 |   from maintaining a secondary index of permissions
224 |   * cause cryptography is really cool like that.
225 | 
226 | Just remember, 
227 | * you'll want to get *something* with a hash in it into something before `/` when you
228 | derive the key, that'll force distribution across all the key's you're writing.
229 | * And if you want to **optimize** for the reader, you'll need to bake that into the entire prefix ahead of each
230 | `/` you stick in the key. 
231 | 
232 | Working within those two constraints you can optimize each index for your particular use case.
233 | 
234 | And there's more, but I'm tired of typing, and I think that the next set of things I write about this will
235 | include examples of cool things others are doing with what is already here. Happy hacking!
236 | 
237 | Much love to all the old Node.js database hackers who used to bounce around Oakland and Berlin and
238 | a hundred JavaScript conferences in the 2010's. I miss ya'll and I forget how much cool stuff we figured out
239 | that we haven't shared with everyone who didn't read that code.
240 | 
241 | 
242 | 
243 | 
244 |   
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 


--------------------------------------------------------------------------------