├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── TODO.md
├── doc
    ├── hitchhiker.adoc
    └── redis-rc-design.md
├── env
    └── profiling
    │   └── hitchhiker
    │       └── bench.clj
├── project.clj
├── resources
    └── redis_test_data.clj
├── src
    └── hitchhiker
    │   ├── konserve.cljc
    │   ├── outboard.clj
    │   ├── redis.clj
    │   └── tree
    │       ├── core.cljc
    │       └── messaging.cljc
├── template_benchmark.xlsx
├── test
    └── hitchhiker
    │   ├── konserve_test.cljc
    │   ├── ops.cljc
    │   ├── redis_test.clj
    │   └── tree
    │       ├── core_test.clj
    │       └── messaging_test.clj
└── test_node.js


/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | *.swp
13 | *.swo
14 | *.swn
15 | *~
16 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: clojure
 2 | dist: trusty
 3 | services:
 4 |     - redis-server
 5 | before_install:
 6 |     - sudo apt-get install nodejs
 7 | install:
 8 |     - lein cljsbuild once 
 9 | env:
10 |     matrix:
11 |         - TEST_CMD='lein test'
12 |         - TEST_CMD='lein bench output --data-structure fractal -- --data-structure b-tree -- --data-structure sorted-set'
13 |         - TEST_CMD='lein bench output --delete-pattern forward -- --delete-pattern reverse -- --delete-pattern shuffle -- --delete-pattern zero'
14 |         - TEST_CMD='node ./test_node.js'
15 | script: $TEST_CMD
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and 
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Hitchhiker Tree
  2 | 
  3 | Hitchhiker trees are a newly invented (by @dgrnbrg) datastructure, synthesizing fractal trees and functional data structures, to create fast, snapshottable, massively scalable databases.
  4 | 
  5 | [Watch the talk from Strange Loop](https://www.youtube.com/watch?v=jdn617M3-P4) to learn more, especially about the concept!
  6 | 
  7 | ## What's in this Repository?
  8 | 
  9 | The hitchhiker namespaces contain a complete implementation of a persistent, serializable, lazily-loaded hitchhiker tree.
 10 | This is a sorted key-value datastructure, like a scalable `sorted-map`.
 11 | It can incrementally persist and automatically lazily load itself from any backing store which implements a simple protocol.
 12 | 
 13 | Outboard is a sample application for the hitchhiker tree.
 14 | It includes an implementation of the IO subsystem backed by Redis, and it manages all of the incremental serialization and flushing.
 15 | 
 16 | The hitchhiker tree is designed very similarly to how Datomic's backing trees must work--I would love to see integration with [DataScript](https://github.com/tonsky/datascript) for a fully open source [Datomic](http://www.datomic.com).
 17 | 
 18 | ## Outboard
 19 | 
 20 | Outboard is a simple API for your Clojure applications that enables you to make use of tens of gigabytes of local memory, far beyond what the JVM can manage.
 21 | Outboard also allows you to restart your application and reuse all of that in-memory data, which dramatic reduces startup times due to data loading.
 22 | 
 23 | Outboard has a simple API, which may be familiar if you've ever used Datomic.
 24 | Unlike Datomic, however, Outboard trees can be "forked" like git repositories, not just transacted upon.
 25 | Once you've created a tree, you can open a connection to it.
 26 | The connection mediates all interactions with the outboard data:
 27 | it can accept transactions, provide snapshots for querying, and be cloned.
 28 | 
 29 | ### API Usage Example
 30 | 
 31 | ```clojure
 32 | (require '[hitchhiker.outboard :as ob])
 33 | 
 34 | ;; First, we'll create a connection to a new outboard
 35 | (def my-outboard (ob/create "first-outboard-tree"))
 36 | 
 37 | ;; We'll get a snapshot of the outboard's current state, which is empty for now
 38 | ;; Note that snapshots are only valid for 5 seconds, but making a new snapshot is free
 39 | ;; It would be easy to write an "extend-life" function for snapshots
 40 | (def first-snapshot (ob/snapshot my-outboard))
 41 | 
 42 | ;; This will insert the pair "hello" "world" only into the snapshot
 43 | (-> first-snapshot
 44 |     (ob/insert "hello" "world")
 45 |     (ob/lookup "hello"))
 46 | ;;=> "world"
 47 | 
 48 | ;; Inserts must be done in a transaction to persist
 49 | (-> (ob/snapshot my-outboard)
 50 |     (ob/lookup "hello"))
 51 | ;;=> nil
 52 | 
 53 | ;; We can insert some data into it via a transaction
 54 | ;; The update! function is atomic, just like swap! for atoms
 55 | ;; update! will pass its transaction function a snapshot of the outboard
 56 | (ob/update! my-outboard (fn [snapshot] (ob/insert snapshot "goodbye" "moon")))
 57 | 
 58 | ;; Since the insert was transacted, it persists
 59 | (-> (ob/snapshot my-outboard)
 60 |     (ob/lookup "goodbye"))
 61 | ;;=> "moon"
 62 | 
 63 | ;; If you'd like, you can "fork" an outboard. Let's fork our outboard.
 64 | ;; To fork, you just save a snapshot under a new name
 65 | (def forked-outboard (ob/save-as (ob/snapshot my-outboard) "forked-outboard"))
 66 | 
 67 | ;; Now, we can transact into the snapshot, which will not affect other forks
 68 | (ob/update! forked-outboard (fn [snapshot] (ob/insert snapshot "goodbye" "sun")))
 69 | 
 70 | ;; As we can see:
 71 | (-> (ob/snapshot my-outboard)
 72 |     (ob/lookup "goodbye"))
 73 | ;;=> "moon"
 74 | (-> (ob/snapshot forked-outboard)
 75 |     (ob/lookup "goodbye"))
 76 | ;;=> "sun"
 77 | ```
 78 | 
 79 | You should check out the docstrings/usage of these functions, too:
 80 | 
 81 | - `close` will gracefully shut down an outboard connection
 82 | - `open` will reopen an outboard (you can only create outboards which don't exist)
 83 | - `destroy` will delete all data related to the closed, named outboard
 84 | - `lookup` and `lookup-fwd-iter` provide single and ordered sequence access to snapshots
 85 | 
 86 | ## Background
 87 | 
 88 | Outboard is an off-heap functionally persistent sorted map.
 89 | This map allows your applications to retain huge data structures in memory across process restarts.
 90 | 
 91 | Outboard is the first library to make use of hitchhiker trees.
 92 | Hitchhiker trees are a functionally persistent, serializable, off-heap fractal B tree.
 93 | They can be extended to contain a mechanism to make statistical analytics blazingly fast, and to support column-store facilities.
 94 | 
 95 | Details about hitchhiker trees, including related work, can be found in `docs/hitchhiker.adoc`.
 96 | 
 97 | ## Testing
 98 | 
 99 | You'l need a local Redis instance running to run the tests. Once you have it, just run  
100 |  
101 |     lein test
102 | 
103 | 
104 | ## Benchmarking
105 | 
106 | This library includes a detailed, instrumented benchmarking suite.
107 | It's built to enable comparative benchmarks between different parameters or code changes, so that improvements to the structure can be correctly categorized as such, and bottlenecks can be reproduced and fixed.
108 | 
109 | To try it, just run
110 | 
111 |     lein bench
112 | 
113 | The benchmark tool supports testing with different parameters, such as:
114 | 
115 | - The tree's branching factor
116 | - Whether to enable fractal tree features, just use the B-tree features, or compare to a vanilla Clojure sorted map
117 | - Reordering of delete operations (to stress certain workloads)
118 | - Whether to use the in-memory or Redis-backed implementation
119 | 
120 | The benchmarking tool is designed to make it convenient to run several benchmarks;
121 | each benchmark's parameters can be separate by a `--`.
122 | This makes it easy to understand the characteristics of the hitchhiker tree over a variety of settings for a parameter.
123 | 
124 | You can run a more sophisticated experiment benchmark by doing
125 | 
126 |     lein bench OUTPUT_DIR options -- options-for-2nd-experiment -- options-for-3rd-experiment
127 | 
128 | This generates an Excel workbooks called "analysis.xlsx" with benchmark results.
129 | For instance, if you'd like to run experiments to understand the performance difference between various values of B (the branching factor), you can do:
130 | 
131 |     lein bench perf_diff_experiment -b 10 -- -b 20 -- -b 40 -- -b 80 -- -b 160 -- -b 320 -- -b 640
132 | 
133 | And it will generate lots of data and the Excel workbook for analysis.
134 | 
135 | If you'd like to see the options for the benchmarking tool, just run `lein bench`.
136 | 
137 | ## Technical details
138 | 
139 | See the `doc/` folder for technical details of the hitchhiker tree and Redis garbage collection system.
140 | 
141 | ## Gratitude
142 | 
143 | Thanks to the early reviewers, Kovas Boguta & Leif Walsh.
144 | Also, thanks to Tom Faulhaber for making the Excel analysis awesome!
145 | 
146 | ## License
147 | 
148 | Copyright © 2016 David Greenberg
149 | 
150 | Distributed under the Eclipse Public License version 1.0
151 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | Comparative benchmark modes--apples to apples (by params) and everything vs. one trial
 2 | This will help guide optimization work
 3 | 
 4 | Figure out where the huge spikes in operation time come from (tiered resizing?)
 5 | 
 6 | We need a more thorough set of tests for the messaging system to saturate deletes, to ensure we don't have more lurking bugs
 7 | 
 8 | Implementations of backwards scan, pred, and succ
 9 | 
10 | Write the WAL, back with in-mem and redis, then add to benchmarks. This is necessary for disk-backed work
11 | 
12 | Choose splits and sizes based on serialized results--big perf gain
13 | 
14 | benchmark dataset like (map + (repeatedly rand) (iterate #(+ 0.01 %) 0.0))
15 | that's a sliding random window, for some random moving write heavy region
16 | with lots of cold nodes
17 | 
18 | Add async writes
19 | 
20 | Make a snapshot life-extender
21 | 
22 | Add support for datascript
23 | 


--------------------------------------------------------------------------------
/doc/hitchhiker.adoc:
--------------------------------------------------------------------------------
  1 | == Hitchhiker tree
  2 | 
  3 | This document will attempt to sketch out the big ideas in the hitchhiker tree.
  4 | It will also attempt to call out various locations in the implementation where features are built.
  5 | 
  6 | === High-level understanding
  7 | 
  8 | The goal of the hitchhiker tree is to wed three things: the query performance of a B+ tree, the write performance of an append-only log, and convenience of a functional, persistent datastructure.
  9 | Let's look at each of these in some detail.
 10 | 
 11 | ==== B+ trees win at queries
 12 | 
 13 | You might remember an important theorem from data structures: the best-performing data structure for looking up sorted keys cannot do those queries faster than `O(log(n))`.
 14 | Since sorted trees provide a solution for this, we'll start with them.
 15 | Now, a common sorted tree for this purpose is the Red-Black tree, whose actual query performance is between `log~2~(n)` and `2*log~2~(n)` (the write performance is `log~2~(n)`).
 16 | The factor of 2 comes from the partial imbalances (which are still asymptotically balanced) that the algorithm allows, and the base 2 of the log comes from the fact that it's a binary search tree.
 17 | 
 18 | A less popular sorted tree is the AVL tree--this tree achieves `log~2~(n)` query performance, at the cost of always paying `2*log~2~(n)` for inserts.
 19 | We can already see a pattern--although many trees reach the asymptotic bound, they differ in their constant factors.
 20 | 
 21 | The tree that the hitchhiker tree is based off of is the B+ tree, which achieves `log~b~(n)` query performance.
 22 | Since `b` can be very large (on the order of 100s or 1000s), these trees are especially great when each node is stored on higher latency media, like remote storage or a disk.
 23 | This is because each node can contain huge numbers of keys, meaning that by only keeping the index nodes in memory, we can access most keys with fewer, often just one, data accesses.
 24 | 
 25 | Unlike the above sorted trees (and B trees, which we won't discuss), B+ trees only store their data (i.e. the values) in their leaves--internal nodes only need to store keys.
 26 | 
 27 | ==== Event logs win at writing data
 28 | 
 29 | Do you know the fastest way to write data?
 30 | Append it to the end of the file.
 31 | There's no pointers, no updating of data structures, no extra IO costs incurred.
 32 | 
 33 | Unfortunately, to perform a query on an event log, we need to replay all the data to figure out what happened.
 34 | That replay costs `O(n)`, since it touches every event written.
 35 | So, how can we fix this?
 36 | 
 37 | ==== Unifying B+ trees and event logs
 38 | 
 39 | The first idea to understand is this: how can we combine the write performance of an event log with the query performance of a B+ tree?
 40 | The answer is that we're going to "overlay" an event log on the B+ tree!
 41 | 
 42 | The idea of the overlay is this: each index node of the B+ tree will contain an event log.
 43 | Whenever we write data, we'll just append the operation (insert or delete) to the end of the root index node's event log.
 44 | In order to avoid the pitfall of appending every operation to an ever-growing event log (which would leave us stuck with linear queries), we'll put a limit on the number of events that fit in the log.
 45 | Once the log has overflowed in the root, we'll split the events in that log towards their eventual destination, adding those events to the event logs of the children of that node.
 46 | Eventually, the event log will overflow to a leaf node, at which point we'll actually do the insertion into the B+ tree.
 47 | 
 48 | This process gives us several properties:
 49 | 
 50 | - Most inserts are a single append to the root's event log
 51 | - Although there are a linear number of events, nodes are exponentially less likely to overflow the deeper they are in the tree
 52 | - All data needed for a query exists along a path of nodes between the root and a specific leaf node. Since the logs are constant in size, queries still only read `log(n)` nodes.
 53 | 
 54 | Thus we dramatically improve the performance of insertions without hurting the IO cost of queries.
 55 | 
 56 | ==== Functional Persistence
 57 | 
 58 | Now that we get the sketch of how to combine event logs and B+ trees, let's see the beauty of making the whole thing functional and persistent!
 59 | Since the combined B+/log data structure primarily only modifies nodes near the root, we can take advantage of the reduced modification to achieve reduced IO when persisting the tree.
 60 | We can use the standard path-copying technique from functional, persistent data structures.
 61 | This gives great performance, since the structure is designed to avoid needing to copy entire paths--most writes will only touch the root.
 62 | Furthermore, we can batch many modifications together, and wait to flush the tree, in order to further batch IO.
 63 | 
 64 | === Code Structure
 65 | 
 66 | The hitchhiker tree's core implementation lives in 2 namespaces: `hitchhiker.tree.core` and `hitchhiker.tree.messaging`.
 67 | `hitchhiker.tree.core` implements the B+ tree and its extensibility hooks; `hitchhiker.tree.messaging` adds the messaging layer (aka log) to the B+ tree from `core`.
 68 | 
 69 | ==== Protocols
 70 | 
 71 | In `hitchhiker.tree.core`, we have several important protocols:
 72 | 
 73 | `hitchhiker.tree.core/IKeyCompare`::
 74 |   This protocol should be extended to support custom key comparators.
 75 |   It's just like `clojure.core/compare`.
 76 | 
 77 | `hitchhiker.tree.core/IResolve`::
 78 |   This protocol is the functionality for a minimal node.
 79 |   Not only will every node implement this, but also backends will use this to implement stubs which automatically & lazily load the full node into memory during queries.
 80 | +
 81 | `last-key` is used for searches, so that entire nodes can remain unloaded from memory when we only need their boundary key.
 82 | +
 83 | `dirty?` is used to determine whether the IO layer would need to flush this node, or whether it already exists in the backing storage.
 84 | +
 85 | `resolve` loads the node into memory--this could return itself (in the case of an already loaded node), or it could return a new object after waiting on some IO.
 86 | 
 87 | `hitchhiker.tree.core/INode`::
 88 |   This protocol implements a node fully in-memory.
 89 |   Generally, this shouldn't need to be re-implemented;
 90 |   however, if the hitchhiker was to be enhanced with key & value size awareness during splits and merges, you'd want to adjust the methods of this protocol.
 91 | 
 92 | `hitchhiker.tree.core/Config`::
 93 |   This structure must be passed to the `hitchhiker.tree.core/b-tree` constructor, which is the only way to get a new tree.
 94 |   The `index-b` is the fanout on index nodes; the `data-b` is the key & value fanout on data nodes, and the `op-buf-size` is the the size of the log at each index node.
 95 |   The internet told me that choosing `sqrt(b)` for the `op-buf-size` and `b-sqrt(b)` for the `index-b` was a good idea, but who knows?
 96 | 
 97 | `hitchhiker.tree.core/IBackend`::
 98 |   This protocol implements a backend for the tree.
 99 | +
100 | `new-session` returns a "session" object, which is a convenient way to capture backend-specific stats.
101 | +
102 | `write-node` will write a a node to storage, returning the stub object which implements `IResolve` for that backend. It can record stats by mutating or logging to the session.
103 | +
104 | `anchor-root` is called by the persistence functionality to ensure that the backend knows which nodes are roots; this is a hint to any sorts of garbage collectors.
105 | +
106 | `delete-addr` removes the given node from storage.
107 | +
108 | `TestingBackend` is a simple implementation of a backend which bypasses serialization and is entirely in memory. It can be a useful reference for the bare minimum implementation of a backend.
109 | 
110 | `hitchhiker.tree.messaging/IOperation`::
111 |   This protocol describes an operation to the tree.
112 |   Currently, there are only `InsertOp` and `DeleteOp`, but arbitrary mutation is supported by the data structure.
113 | 
114 | ==== Useful APIs
115 | 
116 | `hitchhiker.tree.core/flush-tree`::
117 |   This takes a tree, does a depth-first search to ensure each node's children are durably persisted before flushing the node itself.
118 |   It returns the updated tree & the session under which the IO was performed.
119 |   `flush-tree` does block on the writing IO--a future improvement would be to make that non-blocking.
120 | 
121 | `hitchhiker.tree.messaging/enqueue`::
122 |   This is the fundamental operation for adding to the event log in a hitchhiker tree.
123 | `enqueue` will handle the appending, overflow, and correct propagation of operations through the tree.
124 | 
125 | `hitchhiker.tree.messaging/apply-ops-in-path`::
126 |   This is the fundamental operation for reading from the event log in a hitchhiker tree.
127 |   This finds all the relevant operations on the path to a leaf node, and returns the data that leaf node would contain if all the operations along the path were fully committed.
128 |   This is conveniently designed to work on entire leaf nodes, so that iteration is as easy as using the same logic as a non-augmented B+ tree, and simply expanding each leaf node from the standard iteration.
129 | 
130 | `lookup`, `insert`, `delete`, `lookup-fwd-iter`::
131 |   These are the basic operations on hitchhiker trees.
132 |   There are implementations in `hitchhiker.tree.core` and `hitchhiker.tree.messaging` which leverage their respective tree variants.
133 |   They correspond to `get`, `assoc`, `dissoc`, and `subseq` on sorted maps.
134 | 
135 | `hitchhiker.core.b-tree`::
136 |   This is how to make a new hitchhiker or B+ tree.
137 |   You should either use the above mutation functions on it from one or the other namespace; it probably won't work if you mix them.
138 | 
139 | === Related Work
140 | 
141 | Hitchhiker trees are made persistent with the same method, path copying, https://www.cs.cmu.edu/~rwh/theses/okasaki.pdf[as used by Okasaki]
142 | The improved write performance is made possible thanks to the same buffering technique as a https://en.wikipedia.org/wiki/Fractal_tree_index[fractal tree index].
143 | As it turns out, after I implemented the fractal tree, I spoke with a former employee of Tokutek, a company that commercialized fractal tree indices.
144 | That person told me that we'd actually implemented fractal reads identically!
145 | This is funny because there's no documentation anywhere about how exactly you should structure your code to compute the query.
146 | 


--------------------------------------------------------------------------------
/doc/redis-rc-design.md:
--------------------------------------------------------------------------------
 1 | # Refcounting (RC) System for Redis
 2 | 
 3 | One challenge in storage large amounts of off-heap data is deciding when data is unreferenced and can be deleted.
 4 | The hitchhiker tree is a functional data structure, which makes use of a variation of the classic technique of path-copying to reduce IO on updates.
 5 | Path-copying relies on a garbage collector; thus, the need for this system.
 6 | 
 7 | The system is written primarily as Lua scripts for Redis to ensure atomicity and portability.
 8 | Since Redis has no API to set events in the future, a small amount of code must run on the client.
 9 | Luckily, this code simple runs a Lua a script, sleeps for the amount of time returned by the script, and repeats this in a loop.
10 | 
11 | ## Design
12 | 
13 | This garbage collector assumes all keys' references to other keys are immutable.
14 | It may be possible to allow mutation in the future, but the design doesn't currently support it.
15 | This system currently doesn't support any method of cycle-detection or cycle-breaking.
16 | 
17 | We allow each key to refer to however many other keys it needs to keep alive.
18 | When a key has no more references, it is freed, along with anything it was keeping alive.
19 | 
20 | This system not only supports explicitly allocated & freed garbage collection roots (i.e. named values), it also supports implicitly expiring roots.
21 | Automatically expiring are great for garbage collected languages, when there's no guarantee of when a finalizer will run.
22 | By having the primitive of a root which expires, we can avoid memory leaks due to clients not explicitly freeing snapshots, which makes analytics simpler to write.
23 | Currently, data expires by default after 5 seconds, which is hardcoded in the `hitchhiker.redis.RedisBackend` implementation of `anchor-root`.
24 | 
25 | ### Details
26 | 
27 | Each refcounted key gets 2 extra auxiliary keys.
28 | For the purposes of this discussion, we'll discuss a key called `key1`, `key2`, etc.
29 | 
30 | For `key1`, we also have `key1:rc` and `key1:rl`.
31 | The `...:rc` auxiliary stores a count of the number of references pointing to this key.
32 | The `...:rl` auxiliary stores a list of the target keys that this key has references to.
33 | 
34 | When we want to store a new refcounted value (say, `key2`), we store its data in `key2`.
35 | Then, we store the names of any values it refers to in `key2:rl`, a Redis list.
36 | Suppose that `key2` refers to `key1`.
37 | Finally, we'd increment `key2:rc`, since there's an additional reference to it.
38 | This operation can be batched via `hitchhiker.redis/add-refs`, which takes a new value and the names of every key it points to.
39 | 
40 | We also have a helper Lua function called `drop_ref` (located in `hitchhiker.redis/drop-ref-lua`).
41 | When `drop_ref` is called on a key, it attempts to decrement that key's `...:rc`.
42 | If the `...:rc` reaches zero, it follows all the links in `...:rl` and recursively drops their refs.
43 | Note that this functionality is actually implemented iteratively and incrementally to ensure deletions are efficient and scalable.
44 | The function `hitchhiker.redis/drop-ref` exposes this.
45 | 
46 | We also store a zset of keys sorted by expiration time in the redis key `refcount:expiry`.
47 | This zset enables the client to find out how long to sleep until the next expiring key will be ready, and then automatically expire & GC any old data.
48 | This functionality is implemented by the `hitchhiker.redis/get-next-expiry` function, which is called in a loop by the `start-expiry-thread!`.
49 | 
50 | The current design isn't fully atomic, but it is conservatively written such that small amounts of data could leak during a crash, but valid data will always be stored.
51 | 


--------------------------------------------------------------------------------
/env/profiling/hitchhiker/bench.clj:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.bench
  2 |   (:require [clojure.pprint :as pp]
  3 |             [clojure.string :as str]
  4 |             [clojure.tools.cli :refer [parse-opts]]
  5 |             [excel-templates.build :as excel]
  6 |             [hitchhiker.redis :as redis]
  7 |             [hitchhiker.tree.core :refer [<?? <? go-try] :as core]
  8 |             [hitchhiker.tree.messaging :as msg])
  9 |   (:import [java.io File FileWriter]))
 10 | 
 11 | (defn generate-test-datasets
 12 |   "Returns a list of datasets"
 13 |   []
 14 |   [{:name "in-order" :data (range)}
 15 |    {:name "random" :data (repeatedly rand)}])
 16 | 
 17 | (defn core-b-tree
 18 |   "Returns a b-tree with core insert"
 19 |   [b backend]
 20 |   {:structure (<?? (core/b-tree (core/->Config b b 0)))
 21 |    :insert core/insert
 22 |    :delete core/delete
 23 |    :flush (fn [x] (<?? (core/flush-tree x backend)))})
 24 | 
 25 | (defn msg-b-tree
 26 |   "Returns a b-tree with msg insert"
 27 |   [b backend]
 28 |   (let [sqrt-b (long (Math/sqrt b))]
 29 |     {:structure (<?? (core/b-tree(core/->Config sqrt-b b (- b sqrt-b))))
 30 |      :insert msg/insert
 31 |      :delete msg/delete
 32 |      :flush (fn [x] (<?? (core/flush-tree x backend)))}))
 33 | 
 34 | (defn sorted-set-repr
 35 |   "Returns a sorted set"
 36 |   []
 37 |   {:structure (sorted-map)
 38 |    :insert (fn [m k v] (go-try (assoc m k v)))
 39 |    :delete (fn [m k] (go-try (dissoc m k)))
 40 |    :flush (fn [set]
 41 |             {:tree set
 42 |              :stats (atom {})})})
 43 | 
 44 | (defn create-output-dir
 45 |   [dir aux]
 46 |   (let [my-dir (File. dir aux)]
 47 |     (when (.exists my-dir)
 48 |       (throw (ex-info (str "Output dir already exists: " dir) {})))
 49 |     (.mkdirs my-dir)
 50 |     (spit (File. my-dir "time") (str (java.util.Date.)))
 51 |     (let [speed-csv (FileWriter. (File. my-dir (str "speed_" aux ".csv")))
 52 |          ; flush-csv (FileWriter. (File. my-dir (str "flush_iops_" aux ".csv")))
 53 |           ]
 54 |       {:speed speed-csv
 55 |        ;:flush flush-csv
 56 |        })))
 57 | 
 58 | (defn benchmark
 59 |   "n is the total number of samples
 60 |    dataset is the test dataset
 61 |    flush-freq is the number of keys per flush
 62 |    datastruct is the test data structure
 63 |    out is the stream to write the results to (as well as stdout)"
 64 |   [n dataset flush-freq datastruct out delete-xform]
 65 |   (let [{:keys [structure delete insert flush]} datastruct
 66 |         dataset (take n (:data dataset))]
 67 |     (<?? (go-try
 68 |           (loop [[x & data] dataset
 69 |                  t 0
 70 |                  tree structure
 71 |                  last-flush nil
 72 |                  i 0
 73 |                  inserting? true
 74 |                  outputs []]
 75 |             (let [i' (inc i)
 76 |                   {flushed-tree :tree
 77 |                    stats :stats} (when (zero? (mod i' flush-freq))
 78 |                                    (flush tree))
 79 |                   before (System/nanoTime)
 80 |                   tree' (if inserting?
 81 |                           (<? (insert (or flushed-tree tree) x x))
 82 |                           (<? (delete (or flushed-tree tree) x)))
 83 |                   after (System/nanoTime)
 84 |                   log-inserts (zero? (mod i' (quot n 100)))
 85 |                   updated-outputs (atom outputs)]
 86 |               (when log-inserts ;; 1000 pieces
 87 |                 (binding [*out* (:speed out)]
 88 |                   (let [ks (sort (keys last-flush))
 89 |                         avg-ns (float (/ t (quot n 100)))]
 90 |                     (when (zero? i)
 91 |                       (println (str "elements,op,insert_took_avg_ns,"
 92 |                                     (str/join "," ks))))
 93 |                     (println (str i' "," (if inserting? "insert" "delete") "," avg-ns
 94 |                                   "," (str/join "," (map #(get last-flush %) ks))))
 95 |                     (swap! updated-outputs conj (-> (into {} last-flush)
 96 |                                                     (assoc :ins-avg-ns avg-ns
 97 |                                                            (if inserting?
 98 |                                                              :insert
 99 |                                                              :delete) true
100 |                                                            :n i'))))))
101 |               (cond
102 |                 (seq data)
103 |                 (recur data
104 |                        (if log-inserts
105 |                          0
106 |                          (+ t (- after before)))
107 |                        tree'
108 |                        (if stats (merge-with + last-flush @stats) last-flush)
109 |                        i'
110 |                        inserting?
111 |                        @updated-outputs)
112 |                 inserting?
113 |                 (recur (delete-xform dataset)
114 |                        0
115 |                        tree'
116 |                        nil
117 |                        i'
118 |                        false
119 |                        @updated-outputs)
120 |                 :else
121 |                 @updated-outputs)))))))
122 | 
123 | (def options
124 |   [["-n" "--num-operations NUM_OPS" "The number of elements that will be applied to the data structure"
125 |     :default 100000
126 |     :parse-fn #(Long. %)
127 |     :validate [pos? "n must be positive"]]
128 |    [nil "--data-structure STRUCT" "Which data structure to run the test on"
129 |     :default "fractal"
130 |     :validate [#(#{"fractal" "b-tree" "sorted-set"} %) "Data structure must be fractal, b-tree, or sorted set"]]
131 |    [nil "--backend testing" "Runs the benchmark with the specified backend"
132 |     :default "testing"
133 |     :validate [#(#{"redis" "testing"} %) "Backend must be redis or testing"]]
134 |    ["-d" "--delete-pattern PATTERN" "Specifies how the operations will be reordered on delete"
135 |     :default "forward"
136 |     :validate [#(#{"forward" "reverse" "shuffle" "zero"} %) "Incorrect delete pattern"]
137 |     ]
138 |    [nil "--sorted-set" "Runs the benchmarks on a sorted set"]
139 |    ["-b" "--tree-width WIDTH" "Determines the width of the trees. Fractal trees use sqrt(b) child pointers; the rest is for messages."
140 |     :default 300
141 |     :parse-fn #(Long. %)
142 |     :validate [pos? "b must be positive"]]
143 |    ["-f" "--flush-freq FREQ" "After how many operations should the tree get flushed?"
144 |     :default 1000
145 |     :parse-fn #(Long. %)
146 |     :validate [pos? "flush frequency must be positive"]]
147 |    ["-h" "--help" "Prints this help"]])
148 | 
149 | (defn exit
150 |   [status msg]
151 |   (println msg)
152 |   (System/exit status))
153 | 
154 | (defn error-msg
155 |   [errors]
156 |   (str "The following errors occurred while parsing your command:\n\n"
157 |        (str/join \newline errors)))
158 | 
159 | (defn usage
160 |   [options-summary]
161 |   (str/join \newline
162 |             ["Usage: bench output-dir [options] [-- [other-options]]*"
163 |              ""
164 |              "Options:"
165 |              options-summary
166 |              ""
167 |              "Delete patterns:"
168 |              "forward: we delete the elements in the order they were inserted"
169 |              "reverse: we delete the elements in the reverse order they were inserted"
170 |              "shuffle: we delete the elements in a random order"
171 |              "zero: we repeatedly attempt to delete 0, thus never actually deleting"
172 |              ""
173 |              "Backends:"
174 |              "testing: this backend serializes nothing, just using an extra indirection"
175 |              "redis: this backend uses a local redis server"]))
176 | 
177 | (defn make-template-for-one-tree-freq-combo
178 |   [list-of-benchmark-results filter-by]
179 |   ;(clojure.pprint/pprint list-of-benchmark-results)
180 |   (assert (= 2 (count list-of-benchmark-results)) "Should be random and ordered")
181 |   (let [indexed (group-by :ds list-of-benchmark-results)]
182 |     (map #(vector (:n %1) (:ins-avg-ns %1) (:writes %1) (:ins-avg-ns %2) (:writes %2))
183 |          (filter filter-by (:results (first (get indexed "in-order"))))
184 |          (filter filter-by (:results (first (get indexed "random")))))))
185 | 
186 | (defn template-one-sheet
187 |   [pair-of-results-for-one-ds-config]
188 |   (let [{:keys [tree ds freq n b results delete-pattern]}
189 |         (first pair-of-results-for-one-ds-config)
190 |         x {0 [["Data Structure" (name tree) "" "n" n "" "Data Set" ds]]
191 |            1 [["Flush Frequency" freq "" "b" b "" "delete pattern" delete-pattern]]
192 |            [5 18] (make-template-for-one-tree-freq-combo pair-of-results-for-one-ds-config :insert)
193 |            [22 35] (make-template-for-one-tree-freq-combo pair-of-results-for-one-ds-config :delete)}]
194 |     x))
195 | 
196 | (defn -main
197 |   [& [root & args]]
198 |   (let [outputs (atom [])]
199 |     (doseq [args (or (->> args
200 |                           (partition-by #(= % "--"))
201 |                           (map-indexed vector)
202 |                           (filter (comp even? first))
203 |                           (map second)
204 |                           (seq))
205 |                      [[]])] ; always do one iteration
206 |       (let [{:keys [options arguments errors summary]} (parse-opts args options)
207 |             tree-to-test (atom {})
208 |             results (atom [])]
209 |         (cond
210 |           (or (= "-h" root)
211 |               (= "--help" root)
212 |               (nil? root)
213 |               (:help options)) (exit 0 (usage summary))
214 |           (not= (count arguments) 0) (exit 1 (usage summary))
215 |           errors (exit 1 (error-msg errors)))
216 |         (let [backend (case (:backend options)
217 |                         "testing" (core/->TestingBackend)
218 |                         "redis" (do (redis/start-expiry-thread!)
219 |                                     (redis/->RedisBackend)))
220 |               delete-xform (case (:delete-pattern options)
221 |                              "forward" identity
222 |                              "reverse" reverse
223 |                              "shuffle" shuffle
224 |                              "zero" #(repeat (count %) 0.0))
225 |               [tree-name structure]
226 |               (case (:data-structure options)
227 |                 "b-tree" ["b-tree" (core-b-tree (:tree-width options) backend)]
228 |                 "fractal" ["fractal" (msg-b-tree (:tree-width options) backend)]
229 |                 "sorted-set" ["sorted-set" (sorted-set-repr)])
230 |               flush-freq (:flush-freq options)
231 |               codename (str tree-name
232 |                             "__flush_"
233 |                             flush-freq
234 |                             "__b_"
235 |                             (:tree-width options)
236 |                             "__"
237 |                             (:backend options)
238 |                             "__n_"
239 |                             (:num-operations options)
240 |                             "__del_"
241 |                             (:delete-pattern options))]
242 |           (doseq [ds (generate-test-datasets)
243 |                   :let [codename (str codename
244 |                                       "_"
245 |                                       (:name ds))
246 |                         out (create-output-dir
247 |                               root
248 |                               codename)
249 |                         _ (println "Doing" codename)
250 |                         bench-res (benchmark (:num-operations options) ds flush-freq structure out delete-xform)]]
251 |             (swap! results conj
252 |                    {:tree tree-name
253 |                     :ds (:name ds)
254 |                     :freq flush-freq
255 |                     :n (:num-operations options)
256 |                     :b (:tree-width options)
257 |                     :delete-pattern (:delete-pattern options)
258 |                     :results bench-res}))
259 |           ;(println "results")
260 |           ;(clojure.pprint/pprint @results)
261 |           (swap! outputs conj (template-one-sheet @results)))))
262 |     (excel/render-to-file
263 |       "template_benchmark.xlsx"
264 |       (.getPath (File. root "analysis.xlsx"))
265 |       {"SingleDS"
266 |        (map-indexed (fn [i s]
267 |                       (assoc s :sheet-name (str "Trial " (inc i))))
268 |                     @outputs)})))
269 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject hitchhiker-tree "0.1.0-SNAPSHOT"
 2 |   :description "A Hitchhiker Tree Library"
 3 |   :url "https://github.com/dgrnbrg/hitchhiker-tree"
 4 |   :license {:name "Eclipse Public License"
 5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
 6 |   :dependencies [[org.clojure/clojure "1.8.0"]
 7 |                  [org.clojure/clojurescript "1.8.51" :scope "provided"]
 8 |                  [org.clojure/core.memoize "0.5.8"]
 9 |                  [com.taoensso/carmine "2.12.2"]
10 |                  [org.clojure/core.rrb-vector "0.0.11"]
11 |                  [org.clojure/core.cache "0.6.5"]
12 | 
13 |                  [io.replikativ/incognito "0.2.2-SNAPSHOT"]
14 |                  [io.replikativ/konserve "0.4.9"]]
15 |   :aliases {"bench" ["with-profile" "profiling" "run" "-m" "hitchhiker.bench"]}
16 |   :jvm-opts ["-server" "-Xmx3700m" "-Xms3700m"]
17 |   :profiles {:test
18 |              {:dependencies [[org.clojure/test.check "0.9.0"]]}
19 |              :profiling
20 |              {:main hitchhiker.bench
21 |               :source-paths ["env/profiling"]
22 |               :dependencies [[criterium "0.4.4"]
23 |                              [org.clojure/tools.cli "0.3.3"]
24 |                              [org.clojure/test.check "0.9.0"]
25 |                              [com.infolace/excel-templates "0.3.3"]]}
26 |              :dev {:dependencies [[binaryage/devtools "0.8.2"]
27 |                                   [figwheel-sidecar "0.5.8"]
28 |                                   [com.cemerick/piggieback "0.2.1"]
29 |                                   [org.clojure/test.check "0.9.0"]]
30 |                    :source-paths ["src" "dev"]
31 |                    :plugins [[lein-figwheel "0.5.8"]]
32 |                    :repl-options {; for nREPL dev you really need to limit output
33 |                                   :init (set! *print-length* 50)
34 |                                   :nrepl-middleware [cemerick.piggieback/wrap-cljs-repl]}}}
35 |   :clean-targets ^{:protect false} ["resources/public/js/compiled" "target"]
36 | 
37 | 
38 |   :cljsbuild {:builds
39 |               [{:id "dev"
40 |                 :figwheel true
41 |                 :source-paths ["src"]
42 |                 :compiler {:main hitchhiker.tree.core
43 |                            :asset-path "js/out"
44 |                            :output-to "resources/public/js/core.js"
45 |                            :output-dir "resources/public/js/out" }}
46 |                ;; inspired by datascript project.clj
47 |                {:id "test"
48 |                 :source-paths ["src" "test" "dev"]
49 |                 :compiler {
50 |                            :main          hitchhiker-tree.konserve-test
51 |                            :output-to     "target/test.js"
52 |                            :output-dir    "target/none"
53 |                            :optimizations :none
54 |                            :source-map    true
55 |                            :recompile-dependents false
56 |                            :parallel-build true
57 |                            }}
58 |                ]}
59 | 
60 |   :plugins [[lein-figwheel "0.5.8"]
61 |             [lein-cljsbuild "1.1.4" :exclusions [[org.clojure/clojure]]]])
62 | 


--------------------------------------------------------------------------------
/resources/redis_test_data.clj:
--------------------------------------------------------------------------------
1 | 
2 | [[:add 37] [:del 40] [:add -3] [:add 9] [:add 23] [:add -5] [:del 9] [:add -37] [:add -22] [:del 19] [:add -9] [:del 11] [:add 23] [:del 4] [:add 1] [:add 26] [:add 16] [:add 34] [:add -32] [:add -12] [:add -30] [:add 27] [:add -35] [:del -11] [:add -6] [:add 28] [:add -32] [:add -2] [:del 15] [:del -22] [:add 8] [:add 0] [:add 30] [:add 36] [:add -7] [:add -2] [:add -34] [:add 5] [:add 6] [:del 38] [:add 34] [:add 24] [:add 20] [:add -23] [:del -34] [:add 0] [:add 27] [:add -10] [:del 4] [:add 38] [:del -40] [:del 40] [:add 9] [:add -35] [:del -7] [:add -32] [:add -38] [:del -5] [:add -22] [:add 18] [:add -2] [:add -9] [:add -26] [:add 16] [:add -3] [:add -6] [:del 18] [:add -6] [:add 40] [:del -36] [:del -35] [:del -17] [:add 32] [:del -30] [:add -37] [:add -36] [:add 10] [:del -15] [:del -6] [:del -22] [:add -21] [:del 32] [:add -9] [:flush] [:add 26] [:add -26] [:add -17] [:add 10] [:add 24] [:add -20] [:add 3] [:add -31] [:add -11] [:add -3] [:add 28] [:add -11] [:add -2] [:add -21] [:add -37] [:add 19] [:del -30] [:add 1] [:add 27] [:del 39] [:add -13] [:add -29] [:add -6] [:add -2] [:del -17] [:add -33] [:add -7] [:add -27] [:add -22] [:add 24] [:add -38] [:add -7] [:add 6] [:del -4] [:add 1] [:add -35] [:add 30] [:add -6] [:add -16] [:add 3] [:add -15] [:del -40] [:add 22] [:add -22] [:add -35] [:add 16] [:add 18] [:add 26] [:add 24] [:add 33] [:add -9] [:del -17] [:add -13] [:add 30] [:add 32] [:add 17] [:del 38] [:add -16] [:add 6] [:add 16] [:add 36] [:add 28] [:add -19] [:add 9] [:add 30] [:del 5] [:add 4] [:add -8] [:del 24] [:add 3] [:add 38] [:add 33] [:add -35] [:add -30] [:add 9] [:add -18] [:add 26] [:add -24] [:add -30] [:add -38] [:add -32] [:add -1] [:del -22] [:add 17] [:add 9] [:add -35] [:add 0] [:add -28] [:add -5] [:add -12] [:add -18] [:add 0] [:add -7] [:add -38] [:add 29] [:add 34] [:add 38] [:add 3] [:add 30] [:add -10] [:add -26] [:del 14] [:add 4] [:add -37] [:add -12] [:add 31] [:add 12] [:del 38] [:add -36] [:add -28] [:del -31] [:flush] [:add -11] [:del 28] [:add -18] [:add 6] [:add 0] [:del 16] [:add 21] [:add 22] [:add 39] [:add -9] [:add -13] [:add -26] [:add -35] [:del 16] [:add 31] [:add 18] [:add 0] [:add -2] [:add -34] [:add 23] [:del -16] [:add -17] [:add 3] [:del -1] [:del 13] [:del -6] [:add 1] [:del 39] [:add 37] [:add -9] [:add 19] [:del 7] [:add -38] [:add 21] [:del 2] [:add 12] [:add 9] [:add 15] [:add 27] [:add 39] [:del 26] [:add -24] [:add 19] [:add -38] [:add 8] [:del -28] [:add 37] [:add -9] [:add 2] [:add -12] [:add -2] [:del -18] [:add 3] [:add -31] [:del -18] [:add -38] [:add -23] [:add 32] [:add -20] [:add 12] [:add -17] [:add 33] [:add 29] [:add -16] [:add 37] [:add -6] [:add -24] [:add 22] [:add 15] [:add -24] [:add -20] [:add 14] [:add 6] [:add -2] [:add -39] [:add 15] [:add -38] [:add 24] [:del -10] [:add -35] [:add 24] [:add 26] [:del 5] [:add -3] [:add -3] [:add -13] [:add -10] [:add 23] [:del 33] [:del 36] [:add 11] [:add 7] [:add 28] [:del -37] [:del 22] [:add 33] [:add 19] [:add 12] [:add -25] [:add -21] [:flush] [:del -31] [:del -35] [:add -27] [:add 26] [:del -7] [:add -2] [:add -35] [:add 16] [:add 10] [:add -8] [:add 16] [:del 11] [:add 25] [:add -14] [:add 8] [:add -1] [:add 22] [:add -4] [:add 9] [:add 12] [:add -13] [:add 11] [:add -35] [:add -39] [:add 40] [:add -6] [:add 25] [:add 39] [:del 20] [:add -38] [:add 26] [:add 36] [:add -31] [:add 36] [:add 37] [:add 7] [:add -13] [:add -27] [:add -16] [:add -13] [:add 18] [:add -30] [:flush] [:add 34] [:add -20] [:add 40] [:add 3] [:add -8] [:add 12] [:add -14] [:add -5] [:del -21] [:add 40] [:add -5] [:add -12] [:add -9] [:add 0] [:add -7] [:del 11] [:add -35] [:add -36] [:add -30] [:add -37] [:add -15] [:add -28] [:del -16] [:add 36] [:del 22] [:add -8] [:del -30] [:add 24] [:del -6] [:add -1] [:add -22] [:del -29] [:add 16] [:add 13] [:add 36] [:del 40] [:add 15] [:add -25] [:add 19] [:add 9] [:del -22] [:add -6] [:del -8] [:del 17] [:add 4] [:add 15] [:add -10] [:add 26] [:add -36] [:add -15] [:add -20] [:del 35] [:del -5] [:add 17] [:add 2] [:add -36] [:add -28] [:add 8] [:add -38] [:add -17] [:add -24] [:add -13] [:add -20] [:add 29] [:add -9] [:add 15] [:add -36] [:del 14] [:add -15] [:add 9] [:add -14] [:add 12] [:add 8] [:add -36] [:add -27] [:add -36] [:del -40] [:del 24] [:del 6] [:add -11] [:add -2] [:add -21] [:add -32] [:add 9] [:add -5] [:add -23] [:add -33] [:add -19] [:add -30] [:add -10] [:add -21] [:add -22] [:add -10] [:del 4] [:add 6] [:add -11] [:add 3] [:del 26] [:add -18] [:add 5] [:add -11] [:add -21] [:add 0] [:add -12] [:add -40] [:add -8] [:add 11] [:add -7] [:add 21] [:add -32] [:add -5] [:add -38] [:add -14] [:add 30] [:add -3] [:del -6] [:flush] [:add -27] [:add 4] [:add 0] [:del -1] [:add 21] [:add -8] [:del -36] [:add 5] [:add 23] [:del 20] [:add -23] [:add 8] [:add 24] [:add -3] [:add -35] [:add -31] [:add -7] [:add 30] [:add 39] [:del -18] [:add 21] [:add 26] [:add -27] [:del 14] [:add 12] [:add 18] [:add 6] [:add -4] [:add -6] [:add -19] [:del -16] [:add -29] [:add -5] [:del 16] [:add 37] [:add 39] [:add -21] [:add 13] [:add 33] [:add -33] [:add 32] [:add -2] [:add -5] [:del 23] [:add -14] [:add -14] [:add -39] [:add 12] [:add -12] [:add 33] [:add 1] [:add 29] [:add 19] [:add 3] [:add 17] [:del 37] [:add -14] [:add 5] [:del -2] [:add -3] [:add 25] [:add -26] [:add 3] [:add 30] [:add 28] [:add -16] [:del 9] [:add 40] [:add 21] [:add 15] [:add 22] [:del -19] [:add -13] [:add 8] [:add 23] [:add -26] [:add 9] [:add 2] [:add -5] [:add 3] [:add 37] [:add 23] [:add -40] [:add 8] [:add -19] [:add 23] [:del -27] [:add 9] [:add 35] [:add -29] [:add -19] [:add -11] [:add -16] [:del 27] [:add -18] [:add 26] [:add 40] [:add 34] [:del 15] [:add 20] [:add 10] [:add 40] [:del -13] [:del 4] [:add -34] [:del -19] [:add -21] [:add 1] [:add 39] [:del -24] [:del 2] [:add -5] [:add -26] [:del 35] [:add 24] [:add -4] [:add -7] [:add -26] [:add 9] [:del 0] [:add 37] [:add 1] [:add -28] [:add 18] [:add 31] [:add 31] [:add 38] [:del 28] [:add -27] [:add 38] [:add -16] [:add 5] [:add 27] [:add 27] [:add -17] [:add -1] [:add 4] [:add -32] [:del -32] [:del -18] [:add 1] [:add -3] [:add -19] [:add -10] [:add 37] [:add -16] [:add 24] [:add -30] [:del 21] [:add 35] [:add -24] [:add 11] [:add -36] [:add -24] [:add -35] [:add -36] [:add 8] [:add 40] [:add -22] [:add 10] [:add 6] [:del -22] [:del -12] [:add 5] [:add 21] [:add 27] [:add 11] [:add -17] [:del 37] [:add -7] [:add 24] [:add 13] [:add -26] [:add 32] [:del 27] [:add -28] [:add -8] [:add 14] [:add -31] [:add -2] [:add -24] [:del 2] [:add -38] [:del 23] [:del -30] [:add 28] [:add -34] [:add 18] [:del -27] [:del 23] [:add -19] [:del 20] [:add -31] [:add 0] [:add 1] [:del -26] [:add 39] [:add 26] [:del 31] [:add 35] [:del -34] [:del 1] [:del 24] [:del 23] [:add -14] [:add -2] [:del -37] [:del 17] [:add -7] [:add 7] [:add 35] [:add 8] [:add -19] [:add -11] [:add -12] [:add -22] [:add 32] [:del 20] [:add 7] [:add -36] [:add -30] [:add 10] [:add -23] [:add 4] [:add 19] [:add -17] [:add 25] [:add 18] [:add -1] [:add -37] [:add -25] [:add -37] [:add -34] [:add 21] [:add -27] [:add -27] [:add -18] [:add -31] [:add 38] [:add -17] [:add -2] [:add -15] [:add 36] [:del -23] [:add 34] [:add -1] [:add -36] [:del 15] [:add 29] [:add 17] [:add -22] [:add 37] [:add 15] [:del -23] [:add 30] [:add -4] [:add 3] [:add 4] [:add -2] [:add 20] [:add -16] [:del 39] [:add -1] [:add 3] [:del -14] [:add -38] [:del -25] [:add 29] [:add -24] [:del 26] [:add 35] [:del 7] [:add -5] [:del 7] [:add 40] [:add -33] [:add 34] [:add 26] [:add 34] [:del 20] [:add 31] [:add 33] [:add 38] [:add 19] [:del 17] [:add 19] [:add -25] [:add -10] [:add -37] [:add 8] [:add -21] [:add 13] [:add 25] [:add 30] [:add 25] [:add -26] [:del -25] [:del -34] [:add 5] [:add -35] [:add 39] [:add -33] [:add 27] [:del 40] [:flush] [:add -34] [:add 10] [:del -21] [:add -8] [:add 22] [:add -4] [:add 38] [:add -17] [:add -34] [:add 34] [:add -20] [:add 30] [:del -1] [:add -3] [:del 7] [:add 21] [:del 24] [:add 17] [:del -38] [:add -2] [:del 9] [:del 11] [:add -2] [:add 3] [:add 35] [:add 8] [:del -6] [:add -37] [:add 17] [:del -35] [:add -32] [:add -13] [:add 7] [:add 21] [:del -18] [:del 34] [:add -26] [:del -27] [:add -21] [:add 4] [:add -36] [:del -4] [:add 9] [:del -21] [:add -15] [:add -4] [:del 4] [:add 0] [:add 15] [:add -30] [:del 12] [:del 33] [:add -22] [:add -19] [:add -22] [:add -12] [:add -2] [:flush] [:add -20] [:add -20] [:add -33] [:add -17] [:del 1] [:add -2] [:add 38] [:add -39] [:add 14] [:add -8] [:add -19] [:add 7] [:add -13] [:del 39] [:add 38] [:del -14] [:del -40] [:del -18] [:add 38] [:del 0] [:add 37] [:add -16] [:add -29] [:del 30] [:add 16] [:add -31] [:del -2] [:add 37] [:add -10] [:add -26] [:add 33] [:add -23] [:add -14] [:add -18] [:del 10] [:add 5] [:add -14] [:add -22] [:del 32] [:add -25] [:add -40] [:add -12] [:add -21] [:add -9] [:add -32] [:add 28] [:add -21] [:add 13] [:del -21] [:del 31] [:add 10] [:add -11] [:del -17] [:add -25] [:add 9] [:add 0] [:add -22] [:del 7] [:add -38] [:add 4] [:add -5] [:add -13] [:add 14] [:del 26] [:add 18] [:add -18] [:add 5] [:add 32] [:add 17] [:add -5] [:del -30] [:add -34] [:add -22] [:add -33] [:add -2] [:del 1] [:add 22] [:add 39] [:add 21] [:add -38] [:add 1] [:add 23] [:add 17] [:add 5] [:del -18] [:add -14] [:add 30] [:del 37] [:add 3] [:add -39] [:add -1] [:del 22] [:add 39] [:del -26] [:del -26] [:add -37] [:del 5] [:add 26] [:add -20] [:add -21] [:del 6] [:add -20] [:add 36] [:add 7] [:del 11] [:add -26] [:del 6] [:add -17] [:del -17] [:add -30] [:add 1] [:del -35] [:add 4] [:del -17] [:add -6] [:del 9] [:add 34] [:add -35] [:add 8] [:add -11] [:del -1] [:add 13] [:add 20] [:add -22] [:add 7] [:add -13] [:add -24] [:add -37] [:add 32] [:add -30] [:del -34] [:del -31] [:add 19] [:flush] [:add 38] [:add 9] [:add -29] [:add -32] [:del -23] [:del 8] [:del -6] [:add 17] [:add -34] [:add -15] [:del 13] [:add -40] [:add 37] [:add -18] [:add 33] [:add -9] [:add -23] [:add -37] [:add 2] [:add 16] [:add 20] [:add -8] [:add -28] [:add -27] [:add -20] [:add 20] [:add 26] [:add -38] [:add -2] [:add -21] [:add 17] [:add 9] [:del 32] [:del 1] [:add -4] [:add -3] [:del -9] [:add -21] [:add -31] [:add -19] [:add 12] [:add 14] [:add -33] [:del -33] [:del -9] [:add -22]] 
3 | 


--------------------------------------------------------------------------------
/src/hitchhiker/konserve.cljc:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.konserve
  2 |   (:refer-clojure :exclude [resolve subvec])
  3 |   (:require [clojure.core.rrb-vector :refer [catvec subvec]]
  4 |             #?(:clj [clojure.core.async :refer [chan promise-chan put!] :as async]
  5 |                :cljs [cljs.core.async :refer [chan promise-chan put!] :as async])
  6 |             [konserve.core :as k]
  7 |             [konserve.memory :refer [new-mem-store]]
  8 |             [hasch.core :refer [uuid]]
  9 |             [clojure.set :as set]
 10 |             #?(:clj [hitchhiker.tree.core :refer [go-try <?] :as core]
 11 |                :cljs [hitchhiker.tree.core :as core])
 12 |             [hitchhiker.tree.messaging :as msg])
 13 |   #?(:cljs (:require-macros [hitchhiker.tree.core :refer [go-try <?]])))
 14 | 
 15 | 
 16 | (defn synthesize-storage-addr
 17 |   "Given a key, returns a promise containing that key for use as a storage-addr"
 18 |   [key]
 19 |   (doto (promise-chan)
 20 |     (put! key)))
 21 | 
 22 | (defrecord KonserveAddr [store last-key konserve-key storage-addr]
 23 |   core/IResolve
 24 |   (dirty? [_] false)
 25 |   (last-key [_] last-key)
 26 |   (resolve [_]
 27 |     (go-try
 28 |       (-> (<? (k/get-in store [konserve-key]))
 29 |           (assoc :storage-addr (synthesize-storage-addr konserve-key))))))
 30 | 
 31 | 
 32 | (defrecord KonserveBackend [store]
 33 |   core/IBackend
 34 |   (new-session [_] (atom {:writes 0
 35 |                           :deletes 0}))
 36 |   (anchor-root [_ {:keys [konserve-key] :as node}]
 37 |     node)
 38 |   (write-node [_ node session]
 39 |     (go-try
 40 |       (swap! session update-in [:writes] inc)
 41 |       (let [pnode (if (core/index-node? node)
 42 |                     (-> (assoc node :storage-addr nil)
 43 |                         (update :children (fn [cs] (mapv #(assoc % :store nil
 44 |                                                                  :storage-addr nil) cs))))
 45 |                     (assoc node :storage-addr nil))]
 46 |         (let [id (uuid pnode)]
 47 |           (<? (k/assoc-in store [id] node))
 48 |           (->KonserveAddr store (core/last-key node) id (synthesize-storage-addr id))))))
 49 |   (delete-addr [_ addr session]
 50 |     (swap! session update-in :deletes inc)))
 51 | 
 52 | (defn get-root-key
 53 |   [tree]
 54 |   (-> tree :storage-addr (async/poll!)))
 55 | 
 56 | (defn create-tree-from-root-key
 57 |   [store root-key]
 58 |   (go-try
 59 |     (let [val (<? (k/get-in store [root-key]))
 60 |           last-key (core/last-key (assoc val :storage-addr (synthesize-storage-addr root-key)))] ; need last key to bootstrap
 61 |       (<? (core/resolve
 62 |            (->KonserveAddr store last-key root-key (synthesize-storage-addr root-key)))))))
 63 | 
 64 | 
 65 | (defn add-hitchhiker-tree-handlers [store]
 66 |   (swap! (:read-handlers store) merge
 67 |          {'hitchhiker.konserve.KonserveAddr
 68 |           #(-> % map->KonserveAddr
 69 |                (assoc :store store
 70 |                       :storage-addr (synthesize-storage-addr (:konserve-key %))))
 71 |           'hitchhiker.tree.core.DataNode
 72 |           (fn [{:keys [children cfg]}]
 73 |             (core/->DataNode (into (sorted-map-by
 74 |                                     compare) children)
 75 |                              (promise-chan)
 76 |                              cfg))
 77 |           'hitchhiker.tree.core.IndexNode
 78 |           (fn [{:keys [children cfg op-buf]}]
 79 |             (core/->IndexNode (->> children
 80 |                                    vec)
 81 |                               (promise-chan)
 82 |                               (vec op-buf)
 83 |                               cfg))
 84 |           'hitchhiker.tree.messaging.InsertOp
 85 |           msg/map->InsertOp
 86 |           'hitchhiker.tree.messaging.DeleteOp
 87 |           msg/map->DeleteOp
 88 |           'hitchhiker.tree.core.Config
 89 |           core/map->Config})
 90 |   (swap! (:write-handlers store) merge
 91 |          {'hitchhiker.konserve.KonserveAddr
 92 |           (fn [addr]
 93 |             (assoc addr
 94 |                    :store nil
 95 |                    :storage-addr nil))
 96 |           'hitchhiker.tree.core.DataNode
 97 |           (fn [node]
 98 |             (assoc node :storage-addr nil))
 99 |           'hitchhiker.tree.core.IndexNode
100 |           (fn [node]
101 |             (-> node
102 |                 (assoc :storage-addr nil)
103 |                 (update-in [:children]
104 |                            (fn [cs] (map #(assoc % :store nil :storage-addr nil) cs)))))}) 
105 |   store)
106 | 
107 | 
108 | 
109 | (comment
110 | 
111 | 
112 |   (def store (add-read-handlers (<!! (new-fs-store "/tmp/async-hitchhiker-repl"
113 |                                                    :config {:fsync true}))))
114 | 
115 |   (go-try (def store (add-read-handlers (<? (new-mem-store)))))
116 | 
117 |   (def my-tree (<!! (core/flush-tree
118 |                      (time (reduce (fn [t i]
119 |                                         ;                              ^{:break/when (> i 49)}
120 |                                      (<!! (msg/insert t i i)))
121 |                                    (<!! (core/b-tree (core/->Config 17 300 (- 300 17))))
122 |                                    (range 20000)))
123 |                      (->KonserveBackend store)
124 |                      )))
125 | 
126 |   (enable-console-print!)
127 | 
128 |   (go-try (def foo (<? (msg/insert (<? (core/b-tree (core/->Config 17 300 (- 300 17))))
129 |                                1 1))))
130 | 
131 |   (go-try (def foos (<? (msg/insert (<? (msg/insert (<? (core/b-tree (core/->Config 17 300 (- 300 17))))
132 |                                                1 1))
133 |                                2 2))))
134 | 
135 |   (go-try (def bar (<? (msg/delete foos 2))))
136 | 
137 | 
138 | 
139 |   (go-try
140 |    (prn "foo"
141 |         (loop [i 0
142 |                t (<? (core/b-tree (core/->Config 17 300 (- 300 17))))]
143 |           (if (= i 2)
144 |             t
145 |             (recur (inc i) (<? (msg/insert t i i)))))))
146 | 
147 |   (time (reduce (fn [t i]
148 |                   (<!! (msg/insert t i i)))
149 |                 (<!! (core/b-tree (core/->Config 17 300 (- 300 17))))
150 |                 (range 20000)))
151 | 
152 | 
153 | 
154 |   (def my-tree-updated (<!! (core/flush-tree
155 |                              (<!! (msg/delete (<!! (create-tree-from-root-key store (get-root-key (:tree my-tree)))) 10))
156 |                              (->KonserveBackend store)
157 |                              )))
158 | 
159 | 
160 | 
161 | 
162 |   (time (<!! (msg/lookup (<!! (create-tree-from-root-key store (get-root-key (:tree my-tree-updated)))) 100)))
163 | 
164 | (reduce (fn [t i]
165 |           (let [st (System/currentTimeMillis)
166 |                 tree (:tree (<!! (core/flush-tree (<!! (msg/insert t i i))
167 |                                                   (->KonserveBackend store))))]
168 |             (when (= (mod i 100) 0)
169 |               (let [delta (- (System/currentTimeMillis)
170 |                              st)]
171 |                 (println "Op for" i " took " delta " ms")))
172 |             tree))
173 |         (<!! (core/b-tree (core/->Config 17 300 (- 300 17))))
174 |         (range 20000))
175 | 
176 | 
177 | (msg/lookup-fwd-iter (<!! (create-tree-from-root-key store (get-root-key (:tree my-tree))))
178 |                      19899)
179 | 
180 | 
181 | 
182 | 
183 | 
184 | )
185 | 


--------------------------------------------------------------------------------
/src/hitchhiker/outboard.clj:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.outboard
  2 |   (:require [hitchhiker.redis :as redis]
  3 |             [hitchhiker.tree.core :refer [<?? go-try] :as core]
  4 |             [hitchhiker.tree.messaging :as msg]
  5 |             [taoensso.carmine :as car :refer [wcar]])
  6 |   (:import [java.util.concurrent LinkedBlockingQueue TimeUnit]))
  7 | 
  8 | ;;; API
  9 | ;;; transact! conn txns -- Applies the given transactions to the structure
 10 | ;;; snapshot conn -- returns an immutable snapshot of the structure
 11 | ;;; extend-lifetime snapshot until-when -- ensures the snapshot will not be GCed until the specified time (default to 5s)
 12 | ;;; lookup/lookup-fwd-iter snapshot -- operates on the snapshot
 13 | ;;; speculate snapshot txns -- returns a new snapshot with the txns applied to it
 14 | ;;; save-as snapshot new-name -- saves the given snapshot to the given name
 15 | ;;; create new-name -- creates a new empty structure at the given name
 16 | ;;; open name -- returns a connection to the named structure
 17 | ;;; close conn
 18 | ;;;
 19 | ;;; This API makes it easy to optimize the IO due to flushing, since
 20 | ;;; we have complete control over when it happens.
 21 | ;;; It is similar to the Datomic API, but it remains to decide what transactions look like
 22 | 
 23 | ;; The update-queue is a blocking queue of functions which will be applied to the outboard
 24 | ;; The tree-atom is an atom containing the latest view of the outboard. This is for snapshotting
 25 | ;; The close signal is an atom which, when it's set to :shutdown, causes the outboard to shut its in-memory portions down
 26 | (defrecord OutboardConnection [update-queue tree-atom close-signal thread tree-name])
 27 | 
 28 | (defonce ^:private refcount-expiry-thread (redis/start-expiry-thread!))
 29 | 
 30 | (defonce ^:private connection-registry (atom {}))
 31 | 
 32 | (defn- launch-outboard-processer!
 33 |   "This starts a thread which will manage the given connection.
 34 | 
 35 |    Terrible things will happen if you call this on the same connection twice!"
 36 |   [conn save-name]
 37 |   (let [^LinkedBlockingQueue q (:update-queue conn)
 38 |         {:keys [close-signal tree-atom]} conn
 39 |         flush-tree #(when-not (redis/get-root-key @tree-atom)
 40 |                       (swap! tree-atom
 41 |                              (fn [tree]
 42 |                                (:tree (<?? (core/flush-tree
 43 |                                             tree
 44 |                                             (redis/->RedisBackend))))))
 45 |                       (let [new-root (redis/get-root-key @tree-atom)]
 46 |                         (wcar
 47 |                           {}
 48 |                           (car/incr (str new-root ":rc"))
 49 |                           (redis/drop-ref (wcar {} (car/hget "named-hhs" save-name)))
 50 |                           (car/hset "named-hhs" save-name new-root))))]
 51 |     (reset! (:thread conn)
 52 |             (doto (Thread.
 53 |                     (fn* []
 54 |                          (loop [pending-writes 0
 55 |                                 timed-out false]
 56 |                            (if (not= :shutdown @close-signal)
 57 |                              (if (or timed-out (> pending-writes 1000))
 58 |                                (do (flush-tree)
 59 |                                    (recur 0 false))
 60 |                                (if-let [update (try
 61 |                                                  (.poll q 5 TimeUnit/SECONDS)
 62 |                                                  (catch InterruptedException e
 63 |                                                    nil))]
 64 |                                  (do (swap! tree-atom update)
 65 |                                      (recur (inc pending-writes) false))
 66 |                                  (do (recur pending-writes true))))
 67 |                              (do (flush-tree)
 68 |                                  (println "Shutting down" save-name))))))
 69 |               (.setName (str "Outboard processor for " save-name))
 70 |               (.start)))))
 71 | 
 72 | (defn create
 73 |   "Creates a new, empty outboard with the given name. Returns a connection to it."
 74 |   [new-name]
 75 |   (when (or (contains? @connection-registry new-name)
 76 |             (wcar {}
 77 |                   (car/hget "named-hhs" new-name)))
 78 |     (throw (ex-info (str "Cannot create outboard with name " new-name
 79 |                          ", its already in use") {:used-name new-name})))
 80 |   ;;TODO race condition where additional calls to create could all succeed
 81 |   ;;we should guard against this
 82 |   (let [conn (->OutboardConnection (LinkedBlockingQueue.) (atom (<?? (core/b-tree (core/->Config 30 600 870)))) (atom :running) (atom nil) new-name)]
 83 |     (launch-outboard-processer! conn new-name)
 84 |     (swap! connection-registry assoc new-name conn)
 85 |     conn))
 86 | 
 87 | ;; Should have a global reg of opened outboards to prevent double-opening
 88 | ;; or destroynig while in use
 89 | (defn destroy
 90 |   "Destroys the named outboard."
 91 |   [name]
 92 |   (when-not (string? name)
 93 |     (throw (ex-info "destroy takes the name of an outboard" {:name name})))
 94 |   (when (contains? @connection-registry name)
 95 |     (throw (ex-info "Cannot destroy outboard which is currently in use" {:name name})))
 96 |   (wcar {}
 97 |         (redis/drop-ref (wcar {} (car/hget "named-hhs" name)))
 98 |         (car/hdel "named-hhs" name)))
 99 | 
100 | (defn open
101 |   "Returns a connection to the named structure"
102 |   [name]
103 |   (or (get @connection-registry name)
104 |       (if-let [root-key (wcar {} (car/hget "named-hhs" name))]
105 |         (let [conn (->OutboardConnection
106 |                      (LinkedBlockingQueue.)
107 |                      (atom (redis/create-tree-from-root-key root-key))
108 |                      (atom :running)
109 |                      (atom nil)
110 |                      name)]
111 |           (launch-outboard-processer! conn name)
112 |           (swap! connection-registry assoc name conn)
113 |           conn)
114 |         (throw (ex-info (str "Didn't find root-addr at " name) {})))))
115 | 
116 | (defn close
117 |   "Frees the in-VM resources associated with the connection. The connection
118 |    will no longer work."
119 |   [conn]
120 |   (when-not (instance? OutboardConnection conn)
121 |     (throw (ex-info "close takes an outboard connection as an argument" {:conn conn})))
122 |   (reset! (:close-signal conn) :shutdown)
123 |   (.interrupt ^Thread @(:thread conn))
124 |   (swap! connection-registry dissoc (:tree-name conn)))
125 | 
126 | ;;TODO should return the before & after tree states as pointers
127 | (defn update!
128 |   [conn update-fn]
129 |   (.put ^LinkedBlockingQueue (:update-queue conn) update-fn))
130 | 
131 | (defn snapshot
132 |   [conn]
133 |   @(:tree-atom conn))
134 | 
135 | (defn insert
136 |   "Inserts key/value pairs into the outboard data snapshot"
137 |   [snapshot k v & kvs]
138 |   (let [tree snapshot]
139 |     (if (and (seq kvs) (even? (count kvs)))
140 |       (loop [tree (<?? (msg/insert tree k v))
141 |              [k v & kvs] kvs]
142 |         (if (and k v)
143 |           (recur (<?? (msg/insert tree k v)) kvs)
144 |           tree))
145 |       (<?? (msg/insert tree k v)))))
146 | 
147 | (defn delete
148 |   "Deletes keys from the outboard data snapshot"
149 |   [snapshot k & ks]
150 |   (let [tree snapshot]
151 |     (if (seq ks)
152 |       (reduce #(<?? (msg/delete %1 %2)) tree (cons k ks))
153 |       (<?? (msg/delete tree k)))))
154 | 
155 | (defn lookup
156 |   "Returns the value for the given key, or not-found which defaults to nil"
157 |   ([snapshot k]
158 |    (<?? (msg/lookup snapshot k)))
159 |   ([snapshot k not-found]
160 |    (or (<?? (msg/lookup snapshot k)) not-found)))
161 | 
162 | (defn lookup-fwd-iter
163 |   "Returns a lazy iterator of KV pairs starting from the key.
164 | 
165 |    Be careful, this will continue for the entire tree. Make sure to stop."
166 |   ([snapshot k]
167 |    (msg/lookup-fwd-iter snapshot k)))
168 | 
169 | (defn save-as
170 |   "Saves the snapshot to the given new name. This lets you
171 |    incrementally clone data."
172 |   [snapshot new-name]
173 |   (let [new-conn (create new-name)
174 |         flushed-snapshot (if (redis/get-root-key snapshot)
175 |                            snapshot ; already flushed
176 |                            (:tree (core/flush-tree snapshot
177 |                                                    (redis/->RedisBackend))))]
178 |     (wcar {}
179 |           (car/hset "named-hhs" new-name (redis/get-root-key flushed-snapshot))
180 |           (car/incr (str (redis/get-root-key flushed-snapshot) ":rc")))
181 |     (reset! (:tree-atom new-conn) flushed-snapshot)
182 |     new-conn))
183 | 
184 | #_(defn extend-lifetime
185 |   "Ensures the given snapshot will be readable for at least additional-ms longer."
186 |   ;;TODO this is complex b/c we need to find all the reachable non-dirty nodes, and either add or extend their lifetimes...
187 |   [snapshot additional-ms]
188 |   ;((wcar {} (car/zincrby (re))))
189 |   )
190 | 
191 | (comment
192 |   ;First we'll create a new tree
193 |   (def my-tree (create "my-tree"))
194 |   ;(def my-tree (open "my-tree"))
195 |   (println (count @connection-registry))
196 |   ;This is how we'd close the tree
197 |   (close my-tree)
198 |   ;Once the tree is closed, you can destroy it to free its resources
199 |   (destroy "my-tree")
200 |   ;Here, we can iterate through the elements of the tree
201 |   (lookup-fwd-iter (snapshot my-tree) "")
202 | 
203 |   ;save-as lets us take a snapshot and save it under another name
204 |   (def other-tree (save-as (snapshot my-tree) "other-tree"))
205 |   ;it returns a managed connection that can be interacted with like anything usual
206 |   (lookup-fwd-iter (snapshot other-tree) "")
207 |   (close other-tree)
208 |   (destroy "other-tree")
209 | 
210 |   ; To write to a tree, send it an update function with update!
211 |   ; Your function should take a snapshot as the argument, and return the modified snapshot to replace the data structure
212 |   (update! my-tree (fn [snapshot] (insert snapshot "first key" "has a value of 22")))
213 |   (update! my-tree (fn [snapshot] (insert snapshot "second key" {:lol 33})))
214 |   (update! my-tree (fn [snapshot] (insert snapshot "3" 4)))
215 | 
216 |   (wcar {} (car/keys "*"))
217 |   (wcar {} (car/flushall))
218 |   (wcar {} (car/zrange "refcount:expiry" 0 -1))
219 |   (wcar {} (car/hget "named-hhs" "my-tree"))
220 |   (wcar {} (car/hget "named-hhs" "other-tree"))
221 |   (wcar {} (car/get (str (wcar {} (car/hget "named-hhs" "my-tree")) ":rc")))
222 | 
223 |   )
224 | 


--------------------------------------------------------------------------------
/src/hitchhiker/redis.clj:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.redis
  2 |   (:require [clojure.core.cache :as cache]
  3 |             [clojure.string :as str]
  4 |             [hitchhiker.tree.core :refer [go-try <? <??] :as core]
  5 |             [clojure.core.async :refer [promise-chan put!]]
  6 |             [hitchhiker.tree.messaging :as msg]
  7 |             [taoensso.carmine :as car :refer [wcar]]
  8 |             [taoensso.nippy :as nippy]
  9 |             [clojure.core.async :as async]))
 10 | 
 11 | ;;; Description of refcounting system in redis
 12 | ;;;
 13 | ;;; The refcounting system allows any key in redis to be managed
 14 | ;;; by refcounting. This refcounter doesn't do cycle protection, but
 15 | ;;; weakrefs would be very simple to add.
 16 | ;;;
 17 | ;;; To have a key point to another, we call add-refs with the pointer key
 18 | ;;; and list of pointee keys. Usually, the pointer key would be a struct
 19 | ;;; with children.
 20 | ;;;
 21 | ;;; Each key which is pointed to has an auxiliary key, which has the same name
 22 | ;;; but ends in :rc. This is an int of the refcount of the key; the system
 23 | ;;; deletes the key when its refcount reaches 0.
 24 | ;;;
 25 | ;;; Each key which is a pointer has an auxiliary key, which has the same name
 26 | ;;; but ends in :rl. This is the list of keys that we have a reference, or pointer, to.
 27 | ;;; rl=reflist. This list is used when the pointer is finally deleted--every
 28 | ;;; key which the pointer points to must have its refcount decremented, and if
 29 | ;;; any refcount reaches 0, that key must too be deleted.
 30 | ;;;
 31 | ;;; To reduce the frequency that keys are orphaned, we allow for new roots to
 32 | ;;; be marked by the new-root function. This function stores the given key as a
 33 | ;;; newly created root pointer, which is put onto a list with its creation time.
 34 | ;;; Somehow, we track when root pointers are older than a certain time, so that
 35 | ;;; we can delete them automatically.
 36 | 
 37 | (defn add-refs
 38 |   [node-key children-keys]
 39 |   ;(println "Adding refs" node-key children-keys)
 40 |   (apply car/eval*
 41 |          (str/join
 42 |            \newline
 43 |            ["local parent = ARGV[1]"
 44 |             "for i=2,#ARGV do"
 45 |             "  local child = ARGV[i]"
 46 |             "  redis.call('incr', child .. ':rc')"
 47 |             "  redis.call('rpush', parent .. ':rl', child)"
 48 |             "end"])
 49 |          0
 50 |          node-key
 51 |          children-keys))
 52 | 
 53 | (def drop-ref-lua
 54 |   "The string of the drop-ref function in lua. Returns the code in a local
 55 |    function with the named drop_ref"
 56 |   (str/join \newline
 57 |             ["local drop_ref = function (ref)"
 58 |              "  local to_delete = { ref }"
 59 |              "  while next(to_delete) ~= nil do"
 60 |              "    local cur = table.remove(to_delete)"
 61 |              "    if redis.call('decr', cur .. ':rc') <= 0 then"
 62 |              "      local to_follow = redis.call('lrange', cur .. ':rl', 0, -1)"
 63 |              "      for i = 1,#to_follow do"
 64 |              "        table.insert(to_delete, to_follow[i])"
 65 |              "      end"
 66 |              "      redis.call('del', cur .. ':rl')"
 67 |              "      redis.call('del', cur .. ':rc')"
 68 |              "      redis.call('del', cur)"
 69 |              "    end"
 70 |              "  end"
 71 |              "end"]))
 72 | 
 73 | (defn drop-ref
 74 |   [key]
 75 |   (car/lua (str drop-ref-lua "\ndrop_ref(_:my-key)")
 76 |            {} {:my-key key}))
 77 | 
 78 | ;;TODO test the exiration stuff
 79 | (defn get-next-expiry
 80 |   "Given the current time, returns the next expiry time"
 81 |   [now]
 82 |   (car/lua (str/join \newline
 83 |                      [drop-ref-lua
 84 |                       "local to_drop = redis.call('zrangebyscore', 'refcount:expiry', 0, _:now)"
 85 |                       "for i=1,#to_drop do"
 86 |                       "  redis.call('zrem', 'refcount:expiry', to_drop[i])"
 87 |                       "  drop_ref(to_drop[i])"
 88 |                       "end"
 89 |                       "if redis.call('zcard', 'refcount:expiry') == 0 then"
 90 |                       "  return 1"
 91 |                       "else"
 92 |                       "  local head = redis.call('zrange', 'refcount:expiry', 0, 0, 'WITHSCORES')"
 93 |                       "  local head_time = head[2]"
 94 |                       "  local time_to_sleep = head_time - _:now"
 95 |                       "  if time_to_sleep < 0 then"
 96 |                       "    time_to_sleep = 1"
 97 |                       "  end"
 98 |                       "  return time_to_sleep"
 99 |                       "end"])
100 |              {} {:now now}))
101 | 
102 | (defn start-expiry-thread!
103 |   []
104 |   (doto (Thread. (fn []
105 |                    (while true
106 |                      (->> (System/currentTimeMillis)
107 |                           (get-next-expiry)
108 |                           (wcar {})
109 |                           (Thread/sleep)))))
110 |     (.setName "redis rc refcounting expirer")
111 |     (.setDaemon true)
112 |     (.start)))
113 | 
114 | (defn add-to-expiry
115 |   "Takes a refcounting key and a time for that key to expire"
116 |   [key when-to-expire]
117 |   ;; Redis sorted sets us 64 bit floats, and the time only needs 41 bits
118 |   ;; 64 bit floats have 52 bit mantissas, so all is fine for the next century
119 |   (car/lua (str/join \newline
120 |                      ["redis.call('incr', _:my-key .. ':rc')"
121 |                       "redis.call('zadd', 'refcount:expiry', _:when-to-expire, _:my-key)"])
122 |            {} {:my-key key :when-to-expire when-to-expire}))
123 | 
124 | ;; How we'll represent the timer-pointers
125 | ;; We'll make a zset to store keys to expire by their time
126 | ;; One function takes "now" as the arg & expires all the keys that should be expired, returning the time of the next key
127 | ;; The other function takes now & a key, and adds the rc
128 | 
129 | (let [cache (-> {}
130 |                 (cache/lru-cache-factory :threshold 10000)
131 |                 atom)]
132 |   (defn totally-fetch
133 |     [redis-key]
134 |     (let [run (delay
135 |                 (loop [i 0]
136 |                   (if (= i 1000)
137 |                     (do (println "total fail") (throw (ex-info "total fail" {:key redis-key})))
138 |                     (let [x (wcar {} (car/get redis-key))]
139 |                       (if x
140 |                         x
141 |                         (do (Thread/sleep 25) (recur (inc i))))))))
142 |           cs (swap! cache (fn [c]
143 |                             (if (cache/has? c redis-key)
144 |                               (cache/hit c redis-key)
145 |                               (cache/miss c redis-key run))))
146 |           val (cache/lookup cs redis-key)]
147 |       (if val (<?? val) @run)))
148 | 
149 |   (defn seed-cache!
150 |     [redis-key val]
151 |     (swap! cache cache/miss redis-key val)))
152 | 
153 | #_(def totally-fetch
154 |   (memo/lru (fn [redis-key]
155 |               (loop [i 0]
156 |                 (if (= i 1000)
157 |                   (do (println "total fail") (System/exit 1))
158 |                   (let [x (wcar {} (car/get redis-key))]
159 |                     (if x
160 |                       x
161 |                       (do (Thread/sleep 25) (recur (inc i))))))))
162 |             :lru/threshold 400))
163 | 
164 | (defn synthesize-storage-addr
165 |   "Given a key, returns a promise containing that key for use as a storage-addr"
166 |   [key]
167 |   (doto (promise-chan)
168 |     (put! key)))
169 | 
170 | (defrecord RedisAddr [last-key redis-key storage-addr]
171 |   core/IResolve
172 |   (dirty? [_] false)
173 |   (last-key [_] last-key)
174 |   (resolve [_]
175 |     (go-try (-> (totally-fetch redis-key)
176 |                 (assoc :storage-addr (synthesize-storage-addr redis-key))))))
177 | 
178 | (comment
179 |   (:cfg (wcar {} (car/get "b89bb965-e584-45a2-9232-5b76bf47a21c")))
180 |   (update-in {:op-buf [1 2 3]} [:op-buf] into [4 5 6])
181 |   )
182 | 
183 | (defn redis-addr
184 |   [last-key redis-key]
185 |   (->RedisAddr last-key redis-key (synthesize-storage-addr redis-key)))
186 | 
187 | (nippy/extend-freeze RedisAddr :b-tree/redis-addr
188 |                      [{:keys [last-key redis-key]} data-output]
189 |                      (nippy/freeze-to-out! data-output last-key)
190 |                      (nippy/freeze-to-out! data-output redis-key))
191 | 
192 | (nippy/extend-thaw :b-tree/redis-addr
193 |                    [data-input]
194 |                    (let [last-key (nippy/thaw-from-in! data-input)
195 |                          redis-key (nippy/thaw-from-in! data-input)]
196 |                      (redis-addr last-key redis-key)))
197 | 
198 | 
199 | (defrecord RedisBackend [#_service]
200 |   core/IBackend
201 |   (new-session [_] (atom {:writes 0
202 |                           :deletes 0}))
203 |   (anchor-root [_ {:keys [redis-key] :as node}]
204 |     (wcar {} (add-to-expiry redis-key (+ 5000 (System/currentTimeMillis))))
205 |     node)
206 |   (write-node [_ node session]
207 |     (go-try
208 |      (swap! session update-in [:writes] inc)
209 |      (let [key (str (java.util.UUID/randomUUID))
210 |            addr (redis-addr (core/last-key node) key)]
211 |                                         ;(.submit service #(wcar {} (car/set key node)))
212 |        (when (some #(not (satisfies? msg/IOperation %)) (:op-buf node))
213 |          (println (str "Found a broken node, has " (count (:op-buf node)) " ops"))
214 |          (println (str "The node data is " node))
215 |          (println (str "and " (:op-buf node))))
216 |        (wcar {}
217 |              (car/set key node)
218 |              (when (core/index-node? node)
219 |                (add-refs key
220 |                          (for [child (:children node)
221 |                                :let [child-key (<?? (:storage-addr child))]]
222 |                            child-key))))
223 |        (seed-cache! key (doto (promise-chan) (put! node)))
224 |        addr)))
225 |   (delete-addr [_ addr session]
226 |     (wcar {} (car/del addr))
227 |     (swap! session update-in :deletes inc)))
228 | 
229 | (defn get-root-key
230 |   [tree]
231 |   (-> tree :storage-addr (async/poll!)))
232 | 
233 | 
234 | (defn create-tree-from-root-key
235 |   [root-key]
236 |   (let [last-key (core/last-key (wcar {} (car/get root-key)))] ; need last key to bootstrap
237 |     (<?? (core/resolve
238 |           (->RedisAddr last-key root-key (synthesize-storage-addr root-key))))))
239 | 
240 | (comment
241 |   (wcar {} (car/ping) (car/set "foo" "bar") (car/get "foo"))
242 | 
243 | (println "cleared"
244 |          (wcar {} (apply car/del
245 |                          (count (wcar {} (car/keys "*")))))))
246 | 
247 | ;; Benchmarks:
248 | ;; We'll have 2 workloads: in-order (the natural numbers) and random (doubles in 0-1)
249 | ;; We'll record 2 things:
250 | ;; - Series of timings per 0.1% of inserts
251 | ;; - Series of flush cost per X keys
252 | ;; The flush batch size should be a factor of b or of n--the benchmarks should
253 | ;; see results for both.
254 | ;; We'll do this for msg and core versions
255 | ;; We'll also benchmark a sortedset
256 | ;;
257 | ;; We'll look at the plots with log & linear R^2 values
258 | ;;
259 | ;; There should also be a burn-in test to confirm
260 | ;; Will be easier for testing after we add KV. Then build a dataset to store there.
261 | ;;
262 | (comment
263 | 
264 | (do
265 |   (wcar {}
266 |       (doseq [k ["foo" "bar" "baz" "quux"]
267 |               e ["" ":rc" ":rs" ":rl"]]
268 |         (car/del (str k e))))
269 | 
270 |   (do (wcar {} (car/set "foo" 22))
271 |       ;(wcar {} (car/set "foo:rc" 1))
272 |       (wcar {} (car/set "bar" 33))
273 |       (wcar {} (car/set "baz" "onehundred"))
274 |       (wcar {} (car/set "quux" "teply"))
275 |       (wcar {} (add-refs "baz" ["quux"]))
276 |       (wcar {} (add-refs "foo" ["bar" "baz"])))
277 |   (wcar {} (drop-ref "foo")))
278 |   (doseq [k ["foo" "bar" "baz" "quux"]
279 |           e ["" ":rc" ":rs" ":rl"]]
280 |     (println (str k e) "=" (wcar {} ((if (= e ":rl")
281 |                                        #(car/lrange % 0 -1)
282 |                                        car/get) (str k e)))))
283 |   (wcar {} (drop-ref "foo"))
284 | 
285 |   (wcar {} (create-refcounted "foo" 22))
286 | 
287 |   (wcar {} (car/flushall))
288 |   (count (wcar {} (car/keys "*")))
289 |   (count (msg/lookup-fwd-iter (create-tree-from-root-key (<? (:storage-addr (:tree my-tree)))) -1))
290 |   (count (msg/lookup-fwd-iter (create-tree-from-root-key (<? (:storage-addr (:tree my-tree-updated)))) -1))
291 |   (def my-tree (core/flush-tree
292 |                  (time (reduce msg/insert
293 |                                (core/b-tree (core/->Config 17 300 (- 300 17)))
294 |                                (range 50000)))
295 |                  (->RedisBackend)
296 |                  ))
297 |   (def my-tree-updated (core/flush-tree
298 |                          (msg/delete (:tree my-tree) 10)
299 |                          (->RedisBackend)
300 |                          ))
301 |   (wcar {} (car/get (str @(:storage-addr (:tree my-tree)))))
302 |   (wcar {} (car/get (str @(:storage-addr (:tree my-tree-updated)))))
303 |   (wcar {} (car/set "foo" 10))
304 |   (wcar {} (car/get "foo"))
305 |   (wcar {} (drop-ref "foo"))
306 |   (wcar {} (drop-ref @(:storage-addr (:tree my-tree))))
307 |   (wcar {} (drop-ref @(:storage-addr (:tree my-tree-updated))))
308 |   )
309 | 


--------------------------------------------------------------------------------
/src/hitchhiker/tree/core.cljc:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.tree.core
  2 |   (:refer-clojure :exclude [compare resolve subvec])
  3 |   (:require [clojure.core.rrb-vector :refer [catvec subvec]]
  4 |             #?(:clj [clojure.pprint :as pp])
  5 |             #?(:clj [clojure.core.async :refer [go chan put! <! <!! promise-chan]
  6 |                      :as async]
  7 |                :cljs [cljs.core.async :refer [chan put! <! promise-chan]
  8 |                       :as async])
  9 |             #?(:cljs [goog.array])
 10 |             #?(:clj [taoensso.nippy :as nippy]))
 11 |   #?(:clj (:import java.io.Writer
 12 |                    [java.util Arrays Collections]))
 13 |   #?(:cljs (:require-macros [cljs.core.async.macros :refer [go]]
 14 |                             [hitchhiker.tree.core :refer [go-try <? <?resolve]])))
 15 | 
 16 | 
 17 | ;; cljs macro environment
 18 | 
 19 | (defn- cljs-env?
 20 |   "Take the &env from a macro, and tell whether we are expanding into cljs."
 21 |   [env]
 22 |   (boolean (:ns env)))
 23 | 
 24 | #?(:clj
 25 |    (defmacro if-cljs
 26 |      "Return then if we are generating cljs code and else for Clojure code.
 27 |      https://groups.google.com/d/msg/clojurescript/iBY5HaQda4A/w1lAQi9_AwsJ"
 28 |      [then else]
 29 |      (if (cljs-env? &env) then else)))
 30 | 
 31 | 
 32 | ;; core.async helpers
 33 | 
 34 | (defn throw-if-exception
 35 |   "Helper method that checks if x is Exception and if yes, wraps it in a new
 36 |   exception, passing though ex-data if any, and throws it. The wrapping is done
 37 |   to maintain a full stack trace when jumping between multiple contexts."
 38 |   [x]
 39 |   (if (instance? #?(:clj Exception :cljs js/Error) x)
 40 |     (throw (ex-info (or #?(:clj (.getMessage x)) (str x))
 41 |                     (or (ex-data x) {})
 42 |                     x))
 43 |     x))
 44 | 
 45 | #?(:clj
 46 |    (defmacro go-try
 47 |      "Asynchronously executes the body in a go block. Returns a channel
 48 |   which will receive the result of the body when completed or the
 49 |   exception if an exception is thrown. You are responsible to take
 50 |   this exception and deal with it! This means you need to take the
 51 |   result from the channel at some point."
 52 |      {:style/indent 1}
 53 |      [ & body]
 54 |      `(if-cljs (cljs.core.async.macros/go
 55 |                  (try ~@body
 56 |                       (catch js/Error e#
 57 |                         e#)))
 58 |                (go
 59 |                  (try
 60 |                    ~@body
 61 |                    (catch Exception e#
 62 |                      e#))))))
 63 | 
 64 | #?(:clj
 65 |    (defmacro <?
 66 |      "Same as core.async <! but throws an exception if the channel returns a
 67 | throwable error."
 68 |      [ch]
 69 |      `(if-cljs (throw-if-exception (cljs.core.async/<! ~ch))
 70 |                (throw-if-exception (<! ~ch)))))
 71 | 
 72 | 
 73 | #?(:clj
 74 |    (defn <??
 75 |      "Same as core.async <!! but throws an exception if the channel returns a
 76 | throwable error."
 77 |      [ch]
 78 |      (throw-if-exception (<!! ch))))
 79 | 
 80 | (defn reduce<
 81 |   "Reduces over a sequence s with a go function go-f given the initial value
 82 |   init."
 83 |   [go-f init s]
 84 |   (go-try
 85 |    (loop [res init
 86 |           [f & r] s]
 87 |      (if f
 88 |        (recur (<? (go-f res f)) r)
 89 |        res))))
 90 | 
 91 | 
 92 | ;; core code
 93 | 
 94 | (defrecord Config [index-b data-b op-buf-size])
 95 | 
 96 | (defprotocol IKeyCompare
 97 |   (compare [key1 key2]))
 98 | 
 99 | (defprotocol IResolve
100 |   "All nodes must implement this protocol. It's includes the minimal functionality
101 |    necessary to avoid resolving nodes unless strictly necessary."
102 |   (index? [_] "Returns true if this is an index node")
103 |   (last-key [_] "Returns the rightmost key of the node")
104 |   (dirty? [_] "Returns true if this should be flushed")
105 |   ;;TODO resolve should be instrumented
106 |   (resolve [_] "Returns the INode version of this node in a go-block; could trigger IO"))
107 | 
108 | 
109 | 
110 | (defn tree-node?
111 |   [node]
112 |   (satisfies? IResolve node))
113 | 
114 | (defprotocol INode
115 |   (overflow? [node] "Returns true if this node has too many elements")
116 |   (underflow? [node] "Returns true if this node has too few elements")
117 |   (merge-node [node other] "Combines this node with the other to form a bigger node. We assume they're siblings")
118 |   (split-node [node] "Returns a Split object with the 2 nodes that we turned this into")
119 |   (lookup [node key] "Returns the child node which contains the given key"))
120 | 
121 | (defrecord Split [left right median])
122 | 
123 | ;; TODO maybe this is a good protocol?
124 | ;; how to track dirty bits...
125 | ;; could model w/ monad (ground state dirty unless loaded from storage),
126 | ;;     gets cleaned after each flush-flood
127 | ;; flush-flood is the BFS for all dirty nodes; individual nodes must be flushable
128 | ;; A storage backend must be able to take a node as arg, serialize, assign addr,
129 | ;;    and call "clean" on the node to allow it to have a backing addr
130 | ;;
131 | ;; Maybe disk backing shouldn't be done as a monad, since that woud double
132 | ;; the total IOPS when 2 users of a snapshot need flush/clones
133 | ;; (N uses makes N useless copies).
134 | ;;
135 | ;; So perhaps each node can have a promise, it's address on storage.
136 | ;; When someone wants to flush it, they first dump it to disk, then try to clean it.
137 | ;; If the clean succeeds, they're done. If the clean fails (i.e. deliver fails),
138 | ;; they roll back the write and read the address from the promise.
139 | ;;
140 | ;; Monads will be reserved for things we want persisted, rather than the
141 | ;; in-memory flushing system, which can afford extra communication
142 | ;;
143 | ;; We can totally rely on a caching layer to manage keeping nodes around for
144 | ;; when a single tree is passed to several different consumers. This layer
145 | ;; will make it easier ta manage the overall JVM's memory allocation, and
146 | ;; it's far simpler than trying to use weak pointers to track unresolved addrs
147 | ;; so that we can supply the data to all of them when we fetch it. The cache
148 | ;; will also have better GC properties, by not accidentally sticking random
149 | ;; tree bits into jvm GC roots that could be held a long time.
150 | ;;
151 | ;; So flushing writes are shared, but loading from disk is cached instead.
152 | ;; Maybe write flushing could itself just be a smaller (or bigger) "write cache"...
153 | ;; no, that could be defeated by really big writes, which are already guaranteed
154 | ;; to be resident (since they were pending and thus linked to the tree's root).
155 | ;;
156 | ;; We'll make an IOPS measuring backend, which specifically makes "fake" addrs
157 | ;; that keep pointers to the nodes they "stored". Importantly, it needs to record
158 | ;; each read and write operation into counters, so that we can run a test
159 | ;; and check the total IOPS we spent.
160 | ;;
161 | ;; The advanced work with this will allow us to define hard IOPS bounds
162 | ;; (based on proven data), and then send generated loads to check we stay within
163 | ;; our targets.
164 | 
165 | (extend-protocol IKeyCompare
166 |   ;; By default, we use the default comparator
167 |   #?@(:clj
168 |       [Object
169 |        (compare [key1 key2] (clojure.core/compare key1 key2))
170 |        Double
171 |        (compare [^Double key1 key2]
172 |                 (if (instance? Double key2)
173 |                   (.compareTo key1 key2)
174 |                   (clojure.core/compare key1 key2)))
175 |        Long
176 |        (compare [^Long key1 key2]
177 |                 (if (instance? Long key2)
178 |                   (.compareTo key1 key2))
179 |                 (clojure.core/compare key1 key2))]
180 |      :cljs
181 |       [number
182 |        (compare [key1 key2] (cljs.core/compare key1 key2))
183 |        object
184 |        (compare [key1 key2] (cljs.core/compare key1 key2))]))
185 | 
186 | ;; TODO enforce that there always (= (count children) (inc (count keys)))
187 | ;;
188 | ;; TODO we should be able to find all uncommited data by searching for
189 | ;; resolved & unresolved children
190 | 
191 | (declare data-node dirty!)
192 | 
193 | (defn index-node-keys
194 |   "Calculates the separating keys given the children of an index node"
195 |   [children]
196 |   (mapv last-key (butlast children)))
197 | 
198 | (declare ->IndexNode)
199 | 
200 | (defrecord IndexNode [children storage-addr op-buf cfg]
201 |   IResolve
202 |   (index? [this] true)
203 |   (dirty? [this] (not (async/poll! storage-addr)))
204 |   (resolve [this] (go this))
205 |   (last-key [this]
206 |     ;;TODO should optimize by caching to reduce IOps (can use monad)
207 |     (last-key (peek children)))
208 |   INode
209 |   (overflow? [this]
210 |     (>= (count children) (* 2 (:index-b cfg))))
211 |   (underflow? [this]
212 |     (< (count children) (:index-b cfg)))
213 |   (split-node [this]
214 |     (let [b (:index-b cfg)
215 |           median (nth (index-node-keys children) (dec b))
216 |           [left-buf right-buf] (split-with #(not (pos? (compare (:key %) median)))
217 |                                            ;;TODO this should use msg/affects-key
218 |                                            (sort-by :key op-buf))]
219 |       (->Split (->IndexNode (subvec children 0 b)
220 |                             (promise-chan)
221 |                             (vec left-buf)
222 |                             cfg)
223 |                (->IndexNode (subvec children b)
224 |                             (promise-chan)
225 |                             (vec right-buf)
226 |                             cfg)
227 |               median)))
228 |   (merge-node [this other]
229 |     (->IndexNode (catvec children (:children other))
230 |                  (promise-chan)
231 |                  (catvec op-buf (:op-buf other))
232 |                  cfg))
233 |   (lookup [root key]
234 |     ;;This is written like so because it's performance critical
235 |     (let [l (dec (count children))
236 |           a (object-array l)
237 |           _ (dotimes [i l]
238 |               (aset a i (last-key (nth children i))))
239 |           x #?(:clj (Arrays/binarySearch a 0 l key compare)
240 |                :cljs (goog.array/binarySearch a key compare))]
241 |       (if (neg? x)
242 |         (- (inc x))
243 |         x))))
244 | 
245 | #?(:clj
246 |    (nippy/extend-freeze IndexNode :b-tree/index-node
247 |                         [{:keys [storage-addr cfg children op-buf]} data-output]
248 |                         (nippy/freeze-to-out! data-output cfg)
249 |                         (nippy/freeze-to-out! data-output children)
250 |                         ;;TODO apparently RRB-vectors don't freeze correctly;
251 |                         ;;we'll force it to a normal vector as a workaround
252 |                         (nippy/freeze-to-out! data-output (into [] op-buf))))
253 | 
254 | #?(:clj
255 |    (nippy/extend-thaw :b-tree/index-node
256 |                       [data-input]
257 |                       (let [cfg (nippy/thaw-from-in! data-input)
258 |                             children (nippy/thaw-from-in! data-input)
259 |                             op-buf (nippy/thaw-from-in! data-input)]
260 |                         (->IndexNode children nil op-buf cfg))))
261 | 
262 | (defn index-node?
263 |   [node]
264 |   (instance? IndexNode node))
265 | 
266 | #?(:clj
267 |    (defn print-index-node
268 |      "Optionally include"
269 |      [node ^Writer writer fully-qualified?]
270 |      (.write writer (if fully-qualified?
271 |                       (pr-str IndexNode)
272 |                       "IndexNode"))
273 |      (.write writer (str {:keys (index-node-keys (:children node))
274 |                           :children (:children node)}))))
275 | 
276 | #?(:clj
277 |    (defmethod print-method IndexNode
278 |      [node writer]
279 |      (print-index-node node writer false)))
280 | 
281 | #?(:clj
282 |    (defmethod print-dup IndexNode
283 |      [node writer]
284 |      (print-index-node node writer true)))
285 | 
286 | #?(:clj
287 |    (defn node-status-bits
288 |      [node]
289 |      (str "["
290 |           (if (dirty? node) "D" " ")
291 |           "]")))
292 | 
293 | #?(:clj
294 |    (defmethod pp/simple-dispatch IndexNode
295 |      [node]
296 |      (let [out ^Writer *out*]
297 |        (.write out "IndexNode")
298 |        (.write out (node-status-bits node))
299 |        (pp/pprint-logical-block
300 |         :prefix "{" :suffix "}"
301 |         (pp/pprint-logical-block
302 |          (.write out ":keys ")
303 |          (pp/write-out (index-node-keys (:children node)))
304 |          (pp/pprint-newline :linear))
305 |         (pp/pprint-logical-block
306 |          (.write out ":op-buf ")
307 |          (pp/write-out (:op-buf node))
308 |          (pp/pprint-newline :linear))
309 |         (pp/pprint-logical-block
310 |          (.write out ":children ")
311 |          (pp/pprint-newline :mandatory)
312 |          (pp/write-out (:children node)))))))
313 | 
314 | (defn nth-of-set
315 |   "Like nth, but for sorted sets. O(n)"
316 |   [set index]
317 |   (first (drop index set)))
318 | 
319 | (defrecord DataNode [children storage-addr cfg]
320 |   IResolve
321 |   (index? [this] false)
322 |   (resolve [this] (go this)) 
323 |   (dirty? [this] (not (async/poll! storage-addr)))
324 |   (last-key [this]
325 |     (when (seq children)
326 |       (-> children
327 |           (rseq)
328 |           (first)
329 |           (key))))
330 |   INode
331 |   ;; Should have between b & 2b-1 children
332 |   (overflow? [this]
333 |     (>= (count children) (* 2 (:data-b cfg))))
334 |   (underflow? [this]
335 |     (< (count children) (:data-b cfg)))
336 |   (split-node [this]
337 |     (->Split (data-node cfg (into (sorted-map-by compare) (take (:data-b cfg)) children))
338 |              (data-node cfg (into (sorted-map-by compare) (drop (:data-b cfg)) children))
339 |              (nth-of-set children (dec (:data-b cfg)))))
340 |   (merge-node [this other]
341 |     (data-node cfg (into children (:children other))))
342 |   (lookup [root key]
343 |     (let [x #?(:clj (Collections/binarySearch (vec (keys children)) key compare)
344 |                :cljs (goog.array/binarySearch (into-array (keys children)) key compare))]
345 |       (if (neg? x)
346 |         (- (inc x))
347 |         x))))
348 | 
349 | (defn data-node
350 |   "Creates a new data node"
351 |   [cfg children]
352 |   (->DataNode children (promise-chan) cfg))
353 | 
354 | (defn data-node?
355 |   [node]
356 |   (instance? DataNode node))
357 | 
358 | #?(:clj
359 |    (defmacro <?resolve
360 |      "HACK Attempt faster inlined resolve to avoid unnecessary channel ops."
361 |      [e]
362 |      `(if (or (data-node? ~e)
363 |               (index-node? ~e))
364 |         ~e
365 |         (<? (resolve ~e)))))
366 | 
367 | #?(:clj
368 |    (nippy/extend-freeze DataNode :b-tree/data-node
369 |                         [{:keys [cfg children]} data-output]
370 |                         (nippy/freeze-to-out! data-output cfg)
371 |                         (nippy/freeze-to-out! data-output children)))
372 | 
373 | #?(:clj
374 |    (nippy/extend-thaw :b-tree/data-node
375 |                       [data-input]
376 |                       (let [cfg (nippy/thaw-from-in! data-input)
377 |                             children (nippy/thaw-from-in! data-input)]
378 |                         (->DataNode children nil cfg))))
379 | 
380 | ;(println (b-tree :foo :bar :baz))
381 | ;(pp/pprint (apply b-tree (range 100)))
382 | #?(:clj
383 |    (defn print-data-node
384 |      [node ^Writer writer fully-qualified?]
385 |      (.write writer (if fully-qualified?
386 |                       (pr-str DataNode)
387 |                       "DataNode"))
388 |      (.write writer (str {:children (:children node)}))))
389 | 
390 | #?(:clj
391 |    (defmethod print-method DataNode
392 |      [node writer]
393 |      (print-data-node node writer false)))
394 | 
395 | #?(:clj
396 |    (defmethod print-dup DataNode
397 |      [node writer]
398 |      (print-data-node node writer true)))
399 | 
400 | #?(:clj
401 |    (defmethod pp/simple-dispatch DataNode
402 |      [node]
403 |      (let [out ^Writer *out*]
404 |        (.write out (str "DataNode"
405 |                         (node-status-bits node)))
406 |        (.write out (str {:children (:children node)})))))
407 | 
408 | (defn backtrack-up-path-until
409 |   "Given a path (starting with root and ending with an index), searches backwards,
410 |    passing each pair of parent & index we just came from to the predicate function.
411 |    When that function returns true, we return the path ending in the index for which
412 |    it was true, or else we return the empty path"
413 |   [path pred]
414 |   (loop [path path]
415 |     (when (seq path)
416 |       (let [from-index (peek path)
417 |             tmp (pop path)
418 |             parent (peek tmp)]
419 |         (if (pred parent from-index)
420 |           path
421 |           (recur (pop tmp)))))))
422 | 
423 | 
424 | (defn right-successor
425 |   "Given a node on a path, find's that node's right successor node"
426 |   [path]
427 |   ;(clojure.pprint/pprint path)
428 |   ;TODO this function would benefit from a prefetching hint
429 |   ;     to keep the next several sibs in mem
430 |   (go-try
431 |     (when-let [common-parent-path
432 |                (backtrack-up-path-until
433 |                 path
434 |                 (fn [parent index]
435 |                   (< (inc index) (count (:children parent)))))]
436 |       (let [next-index (-> common-parent-path peek inc)
437 |             parent (-> common-parent-path pop peek)
438 |             new-sibling (<?resolve (nth (:children parent) next-index))
439 |             ;; We must get back down to the data node
440 |             ;; iterate cannot do blocking operations with core.async, so we use a loop
441 |             sibling-lineage (loop [res [new-sibling]
442 |                                     s new-sibling]
443 |                               (let [c (-> s :children first)
444 |                                     c (if (tree-node? c)
445 |                                         (<?resolve c)
446 |                                         c)]
447 |                                 (if (or (index-node? c)
448 |                                         (data-node? c))
449 |                                   (recur (conj res c) c)
450 |                                   res)))
451 |             path-suffix (-> (interleave sibling-lineage
452 |                                         (repeat 0))
453 |                             (butlast)) ; butlast ensures we end w/ node
454 |             ]
455 |         (-> (pop common-parent-path)
456 |             (conj next-index)
457 |             (into path-suffix))))))
458 | 
459 | 
460 | (defn forward-iterator
461 |   "Takes the result of a search and puts the iterated elements onto iter-ch
462 |   going forward over the tree as needed. Does lg(n) backtracking sometimes."
463 |   [iter-ch path start-key]
464 |   (go-try
465 |       (loop [path path]
466 |         (if path
467 |           (let  [start-node (peek path)
468 |                  _ (assert (data-node? start-node))
469 |                  elements (-> start-node
470 |                               :children ; Get the indices of it
471 |                               (subseq >= start-key))]
472 |             (<? (async/onto-chan iter-ch elements false))
473 |             (recur (<? (right-successor (pop path)))))
474 |           (async/close! iter-ch)))))
475 | 
476 | (defn lookup-path
477 |   "Given a B-tree and a key, gets a path into the tree"
478 |   [tree key]
479 |   (go-try
480 |     (loop [path [tree] ;alternating node/index/node/index/node... of the search taken
481 |            cur tree ;current search node
482 |            ]
483 |       (if (seq (:children cur))
484 |         (if (data-node? cur)
485 |           path
486 |           (let [index (lookup cur key)
487 |                 child (if (data-node? cur)
488 |                         nil #_(nth-of-set (:children cur) index)
489 |                         (-> (:children cur)
490 |                             ;;TODO what are the semantics for exceeding on the right? currently it's trunc to the last element
491 |                             (nth index (peek (:children cur)))
492 |                             (<?resolve)))
493 |                 path' (conj path index child)]
494 |             (recur path' child)))
495 |         nil))))
496 | 
497 | (defn lookup-key
498 |   "Given a B-tree and a key, gets an iterator into the tree"
499 |   ([tree key]
500 |    (lookup-key tree key nil))
501 |   ([tree key not-found]
502 |    (go-try
503 |      (->
504 |       (-> (<? (lookup-path tree key))
505 |           (peek)
506 |           (<?resolve))
507 |       :children
508 |       (get key not-found)))))
509 | 
510 | ;; this is only for the REPL and testing
511 | #?(:clj
512 |    (defn chan-seq [ch]
513 |      (when-some [v (<?? ch)]
514 |        (cons v (lazy-seq (chan-seq ch))))))
515 | 
516 | 
517 | #?(:clj
518 |    (defn lookup-fwd-iter
519 |      "Compatibility helper to clojure sequences. Please prefer the channel
520 |   interface of forward-iterator, as this function blocks your thread, which
521 |   disturbs async contexts and might lead to poor performance. It is mainly here
522 |   to facilitate testing."
523 |      [tree key]
524 |      (let [path (<?? (lookup-path tree key))
525 |            iter-ch (chan)]
526 |        (forward-iterator iter-ch path key)
527 |        (chan-seq iter-ch))))
528 | 
529 | (defn insert
530 |   [{:keys [cfg] :as tree} key value]
531 |   (go-try
532 |     (let [path (<? (lookup-path tree key))
533 |           {:keys [children] :or {children (sorted-map-by compare)}} (peek path)
534 |           updated-data-node (data-node cfg (assoc children key value))]
535 |       (loop [node updated-data-node
536 |              path (pop path)]
537 |         (if (empty? path)
538 |           (if (overflow? node)
539 |             (let [{:keys [left right median]} (split-node node)]
540 |               (->IndexNode [left right] (promise-chan) [] cfg))
541 |             node)
542 |           (let [index (peek path)
543 |                 {:keys [children keys] :as parent} (peek (pop path))]
544 |             (if (overflow? node) ; splice the split into the parent
545 |               ;;TODO refactor paths to be node/index pairs or 2 vectors or something
546 |               (let [{:keys [left right median]} (split-node node)
547 |                     new-children (catvec (conj (subvec children 0 index)
548 |                                                left right)
549 |                                          (subvec children (inc index)))]
550 |                 (recur (-> parent
551 |                            (assoc :children new-children)
552 |                            (dirty!))
553 |                        (pop (pop path))))
554 |               (recur (-> parent
555 |                          ;;TODO this assoc-in seems to be a bottleneck
556 |                          (assoc-in [:children index] node)
557 |                          (dirty!))
558 |                      (pop (pop path))))))))))
559 | 
560 | ;;TODO: cool optimization: when merging children, push as many operations as you can
561 | ;;into them to opportunistically minimize overall IO costs
562 | 
563 | (defn delete
564 |   [{:keys [cfg] :as tree} key]
565 |   (go-try
566 |     (let [path (<? (lookup-path tree key)) ; don't care about the found key or its index
567 |           {:keys [children] :or {children (sorted-map-by compare)}} (peek path)
568 |           updated-data-node (data-node cfg (dissoc children key))]
569 |       (loop [node updated-data-node
570 |              path (pop path)]
571 |         (if (empty? path)
572 |           ;; Check for special root underflow case
573 |           (if (and (index-node? node) (= 1 (count (:children node))))
574 |             (first (:children node))
575 |             node)
576 |           (let [index (peek path)
577 |                 {:keys [children keys op-buf] :as parent} (peek (pop path))]
578 |             (if (underflow? node) ; splice the split into the parent
579 |               ;;TODO this needs to use a polymorphic sibling-count to work on serialized nodes
580 |               (let [bigger-sibling-idx
581 |                     (cond
582 |                       (= (dec (count children)) index) (dec index) ; only have left sib
583 |                       (zero? index) 1 ;only have right sib
584 |                       (> (count (:children (nth children (dec index))))
585 |                          (count (:children (nth children (inc index)))))
586 |                       (dec index) ; right sib bigger
587 |                       :else (inc index))
588 |                     node-first? (> bigger-sibling-idx index) ; if true, `node` is left
589 |                     merged (if node-first?
590 |                              (merge-node node (<?resolve (nth children bigger-sibling-idx)))
591 |                              (merge-node (<?resolve (nth children bigger-sibling-idx)) node))
592 |                     old-left-children (subvec children 0 (min index bigger-sibling-idx))
593 |                     old-right-children (subvec children (inc (max index bigger-sibling-idx)))]
594 |                 (if (overflow? merged)
595 |                   (let [{:keys [left right median]} (split-node merged)]
596 |                     (recur (->IndexNode (catvec (conj old-left-children left right)
597 |                                                 old-right-children)
598 |                                         (promise-chan)
599 |                                         op-buf
600 |                                         cfg)
601 |                            (pop (pop path))))
602 |                   (recur (->IndexNode (catvec (conj old-left-children merged)
603 |                                               old-right-children)
604 |                                       (promise-chan)
605 |                                       op-buf
606 |                                       cfg)
607 |                          (pop (pop path)))))
608 |               (recur (->IndexNode (assoc children index node)
609 |                                   (promise-chan)
610 |                                   op-buf
611 |                                   cfg)
612 |                      (pop (pop path))))))))))
613 | 
614 | (defn b-tree
615 |   [cfg & kvs]
616 |   (go-try
617 |     (loop [[[k v] & r] (partition 2 kvs)
618 |            t (data-node cfg (sorted-map-by compare))]
619 |       (if k
620 |         (recur r (<? (insert t k v)))
621 |         t)))
622 |   #_(reduce (fn [t [k v]]
623 |             (insert t k v))
624 |           (data-node cfg (sorted-map-by compare))
625 |           (partition 2 kvs)))
626 | 
627 | (defrecord TestingAddr [last-key node]
628 |   IResolve
629 |   (dirty? [this] false)
630 |   (last-key [_] last-key)
631 |   (resolve [_] (go node)))
632 | 
633 | #?(:clj
634 |    (defn print-testing-addr
635 |      [node ^Writer writer fully-qualified?]
636 |      (.write writer (if fully-qualified?
637 |                       (pr-str TestingAddr)
638 |                       "TestingAddr"))
639 |      (.write writer (str {}))))
640 | 
641 | #?(:clj
642 |    (defmethod print-method TestingAddr
643 |      [node writer]
644 |      (print-testing-addr node writer false)))
645 | 
646 | #?(:clj
647 |    (defmethod print-dup TestingAddr
648 |      [node writer]
649 |      (print-testing-addr node writer true)))
650 | 
651 | #?(:clj
652 |    (defmethod pp/simple-dispatch TestingAddr
653 |      [node]
654 |      (let [out ^Writer *out*]
655 |        (.write out (str "TestingAddr"
656 |                         (node-status-bits node)))
657 |        (.write out (str {})))))
658 | 
659 | (defn dirty!
660 |   "Marks a node as being dirty if it was clean"
661 |   [node]
662 |   (assert (not (instance? TestingAddr node)))
663 |   (assoc node :storage-addr (promise-chan)))
664 | 
665 | ;;TODO make this a loop/recur instead of mutual recursion
666 | (declare flush-tree)
667 | 
668 | (defn flush-children
669 |   [children backend session]
670 |   (go-try
671 |       (loop [[c & r] children
672 |              res []]
673 |         (if-not c
674 |           res
675 |           (recur r (conj res (<? (flush-tree c backend session))))))))
676 | 
677 | (defprotocol IBackend
678 |   (new-session [backend] "Returns a session object that will collect stats")
679 |   (write-node [backend node session] "Writes the given node to storage, returning a go-block with its assigned address")
680 |   (anchor-root [backend node] "Tells the backend this is a temporary root")
681 |   (delete-addr [backend addr session] "Deletes the given addr from storage"))
682 | 
683 | (defrecord TestingBackend []
684 |   IBackend
685 |   (new-session [_] (atom {:writes 0}))
686 |   (anchor-root [_ root] root)
687 |   (write-node [_ node session]
688 |     (go-try
689 |       (swap! session update-in [:writes] inc)
690 |       (->TestingAddr (last-key node) node)))
691 |   (delete-addr [_ addr session ]))
692 | 
693 | (defn flush-tree
694 |   "Given the tree, finds all dirty nodes, delivering addrs into them.
695 |    Every dirty node also gets replaced with its TestingAddr.
696 |    These form a GC cycle, have fun with the unmanaged memory port :)"
697 |   ([tree backend]
698 |    (go-try
699 |      (let [session (new-session backend)
700 |            flushed (<? (flush-tree tree backend session))
701 |            root (anchor-root backend flushed)]
702 |        {:tree (<?resolve root) ; root should never be unresolved for API
703 |         :stats session})))
704 |   ([tree backend stats]
705 |    (go
706 |      (if (dirty? tree)
707 |        (let [cleaned-children (if (data-node? tree)
708 |                                 (:children tree)
709 |                                 ;; TODO throw on nested errors
710 |                                 (->> (flush-children (:children tree) backend stats)
711 |                                      <?
712 |                                      catvec))
713 |              cleaned-node (assoc tree :children cleaned-children)
714 |              new-addr (<? (write-node backend cleaned-node stats))]
715 |          (put! (:storage-addr tree) new-addr)
716 |          (when (not= new-addr (<? (:storage-addr tree)))
717 |            (delete-addr backend new-addr stats))
718 |          new-addr)
719 |        tree))))
720 | 
721 | ;; The parts of the serialization system that seem like they're need hooks are:
722 | ;; - Must provide a function that takes a node, serializes it, and returns an addr
723 | ;; - Must be able to rollback writing an addr
724 | ;; - Whatever the addr it returns, it should cache its resolve in-mem somehow
725 | ;; - The serialize a node & rollback a node functions should accept a "stats" object as well
726 | ;; - The "stats" object must be convertible to a summary or whatever at the end
727 | 


--------------------------------------------------------------------------------
/src/hitchhiker/tree/messaging.cljc:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.tree.messaging
  2 |   (:refer-clojure :exclude [subvec])
  3 |   (:require [clojure.core.rrb-vector :refer [catvec]]
  4 |             [hasch.core :refer [uuid]]
  5 |             #?(:clj [clojure.core.async :as async]
  6 |                :cljs [cljs.core.async :as async])
  7 |             #?(:clj [clojure.pprint :as pp])
  8 |             #?(:clj [hitchhiker.tree.core :refer [go-try <? <??] :as core]
  9 |                :cljs [hitchhiker.tree.core :as core]))
 10 |   #?(:clj (:import java.io.Writer))
 11 |   #?(:cljs (:require-macros [hitchhiker.tree.core :refer [go-try <? <?resolve]])))
 12 | 
 13 | ;; An operation is an object with a few functions
 14 | ;; 1. It has a function that it applies to the tree to apply its effect
 15 | ;; In the future, it could also have
 16 | ;; 2. It has a promise which can be filled with the end result
 17 | ;;       (more memory but faster results for repeat queries)
 18 | 
 19 | (defprotocol IOperation
 20 |   (affects-key [op] "Which key this affects--currently must be a single key")
 21 |   (apply-op-to-coll [op coll] "Applies the operation to the collection")
 22 |   (apply-op-to-tree [op tree] "Applies the operation to the tree. Returns go-block."))
 23 | 
 24 | (defrecord InsertOp [key value]
 25 |   IOperation
 26 |   (affects-key [_] key)
 27 |   (apply-op-to-coll [_ map] (assoc map key value))
 28 |   (apply-op-to-tree [_ tree] (core/insert tree key value)))
 29 | 
 30 | (defrecord DeleteOp [key]
 31 |   IOperation
 32 |   (affects-key [_] key)
 33 |   (apply-op-to-coll [_ map] (dissoc map key))
 34 |   (apply-op-to-tree [_ tree] (core/delete tree key)))
 35 | 
 36 | #?(:clj
 37 |    (defmethod print-method InsertOp
 38 |      [op ^Writer writer]
 39 |      (.write writer "InsertOp")
 40 |      (.write writer (str {:key (:key op) :value (:value op) " - " (:tag op)}))))
 41 | 
 42 | #?(:clj
 43 |    (defmethod print-dup InsertOp
 44 |      [op ^Writer writer]
 45 |      (.write writer "(tree.messaging/->InsertOp ")
 46 |      (.write writer (pr-str (:key op)))
 47 |      (.write writer ", ")
 48 |      (.write writer (pr-str (:value op)))
 49 |      (.write writer ")")))
 50 | 
 51 | #?(:clj
 52 |    (defmethod pp/simple-dispatch InsertOp
 53 |      [op]
 54 |      (print op)))
 55 | 
 56 | #?(:clj
 57 |   (defmethod print-method DeleteOp
 58 |     [op ^Writer writer]
 59 |     (.write writer "DeleteOp")
 60 |     (.write writer (str {:key (:key op)} " - " (:tag op)))))
 61 | 
 62 | #?(:clj
 63 |    (defmethod print-dup DeleteOp
 64 |      [op ^Writer writer]
 65 |      (.write writer "(tree.messaging/->DeleteOp ")
 66 |      (.write writer (pr-str (:key op)))
 67 |      (.write writer ")")))
 68 | 
 69 | #?(:clj
 70 |    (defmethod pp/simple-dispatch DeleteOp
 71 |      [op]
 72 |      (print op)))
 73 | 
 74 | (defn enqueue
 75 |   ([tree msgs]
 76 |    (go-try
 77 |      (let [deferred-ops (atom [])
 78 |            msg-buffers-propagated (<? (enqueue tree msgs deferred-ops))]
 79 |                                         ;(when (seq @deferred-ops) (println "appyling deferred ops" @deferred-ops))
 80 |        (loop [tree msg-buffers-propagated
 81 |               [op & r] @deferred-ops]
 82 |          (if op
 83 |            (recur (<? (apply-op-to-tree op tree)) r)
 84 |            tree)))))
 85 |   ([tree msgs deferred-ops]
 86 |    (go-try
 87 |     (let [tree (core/<?resolve tree)]
 88 |        (cond
 89 |          (core/data-node? tree) ; need to return ops to apply to the tree proper...
 90 |          (do (swap! deferred-ops into msgs)
 91 |              tree)
 92 |          (<= (+ (count msgs) (count (:op-buf tree)))
 93 |              (get-in tree [:cfg :op-buf-size])) ; will there be enough space?
 94 |          (-> tree
 95 |              (core/dirty!)
 96 |              (update-in [:op-buf] into msgs))
 97 |          :else ; overflow, should be IndexNode
 98 |          (do (assert (core/index-node? tree))
 99 |                                         ;(println "overflowing node" (:keys tree) "with buf" (:op-buf tree)
100 |                                         ;         "with new msgs" msgs
101 |                                         ;         )
102 |              (loop [[child & children] (:children tree)
103 |                     rebuilt-children []
104 |                     msgs (vec (sort-by affects-key ;must be a stable sort
105 |                                        (concat (:op-buf tree) msgs)))]
106 |                (let [took-msgs (into []
107 |                                      (take-while #(>= 0 (core/compare
108 |                                                          (affects-key %)
109 |                                                          (core/last-key child))))
110 |                                      msgs)
111 |                      extra-msgs (into []
112 |                                       (drop-while #(>= 0 (core/compare
113 |                                                           (affects-key %)
114 |                                                           (core/last-key child))))
115 |                                       msgs)
116 |                                         ;_ (println "last-key:" (core/last-key child))
117 |                                         ;_ (println "goes left:" took-msgs)
118 |                                         ;_ (println "goes right:" extra-msgs)
119 |                      on-the-last-child? (empty? children)
120 | 
121 |                      ;; Any changes to the current child?
122 |                      new-child
123 |                      (cond
124 |                        (and on-the-last-child? (seq extra-msgs))
125 |                        (<? (enqueue (core/<?resolve child)
126 |                                     (catvec took-msgs extra-msgs)
127 |                                     deferred-ops))
128 |                        (seq took-msgs) ; save a write
129 |                        (<? (enqueue (core/<?resolve child)
130 |                                     (catvec took-msgs)
131 |                                     deferred-ops))
132 |                        :else
133 |                        child)]
134 | 
135 |                  (if on-the-last-child?
136 |                    (-> tree
137 |                        (assoc :children (conj rebuilt-children new-child))
138 |                        (assoc :op-buf [])
139 |                        (core/dirty!))
140 |                    (recur children (conj rebuilt-children new-child) extra-msgs))))))))))
141 | 
142 | 
143 | ;;TODO delete in core needs to stop using the index-node constructor to be more
144 | ;;careful about how we handle op-bufs during splits and merges.
145 | ;;
146 | ;;After we've got delete working, lookup, pred, and succ should be fixed
147 | ;;
148 | ;;broadcast nodes will need IDs so that they can combine during merges...
149 | ;;
150 | 
151 | (defn general-max [e & r]
152 |   ;; fast track for number keys
153 |   (if (number? e)
154 |     (apply max e r)
155 |     (reduce (fn [old elem]
156 |               (if (pos? (core/compare old elem))
157 |                 old
158 |                 elem))
159 |             e r)))
160 | 
161 | 
162 | (defn apply-ops-in-path
163 |   [path]
164 |   (if (>= 1 (count path))
165 |     (:children (peek path))
166 |     (let [ops (->> path
167 |                    (into [] (comp (filter core/index-node?)
168 |                                   (map :op-buf)))
169 |                    (rseq) ; highest node should be last in seq
170 |                    (apply catvec)
171 |                    (sort-by affects-key)) ;must be a stable sort
172 |           this-node-index (-> path pop peek)
173 |           parent (-> path pop pop peek)
174 |           is-first? (zero? this-node-index)
175 |           ;;We'll need to find the smallest last-key of the left siblings along the path
176 |           [left-sibs-on-path is-last?]
177 |           (loop [path path
178 |                  is-last? true
179 |                  left-sibs []]
180 |             (if (= 1 (count path)) ; are we at the root?
181 |               [left-sibs is-last?]
182 |               (let [this-node-index (-> path pop peek)
183 |                     parent (-> path pop pop peek)
184 |                     is-first? (zero? this-node-index)
185 |                     local-last? (= (-> parent :children count dec)
186 |                                    this-node-index)]
187 |                 (if is-first?
188 |                   (recur (pop (pop path)) (and is-last? local-last?) left-sibs)
189 |                   (recur (pop (pop path))
190 |                          (and is-last? local-last?)
191 |                          (conj left-sibs
192 |                                (nth (:children parent)
193 |                                     (dec this-node-index))))))))
194 |           left-sibs-min-last (when (seq left-sibs-on-path)
195 |                                (->> left-sibs-on-path
196 |                                     (map core/last-key)
197 |                                     (apply general-max)))
198 |           left-sib-filter (if left-sibs-min-last
199 |                             (drop-while #(>= 0 (core/compare (affects-key %)
200 |                                                              left-sibs-min-last)))
201 |                             identity)
202 |           data-node (peek path)
203 |           my-last (core/last-key data-node)
204 |           right-side-filter (if is-last?
205 |                               identity
206 |                               (take-while #(>= 0 (core/compare (affects-key %) my-last))))
207 |           correct-ops (into [] (comp left-sib-filter right-side-filter) ops)
208 | 
209 |           ;;We include op if leq my left, and not if leq left's left
210 |           ;;TODO we can't apply all ops, we should ensure to only apply ops whose keys are in the defined range, unless we're the last sibling
211 |           ]
212 |       ;(println "left-sibs-min-last" left-sibs-min-last)
213 |       ;(println "is-last?" is-last?)
214 |       ;(println "expanding data node" data-node "with ops" correct-ops)
215 |       (reduce (fn [coll op]
216 |                 (apply-op-to-coll op coll))
217 |               (:children data-node)
218 |               correct-ops))))
219 | 
220 | (defn lookup
221 |   ([tree key]
222 |    (lookup tree key nil))
223 |   ([tree key not-found]
224 |    (go-try
225 |      (let [path (<? (core/lookup-path tree key))
226 |            expanded (apply-ops-in-path path)]
227 |        (get expanded key not-found)))))
228 | 
229 | (defn insert
230 |   [tree key value]
231 |   (enqueue tree [(assoc (->InsertOp key value)
232 |                         :tag (uuid)
233 |                         )]))
234 | 
235 | (defn delete
236 |   [tree key]
237 |   (enqueue tree [(assoc (->DeleteOp key)
238 |                         :tag (uuid)
239 |                         )]))
240 | 
241 | (defn forward-iterator
242 |   "Takes the result of a search and puts the iterated elements onto iter-ch
243 |   going forward over the tree as needed. Does lg(n) backtracking sometimes."
244 |   [iter-ch path start-key]
245 |   (go-try
246 |       (loop [path path]
247 |         (if path
248 |           (let  [_ (assert (core/data-node? (peek path)))
249 |                  elements (drop-while (fn [[k v]]
250 |                                         (neg? (core/compare k start-key)))
251 |                                       (apply-ops-in-path path))]
252 |             (<? (async/onto-chan iter-ch elements false))
253 |             (recur (<? (core/right-successor (pop path)))))
254 |           (async/close! iter-ch)))))
255 | 
256 | #?(:clj
257 |    (defn lookup-fwd-iter
258 |      "Compatibility helper to clojure sequences. Please prefer the channel
259 |   interface of forward-iterator, as this function blocks your thread, which
260 |   disturbs async contexts and might lead to poor performance. It is mainly here
261 |   to facilitate testing or for exploration on the REPL."
262 |      [tree key]
263 |      (let [path (<?? (core/lookup-path tree key))
264 |            iter-ch (async/chan)]
265 |        (forward-iterator iter-ch path key)
266 |        (core/chan-seq iter-ch))))
267 | 


--------------------------------------------------------------------------------
/template_benchmark.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacrypt-project/hitchhiker-tree/f7d0f926541d7cb31fac11c2d2245c5bac451b86/template_benchmark.xlsx


--------------------------------------------------------------------------------
/test/hitchhiker/konserve_test.cljc:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.konserve-test
  2 |   (:refer-clojure :exclude [vec])
  3 |   (:require [clojure.core.rrb-vector :refer [catvec vec]]
  4 |             [#?(:clj clojure.test :cljs cljs.test)
  5 |              #?(:clj :refer :cljs :refer-macros) [deftest testing run-tests is
  6 |                                                   #?(:cljs async)]]
  7 |             [clojure.test.check.clojure-test #?(:clj :refer :cljs :refer-macros) [defspec]]
  8 |             [clojure.test.check.generators :as gen #?@(:cljs [:include-macros true])]
  9 |             [clojure.test.check.properties :as prop #?@(:cljs [:include-macros true])]
 10 |             #?(:clj [konserve.filestore :refer [new-fs-store delete-store list-keys]])
 11 |             [konserve.memory :refer [new-mem-store]]
 12 |             [hitchhiker.konserve :as kons]
 13 |             [hitchhiker.tree.core #?(:clj :refer :cljs :refer-macros)
 14 |              [<?? <? go-try] :as core]
 15 |             [hitchhiker.tree.messaging :as msg]
 16 |             [hitchhiker.ops :refer [recorded-ops]]
 17 |             #?(:cljs [cljs.core.async :refer [promise-chan] :as async]
 18 |                :clj [clojure.core.async :refer [promise-chan] :as async])
 19 |             #?(:cljs [cljs.nodejs :as nodejs])))
 20 | 
 21 | #?(:cljs
 22 |    (do
 23 |      (nodejs/enable-util-print!)
 24 |      (enable-console-print!)))
 25 | 
 26 | 
 27 | (defn iter-helper [tree key]
 28 |   (go-try
 29 |       (let [iter-ch (async/chan)
 30 |             path (<? (core/lookup-path tree key))]
 31 |         (msg/forward-iterator iter-ch path key)
 32 |         (<? (async/into [] iter-ch)))))
 33 | 
 34 | 
 35 | (deftest simple-konserve-test
 36 |   (testing "Insert and lookup"
 37 |     #?(:cljs
 38 |        (async done
 39 |         (go-try
 40 |          (let [store (<? (new-mem-store))
 41 |                backend (kons/->KonserveBackend store)
 42 |                init-tree (<? (core/reduce< (fn [t i] (msg/insert t i i))
 43 |                                            (<? (core/b-tree (core/->Config 1 3 (- 3 1))))
 44 |                                            (range 1 11)))
 45 |                flushed (<? (core/flush-tree init-tree backend))
 46 |                root-key (kons/get-root-key (:tree flushed))
 47 |                tree (<? (kons/create-tree-from-root-key store root-key))]
 48 |            (is (= (<? (msg/lookup tree -10)) nil))
 49 |            (is (= (<? (msg/lookup tree 100)) nil))
 50 |            (dotimes [i 10]
 51 |              (is (= (<? (msg/lookup tree (inc i))) (inc i))))
 52 |            (is (= (map first (<? (iter-helper tree 4))) (range 4 11)))
 53 |            (is (= (map first (<? (iter-helper tree 0))) (range 1 11)))
 54 |            (let [deleted (<? (core/flush-tree (<? (msg/delete tree 3)) backend))
 55 |                  root-key (kons/get-root-key (:tree deleted))
 56 |                  tree (<? (kons/create-tree-from-root-key store root-key))]
 57 |              (is (= (<? (msg/lookup tree 2)) 2))
 58 |              (is (= (<? (msg/lookup tree 3)) nil))
 59 |              (is (= (<? (msg/lookup tree 4)) 4)))
 60 |            (done)))))
 61 |     #?(:clj
 62 |        (let [folder "/tmp/async-hitchhiker-tree-test"
 63 |              _ (delete-store folder)
 64 |              store (kons/add-hitchhiker-tree-handlers (<?? (new-fs-store folder :config {:fsync false})))
 65 |              backend (kons/->KonserveBackend store)
 66 |              flushed (<?? (core/flush-tree
 67 |                            (time (reduce (fn [t i]
 68 |                                            (<?? (msg/insert t i i)))
 69 |                                          (<?? (core/b-tree (core/->Config 1 3 (- 3 1))))
 70 |                                          (range 1 11)))
 71 |                            backend))
 72 |              root-key (kons/get-root-key (:tree flushed))
 73 |              tree (<?? (kons/create-tree-from-root-key store root-key))]
 74 |          (is (= (<?? (msg/lookup tree -10)) nil))
 75 |          (is (= (<?? (msg/lookup tree 100)) nil))
 76 |          (dotimes [i 10]
 77 |            (is (= (<?? (msg/lookup tree (inc i))) (inc i))))
 78 |          (is (= (map first (msg/lookup-fwd-iter tree 4)) (range 4 11)))
 79 |          (is (= (map first (msg/lookup-fwd-iter tree 0)) (range 1 11)))
 80 |          (let [deleted (<?? (core/flush-tree (<?? (msg/delete tree 3)) backend))
 81 |                root-key (kons/get-root-key (:tree deleted))
 82 |                tree (<?? (kons/create-tree-from-root-key store root-key))]
 83 |            (is (= (<?? (msg/lookup tree 2)) 2))
 84 |            (is (= (<?? (msg/lookup tree 3)) nil))
 85 |            (is (= (<?? (msg/lookup tree 4)) 4)))
 86 |          (delete-store folder)))))
 87 | 
 88 | 
 89 | 
 90 | ;; adapted from redis tests
 91 | 
 92 | (defn insert
 93 |   [t k]
 94 |   (msg/insert t k k))
 95 | 
 96 | (comment
 97 |   (def recorded-ops (atom []))
 98 | 
 99 |   (binding [*print-length* -1]
100 |     (spit "/tmp/ops" (pr-str (vec (take 100 @recorded-ops)))))
101 | 
102 |   (count @recorded-ops)
103 | 
104 |   (first @recorded-ops))
105 | 
106 | (defn ops-test [ops universe-size]
107 |   (go-try
108 |       (let [folder "/tmp/konserve-mixed-workload"
109 |             _ #?(:clj (delete-store folder) :cljs nil)
110 |             store (kons/add-hitchhiker-tree-handlers
111 |                    (<? #?(:clj (new-fs-store folder :config {:fsync false})
112 |                           :cljs (new-mem-store))))
113 |             _ #?(:clj (assert (empty? (<? (list-keys store)))
114 |                               "Start with no keys")
115 |                  :cljs nil)
116 |             ;_ (swap! recorded-ops conj ops)
117 |             [b-tree root set]
118 |             (<? (core/reduce< (fn [[t root set] [op x]]
119 |                                 (go-try
120 |                                     (let [x-reduced (when x (mod x universe-size))]
121 |                                       (condp = op
122 |                                         :flush (let [flushed (<? (core/flush-tree t (kons/->KonserveBackend store)))
123 |                                                      t (:tree flushed)]
124 |                                                  [t (<? (:storage-addr t)) set])
125 |                                         :add [(<? (insert t x-reduced)) root (conj set x-reduced)]
126 |                                         :del [(<? (msg/delete t x-reduced)) root (disj set x-reduced)]))))
127 |                               [(<? (core/b-tree (core/->Config 3 3 2))) nil #{}]
128 |                               ops))]
129 |         (let [b-tree-order (map first (<? (iter-helper b-tree -1)))
130 |               res (= b-tree-order (seq (sort set)))]
131 |           (assert res (str "These are unequal: " (pr-str b-tree-order) " " (pr-str (seq (sort set)))))
132 |           #?(:clj (delete-store folder))
133 |           res))))
134 | 
135 | ;; TODO recheck when https://dev.clojure.org/jira/browse/TCHECK-128 is fixed
136 | ;; and share clj mixed-op-seq test, remove ops.cljc then.
137 | #?(:cljs
138 |    (deftest manual-mixed-op-seq
139 |      (async done
140 |             (go-try
141 |                 (loop [[ops & r] recorded-ops]
142 |                   (when ops
143 |                     (is (<? (ops-test ops 1000)))
144 |                     (recur r)))
145 |               (done)))))
146 | 
147 | 
148 | #?(:clj
149 |    (defn mixed-op-seq
150 |      "This is like the basic mixed-op-seq tests, but it also mixes in flushes to a konserve filestore"
151 |      [add-freq del-freq flush-freq universe-size num-ops]
152 |      (prop/for-all [ops (gen/vector (gen/frequency
153 |                                      [[add-freq (gen/tuple (gen/return :add)
154 |                                                            (gen/no-shrink gen/int))]
155 |                                       [flush-freq (gen/return [:flush])]
156 |                                       [del-freq (gen/tuple (gen/return :del)
157 |                                                            (gen/no-shrink gen/int))]])
158 |                                     num-ops)]
159 |                    (<?? (ops-test ops universe-size)))))
160 | 
161 | 
162 | #?(:clj
163 |    (defspec test-many-keys-bigger-trees
164 |      100
165 |      (mixed-op-seq 800 200 10 1000 1000)))
166 | 
167 | 
168 | 
169 | 
170 | (comment
171 |   ;; macroexpanded
172 |   (defn test-many-keys-bigger-trees
173 |   ([]
174 |    (let [options__29789__auto__ (clojure.test.check.clojure-test/process-options 100)]
175 |      (apply test-many-keys-bigger-trees
176 |             (:num-tests options__29789__auto__)
177 |             (apply concat options__29789__auto__))))
178 |   ([times & {:as quick-check-opts, :keys [seed max-size]}]
179 |    (go-try
180 |        (apply clojure.test.check/quick-check
181 |               times
182 |               (vary-meta (<? (mixed-op-seq 800 200 10 1000 1000))
183 |                          assoc :name (str (quote (mixed-op-seq 800 200 10 1000 1000))))
184 |               (apply concat quick-check-opts)))))
185 | 
186 |   (<?? (test-many-keys-bigger-trees))
187 |   (macroexpand-1 '(defspec test-many-keys-bigger-trees
188 |                     100
189 |                     (mixed-op-seq 800 200 10 1000 1000))))
190 | 
191 | #?(:cljs
192 |    (defn ^:export test-all [cb]
193 |      (defmethod cljs.test/report [:cljs.test/default :end-run-tests] [m]
194 |        (cb (clj->js m))
195 |        (if (cljs.test/successful? m)
196 |          (println "Success!")
197 |          (println "FAIL")))
198 |      (run-tests)))
199 | 


--------------------------------------------------------------------------------
/test/hitchhiker/redis_test.clj:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.redis-test
  2 |   (:require [clojure.test.check.clojure-test :refer [defspec]]
  3 |             [clojure.test.check.generators :as gen]
  4 |             [clojure.test.check.properties :as prop]
  5 |             [hitchhiker.redis :as redis]
  6 |             [hitchhiker.tree.core :refer [<??] :as core]
  7 |             hitchhiker.tree.core-test
  8 |             [hitchhiker.tree.messaging :as msg]
  9 |             [taoensso.carmine :as car :refer [wcar]]))
 10 | 
 11 | (defn insert
 12 |   [t k]
 13 |   (msg/insert t k k))
 14 | 
 15 | (defn lookup-fwd-iter
 16 |   [t v]
 17 |   (seq (map first (msg/lookup-fwd-iter t v))))
 18 | 
 19 | 
 20 | (defn mixed-op-seq
 21 |   "This is like the basic mixed-op-seq tests, but it also mixes in flushes to redis
 22 |    and automatically deletes the old tree"
 23 |   [add-freq del-freq flush-freq universe-size num-ops]
 24 |   (prop/for-all [ops (gen/vector (gen/frequency
 25 |                                    [[add-freq (gen/tuple (gen/return :add)
 26 |                                                          (gen/no-shrink gen/int))]
 27 |                                     [flush-freq (gen/return [:flush])]
 28 |                                     [del-freq (gen/tuple (gen/return :del)
 29 |                                                          (gen/no-shrink gen/int))]])
 30 |                                  num-ops)]
 31 |                 (assert (let [ks (wcar {} (car/keys "*"))]
 32 |                           (or (empty? ks)
 33 |                               (= ["refcount:expiry"] ks)
 34 |                               (= #{"refcount:expiry" nil} (into #{} ks))))
 35 |                         "Start with no keys")
 36 |                 (let [[b-tree root set]
 37 |                       (reduce (fn [[t root set] [op x]]
 38 |                                        (let [x-reduced (when x (mod x universe-size))]
 39 |                                          (condp = op
 40 |                                            :flush (let [t (:tree (<?? (core/flush-tree t (redis/->RedisBackend))))]
 41 |                                                     (when root
 42 |                                                       (wcar {} (redis/drop-ref root)))
 43 |                                                     #_(println "flush")
 44 |                                                     [t (<?? (:storage-addr t)) set])
 45 |                                            :add (do #_(println "add") [(<?? (insert t x-reduced)) root (conj set x-reduced)])
 46 |                                            :del (do #_(println "del") [(<?? (msg/delete t x-reduced)) root (disj set x-reduced)]))))
 47 |                               [(<?? (core/b-tree (core/->Config 3 3 2))) nil #{}]
 48 |                               ops)]
 49 |                   #_(println "Make it to the end of a test, tree has" (count (lookup-fwd-iter b-tree -1)) "keys left")
 50 |                   (let [b-tree-order (lookup-fwd-iter b-tree -1)
 51 |                         res (= b-tree-order (seq (sort set)))]
 52 |                     (wcar {} (redis/drop-ref root))
 53 |                     (assert (let [ks (wcar {} (car/keys "*"))]
 54 |                               (or (empty? ks)
 55 |                                   (= ["refcount:expiry"] ks)
 56 |                                   (= #{"refcount:expiry" nil} (into #{} ks))))
 57 |                             "End with no keys")
 58 |                     (assert res (str "These are unequal: " (pr-str b-tree-order) " " (pr-str (seq (sort set)))))
 59 |                     res))))
 60 | 
 61 | 
 62 | (defspec test-many-keys-bigger-trees
 63 |   100
 64 |   (mixed-op-seq 800 200 10 1000 1000))
 65 | 
 66 | (comment
 67 |   (test-many-keys-bigger-trees)
 68 | 
 69 | 
 70 |   (count (remove  (reduce (fn [t [op x]]
 71 |                             (let [x-reduced (when x (mod x 1000))]
 72 |                               (condp = op
 73 |                                 :flush t
 74 |                                 :add (conj t x-reduced)
 75 |                                 :del (disj t x-reduced))))
 76 |                           #{}
 77 |                           (drop-last 2 opseq)) (lookup-fwd-iter (msg/delete test-tree -33) 0)))
 78 |   (:op-buf test-tree)
 79 |   (count (sort (reduce (fn [t [op x]]
 80 |                          (let [x-reduced (when x (mod x 1000))]
 81 |                            (condp = op
 82 |                              :flush t
 83 |                              :add (conj t x-reduced)
 84 |                              :del (disj t x-reduced))))
 85 |                        #{}
 86 |                        opseq)))
 87 | 
 88 | 
 89 |   (let [ops (->> (read-string (slurp "broken-data.edn"))
 90 |                  (map (fn [[op x]] [op (mod x 100000)]))
 91 |                  (drop-last  125))]
 92 |     (let [[b-tree s] (reduce (fn [[t s] [op x]]
 93 |                                (let [x-reduced (mod x 100000)]
 94 |                                  (condp = op
 95 |                                    :add [(insert t x-reduced)
 96 |                                          (conj s x-reduced)]
 97 |                                    :del [(msg/delete t x-reduced)
 98 |                                          (disj s x-reduced)])))
 99 |                              [(core/b-tree (core/->Config 3 3 2)) #{}]
100 |                              ops)]
101 |       (println ops)
102 |       (println (->> (read-string (slurp "broken-data.edn"))
103 |                     (map (fn [[op x]] [op (mod x 100000)]))
104 |                     (take-last  125)
105 |                     first))
106 |       (println (lookup-fwd-iter b-tree -1))
107 |       (println (sort s))
108 |       ))
109 |   (defn trial []
110 |     (let [opseq (read-string (slurp "broken-data.edn"))
111 |           [b-tree root] (reduce (fn [[t root] [op x]]
112 |                                   (let [x-reduced (when x (mod x 1000))]
113 |                                     (condp = op
114 |                                       :flush (let [_ (println "About to flush...")
115 |                                                    t (:tree (core/flush-tree t (redis/->RedisBackend)))]
116 |                                                (when root
117 |                                                  (wcar {} (redis/drop-ref root)))
118 |                                                (println "flushed")
119 |                                                [t @(:storage-addr t)])
120 |                                       :add (do (println "about to add" x-reduced "...")
121 |                                                (let [x [(insert t x-reduced) root]]
122 |                                                  (println "added") x
123 |                                                  ))
124 |                                       :del (do (println "about to del" x-reduced "...")
125 |                                                (let [x [(msg/delete t x-reduced) root]]
126 |                                                  (println "deled") x)))))
127 |                                 [(core/b-tree (core/->Config 3 3 2))]
128 |                                 opseq)]
129 |       (def test-tree b-tree)
130 |       (println "Got diff"
131 |                (count (remove  (reduce (fn [t [op x]]
132 |                                          (let [x-reduced (when x (mod x 1000))]
133 |                                            (condp = op
134 |                                              :flush t
135 |                                              :add (conj t x-reduced)
136 |                                              :del (disj t x-reduced))))
137 |                                        #{}
138 |                                        opseq) (lookup-fwd-iter test-tree 0))))
139 |       (println "balanced?" (hitchhiker.tree.core-test/check-node-is-balanced test-tree))
140 |       (def my-root root)))
141 | 
142 |   (map #(and (second %) (mod (second %) 1000)) opseq)
143 | 
144 | 
145 |   (def opseq (read-string (io/resource "redis_test_data.clj"))))
146 | 


--------------------------------------------------------------------------------
/test/hitchhiker/tree/core_test.clj:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.tree.core-test
  2 |   (:refer-clojure :exclude [compare resolve])
  3 |   (:require [clojure.test :refer :all]
  4 |             [clojure.test.check :as tc]
  5 |             [clojure.test.check.clojure-test :refer [defspec]]
  6 |             [clojure.test.check.generators :as gen]
  7 |             [clojure.test.check.properties :as prop]
  8 |             [hitchhiker.tree.core :refer :all]
  9 |             [clojure.core.async :refer [promise-chan] :as async]))
 10 | 
 11 | (deftest reduce<-test
 12 |   (is (= 45 (<?? (reduce< (fn [res s]
 13 |                             (go-try (+ res s)))
 14 |                           0
 15 |                           (range 10))))))
 16 | 
 17 | (deftest simple-read-only-behavior
 18 |   (testing "Basic searches"
 19 |     (let [data1 (data-node (->Config 3 5 2) (sorted-map 1 1 2 2 3 3 4 4 5 5))
 20 |           data2 (data-node (->Config 3 5 2) (sorted-map 6 6 7 7 8 8 9 9 10 10))
 21 |           root (->IndexNode [data1 data2] (promise-chan) [] (->Config 3 5 2))]
 22 |       (is (= (<?? (lookup-key root -10)) nil) "not found key")
 23 |       (is (= (<?? (lookup-key root 100)) nil) "not found key")
 24 |       (dotimes [i 10]
 25 |         (is (= (<?? (lookup-key root (inc i))) (inc i))))))
 26 |   (testing "Basic string key searches"
 27 |     (let [data1 (data-node (->Config 3 5 2) (sorted-map "1" 1 "10" 10 "2" 2 "3" 3 "4" 4))
 28 |           data2 (data-node (->Config 3 5 2) (sorted-map "5" 5 "6" 6 "7" 7 "8" 8 "9" 9))
 29 |           root (->IndexNode [data1 data2] (promise-chan) [] (->Config 3 5 2))]
 30 |       (is (= (<?? (lookup-key root "-10")) nil) "not found key")
 31 |       (is (= (<?? (lookup-key root "100")) nil) "not found key")
 32 |       (dotimes [i 10]
 33 |         (is (= (<?? (lookup-key root (str (inc i)))) (inc i))))))
 34 |   (testing "basic fwd iterator"
 35 |     (let [data1 (data-node (->Config 3 5 2) (sorted-map 1 1 2 2 3 3 4 4 5 5))
 36 |           data2 (data-node (->Config 3 5 2) (sorted-map 6 6 7 7 8 8 9 9 10 10))
 37 |           root (->IndexNode [data1 data2] (promise-chan) [] (->Config 3 5 2))]
 38 |       (is (= (map first (lookup-fwd-iter root 4)) (range 4 11)))
 39 |       (is (= (map first (lookup-fwd-iter root 0)) (range 1 11)))))
 40 |   (testing "index nodes identified as such"
 41 |     (let [data (data-node (->Config 3 5 2) (sorted-map 1 1))
 42 |           root (->IndexNode [data] (promise-chan) [] (->Config 3 5 2))]
 43 |       (is (index? root))
 44 |       (is (not (index? data))))))
 45 | 
 46 | 
 47 | (defn insert-helper
 48 |   [t k]
 49 |   (<?? (insert t k k)))
 50 | 
 51 | (def added-keys-appear-in-order
 52 |   (prop/for-all [v (gen/vector gen/int)]
 53 |                 (let [sorted-set-order (into (sorted-set) v)
 54 |                       b-tree (reduce insert-helper
 55 |                                      (<?? (b-tree (->Config 3 3 2)))
 56 |                                      v)
 57 |                       b-tree-order (lookup-fwd-iter b-tree Integer/MIN_VALUE)]
 58 |                   (= (seq sorted-set-order) (seq (map first b-tree-order))))))
 59 | 
 60 | (defspec b-tree-sorts-uniques-random-int-vector
 61 |   1000
 62 |   added-keys-appear-in-order)
 63 | 
 64 | (defspec test-insert
 65 |   1000
 66 |   (prop/for-all [v (gen/vector gen/int)]
 67 |                 (let [sorted-set-order (into (sorted-set) v)
 68 |                       b-tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) v)
 69 |                       b-tree-order (lookup-fwd-iter b-tree Integer/MIN_VALUE)]
 70 |                   (= (seq sorted-set-order) (seq (map first b-tree-order))))))
 71 | 
 72 | (defspec test-insert-string
 73 |   1000
 74 |   (prop/for-all [v (gen/vector gen/string)]
 75 |                 (let [sorted-set-order (into (sorted-set) v)
 76 |                       b-tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) v)
 77 |                       b-tree-order (lookup-fwd-iter b-tree "")]
 78 |                   (= (seq sorted-set-order) (seq (map first b-tree-order))))))
 79 | 
 80 | (defspec test-delete2
 81 |   1000
 82 |   (prop/for-all [the-set (gen/vector-distinct gen/int)
 83 |                  num gen/nat]
 84 |                 (let [set-a (sort the-set)
 85 |                       set-b (take num the-set)
 86 |                       b-tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) set-a)
 87 |                       b-tree-without (reduce #(<?? (delete %1 %2)) b-tree set-b)
 88 |                       b-tree-order (lookup-fwd-iter b-tree-without Integer/MIN_VALUE)]
 89 |                   (= (seq (remove (set set-b) set-a)) (seq (map first b-tree-order))))))
 90 | 
 91 | 
 92 | (deftest insert-test
 93 |   (let [data1 (data-node (->Config 3 3 2) (sorted-map 1 "1" 2 "2" 3 "3" 4 "4"))
 94 |         root (->IndexNode [data1] (promise-chan) [] (->Config 3 3 2))]
 95 |     (is (= (map second (lookup-fwd-iter (<?? (insert root 3 "3")) -10)) ["1" "2" "3" "4"]))
 96 |     (are [x] (= (map second (lookup-fwd-iter (<?? (insert root x (str x))) -10)) (sort (map str (conj [1 2 3 4] x))))
 97 |          0
 98 |          2.5
 99 |          5))
100 |   (let [data1 (data-node (->Config 3 3 2) (sorted-map 1 1 2 2 3 3 4 4 5 5))
101 |         root (->IndexNode [data1] (promise-chan) [] (->Config 3 3 2))]
102 |     (are [x y] (= (map first (lookup-fwd-iter (<?? (insert root x x)) y))
103 |                   (drop-while
104 |                     #(< % y)
105 |                     (sort (conj [1 2 3 4 5] x))))
106 |          0 3
107 |          2.5 2
108 |          5.5 3)))
109 | 
110 | (deftest simple-delete-tests
111 |   (let [tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) (range 5))]
112 |     (is (= (map first (lookup-fwd-iter (<?? (delete tree 3)) 0)) [0 1 2 4])))
113 |   (let [tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) (range 10))]
114 |     (is (= (map first (lookup-fwd-iter (<?? (delete tree 0)) 0)) (map inc (range 9)))))
115 |   (let [tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) (range 6))]
116 |     (is (= (map first (lookup-fwd-iter (<?? (delete tree 0)) 0)) (map inc (range 5))))))
117 | 
118 | (defn check-node-is-balanced
119 |   "Given a node, checks that it's balanced.
120 |    All children must have b to 2b-1 children.
121 |    The intent is to feed this a root node, which will have 2 as the min"
122 |   ([node]
123 |    (let [{:keys [index-b data-b]} (:cfg node)]
124 |      (or
125 |        ;; First case: it's just a datanode
126 |        (and (data-node? node)
127 |             (< (count (:children node)) (* 2 data-b)))
128 |        ;; Second case: it's an index
129 |        (let [acc (atom [])] ; acc is for ensuring all leaves are the same depth
130 |          (and (check-node-is-balanced node index-b acc 2 0)
131 |               (apply = @acc))))))
132 |   ([{:keys [children] :as node} b acc min height]
133 |    (if (<= min (count children) (dec (* 2 b))) ;between min (inc) & max (exc)
134 |      (if-not (data-node? node)
135 |        (every? #(check-node-is-balanced % b acc b (inc height)) children)
136 |        (do
137 |          (swap! acc conj height)
138 |          true))
139 |      false)))
140 | 
141 | (defspec test-balanced-after-many-inserts
142 |   1000
143 |   (prop/for-all [the-set (gen/vector (gen/no-shrink gen/int))]
144 |                 (let [b-tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) the-set)]
145 |                   (check-node-is-balanced b-tree))))
146 | 
147 | (defspec test-wider-balanced-after-many-inserts
148 |   1000
149 |   (prop/for-all [the-set (gen/vector (gen/no-shrink gen/int))]
150 |                 (let [b-tree (reduce insert-helper (<?? (b-tree (->Config 200 250 17))) the-set)]
151 |                   (check-node-is-balanced b-tree))))
152 | 
153 | #_(require '[criterium.core :refer (quick-bench)])
154 | ;;TODO this is very slow...why is it so much slower?
155 | ;;Probably insane overuse of catvec/subvec...
156 | ;;Need to profile!
157 | #_(quick-bench (reduce insert (b-tree (->Config 1000 1100 20)) (range 10000)))
158 | #_(quick-bench (into []  (range 10000)))
159 | 
160 | (defn mixed-op-seq
161 |   "Returns a property that ensures trees produced by a sequence of adds and deletes
162 |    in the given ratio, with universe-size distinct values"
163 |   [add-vs-del-ratio universe-size num-ops]
164 |   (let [add-freq (long (* 1000 add-vs-del-ratio))
165 |         del-freq (long (* 1000 (- 1 add-vs-del-ratio)))]
166 |     (prop/for-all [ops (gen/vector (gen/frequency
167 |                                      [[add-freq (gen/tuple (gen/return :add)
168 |                                                            (gen/no-shrink gen/int))]
169 |                                       [del-freq (gen/tuple (gen/return :del)
170 |                                                            (gen/no-shrink gen/int))]])
171 |                                    num-ops)]
172 |                   (let [b-tree (reduce (fn [t [op x]]
173 |                                          (let [x-reduced (mod x universe-size)]
174 |                                            (condp = op
175 |                                              :add (insert-helper t x-reduced)
176 |                                              :del (<?? (delete t x-reduced)))))
177 |                                        (<?? (b-tree (->Config 3 3 2)))
178 |                                        ops)]
179 |   ;                  (println ops)
180 |                     (check-node-is-balanced b-tree)))))
181 | 
182 | (defspec test-few-keys-many-ops
183 |   50
184 |   (mixed-op-seq 0.5 250 5000))
185 | 
186 | (defspec test-many-keys-bigger-trees
187 |   1000
188 |   (mixed-op-seq 0.8 1000 1000))
189 | 
190 | (defspec test-sparse-ops
191 |   1000
192 |   (mixed-op-seq 0.7 100000 1000))
193 | 
194 | (comment
195 |   (time (tc/quick-check 1 (mixed-op-seq 0.5 100 1000))))
196 | 
197 | (defspec test-flush
198 |   1000
199 |   (prop/for-all [v (gen/vector gen/int)]
200 |                 (let [sorted-set-order (into (sorted-set) v)
201 |                       b-tree (reduce insert-helper (<?? (b-tree (->Config 3 3 2))) v)
202 |                       b-tree-order (map first (lookup-fwd-iter b-tree Integer/MIN_VALUE))
203 |                       flushed-tree (:tree (<?? (flush-tree b-tree (->TestingBackend))))
204 |                       flushed-tree-order (map first (lookup-fwd-iter flushed-tree Integer/MIN_VALUE))]
205 |                   (= (seq sorted-set-order)
206 |                      (seq b-tree-order)
207 |                      (seq flushed-tree-order)))))
208 | ;;TODO should test that flushing can be interleaved without races
209 | 


--------------------------------------------------------------------------------
/test/hitchhiker/tree/messaging_test.clj:
--------------------------------------------------------------------------------
  1 | (ns hitchhiker.tree.messaging-test
  2 |   (:require [clojure.test.check.clojure-test :refer [defspec]]
  3 |             [clojure.test.check.generators :as gen]
  4 |             [clojure.test.check.properties :as prop]
  5 |             [hitchhiker.tree.core :refer [go-try <??] :as core]
  6 |             hitchhiker.tree.core-test
  7 |             [hitchhiker.tree.messaging :as msg]))
  8 | 
  9 | (defn insert
 10 |   [t k]
 11 |   (<?? (msg/insert t k k)))
 12 | 
 13 | (defn lookup-fwd-iter
 14 |   [t v]
 15 |   (seq (map first (msg/lookup-fwd-iter t v))))
 16 | 
 17 | (defspec b-tree-sorts-uniques-random-int-vector
 18 |   1000
 19 |   (prop/for-all [v (gen/vector gen/int)]
 20 |                 (let [sorted-set-order (into (sorted-set) v)
 21 |                       b-tree (reduce insert (<?? (core/b-tree (core/->Config 3 3 2))) v)
 22 |                       b-tree-order (lookup-fwd-iter b-tree Integer/MIN_VALUE)]
 23 |                   (= (seq sorted-set-order) b-tree-order))))
 24 | 
 25 | (defspec test-insert
 26 |   1000
 27 |   (prop/for-all [v (gen/vector gen/int)]
 28 |                 (let [sorted-set-order (into (sorted-set) v)
 29 |                       b-tree (reduce insert (<?? (core/b-tree (core/->Config 3 3 2))) v)
 30 |                       b-tree-order (lookup-fwd-iter b-tree Integer/MIN_VALUE)]
 31 |                   (= (seq sorted-set-order) b-tree-order))))
 32 | 
 33 | (defspec test-insert-string
 34 |   1000
 35 |   (prop/for-all [v (gen/vector gen/string)]
 36 |                 (let [sorted-set-order (into (sorted-set) v)
 37 |                       b-tree (reduce insert (<?? (core/b-tree (core/->Config 3 3 2))) v)
 38 |                       b-tree-order (lookup-fwd-iter b-tree "")]
 39 |                   (= (seq sorted-set-order) b-tree-order))))
 40 | 
 41 | (defspec test-delete2
 42 |   1000
 43 |   (prop/for-all [the-set (gen/vector-distinct gen/int)
 44 |                  num gen/nat]
 45 |                 (let [set-a (sort the-set)
 46 |                       set-b (take num the-set)
 47 |                       b-tree (reduce insert (<?? (core/b-tree (core/->Config 3 3 2))) set-a)
 48 |                       b-tree-without (reduce #(<?? (msg/delete %1 %2)) b-tree set-b)
 49 |                       b-tree-order (lookup-fwd-iter b-tree-without Integer/MIN_VALUE)]
 50 |                   (= (seq (remove (set set-b) set-a)) b-tree-order))))
 51 | 
 52 | ;(require '[clojure.pprint :refer (pprint)])
 53 | ;(pprint (reduce msg/delete (reduce insert (core/b-tree (core/->Config 3 3 2)) [0 5 2 1 4 -1 3]) [0 5]))
 54 | 
 55 | (defspec test-balanced-after-many-inserts
 56 |   1000
 57 |   (prop/for-all [the-set (gen/vector (gen/no-shrink gen/int))]
 58 |                 (let [b-tree (reduce insert (<?? (core/b-tree (core/->Config 3 3 2))) the-set)]
 59 |                   (hitchhiker.tree.core-test/check-node-is-balanced b-tree))))
 60 | 
 61 | (defspec test-wider-balanced-after-many-inserts
 62 |   1000
 63 |   (prop/for-all [the-set (gen/vector (gen/no-shrink gen/int))]
 64 |                 (let [b-tree (reduce insert (<?? (core/b-tree (core/->Config 200 220 17))) the-set)]
 65 |                   (hitchhiker.tree.core-test/check-node-is-balanced b-tree))))
 66 | 
 67 | ;; This test will show how if you apply killerop to the b-tree result, it corrupts
 68 | ;; the tree by losing track of the element 20
 69 | (comment
 70 |   (let [split-idx (+ 125 #_270)
 71 |         all-ops (->> (read-string (slurp "broken-data.edn"))
 72 |                      (map (fn [[op x]] [op (mod x 100000)])))
 73 |         ops (->> all-ops
 74 |                  (drop-last split-idx))
 75 |         killer-op (->> all-ops
 76 |                        (take-last split-idx)
 77 |                        first)
 78 |         killer-op-dos (->> all-ops
 79 |                        (take-last split-idx)
 80 |                        second)
 81 |         ]
 82 |     (let [[b-tree s] (reduce (fn [[t s] [op x]]
 83 |                                (let [x-reduced (mod x 100000)]
 84 |                                  (condp = op
 85 |                                    :add [(insert t x-reduced)
 86 |                                          (conj s x-reduced)]
 87 |                                    :del [(msg/delete t x-reduced)
 88 |                                          (disj s x-reduced)])))
 89 |                              [(<?? (core/b-tree (core/->Config 3 3 2))) #{}]
 90 |                              ops)
 91 |           f #(case (first %1) :add (insert %2 (second %1))
 92 |                :del (msg/delete %2 (second %1)))
 93 |           ]
 94 |       ;  (println ops)
 95 |       (println killer-op)
 96 |       (clojure.pprint/pprint b-tree)
 97 |       (println (lookup-fwd-iter b-tree -1))
 98 |       (println (sort s))
 99 |       (def cool-test-tree b-tree)
100 |       ;; It appears that the insert op is leapfrogging the pending delete op
101 |       (clojure.pprint/pprint (f killer-op b-tree))
102 |       (println (lookup-fwd-iter (f killer-op b-tree) -1))
103 |       (println (sort (disj s (second killer-op))))
104 |       (when killer-op-dos
105 |         (println killer-op-dos)
106 |         (clojure.pprint/pprint (f killer-op-dos (f killer-op b-tree)))))
107 |     )
108 | 
109 |   (clojure.pprint/pprint cool-test-tree)
110 |   (clojure.pprint/pprint (insert cool-test-tree 20))
111 |   (clojure.pprint/pprint (msg/delete cool-test-tree 32))
112 |   )
113 | 
114 | (defn mixed-op-seq
115 |   "Returns a property that ensures trees produced by a sequence of adds and deletes
116 |    in the given ratio, with universe-size distinct values"
117 |   [add-vs-del-ratio universe-size num-ops]
118 |   (let [add-freq (long (* 1000 add-vs-del-ratio))
119 |         del-freq (long (* 1000 (- 1 add-vs-del-ratio)))]
120 |     (prop/for-all [ops (gen/vector (gen/frequency
121 |                                      [[add-freq (gen/tuple (gen/return :add)
122 |                                                            (gen/no-shrink gen/int))]
123 |                                       [del-freq (gen/tuple (gen/return :del)
124 |                                                            (gen/no-shrink gen/int))]])
125 |                                    num-ops)]
126 |                   (let [[b-tree s] (reduce (fn [[t s] [op x]]
127 |                                              (let [x-reduced (mod x universe-size)]
128 |                                                (condp = op
129 |                                                  :add [(insert t x-reduced)
130 |                                                        (conj s x-reduced)]
131 |                                                  :del [(<?? (msg/delete t x-reduced))
132 |                                                        (disj s x-reduced)])))
133 |                                            [(<?? (core/b-tree (core/->Config 3 3 2))) #{}]
134 |                                            ops)]
135 |                     ;                  (println ops)
136 |                     (and (= (lookup-fwd-iter b-tree -1) (seq (sort s)))
137 |                          (hitchhiker.tree.core-test/check-node-is-balanced b-tree))))))
138 | 
139 | (comment
140 |   (let [data (read-string (slurp "broken-data2.edn"))
141 |         universe-size 1000
142 |         ]
143 |     (let [[b-tree s] (reduce (fn [[t s] [op x]]
144 |                                (let [x-reduced (mod x universe-size)]
145 |                                  (condp = op
146 |                                    :add [(insert t x-reduced)
147 |                                          (conj s x-reduced)]
148 |                                    :del [(msg/delete t x-reduced)
149 |                                          (disj s x-reduced)])))
150 |                              [(<?? (core/b-tree (core/->Config 3 3 2))) #{}]
151 |                              data)]
152 |       ;                  (println ops)
153 |       (println (lookup-fwd-iter b-tree -1))
154 |       (println (sort s))
155 |            )
156 |     )
157 |   )
158 | 
159 | (defspec test-few-keys-many-ops
160 |   50
161 |   (mixed-op-seq 0.5 250 5000))
162 | 
163 | (defspec test-many-keys-bigger-trees
164 |   1000
165 |   (mixed-op-seq 0.8 1000 1000))
166 | 
167 | (defspec test-sparse-ops
168 |   1000
169 |   (mixed-op-seq 0.7 100000 1000))
170 | 
171 | 


--------------------------------------------------------------------------------
/test_node.js:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/node
 2 | 
 3 | // have a look at the datascript test runner also
 4 | 
 5 | var fs = require('fs'),
 6 |     vm = require('vm');
 7 | 
 8 | global.goog = {};
 9 | 
10 | global.CLOSURE_IMPORT_SCRIPT = function(src) {
11 |   require('./target/none/goog/' + src);
12 |   return true;
13 | };
14 | 
15 | function nodeGlobalRequire(file) {
16 |   vm.runInThisContext.call(global, fs.readFileSync(file), file);
17 | }
18 | 
19 | 
20 | nodeGlobalRequire('./target/none/goog/base.js');
21 | nodeGlobalRequire('./target/none/cljs_deps.js');
22 | goog.require('hitchhiker.konserve_test');
23 | 
24 | 
25 | hitchhiker.konserve_test.test_all(
26 |     function(res) {
27 |         if(res.fail + res.error > 0)
28 |             process.exit(1);
29 |         else
30 |             process.exit(0);});
31 | 
32 | 


--------------------------------------------------------------------------------