├── .gitattributes ├── .gitignore ├── ebin └── gb_merkle_trees.app ├── Makefile ├── .travis.yml ├── README.md ├── LICENSE.md └── src └── gb_merkle_trees.erl /.gitattributes: -------------------------------------------------------------------------------- 1 | erlang.mk -diff 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | deps 3 | *.o 4 | *.beam 5 | *.plt 6 | erl_crash.dump 7 | ebin/ 8 | doc/ 9 | *.d 10 | -------------------------------------------------------------------------------- /ebin/gb_merkle_trees.app: -------------------------------------------------------------------------------- 1 | {application, 'gb_merkle_trees', [ 2 | {description, "General balanced Merkle trees"}, 3 | {vsn, "0.2.2"}, 4 | {modules, ['gb_bench','gb_merkle_trees']}, 5 | {registered, []}, 6 | {applications, [kernel,stdlib,crypto]}, 7 | {env, []} 8 | ]}. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJECT = gb_merkle_trees 2 | PROJECT_DESCRIPTION = General balanced Merkle trees 3 | PROJECT_VERSION = 0.2.2 4 | 5 | TEST_DEPS = triq 6 | dep_triq = git https://gitlab.com/triq/triq.git e5ba907a11985bf8150f5b5b332d39516ab15857 7 | 8 | LOCAL_DEPS = crypto 9 | 10 | # Whitespace to be used when creating files from templates. 11 | SP = 2 12 | 13 | include erlang.mk 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | dist: trusty 3 | language: erlang 4 | otp_release: 5 | - 20.0.1 6 | - 19.3.6.1 7 | - 18.3 8 | addons: 9 | apt: 10 | sources: 11 | - debian-sid 12 | packages: 13 | ## GNU Make 4. GNU Make 3.81 as [in Ubuntu 14.04 14 | ## "trusty"](https://packages.ubuntu.com/trusty/make) is not 15 | ## sufficient, because of error on `triq` test dep when running 16 | ## `make tests`. Also erlang.mk reports the following warning: 17 | ## "erlang.mk:24: Please upgrade to GNU Make 4 or later: 18 | ## https://erlang.mk/guide/installation.html". 19 | - make 20 | before_install: 21 | - make --version 22 | script: 23 | - | 24 | case "${JOB:?}" in 25 | dotapp) 26 | A=ebin/gb_merkle_trees.app 27 | test -f ${A:?} 28 | C=$(cat ${A:?}) 29 | rm ${A:?} && test ! -e ${A:?} && make ${A:?} && test -f ${A:?} 30 | printf "%b" "${C:?}" | diff -u - ${A:?} 31 | ;; 32 | *) 33 | make ${JOB:?} 34 | ;; 35 | esac 36 | matrix: 37 | include: 38 | - otp_release: 20.0.1 39 | env: JOB=edoc 40 | - otp_release: 20.0.1 41 | env: JOB=dotapp 42 | env: 43 | - JOB=check 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gb_merkle_trees 2 | 3 | An Erlang library that provides a dictionary-like storage for binaries using general balanced binary Merkle trees, with an interface similar to `gb_trees`. 4 | 5 | This library uses [semantic versioning 2.0](http://semver.org/). If a change causes different root hashes to be generated for the same input data when entering or deleting, it is considered backwards incompatible. 6 | 7 | [erlang.mk](https://erlang.mk/) is used as a build tool. 8 | 9 | ## Documentation 10 | 11 | Run `make edoc` and open `doc/index.html`. 12 | 13 | ## Contributing 14 | 15 | Unless you’re deleting code or making pure optimizations, write tests. Except for basic cases, testing of this library is done using [triq](https://github.com/triqng/triq). To run tests, execute `make tests`. 16 | 17 | Write function specifications. To run Dialyzer, execute `make dialyze`. 18 | 19 | No hard line length limit is imposed. 20 | 21 | ## License 22 | 23 | This software is licensed under under [the Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) (the “License”); you may not use this software except in compliance with the License. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 24 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | ============== 3 | 4 | _Version 2.0, January 2004_ 5 | _<>_ 6 | 7 | ### Terms and Conditions for use, reproduction, and distribution 8 | 9 | #### 1. Definitions 10 | 11 | “License” shall mean the terms and conditions for use, reproduction, and 12 | distribution as defined by Sections 1 through 9 of this document. 13 | 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 15 | owner that is granting the License. 16 | 17 | “Legal Entity” shall mean the union of the acting entity and all other entities 18 | that control, are controlled by, or are under common control with that entity. 19 | For the purposes of this definition, “control” means **(i)** the power, direct or 20 | indirect, to cause the direction or management of such entity, whether by 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 22 | outstanding shares, or **(iii)** beneficial ownership of such entity. 23 | 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 25 | permissions granted by this License. 26 | 27 | “Source” form shall mean the preferred form for making modifications, including 28 | but not limited to software source code, documentation source, and configuration 29 | files. 30 | 31 | “Object” form shall mean any form resulting from mechanical transformation or 32 | translation of a Source form, including but not limited to compiled object code, 33 | generated documentation, and conversions to other media types. 34 | 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made 36 | available under the License, as indicated by a copyright notice that is included 37 | in or attached to the work (an example is provided in the Appendix below). 38 | 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that 40 | is based on (or derived from) the Work and for which the editorial revisions, 41 | annotations, elaborations, or other modifications represent, as a whole, an 42 | original work of authorship. For the purposes of this License, Derivative Works 43 | shall not include works that remain separable from, or merely link (or bind by 44 | name) to the interfaces of, the Work and Derivative Works thereof. 45 | 46 | “Contribution” shall mean any work of authorship, including the original version 47 | of the Work and any modifications or additions to that Work or Derivative Works 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 50 | on behalf of the copyright owner. For the purposes of this definition, 51 | “submitted” means any form of electronic, verbal, or written communication sent 52 | to the Licensor or its representatives, including but not limited to 53 | communication on electronic mailing lists, source code control systems, and 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 55 | the purpose of discussing and improving the Work, but excluding communication 56 | that is conspicuously marked or otherwise designated in writing by the copyright 57 | owner as “Not a Contribution.” 58 | 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 60 | of whom a Contribution has been received by Licensor and subsequently 61 | incorporated within the Work. 62 | 63 | #### 2. Grant of Copyright License 64 | 65 | Subject to the terms and conditions of this License, each Contributor hereby 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 67 | irrevocable copyright license to reproduce, prepare Derivative Works of, 68 | publicly display, publicly perform, sublicense, and distribute the Work and such 69 | Derivative Works in Source or Object form. 70 | 71 | #### 3. Grant of Patent License 72 | 73 | Subject to the terms and conditions of this License, each Contributor hereby 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 75 | irrevocable (except as stated in this section) patent license to make, have 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 77 | such license applies only to those patent claims licensable by such Contributor 78 | that are necessarily infringed by their Contribution(s) alone or by combination 79 | of their Contribution(s) with the Work to which such Contribution(s) was 80 | submitted. If You institute patent litigation against any entity (including a 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 82 | Contribution incorporated within the Work constitutes direct or contributory 83 | patent infringement, then any patent licenses granted to You under this License 84 | for that Work shall terminate as of the date such litigation is filed. 85 | 86 | #### 4. Redistribution 87 | 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof 89 | in any medium, with or without modifications, and in Source or Object form, 90 | provided that You meet the following conditions: 91 | 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 93 | this License; and 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You 95 | changed the files; and 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 97 | all copyright, patent, trademark, and attribution notices from the Source form 98 | of the Work, excluding those notices that do not pertain to any part of the 99 | Derivative Works; and 100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 101 | Derivative Works that You distribute must include a readable copy of the 102 | attribution notices contained within such NOTICE file, excluding those notices 103 | that do not pertain to any part of the Derivative Works, in at least one of the 104 | following places: within a NOTICE text file distributed as part of the 105 | Derivative Works; within the Source form or documentation, if provided along 106 | with the Derivative Works; or, within a display generated by the Derivative 107 | Works, if and wherever such third-party notices normally appear. The contents of 108 | the NOTICE file are for informational purposes only and do not modify the 109 | License. You may add Your own attribution notices within Derivative Works that 110 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 111 | provided that such additional attribution notices cannot be construed as 112 | modifying the License. 113 | 114 | You may add Your own copyright statement to Your modifications and may provide 115 | additional or different license terms and conditions for use, reproduction, or 116 | distribution of Your modifications, or for any such Derivative Works as a whole, 117 | provided Your use, reproduction, and distribution of the Work otherwise complies 118 | with the conditions stated in this License. 119 | 120 | #### 5. Submission of Contributions 121 | 122 | Unless You explicitly state otherwise, any Contribution intentionally submitted 123 | for inclusion in the Work by You to the Licensor shall be under the terms and 124 | conditions of this License, without any additional terms or conditions. 125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 126 | any separate license agreement you may have executed with Licensor regarding 127 | such Contributions. 128 | 129 | #### 6. Trademarks 130 | 131 | This License does not grant permission to use the trade names, trademarks, 132 | service marks, or product names of the Licensor, except as required for 133 | reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | #### 7. Disclaimer of Warranty 137 | 138 | Unless required by applicable law or agreed to in writing, Licensor provides the 139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 141 | including, without limitation, any warranties or conditions of TITLE, 142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 143 | solely responsible for determining the appropriateness of using or 144 | redistributing the Work and assume any risks associated with Your exercise of 145 | permissions under this License. 146 | 147 | #### 8. Limitation of Liability 148 | 149 | In no event and under no legal theory, whether in tort (including negligence), 150 | contract, or otherwise, unless required by applicable law (such as deliberate 151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 152 | liable to You for damages, including any direct, indirect, special, incidental, 153 | or consequential damages of any character arising as a result of this License or 154 | out of the use or inability to use the Work (including but not limited to 155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 156 | any and all other commercial damages or losses), even if such Contributor has 157 | been advised of the possibility of such damages. 158 | 159 | #### 9. Accepting Warranty or Additional Liability 160 | 161 | While redistributing the Work or Derivative Works thereof, You may choose to 162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 163 | other liability obligations and/or rights consistent with this License. However, 164 | in accepting such obligations, You may act only on Your own behalf and on Your 165 | sole responsibility, not on behalf of any other Contributor, and only if You 166 | agree to indemnify, defend, and hold each Contributor harmless for any liability 167 | incurred by, or claims asserted against, such Contributor by reason of your 168 | accepting any such warranty or additional liability. 169 | 170 | _END OF TERMS AND CONDITIONS_ 171 | 172 | ### APPENDIX: How to apply the Apache License to your work 173 | 174 | To apply the Apache License to your work, attach the following boilerplate 175 | notice, with the fields enclosed by brackets `[]` replaced with your own 176 | identifying information. (Don't include the brackets!) The text should be 177 | enclosed in the appropriate comment syntax for the file format. We also 178 | recommend that a file or class name and description of purpose be included on 179 | the same “printed page” as the copyright notice for easier identification within 180 | third-party archives. 181 | 182 | Copyright [yyyy] [name of copyright owner] 183 | 184 | Licensed under the Apache License, Version 2.0 (the "License"); 185 | you may not use this file except in compliance with the License. 186 | You may obtain a copy of the License at 187 | 188 | http://www.apache.org/licenses/LICENSE-2.0 189 | 190 | Unless required by applicable law or agreed to in writing, software 191 | distributed under the License is distributed on an "AS IS" BASIS, 192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 193 | See the License for the specific language governing permissions and 194 | limitations under the License. 195 | -------------------------------------------------------------------------------- /src/gb_merkle_trees.erl: -------------------------------------------------------------------------------- 1 | %% Licensed under the Apache License, Version 2.0 (the “License”); 2 | %% you may not use this file except in compliance with the License. 3 | %% You may obtain a copy of the License at 4 | %% 5 | %% http://www.apache.org/licenses/LICENSE-2.0 6 | %% 7 | %% Unless required by applicable law or agreed to in writing, software 8 | %% distributed under the License is distributed on an “AS IS” BASIS, 9 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | %% See the License for the specific language governing permissions and 11 | %% limitations under the License. 12 | %% 13 | %% @doc General balanced binary Merkle trees. Similar to {@link //stdlib/gb_trees}, but with Merkle proofs. 14 | %% 15 | %% Keys and values need to be binaries. Values are stored only in leaf nodes to shorten Merkle proofs. 16 | %% 17 | %% Hashes of leaf nodes are based on concatenation of hashes of key and value. Hashes of inner nodes are based on concatenation of hashes of left and right node. 18 | %% 19 | %% Similarly as in {@link //stdlib/gb_trees}, deletions do not cause trees to rebalance. 20 | %% 21 | %% SHA-256 is used as the default hashing algorithm. You can define the `GB_MERKLE_TREES_HASH_ALGORITHM' macro to use another algorithm. See documentation of {@link //crypto/crypto:hash/2} for available choices. 22 | %% 23 | %% @author Krzysztof Jurewicz [http://jurewicz.org.pl] 24 | %% 25 | %% @reference See Arne Andersson’s “General Balanced Trees” article for insights about the balancing algorithm. The original balance condition has been changed to 2^h(T) ≤ |T|^2. 26 | %% @reference See go-merkle for a similar in purpose library written in Go which uses AVL trees instead of general balanced trees. 27 | %% @see //stdlib/gb_trees 28 | %% @see //crypto/crypto:hash/2 29 | 30 | -module(gb_merkle_trees). 31 | -export([balance/1, 32 | delete/2, 33 | empty/0, 34 | enter/3, 35 | foldr/3, 36 | from_list/1, 37 | from_orddict/1, 38 | from_orddict/2, 39 | keys/1, 40 | lookup/2, 41 | merkle_proof/2, 42 | root_hash/1, 43 | size/1, 44 | to_orddict/1, 45 | verify_merkle_proof/4]). 46 | 47 | -ifdef(TEST). 48 | -include_lib("triq/include/triq.hrl"). 49 | -include_lib("eunit/include/eunit.hrl"). 50 | -endif. 51 | 52 | -ifndef(GB_MERKLE_TREES_HASH_ALGORITHM). 53 | -define(GB_MERKLE_TREES_HASH_ALGORITHM, sha256). 54 | -endif. 55 | -define(HASH(X), crypto:hash(?GB_MERKLE_TREES_HASH_ALGORITHM, X)). 56 | 57 | %% Trees are balanced using the condition 2^h(T) ≤ |T|^C 58 | -define(C, 2). 59 | 60 | -type key() :: binary(). 61 | -type value() :: binary(). 62 | -type hash() :: binary(). 63 | 64 | %% We distinguish inner nodes and tree nodes by tuple length instead of using records to save some space. 65 | -type leaf_node() :: {key(), value(), hash()}. 66 | -type inner_node() :: {key(), hash() | to_be_computed, Left :: inner_node() | leaf_node(), Right :: inner_node() | leaf_node()}. 67 | -type tree_node() :: leaf_node() | inner_node() | empty. 68 | -opaque tree() :: {Size :: non_neg_integer(), RootNode :: tree_node()}. 69 | -type merkle_proof() :: {hash() | merkle_proof(), hash() | merkle_proof()}. 70 | 71 | -export_type( 72 | [key/0, 73 | value/0, 74 | hash/0, 75 | tree/0, 76 | merkle_proof/0]). 77 | 78 | -spec delete(key(), tree()) -> tree(). 79 | %% @doc Remove key from tree. The key must be present in the tree. 80 | delete(Key, {Size, RootNode}) -> 81 | {Size - 1, delete_1(Key, RootNode)}. 82 | 83 | -spec delete_1(key(), tree_node()) -> tree_node(). 84 | delete_1(Key, {Key, _, _}) -> 85 | empty; 86 | delete_1(Key, {InnerKey, _, LeftNode, RightNode}) -> 87 | case Key < InnerKey of 88 | true -> 89 | case delete_1(Key, LeftNode) of 90 | empty -> 91 | RightNode; 92 | NewLeftNode -> 93 | {InnerKey, inner_hash(node_hash(NewLeftNode), node_hash(RightNode)), NewLeftNode, RightNode} 94 | end; 95 | _ -> 96 | case delete_1(Key, RightNode) of 97 | empty -> 98 | LeftNode; 99 | NewRightNode -> 100 | {InnerKey, inner_hash(node_hash(LeftNode), node_hash(NewRightNode)), LeftNode, NewRightNode} 101 | end 102 | end. 103 | 104 | -spec empty() -> tree(). 105 | %% @doc Return an empty tree. 106 | empty() -> 107 | {0, empty}. 108 | 109 | -spec size(tree()) -> non_neg_integer(). 110 | %% @doc Return number of elements stored in the tree. 111 | size({Size, _}) -> 112 | Size. 113 | 114 | -spec leaf_hash(key(), value()) -> hash(). 115 | leaf_hash(Key, Value) -> 116 | KeyHash = ?HASH(Key), 117 | ValueHash = ?HASH(Value), 118 | ?HASH(<>). 119 | 120 | -spec inner_hash(hash(), hash()) -> hash(). 121 | inner_hash(LeftHash, RightHash) -> 122 | ?HASH(<>). 123 | 124 | -spec root_hash(tree()) -> hash() | undefined. 125 | %% @doc Return the hash of root node. 126 | root_hash({_, RootNode}) -> 127 | node_hash(RootNode). 128 | 129 | -spec merkle_proof(key(), tree()) -> merkle_proof(). 130 | %% @doc For a given key return a proof that, along with its value, it is contained in tree. 131 | %% Hash for root node is not included in the proof. 132 | merkle_proof(Key, {_Size, RootNode}) -> 133 | merkle_proof_node(Key, RootNode). 134 | 135 | -spec merkle_proof_node(key(), tree_node()) -> merkle_proof(). 136 | merkle_proof_node(Key, {Key, Value, _}) -> 137 | {?HASH(Key), ?HASH(Value)}; 138 | merkle_proof_node(Key, {InnerKey, _, Left, Right}) -> 139 | case Key < InnerKey of 140 | true -> 141 | {merkle_proof_node(Key, Left), node_hash(Right)}; 142 | _ -> 143 | {node_hash(Left), merkle_proof_node(Key, Right)} 144 | end. 145 | 146 | -spec verify_merkle_proof(key(), value(), Root::hash(), merkle_proof()) -> 147 | ok | {error, Reason} when 148 | Reason :: {key_hash_mismatch, hash()} 149 | | {value_hash_mismatch, hash()} 150 | | {root_hash_mismatch, hash()}. 151 | %% @doc Verify a proof against a leaf and a root node hash. 152 | verify_merkle_proof(Key, Value, RootHash, Proof) -> 153 | {KH, VH} = {?HASH(Key), ?HASH(Value)}, 154 | {PKH, PVH} = bottom_merkle_proof_pair(Proof), 155 | if 156 | PKH =/= KH -> 157 | {error, {key_hash_mismatch, PKH}}; 158 | PVH =/= VH -> 159 | {error, {value_hash_mismatch, PKH}}; 160 | true -> 161 | PRH = merkle_fold(Proof), 162 | if 163 | PRH =/= RootHash -> 164 | {error, {root_hash_mismatch, PRH}}; 165 | true -> 166 | ok 167 | end 168 | end. 169 | 170 | -spec from_list(list({key(), value()})) -> tree(). 171 | %% @doc Create a tree from a list. 172 | %% This creates a tree by iteratively inserting elements and not necessarily results in a perfect balance, like the one obtained when running {@link from_orddict/1}. 173 | from_list(List) -> 174 | from_list(List, empty()). 175 | 176 | -spec from_list(list({key(), value()}), Acc :: tree()) -> tree(). 177 | from_list([], Acc) -> 178 | Acc; 179 | from_list([{Key, Value}|Rest], Acc) -> 180 | from_list(Rest, enter(Key, Value, Acc)). 181 | 182 | -spec from_orddict(OrdDict :: list({key(), value()})) -> tree(). 183 | %% @equiv from_orddict(OrdDict, length(OrdDict)) 184 | from_orddict(OrdDict) -> 185 | from_orddict(OrdDict, length(OrdDict)). 186 | 187 | -spec from_orddict(list({key(), value()}), Size :: non_neg_integer()) -> tree(). 188 | %% @doc Create a perfectly balanced tree from an ordered dictionary. 189 | from_orddict(OrdDict, Size) -> 190 | {Size, balance_orddict(OrdDict, Size)}. 191 | 192 | -spec to_orddict(tree()) -> list({key(), value()}). 193 | %% @doc Convert tree to an orddict. 194 | to_orddict(Tree) -> 195 | foldr( 196 | fun (KV, Acc) -> 197 | [KV|Acc] 198 | end, 199 | [], 200 | Tree). 201 | 202 | -spec keys(tree()) -> list(key()). 203 | %% @doc Return the keys as an ordered list. 204 | keys(Tree) -> 205 | foldr( 206 | fun ({Key, _}, Acc) -> [Key|Acc] end, 207 | [], 208 | Tree). 209 | 210 | -spec foldr(fun(({key(), value()}, Acc :: any()) -> any()), Acc :: any(), tree()) -> Acc :: any(). 211 | %% @doc Iterate through keys and values, from those with highest keys to lowest. 212 | foldr(Fun, Acc, {_, RootNode}) -> 213 | foldr_1(Fun, Acc, RootNode). 214 | 215 | -spec foldr_1(fun(({key(), value()}, Acc :: any()) -> any()), Acc :: any(), tree_node()) -> Acc :: any(). 216 | foldr_1(_, Acc, empty) -> 217 | Acc; 218 | foldr_1(F, Acc, _LeafNode={Key, Value, _}) -> 219 | F({Key, Value}, Acc); 220 | foldr_1(F, Acc, {_, _, Left, Right}) -> 221 | foldr_1(F, foldr_1(F, Acc, Right), Left). 222 | 223 | -spec node_hash(tree_node()) -> hash() | undefined. 224 | node_hash(empty) -> 225 | undefined; 226 | node_hash({_, _, Hash}) -> 227 | Hash; 228 | node_hash({_, Hash, _, _}) -> 229 | Hash. 230 | 231 | -spec enter(key(), value(), tree()) -> tree(). 232 | %% @doc Insert or update key and value into tree. 233 | enter(Key, Value, {Size, RootNode}) -> 234 | {NewRootNode, undefined, undefined, KeyExists} = enter_1(Key, Value, RootNode, 0, Size), 235 | NewSize = 236 | case KeyExists of 237 | true -> Size; 238 | _ -> Size + 1 239 | end, 240 | {NewSize, NewRootNode}. 241 | 242 | -spec enter_1(key(), value(), tree_node(), Depth :: non_neg_integer(), TreeSize :: non_neg_integer()) -> 243 | {tree_node(), RebalancingCount :: pos_integer() | undefined, Height :: non_neg_integer() | undefined, KeyExists :: boolean()}. 244 | enter_1(Key, Value, empty, _, _) -> 245 | {{Key, Value, leaf_hash(Key, Value)}, undefined, undefined, false}; 246 | enter_1(Key, Value, ExistingLeafNode={ExistingKey, _, _}, Depth, TreeSize) -> 247 | NewLeafNode = {Key, Value, leaf_hash(Key, Value)}, 248 | case Key =:= ExistingKey of 249 | true -> 250 | {NewLeafNode, undefined, undefined, true}; 251 | _ -> 252 | NewTreeSize = TreeSize + 1, 253 | NewDepth = Depth + 1, 254 | {InnerKey, LeftNode, RightNode} = 255 | case Key > ExistingKey of 256 | true -> 257 | {Key, ExistingLeafNode, NewLeafNode}; 258 | _ -> 259 | {ExistingKey, NewLeafNode, ExistingLeafNode} 260 | end, 261 | case rebalancing_needed(NewTreeSize, NewDepth) of 262 | true -> 263 | {{InnerKey, to_be_computed, LeftNode, RightNode}, 264 | 2, 265 | 1, 266 | false}; 267 | _ -> 268 | {{InnerKey, inner_hash(node_hash(LeftNode), node_hash(RightNode)), LeftNode, RightNode}, 269 | undefined, 270 | undefined, 271 | false} 272 | end 273 | end; 274 | enter_1(Key, Value, InnerNode={InnerKey, _, LeftNode, RightNode}, Depth, TreeSize) -> 275 | NodeToFollowSymb = 276 | case Key < InnerKey of 277 | true -> left; 278 | _ -> right 279 | end, 280 | {NodeToFollow, NodeNotChanged} = 281 | case NodeToFollowSymb of 282 | right -> {RightNode, LeftNode}; 283 | left -> {LeftNode, RightNode} 284 | end, 285 | {NewNode, RebalancingCount, Height, KeyExists} = enter_1(Key, Value, NodeToFollow, Depth + 1, TreeSize), 286 | {NewLeftNode, NewRightNode} = 287 | case NodeToFollowSymb of 288 | right -> 289 | {LeftNode, NewNode}; 290 | _ -> 291 | {NewNode, RightNode} 292 | end, 293 | case RebalancingCount of 294 | undefined -> 295 | {update_inner_node(InnerNode, NewLeftNode, NewRightNode), undefined, undefined, KeyExists}; 296 | _ -> 297 | Count = RebalancingCount + node_size(NodeNotChanged), 298 | NewHeight = Height + 1, 299 | NewInnerNodeUnbalanced = {InnerKey, to_be_computed, NewLeftNode, NewRightNode}, 300 | case may_be_rebalanced(Count, NewHeight) of 301 | true -> 302 | {balance_node(NewInnerNodeUnbalanced, Count), 303 | undefined, 304 | undefined, 305 | KeyExists}; 306 | _ -> 307 | {NewInnerNodeUnbalanced, 308 | Count, 309 | NewHeight, 310 | KeyExists} 311 | end 312 | end. 313 | 314 | -spec rebalancing_needed(TreeSize :: non_neg_integer(), Depth :: non_neg_integer()) -> boolean(). 315 | rebalancing_needed(TreeSize, Depth) -> 316 | math:pow(2, Depth) > math:pow(TreeSize, ?C). 317 | 318 | -spec may_be_rebalanced(Count :: non_neg_integer(), Height :: non_neg_integer()) -> boolean(). 319 | may_be_rebalanced(Count, Height) -> 320 | math:pow(2, Height) > math:pow(Count, ?C). 321 | 322 | -spec node_size(tree_node()) -> non_neg_integer(). 323 | node_size(empty) -> 324 | 0; 325 | node_size({_, _, _}) -> 326 | 1; 327 | node_size({_, _, Left, Right}) -> 328 | node_size(Left) + node_size(Right). 329 | 330 | -spec balance_orddict(list({key(), value()}), Size :: non_neg_integer()) -> tree_node(). 331 | balance_orddict(KVOrdDict, Size) -> 332 | {Node, []} = balance_orddict_1(KVOrdDict, Size), 333 | Node. 334 | 335 | -spec balance_orddict_1(list({key(), value()}), Size :: non_neg_integer()) -> {tree_node(), list({key(), value()})}. 336 | balance_orddict_1(OrdDict, Size) when Size > 1 -> 337 | Size2 = Size div 2, 338 | Size1 = Size - Size2, 339 | {LeftNode, OrdDict1=[{Key, _} | _]} = balance_orddict_1(OrdDict, Size1), 340 | {RightNode, OrdDict2} = balance_orddict_1(OrdDict1, Size2), 341 | InnerNode = {Key, inner_hash(node_hash(LeftNode), node_hash(RightNode)), LeftNode, RightNode}, 342 | {InnerNode, OrdDict2}; 343 | balance_orddict_1([{Key, Value} | OrdDict], 1) -> 344 | {{Key, Value, leaf_hash(Key, Value)}, OrdDict}; 345 | balance_orddict_1(OrdDict, 0) -> 346 | {empty, OrdDict}. 347 | 348 | -spec node_to_orddict(tree_node()) -> list({key(), value()}). 349 | node_to_orddict(Node) -> 350 | foldr_1( 351 | fun (KV, Acc) -> 352 | [KV|Acc] 353 | end, 354 | [], 355 | Node). 356 | 357 | -spec balance_node(tree_node(), Size :: non_neg_integer()) -> tree_node(). 358 | balance_node(Node, Size) -> 359 | KVOrdDict = node_to_orddict(Node), 360 | balance_orddict(KVOrdDict, Size). 361 | 362 | -spec balance(tree()) -> tree(). 363 | %% @doc Perfectly balance a tree. 364 | balance({Size, RootNode}) -> 365 | {Size, balance_orddict(node_to_orddict(RootNode), Size)}. 366 | 367 | -spec lookup(key(), tree()) -> value() | none. 368 | %% @doc Fetch value for key from tree. 369 | lookup(Key, {_, RootNode}) -> 370 | lookup_1(Key, RootNode). 371 | 372 | -spec lookup_1(key(), inner_node() | leaf_node()) -> value() | none. 373 | lookup_1(Key, {Key, Value, _}) -> 374 | Value; 375 | lookup_1(Key, {InnerKey, _, Left, Right}) -> 376 | case Key < InnerKey of 377 | true -> 378 | lookup_1(Key, Left); 379 | _ -> 380 | lookup_1(Key, Right) 381 | end; 382 | lookup_1(_, _) -> 383 | none. 384 | 385 | -spec update_inner_node(inner_node(), Left :: tree_node(), Right :: tree_node()) -> inner_node(). 386 | update_inner_node(Node={Key, _, Left, Right}, NewLeft, NewRight) -> 387 | case lists:map(fun node_hash/1, [Left, Right, NewLeft, NewRight]) of 388 | [LeftHash, RightHash, LeftHash, RightHash] -> 389 | %% Nothing changed, no need to rehash. 390 | Node; 391 | [_, _, NewLeftHash, NewRightHash] -> 392 | {Key, inner_hash(NewLeftHash, NewRightHash), NewLeft, NewRight} 393 | end. 394 | 395 | -spec merkle_fold(merkle_proof()) -> hash(). 396 | merkle_fold({Left, Right}) -> 397 | LeftHash = merkle_fold(Left), 398 | RightHash = merkle_fold(Right), 399 | ?HASH(<>); 400 | merkle_fold(Hash) -> 401 | Hash. 402 | 403 | -spec bottom_merkle_proof_pair(merkle_proof()) -> {hash(), hash()}. 404 | bottom_merkle_proof_pair({Pair, Hash}) when is_tuple(Pair), is_binary(Hash) -> 405 | bottom_merkle_proof_pair(Pair); 406 | bottom_merkle_proof_pair({_Hash, Pair}) when is_tuple(Pair) -> 407 | bottom_merkle_proof_pair(Pair); 408 | bottom_merkle_proof_pair(Pair) -> 409 | Pair. 410 | 411 | -ifdef(TEST). 412 | empty_test_() -> 413 | [?_assertEqual(0, ?MODULE:size(empty()))]. 414 | 415 | %% Types for Triq. 416 | key() -> 417 | binary(). 418 | value() -> 419 | binary(). 420 | kv_orddict() -> 421 | ?LET(L, list({key(), value()}), orddict:from_list(L)). 422 | tree() -> 423 | %% The validity of data generated by this generator depends on the validity of the `from_list' function. 424 | %% This should not be a problem as long as the `from_list' function itself is tested. 425 | ?LET(KVO, list({key(), value()}), from_list(KVO)). 426 | non_empty_tree() -> 427 | ?SUCHTHAT(Tree, tree(), element(1, Tree) > 0). 428 | 429 | %% Helper functions for Triq. 430 | -spec height(tree()) -> non_neg_integer(). 431 | height({_, RootNode}) -> 432 | node_height(RootNode). 433 | 434 | -spec node_height(tree_node()) -> non_neg_integer(). 435 | node_height(empty) -> 436 | %% Strictly speaking, there is no height for empty tree. 437 | 0; 438 | node_height({_, _, _}) -> 439 | 0; 440 | node_height({_, _, Left, Right}) -> 441 | 1 + max(node_height(Left), node_height(Right)). 442 | 443 | -spec shallow_height(tree()) -> non_neg_integer(). 444 | shallow_height({_, RootNode}) -> 445 | node_shallow_height(RootNode). 446 | 447 | -spec node_shallow_height(tree_node()) -> non_neg_integer(). 448 | node_shallow_height(empty) -> 449 | %% Strictly speaking, there is no height for empty tree. 450 | 0; 451 | node_shallow_height({_, _, _}) -> 452 | 0; 453 | node_shallow_height({_, _, Left, Right}) -> 454 | 1 + min(node_shallow_height(Left), node_shallow_height(Right)). 455 | 456 | -spec is_perfectly_balanced(tree()) -> boolean(). 457 | is_perfectly_balanced(Tree) -> 458 | height(Tree) - shallow_height(Tree) =< 1. 459 | 460 | -spec fun_idempotent(F :: fun((X) -> X), X) -> boolean(). 461 | %% @doc Return true if F(X) =:= X. 462 | fun_idempotent(F, X) -> 463 | F(X) =:= X. 464 | 465 | prop_lookup_does_not_fetch_deleted_key() -> 466 | ?FORALL({Tree, Key, Value}, 467 | {tree(), key(), value()}, 468 | none =:= lookup(Key, delete(Key, enter(Key, Value, Tree)))). 469 | prop_deletion_decreases_size_by_1() -> 470 | ?FORALL({Tree, Key, Value}, 471 | {tree(), key(), value()}, 472 | ?MODULE:size(enter(Key, Value, Tree)) - 1 =:= ?MODULE:size(delete(Key, enter(Key, Value, Tree)))). 473 | prop_merkle_proofs_fold_to_root_hash() -> 474 | ?FORALL({Tree, Key, Value}, 475 | {tree(), key(), value()}, 476 | root_hash(enter(Key, Value, Tree)) =:= merkle_fold(merkle_proof(Key, enter(Key, Value, Tree)))). 477 | prop_merkle_proofs_contain_kv_hashes_at_the_bottom() -> 478 | ?FORALL({Tree, Key, Value}, 479 | {tree(), key(), value()}, 480 | bottom_merkle_proof_pair(merkle_proof(Key, enter(Key, Value, Tree))) =:= {?HASH(Key), ?HASH(Value)}). 481 | prop_merkle_proofs_can_be_verified() -> 482 | ?FORALL({Tree, Key, Value}, 483 | {tree(), key(), value()}, 484 | ok =:= verify_merkle_proof(Key, Value, root_hash(enter(Key, Value, Tree)), merkle_proof(Key, enter(Key, Value, Tree)))). 485 | prop_merkle_proofs_verification_reports_mismatch_for_wrong_key() -> 486 | ?FORALL({Tree, Key, Value}, 487 | {tree(), key(), value()}, 488 | case verify_merkle_proof(<<"X", Key/binary>>, Value, root_hash(enter(Key, Value, Tree)), merkle_proof(Key, enter(Key, Value, Tree))) of 489 | {error, {key_hash_mismatch, H}} when is_binary(H) -> 490 | true; 491 | _ -> 492 | false 493 | end). 494 | prop_merkle_proofs_verification_reports_mismatch_for_wrong_value() -> 495 | ?FORALL({Tree, Key, Value}, 496 | {tree(), key(), value()}, 497 | case verify_merkle_proof(Key, <<"X", Value/binary>>, root_hash(enter(Key, Value, Tree)), merkle_proof(Key, enter(Key, Value, Tree))) of 498 | {error, {value_hash_mismatch, H}} when is_binary(H) -> 499 | true; 500 | _ -> 501 | false 502 | end). 503 | prop_merkle_proofs_verification_reports_mismatch_for_wrong_root_hash() -> 504 | ?FORALL({Tree, Key, Value}, 505 | {tree(), key(), value()}, 506 | case verify_merkle_proof(Key, Value, begin RH = root_hash(enter(Key, Value, Tree)), <<"X", RH/binary>> end, merkle_proof(Key, enter(Key, Value, Tree))) of 507 | {error, {root_hash_mismatch, H}} when is_binary(H) -> 508 | true; 509 | _ -> 510 | false 511 | end). 512 | prop_from_list_size() -> 513 | ?FORALL(KVList, list({key(), value()}), 514 | length(proplists:get_keys(KVList)) =:= ?MODULE:size(from_list(KVList))). 515 | prop_from_orddict_size() -> 516 | ?FORALL(KVO, kv_orddict(), 517 | length(KVO) =:= ?MODULE:size(from_list(KVO))). 518 | prop_orddict_conversion_idempotence() -> 519 | ?FORALL(KVO, kv_orddict(), KVO =:= to_orddict(from_orddict(KVO))). 520 | prop_from_orddict_returns_a_perfectly_balanced_tree() -> 521 | ?FORALL(KVO, kv_orddict(), is_perfectly_balanced(from_orddict(KVO))). 522 | prop_keys() -> 523 | ?FORALL(Tree, tree(), keys(Tree) =:= [Key || {Key, _} <- to_orddict(Tree)]). 524 | from_list_sometimes_doesnt_return_a_perfectly_balanced_tree_test() -> 525 | ?assertNotEqual( 526 | true, 527 | triq:counterexample( 528 | ?FORALL( 529 | KVList, 530 | list({key(), value()}), 531 | is_perfectly_balanced(from_list(KVList))))). 532 | prop_foldr_iterates_on_proper_ordering_and_contains_no_duplicates() -> 533 | ?FORALL(Tree, tree(), 534 | fun_idempotent( 535 | fun lists:usort/1, 536 | foldr( 537 | fun({Key, _}, Acc) -> [Key|Acc] end, 538 | [], 539 | Tree) 540 | )). 541 | prop_enter_is_idempotent() -> 542 | ?FORALL({Tree, Key, Value}, 543 | {tree(), key(), value()}, 544 | fun_idempotent( 545 | fun (Tree_) -> enter(Key, Value, Tree_) end, 546 | enter(Key, Value, Tree))). 547 | prop_entered_value_can_be_retrieved() -> 548 | ?FORALL({Tree, Key, Value}, 549 | {tree(), key(), value()}, 550 | Value =:= lookup(Key, enter(Key, Value, Tree))). 551 | prop_entered_value_can_be_retrieved_after_balancing() -> 552 | ?FORALL({Tree, Key, Value}, 553 | {tree(), key(), value()}, 554 | Value =:= lookup(Key, balance(enter(Key, Value, Tree)))). 555 | prop_height_constrained() -> 556 | ?FORALL(Tree, non_empty_tree(), math:pow(2, height(Tree)) =< math:pow(?MODULE:size(Tree), ?C)). 557 | prop_balancing_yields_same_orddict() -> 558 | ?FORALL(Tree, tree(), to_orddict(Tree) =:= to_orddict(balance(Tree))). 559 | prop_entering_key_second_time_does_not_increase_size() -> 560 | ?FORALL({Tree, Key, Value1, Value2}, 561 | {tree(), key(), value(), value()}, 562 | ?MODULE:size(enter(Key, Value1, Tree)) =:= ?MODULE:size(enter(Key, Value2, enter(Key, Value1, Tree)))). 563 | prop_tree_after_explicit_balancing_is_perfectly_balanced() -> 564 | ?FORALL(Tree, tree(), is_perfectly_balanced(balance(Tree))). 565 | -endif. 566 | --------------------------------------------------------------------------------