├── .gitignore ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── doc └── overview.edoc ├── include └── cbt.hrl ├── rebar.config ├── rebar_dev.config ├── src ├── cbt.app.src ├── cbt_backend.erl ├── cbt_btree.erl ├── cbt_btree_copy.erl ├── cbt_compress.erl ├── cbt_ets.erl ├── cbt_file.erl ├── cbt_ramfile.erl ├── cbt_stream.erl └── cbt_util.erl └── test ├── cbt_btree_copy_tests.erl ├── cbt_btree_ramfile_tests.erl ├── cbt_btree_tests.erl ├── cbt_ets_btree_copy_tests.erl ├── cbt_ets_tests.erl ├── cbt_file_tests.erl ├── cbt_ramfile_tests.erl ├── cbt_stream_tests.erl └── cbt_tests.hrl /.gitignore: -------------------------------------------------------------------------------- 1 | ebin 2 | *.swp 3 | *.dump 4 | *.dat 5 | edoc-info 6 | .DS_Store 7 | deps/ 8 | doc/*.html 9 | doc/*.css 10 | doc/erlang.png 11 | doc/edoc-info 12 | t/*.beam 13 | t/temp.* 14 | .rebar 15 | .eunit 16 | test/temp 17 | test/temp/* 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BASE_DIR = $(shell pwd) 2 | SUPPORT_DIR=$(BASE_DIR)/support 3 | ERLC ?= $(shell which erlc) 4 | ESCRIPT ?= $(shell which escript) 5 | ERL ?= $(shell which erl) 6 | APP := cbt 7 | REBAR?= rebar 8 | 9 | $(if $(ERLC),,$(warning "Warning: No Erlang found in your path, this will probably not work")) 10 | 11 | $(if $(ESCRIPT),,$(warning "Warning: No escript found in your path, this will probably not work")) 12 | 13 | .PHONY: deps doc test 14 | 15 | all: deps compile 16 | 17 | dev: devbuild 18 | 19 | compile: 20 | @$(REBAR) compile 21 | 22 | deps: 23 | @$(REBAR) get-deps 24 | 25 | doc: dev 26 | $(REBAR) -C rebar_dev.config doc skip_deps=true 27 | 28 | test: dev 29 | ${REBAR} -C rebar_dev.config eunit skip_deps=true 30 | 31 | clean: 32 | @$(REBAR) clean 33 | @rm -f t/*.beam t/temp.* 34 | @rm -f doc/*.html doc/*.css doc/edoc-info doc/*.png 35 | 36 | distclean: clean 37 | @$(REBAR) delete-deps 38 | @rm -rf deps 39 | 40 | dialyzer: compile 41 | @dialyzer -Wno_return -c ebin 42 | 43 | # development 44 | # 45 | devclean: 46 | $(REBAR) -C rebar_dev.config clean 47 | 48 | devbuild: devdeps 49 | $(REBAR) -C rebar_dev.config compile 50 | 51 | devdeps: 52 | $(REBAR) -C rebar_dev.config get-deps 53 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | CBT - The Couchdb Btree 2 | 3 | Copyright 2007-2012 - The Apache Software foundation 4 | Copyright 2011-2014 - Couchbase 5 | Copyright 2014-2015 - Benoit Chesneau & The Refuge Project 6 | 7 | This library includes software developed at 8 | The Apache Software Foundation (http://www.apache.org/): 9 | 10 | Portions of this software contains code derived from the couchdb version 11 | of Couchbase: https://github.com/couchbase/couchdb 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #cbt 2 | 3 | multi-layer MVCC log append-only database library based on the Apache CouchDB btree. 4 | 5 | ## Changes compared to couchdb 6 | 7 | - Pluggable Storage backends 8 | - use CRC32 to check data integrity instead of MD5 9 | - rewrote the file init part to make it more robust 10 | - removed the need of an external config. Provides correct default and use 11 | erlang environment. 12 | - documentation and specs 13 | - some syntax improvements. 14 | 15 | ## build 16 | 17 | ### 1. install rebar 18 | To build cbt you need to install rebar in your `PATH`. Rebar is 19 | available on Github: 20 | 21 | https://github.com/rebar/rebar 22 | 23 | Follow the 24 | [README](https://github.com/rebar/rebar/blob/master/README.md) to 25 | install it. 26 | 27 | ### 2. build 28 | 29 | Fetch the source code: 30 | 31 | $ git clone https://bitbucket.org/refugeio/cbt.git 32 | 33 | Build the source, run the `make` command. It will fetch any needed 34 | dependencies. 35 | 36 | $ cd //cbt 37 | $ make 38 | 39 | > **Note:** To use snappy or lz4 compression methods you need to install the 40 | > following dependencies: 41 | > - `snappy`: https://github.com/fdmanana/snappy-erlang-nif 42 | > - ` lz4`: https://github.com/krestenkrab/erlang-lz4 43 | > 44 | > And launch your application installed in the library path. 45 | 46 | ### 3. test CBT 47 | 48 | Run the following command line: 49 | 50 | $ make test 51 | 52 | 53 | ### 3. Build the doc 54 | 55 | $ make doc 56 | 57 | and open the `index.html` file in the doc folder. Or read it 58 | [online](http://cbt.cowdb.org). 59 | 60 | ## Example of usage with the file backend 61 | 62 | Example of usage: 63 | 64 | Store a {Key Value} pair in a btree: 65 | 66 | 1> {ok, Fd} = cbt_file:open("test.db", [create_if_missing]). 67 | {ok,<0.35.0>} 68 | 2> {ok, Btree} = cbt_btree:open(nil, Fd). 69 | {ok,{btree,<0.35.0>,nil,undefined,undefined,undefined,nil, 70 | snappy,1279}} 71 | 3> 72 | 3> {ok, Btree2} = cbt_btree:add(Btree, [{a, 1}]). 73 | {ok,{btree,<0.35.0>, 74 | {0,[],32}, 75 | undefined,undefined,undefined,nil,snappy,1279}} 76 | 4> Root = cbt_btree:get_state(Btree2). 77 | {0,[],32} 78 | 5> Header = {1, Root}. 79 | {1,{0,[],32}} 80 | 6> cbt_file:write_header(Fd, Header). 81 | ok 82 | 83 | What we did here is to open a file, create a btree inside and add a key 84 | value. Until we write the header, the database value is not changed. 85 | 86 | Now open the database in a new process and read the btree using the last 87 | header: 88 | 89 | 7> {ok, Fd1} = cbt_file:open("test.db"). 90 | {ok,<0.44.0>} 91 | 8> 92 | 8> {ok, Header1} = cbt_file:read_header(Fd1). 93 | {ok,{1,{0,[],32}}} 94 | 9> Header1 == Header 95 | 9> . 96 | true 97 | 10> {_, ReaderRoot} = Header1. 98 | {1,{0,[],32}} 99 | 11> {ok, SnapshotBtree} = cbt_btree:open(ReaderRoot, Fd1). 100 | {ok,{btree,<0.44.0>, 101 | {0,[],32}, 102 | undefined,undefined,undefined,nil,snappy,1279}} 103 | 12> cbt_btree:lookup(SnapshotBtree, [a]). 104 | [{ok,{a,1}}] 105 | 106 | You can check that the database value is not change until we store the 107 | header: 108 | 109 | 13> {ok, Btree4} = cbt_btree:add(Btree2, [{a, 1}, {b, 2}]). 110 | {ok,{btree,<0.35.0>, 111 | {4160,[],39}, 112 | undefined,undefined,undefined,nil,snappy,1279}} 113 | 14> cbt_btree:lookup(Btree4, [a, b]). 114 | [{ok,{a,1}},{ok,{b,2}}] 115 | 15> Root2 = cbt_btree:get_state(Btree4). 116 | {4160,[],39} 117 | 16> Header2 = {1, Root2}. 118 | {1,{4160,[],39}} 119 | 17> cbt_file:write_header(Fd, Header2). 120 | ok 121 | 18> cbt_btree:lookup(SnapshotBtree, [a, b]). 122 | [{ok,{a,1}},not_found] 123 | 124 | 125 | ## ETS backend 126 | 127 | Find here a simple usage of the ETS backend of cbt allowing you to store one 128 | database in an ETS. 129 | 130 | 1> cbt_ets:new(test). 131 | test 132 | 2> {ok, Bt} = cbt_ets:open_btree(test, test). 133 | {ok,{btree,test,cbt_ets,nil,identity,identity, 134 | #Fun,nil,none,1279,2558}} 135 | 3> {ok, Bt2} = cbt_btree:add(Bt, [{a, 1}]). 136 | {ok,{btree,test,cbt_ets, 137 | {1,[],28}, 138 | identity,identity,#Fun,nil,none,1279, 139 | 2558}} 140 | 4> cbt_ets:update_btree(test, test, Bt2). 141 | true 142 | 5> {ok, SnapshotBtree} = cbt_ets:open_btree(test, test). 143 | {ok,{btree,test,cbt_ets, 144 | {1,[],28}, 145 | identity,identity,#Fun,nil,none,1279, 146 | 2558}} 147 | 6> cbt_btree:lookup(SnapshotBtree, [a]). 148 | [{ok,{a,1}}] 149 | 7> {ok, Bt3} = cbt_btree:add(Bt2, [{b, 2}]). 150 | {ok,{btree,test,cbt_ets, 151 | {2,[],36}, 152 | identity,identity,#Fun,nil,none,1279, 153 | 2558}} 154 | 8> cbt_ets:update_btree(test, test, Bt3). 155 | true 156 | 9> cbt_btree:lookup(SnapshotBtree, [a, b]). 157 | [{ok,{a,1}},not_found] 158 | 10> {ok, SnapshotBtree2} = cbt_ets:open_btree(test, test). 159 | {ok,{btree,test,cbt_ets, 160 | {2,[],36}, 161 | identity,identity,#Fun,nil,none,1279, 162 | 2558}} 163 | 11> cbt_btree:lookup(SnapshotBtree2, [a, b]). 164 | [{ok,{a,1}},{ok,{b,2}}]i 165 | 166 | ## Custom storage backend 167 | 168 | CBT provides you 2 different backends by default: 169 | 170 | - `cbt_file`: Backend to store data in a file 171 | - `cbt_ets`: Backend to store data in ETS. 172 | 173 | But can use a custom backends to store the btree data if you need it. For 174 | example if you want to store the btree in a custom file backend when you want 175 | to change the data types or when you want to store the BTREE over a Key/Value 176 | store. 177 | 178 | To do it just pass the backend module to the btree and give it the Reference 179 | (atom or pid) that have been created when initializing the backend. Have a 180 | look in the `cbt_ets' module for more informations. 181 | -------------------------------------------------------------------------------- /doc/overview.edoc: -------------------------------------------------------------------------------- 1 | title cbt - couchdb btree library 2 | @author Benoit Chesneau 3 | [http://refuge.io/] 4 | @version 1.2.2 5 | @copyright 2014-2015 Benoit Chesneau (Apache 2 License) 6 | 7 | @doc cbt is a multi-layer MVCC log append-only database based on the Apache CouchDB btree. 8 | 9 | 10 | The source code can be obtained from the bitbucket repo. 12 | 13 | Included modules are: 14 | 15 |
16 |
{@link cbt_file}
17 |
18 | File module used by other module. 19 |
20 | 21 |
{@link cbt_btree}
22 |
23 | Main module to write and query multiple btree in a file created 24 | with the cbt_file module. 25 |
26 | 27 |
{@link cbt_stream}
28 |
29 | module to store a large binary (stream) in the database file and 30 | get the list of each chunk. 31 |
32 | 33 |
{@link cbt_btree_copy}
34 |
35 | module to store a btree to another. 36 |
37 |
38 | 39 | 40 | Example of usage: 41 | 42 | Store a {Key Value} pair in a btree: 43 | 44 | ``` 45 | 1> {ok, Fd} = cbt_file:open("test.db"). 46 | {ok,<0.35.0>} 47 | 2> {ok, Btree} = cbt_btree:opeb(nil, Fd). 48 | {ok,{btree,<0.35.0>,nil,undefined,undefined,undefined,nil, 49 | snappy,1279}} 50 | 3> 51 | 3> {ok, Btree2} = cbt_btree:add(Btree, [{a, 1}]). 52 | {ok,{btree,<0.35.0>, 53 | {0,[],32}, 54 | undefined,undefined,undefined,nil,snappy,1279}} 55 | 4> Root = cbt_btree:get_state(Btree2). 56 | {0,[],32} 57 | 5> Header = {1, Root}. 58 | {1,{0,[],32}} 59 | 6> cbt_file:write_header(Fd, Header). 60 | ok 61 | ``` 62 | 63 | What we did here is to open a file, create a btree inside and add a key 64 | value. Until we write the header, the database value is not changed. 65 | 66 | Now open the database in a new process and read the btree using the last 67 | header: 68 | 69 | ``` 70 | 7> {ok, Fd1} = cbt_file:open("test.db"). 71 | {ok,<0.44.0>} 72 | 8> 73 | 8> {ok, Header1} = cbt_file:read_header(Fd1). 74 | {ok,{1,{0,[],32}}} 75 | 9> Header1 == Header 76 | 9> . 77 | true 78 | 10> {_, ReaderRoot} = Header1. 79 | {1,{0,[],32}} 80 | 11> {ok, SnapshotBtree} = cbt_btree:open(ReaderRoot, Fd1). 81 | {ok,{btree,<0.44.0>, 82 | {0,[],32}, 83 | undefined,undefined,undefined,nil,snappy,1279}} 84 | 12> cbt_btree:lookup(SnapshotBtree, [a]). 85 | [{ok,{a,1}}] 86 | ``` 87 | 88 | You can check that the database value is not change until we store the 89 | header: 90 | 91 | ``` 92 | 13> {ok, Btree4} = cbt_btree:add(Btree2, [{a, 1}, {b, 2}]). 93 | {ok,{btree,<0.35.0>, 94 | {4160,[],39}, 95 | undefined,undefined,undefined,nil,snappy,1279}} 96 | 14> cbt_btree:lookup(Btree4, [a, b]). 97 | [{ok,{a,1}},{ok,{b,2}}] 98 | 15> Root2 = cbt_btree:get_state(Btree4). 99 | {4160,[],39} 100 | 16> Header2 = {1, Root2}. 101 | {1,{4160,[],39}} 102 | 17> cbt_file:write_header(Fd, Header2). 103 | ok 104 | 18> cbt_btree:lookup(SnapshotBtree, [a, b]). 105 | [{ok,{a,1}},not_found] 106 | ''' 107 | 108 | ## ETS backend 109 | 110 | Find here a simple usage of the ETS backend of cbt allowing you to store one 111 | database in an ETS. 112 | 113 | ``` 114 | 1> cbt_ets:new(test). 115 | test 116 | 2> {ok, Bt} = cbt_ets:open_btree(test, test). 117 | {ok,{btree,test,cbt_ets,nil,identity,identity, 118 | #Fun,nil,none,1279,2558}} 119 | 3> {ok, Bt2} = cbt_btree:add(Bt, [{a, 1}]). 120 | {ok,{btree,test,cbt_ets, 121 | {1,[],28}, 122 | identity,identity,#Fun,nil,none,1279, 123 | 2558}} 124 | 4> cbt_ets:update_btree(test, test, Bt2). 125 | true 126 | 5> {ok, SnapshotBtree} = cbt_ets:open_btree(test, test). 127 | {ok,{btree,test,cbt_ets, 128 | {1,[],28}, 129 | identity,identity,#Fun,nil,none,1279, 130 | 2558}} 131 | 6> cbt_btree:lookup(SnapshotBtree, [a]). 132 | [{ok,{a,1}}] 133 | 7> {ok, Bt3} = cbt_btree:add(Bt2, [{b, 2}]). 134 | {ok,{btree,test,cbt_ets, 135 | {2,[],36}, 136 | identity,identity,#Fun,nil,none,1279, 137 | 2558}} 138 | 8> cbt_ets:update_btree(test, test, Bt3). 139 | true 140 | 9> cbt_btree:lookup(SnapshotBtree, [a, b]). 141 | [{ok,{a,1}},not_found] 142 | 10> {ok, SnapshotBtree2} = cbt_ets:open_btree(test, test). 143 | {ok,{btree,test,cbt_ets, 144 | {2,[],36}, 145 | identity,identity,#Fun,nil,none,1279, 146 | 2558}} 147 | 11> cbt_btree:lookup(SnapshotBtree2, [a, b]). 148 | [{ok,{a,1}},{ok,{b,2}}] 149 | ''' 150 | 151 | ## Custom storage backend 152 | 153 | CBT provides you 2 different backends by default: 154 | 155 | - `cbt_file': Backend to store data in a file 156 | - `cbt_ets': Backend to store data in ETS. 157 | 158 | But can use a custom backends to store the btree data if you need it. For 159 | example if you want to store the btree in a custom file backend when you want 160 | to change the data types or when you want to store the BTREE over a Key/Value 161 | store. 162 | 163 | To do it just pass the backend module to the btree and give it the Reference 164 | (atom or pid) that have been created when initializing the backend. Have a 165 | look in the `cbt_ets' module for more informations. 166 | -------------------------------------------------------------------------------- /include/cbt.hrl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -define(DEFAULT_COMPRESSION, none). 14 | 15 | -define(b2l(V), binary_to_list(V)). 16 | -define(l2b(V), list_to_binary(V)). 17 | -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])). 18 | -define(term_size(T), 19 | try 20 | erlang:external_size(T) 21 | catch _:_ -> 22 | byte_size(?term_to_bin(T)) 23 | end). 24 | 25 | -record(btree, { 26 | ref, 27 | mod, 28 | root, 29 | extract_kv = identity, % fun({_Key, _Value} = KV) -> KV end,, 30 | assemble_kv = identity, % fun({Key, Value}) -> {Key, Value} end, 31 | less = fun(A, B) -> A < B end, 32 | reduce = nil, 33 | compression = ?DEFAULT_COMPRESSION, 34 | kv_chunk_threshold = 16#4ff, 35 | kp_chunk_threshold = 2 * 16#4ff 36 | }). 37 | 38 | -record(ets_btree_meta, {key, 39 | write_loc=0}). 40 | 41 | -record(ets_btree, {name, 42 | root=nil}). 43 | 44 | -record(ets_btree_data, {pos, 45 | data}). 46 | 47 | 48 | -define(ETS_META_KEY, '_db_meta_'). 49 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*- 2 | %% ex: ft=erlang ts=4 sw=4 et 3 | 4 | {erl_opts, [fail_on_warning, 5 | warn_unused_vars, 6 | warn_export_all, 7 | warn_shadow_vars, 8 | warn_unused_import, 9 | warn_unused_function, 10 | warn_bif_clash, 11 | warn_unused_record, 12 | warn_deprecated_function, 13 | warn_obsolete_guard, 14 | warn_export_vars, 15 | warn_exported_vars, 16 | %%warn_untyped_record, 17 | % warn_missing_spec, 18 | % strict_validation, 19 | debug_info]}. 20 | 21 | {erl_first_files, [ "src/cbt_backend.erl"]}. 22 | 23 | 24 | {profiles, [ 25 | {test, [ 26 | {deps, [snappyer, lz4]} 27 | ]} 28 | ]}. 29 | -------------------------------------------------------------------------------- /rebar_dev.config: -------------------------------------------------------------------------------- 1 | %%-*- mode: erlang -*- 2 | 3 | {lib_dirs, ["deps"]}. 4 | {src_dirs, ["./src"]}. 5 | 6 | {erl_opts, 7 | [{i, "include"}, 8 | {i, "deps"}, 9 | {src_dirs, ["src"]}, 10 | debug_info, 11 | bin_opt_info 12 | ] 13 | }. 14 | 15 | {cover_enabled, true}. 16 | {edoc_opts, [{preprocess, true}]}. 17 | 18 | {erl_opts, [{platform_define, "R15", 'crypto_compat'}, 19 | fail_on_warning, 20 | warn_unused_vars, 21 | warn_export_all, 22 | warn_shadow_vars, 23 | warn_unused_import, 24 | warn_unused_function, 25 | warn_bif_clash, 26 | warn_unused_record, 27 | warn_deprecated_function, 28 | warn_obsolete_guard, 29 | warn_export_vars, 30 | warn_exported_vars, 31 | %%warn_untyped_record, 32 | % warn_missing_spec, 33 | % strict_validation, 34 | bin_opt_info, 35 | debug_info]}. 36 | 37 | {eunit_opts, [ 38 | no_tty, 39 | {report, {eunit_progress, [colored, profile]}} 40 | ]}. 41 | 42 | {erl_first_files, [ "src/cbt_backend.erl"]}. 43 | 44 | {deps, [ 45 | {snappy, ".*", 46 | {git, "https://github.com/fdmanana/snappy-erlang-nif.git", 47 | {branch, "master"}}}, 48 | 49 | {lz4, ".*", {git, "git://github.com/krestenkrab/erlang-lz4.git", 50 | {branch, "master"}}}, 51 | 52 | {eunit_formatters, ".*", 53 | {git, "git://github.com/seancribbs/eunit_formatters", 54 | "master"}} 55 | 56 | 57 | ]}. 58 | 59 | 60 | -------------------------------------------------------------------------------- /src/cbt.app.src: -------------------------------------------------------------------------------- 1 | {application, cbt, 2 | [ 3 | {description, "couch btree library"}, 4 | {vsn, "1.2.2"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib 9 | ]}, 10 | {env, []} 11 | ]}. 12 | -------------------------------------------------------------------------------- /src/cbt_backend.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | %% @doc backend behaviour to handle the btree storage. 14 | 15 | -module(cbt_backend). 16 | 17 | -type ref() :: pid() | atom(). 18 | -export_types([ref/0]). 19 | 20 | %% append an Erlang term to the backend storage. 21 | -callback append_term(Ref :: ref(), Term :: term()) -> 22 | {ok, Pos::any(), NumBytesWritten:: integer()} 23 | | {error, term()}. 24 | 25 | -callback append_term(Ref :: ref(), Term :: term(), Options :: any()) -> 26 | {ok, Pos::any(), NumBytesWritten:: integer()} 27 | | {error, term()}. 28 | 29 | %% read a term from the backend storage appended with append_term 30 | -callback pread_term(Ref :: ref(), Pos :: any()) -> 31 | {ok, Term::term()} 32 | | {error, term()}. 33 | 34 | -callback sync(Ref :: ref()) -> 35 | ok 36 | | {error, term()}. 37 | 38 | -callback empty(Ref :: ref()) -> 39 | ok 40 | | {error, term()}. 41 | -------------------------------------------------------------------------------- /src/cbt_btree_copy.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_btree_copy). 14 | 15 | -export([copy/3]). 16 | 17 | -include("cbt.hrl"). 18 | 19 | -define(CHUNK_THRESHOLD, 16#4ff). 20 | 21 | -record(acc, { 22 | btree, 23 | ref, 24 | mod, 25 | before_kv_write = {fun(Item, Acc) -> {Item, Acc} end, []}, 26 | filter = fun(_) -> true end, 27 | compression = ?DEFAULT_COMPRESSION, 28 | kv_chunk_threshold, 29 | kp_chunk_threshold, 30 | nodes = dict:from_list([{1, []}]), 31 | cur_level = 1, 32 | max_level = 1 33 | }). 34 | 35 | -type cbt_copy_options() :: [{backend, cbt_file | atom()} 36 | | {before_kv_write, {fun(), any()}} 37 | | {filter, fun()} 38 | | override 39 | | {compression, cbt_compress:compression_method()}]. 40 | -export_type([cbt_copy_options/0]). 41 | 42 | %% @doc copy a btree to a cbt file process. 43 | %% Options are: 44 | %%
    45 | %%
  • {backend, Module} : backend to use to read/append btree items. Default 46 | %% is cbt_file.
  • 47 | %%
  • `{before_kv_write, fun(Item, Acc) -> {Newttem, NewAcc} end, InitAcc}': 48 | %% to edit a Key/Vlaue before it's written to the new btree.
  • 49 | %%
  • `{filter, fun(Item) -> true | false end}', function to filter the 50 | %% items copied to the new btree.
  • 51 | %%
  • `override': if the new file should be truncated
  • 52 | %%
  • `{compression, Module}': to change the compression on the new 53 | %% btree.
  • 54 | %%
55 | -spec copy(Bt :: cbt_btree:cbtree(), Ref :: cbt_backend:ref(), cbt_copy_options()) -> 56 | {ok, cbt_btree:cbtree_root(), any()}. 57 | copy(Btree, Ref, Options) -> 58 | %% use custom backend if needed 59 | Mod = proplists:get_value(backend, Options, cbt_file), 60 | %% override the file 61 | case lists:member(override, Options) of 62 | true -> 63 | ok = Mod:empty(Ref); 64 | false -> 65 | ok 66 | end, 67 | Acc0 = #acc{btree = Btree, 68 | ref = Ref, 69 | mod = Mod, 70 | kv_chunk_threshold = Btree#btree.kv_chunk_threshold, 71 | kp_chunk_threshold = Btree#btree.kp_chunk_threshold}, 72 | Acc = apply_options(Options, Acc0), 73 | {ok, _, #acc{cur_level = 1} = FinalAcc0} = cbt_btree:fold( 74 | Btree, fun fold_copy/3, Acc, []), 75 | {ok, CopyRootState, FinalAcc} = finish_copy(FinalAcc0), 76 | ok = Mod:sync(Ref), 77 | {_, LastUserAcc} = FinalAcc#acc.before_kv_write, 78 | {ok, CopyRootState, LastUserAcc}. 79 | 80 | apply_options([], Acc) -> 81 | Acc; 82 | apply_options([{backend, Mod} | Rest], Acc) -> 83 | apply_options(Rest, Acc#acc{mod = Mod}); 84 | apply_options([{before_kv_write, {Fun, UserAcc}} | Rest], Acc) -> 85 | apply_options(Rest, Acc#acc{before_kv_write = {Fun, UserAcc}}); 86 | apply_options([{filter, Fun} | Rest], Acc) -> 87 | apply_options(Rest, Acc#acc{filter = Fun}); 88 | apply_options([override | Rest], Acc) -> 89 | apply_options(Rest, Acc); 90 | apply_options([{compression, Comp} | Rest], Acc) -> 91 | apply_options(Rest, Acc#acc{compression = Comp}); 92 | apply_options([{kv_chunk_threshold, Threshold} | Rest], Acc) -> 93 | apply_options(Rest, Acc#acc{kv_chunk_threshold = Threshold}); 94 | apply_options([{kp_chunk_threshold, Threshold} | Rest], Acc) -> 95 | apply_options(Rest, Acc#acc{kp_chunk_threshold = Threshold}). 96 | 97 | extract(#acc{btree = #btree{extract_kv = identity}}, Value) -> 98 | Value; 99 | extract(#acc{btree = #btree{extract_kv = Extract}}, Value) -> 100 | Extract(Value). 101 | 102 | assemble(#acc{btree = #btree{assemble_kv = identity}}, KeyValue) -> 103 | KeyValue; 104 | assemble(#acc{btree = #btree{assemble_kv = Assemble}}, KeyValue) -> 105 | Assemble(KeyValue). 106 | 107 | 108 | before_leaf_write(#acc{before_kv_write = {Fun, UserAcc0}} = Acc, KVs) -> 109 | {NewKVs, NewUserAcc} = lists:mapfoldl( 110 | fun({K, _V}=KV, UAcc) -> 111 | Item = assemble(Acc, KV), 112 | {NewItem, UAcc2} = Fun(Item, UAcc), 113 | {K, _NewValue} = NewKV = extract(Acc, NewItem), 114 | {NewKV, UAcc2} 115 | end, 116 | UserAcc0, KVs), 117 | {NewKVs, Acc#acc{before_kv_write = {Fun, NewUserAcc}}}. 118 | 119 | 120 | write_leaf(#acc{ref = Ref, mod=Mod, compression = Comp}, Node, Red) -> 121 | {ok, Pos, Size} = Mod:append_term(Ref, Node, [{compression, Comp}]), 122 | {ok, {Pos, Red, Size}}. 123 | 124 | 125 | write_kp_node(#acc{ref = Ref, mod=Mod, btree = Bt, compression = Comp}, NodeList) -> 126 | {ChildrenReds, ChildrenSize} = lists:foldr( 127 | fun({_Key, {_P, Red, Sz}}, {AccR, AccSz}) -> 128 | {[Red | AccR], Sz + AccSz} 129 | end, 130 | {[], 0}, NodeList), 131 | Red = case Bt#btree.reduce of 132 | nil -> []; 133 | _ -> 134 | cbt_btree:final_reduce(Bt, {[], ChildrenReds}) 135 | end, 136 | {ok, Pos, Size} = Mod:append_term( 137 | Ref, {kp_node, NodeList}, [{compression, Comp}]), 138 | {ok, {Pos, Red, ChildrenSize + Size}}. 139 | 140 | 141 | fold_copy(Item, _Reds, #acc{nodes = Nodes, cur_level = 1, filter = Filter} = Acc) -> 142 | case Filter(Item) of 143 | false -> 144 | {ok, Acc}; 145 | true -> 146 | {K, V} = extract(Acc, Item), 147 | LevelNode = dict:fetch(1, Nodes), 148 | LevelNodes2 = [{K, V} | LevelNode], 149 | NextAcc = case ?term_size(LevelNodes2) >= Acc#acc.kv_chunk_threshold of 150 | true -> 151 | {LeafState, Acc2} = flush_leaf(LevelNodes2, Acc), 152 | bubble_up({K, LeafState}, Acc2); 153 | false -> 154 | Acc#acc{nodes = dict:store(1, LevelNodes2, Nodes)} 155 | end, 156 | {ok, NextAcc} 157 | end. 158 | 159 | 160 | bubble_up({Key, NodeState}, #acc{cur_level = Level} = Acc) -> 161 | bubble_up({Key, NodeState}, Level, Acc). 162 | 163 | bubble_up({Key, NodeState}, Level, #acc{max_level = MaxLevel, 164 | nodes = Nodes} = Acc) -> 165 | Acc2 = Acc#acc{nodes = dict:store(Level, [], Nodes)}, 166 | case Level of 167 | MaxLevel -> 168 | Acc2#acc{ 169 | nodes = dict:store(Level + 1, [{Key, NodeState}], Acc2#acc.nodes), 170 | max_level = Level + 1 171 | }; 172 | _ when Level < MaxLevel -> 173 | NextLevelNodes = dict:fetch(Level + 1, Acc2#acc.nodes), 174 | NextLevelNodes2 = [{Key, NodeState} | NextLevelNodes], 175 | case ?term_size(NextLevelNodes2) >= Acc#acc.kp_chunk_threshold of 176 | true -> 177 | {ok, NewNodeState} = write_kp_node( 178 | Acc2, lists:reverse(NextLevelNodes2)), 179 | bubble_up({Key, NewNodeState}, Level + 1, Acc2); 180 | false -> 181 | Acc2#acc{ 182 | nodes = dict:store(Level + 1, NextLevelNodes2, Acc2#acc.nodes) 183 | } 184 | end 185 | end. 186 | 187 | 188 | finish_copy(#acc{cur_level = 1, max_level = 1, nodes = Nodes} = Acc) -> 189 | case dict:fetch(1, Nodes) of 190 | [] -> 191 | {ok, nil, Acc}; 192 | [{_Key, _Value} | _] = KvList -> 193 | {RootState, Acc2} = flush_leaf(KvList, Acc), 194 | {ok, RootState, Acc2} 195 | end; 196 | 197 | finish_copy(#acc{cur_level = Level, max_level = Level, nodes = Nodes} = Acc) -> 198 | case dict:fetch(Level, Nodes) of 199 | [{_Key, {Pos, Red, Size}}] -> 200 | {ok, {Pos, Red, Size}, Acc}; 201 | NodeList -> 202 | {ok, RootState} = write_kp_node(Acc, lists:reverse(NodeList)), 203 | {ok, RootState, Acc} 204 | end; 205 | 206 | finish_copy(#acc{cur_level = Level, nodes = Nodes} = Acc) -> 207 | case dict:fetch(Level, Nodes) of 208 | [] -> 209 | Acc2 = Acc#acc{cur_level = Level + 1}, 210 | finish_copy(Acc2); 211 | [{LastKey, _} | _] = NodeList -> 212 | {UpperNodeState, Acc2} = case Level of 213 | 1 -> 214 | flush_leaf(NodeList, Acc); 215 | _ when Level > 1 -> 216 | {ok, KpNodeState} = write_kp_node(Acc, lists:reverse(NodeList)), 217 | {KpNodeState, Acc} 218 | end, 219 | ParentNode = dict:fetch(Level + 1, Nodes), 220 | Acc3 = Acc2#acc{ 221 | nodes = dict:store(Level + 1, [{LastKey, UpperNodeState} | ParentNode], Nodes), 222 | cur_level = Level + 1 223 | }, 224 | finish_copy(Acc3) 225 | end. 226 | 227 | 228 | flush_leaf(KVs, #acc{btree = Btree} = Acc) -> 229 | {NewKVs, Acc2} = before_leaf_write(Acc, lists:reverse(KVs)), 230 | Red = case Btree#btree.reduce of 231 | nil -> []; 232 | _ -> 233 | Items = case Btree#btree.assemble_kv of 234 | identity -> 235 | NewKVs; 236 | _ -> 237 | [assemble(Acc2, Kv) || Kv <- NewKVs] 238 | end, 239 | cbt_btree:final_reduce(Btree, {Items, []}) 240 | end, 241 | {ok, LeafState} = write_leaf(Acc2, {kv_node, NewKVs}, Red), 242 | {LeafState, Acc2}. 243 | -------------------------------------------------------------------------------- /src/cbt_compress.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_compress). 14 | 15 | -export([compress/2, decompress/1, is_compressed/2]). 16 | 17 | -include("cbt.hrl"). 18 | 19 | -define(NO_COMPRESSION, 0). 20 | % binaries compressed with snappy have their first byte set to this value 21 | -define(SNAPPY_PREFIX, 1). 22 | % binaries compressed with gzip have their first byte set to this value 23 | -define(GZIP_PREFIX, 2). 24 | % binaries compressed with lz4 have their first byte set to this value 25 | -define(LZ4_PREFIX, 3). 26 | % Term prefixes documented at: 27 | % http://www.erlang.org/doc/apps/erts/erl_ext_dist.html 28 | -define(TERM_PREFIX, 131). 29 | -define(COMPRESSED_TERM_PREFIX, 131, 80). 30 | 31 | -type compression_method() :: snappy | lz4 | gzip | none. 32 | -export_type([compression_method/0]). 33 | 34 | 35 | use_compressed(UncompressedSize, CompressedSize) 36 | when CompressedSize < UncompressedSize -> 37 | true; 38 | use_compressed(_UncompressedSize, _CompressedSize) -> 39 | false. 40 | 41 | 42 | %% @doc compress an encoded binary with the following type. When an 43 | %% erlang term is given it is encoded to a binary. 44 | -spec compress(Bin::binary()|term(), Method::compression_method()) -> Bin::binary(). 45 | compress(Term, snappy) -> 46 | Bin = ?term_to_bin(Term), 47 | {ok, CompressedBin} = snappyer:compress(Bin), 48 | case use_compressed(byte_size(Bin), byte_size(CompressedBin)) of 49 | true -> 50 | <>; 51 | false -> 52 | << ?NO_COMPRESSION, Bin/binary >> 53 | end; 54 | compress(Term, gzip) -> 55 | Bin = ?term_to_bin(Term), 56 | CompressedBin = zlib:gzip(Bin), 57 | case use_compressed(byte_size(Bin), byte_size(CompressedBin)) of 58 | true -> 59 | <>; 60 | false -> 61 | << ?NO_COMPRESSION, Bin/binary >> 62 | end; 63 | compress(Term, lz4) -> 64 | Bin = ?term_to_bin(Term), 65 | {ok, CompressedBin} = lz4:compress(erlang:iolist_to_binary(Bin)), 66 | case use_compressed(byte_size(Bin), byte_size(CompressedBin)) of 67 | true -> 68 | <>; 69 | false -> 70 | << ?NO_COMPRESSION, Bin/binary >> 71 | end; 72 | compress(Term, none) -> 73 | Bin = ?term_to_bin(Term), 74 | << ?NO_COMPRESSION, Bin/binary >>. 75 | 76 | %% @doc decompress a binary to an erlang decoded term. 77 | -spec decompress(Bin::binary()) -> Term::term(). 78 | 79 | decompress(<>) -> 80 | binary_to_term(TermBin); 81 | decompress(<>) -> 82 | {ok, TermBin} = snappyer:decompress(Rest), 83 | binary_to_term(TermBin); 84 | decompress(<>) -> 85 | TermBin = zlib:gunzip(Rest), 86 | binary_to_term(TermBin); 87 | decompress(<>) -> 88 | TermBin = lz4:uncompress(Rest), 89 | binary_to_term(TermBin). 90 | 91 | %% @doc check if the binary has been compressed. 92 | -spec is_compressed(Bin::binary()|term(), 93 | Method::compression_method()) -> true | false. 94 | is_compressed(<>, none) -> 95 | false; 96 | is_compressed(<>, snappy) -> 97 | true; 98 | is_compressed(<>, gzip) -> 99 | true; 100 | is_compressed(<>, lz4) -> 101 | true; 102 | is_compressed(_Term, _Method) -> 103 | false. 104 | -------------------------------------------------------------------------------- /src/cbt_ets.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_ets). 14 | -behaviour(cbt_backend). 15 | 16 | -include("cbt.hrl"). 17 | 18 | %% public API 19 | -export([new/1, delete/1]). 20 | -export([open_btree/2, open_btree/3, 21 | update_btree/3, 22 | delete_btree/2, 23 | bytes/1]). 24 | 25 | %% backend API 26 | %% 27 | -export([append_term/2, append_term/3, 28 | pread_term/2, 29 | sync/1, 30 | empty/1]). 31 | 32 | 33 | %% @doc create new ETS storage 34 | -spec new(DbName :: atom) -> atom(). 35 | new(DbName) when is_atom(DbName) -> 36 | Tid = ets:new(DbName, [named_table, ordered_set, public, {keypos, 2}]), 37 | %% make meta 38 | ets:insert_new(DbName, #ets_btree_meta{key=?ETS_META_KEY}), 39 | Tid. 40 | 41 | %% @doc delete ETS storage 42 | -spec delete(Tab :: atom()) -> ok. 43 | delete(Tab) -> 44 | true = ets:delete(Tab), 45 | ok. 46 | 47 | %% @doc open a btree from the storage 48 | -spec open_btree(Tab :: atom(), BtName :: any()) -> 49 | {ok, cbt_btree:cbbtree()} | {error, term()}. 50 | open_btree(Tab, BtName) -> 51 | open_btree(Tab, BtName, []). 52 | 53 | open_btree(Tab, BtName, Options0) when Tab /= ?ETS_META_KEY -> 54 | Options = [{backend, cbt_ets}] ++ Options0, 55 | case ets:lookup(Tab, BtName) of 56 | [] -> 57 | %% create a new btree if missing 58 | cbt_btree:open(nil, Tab, Options); 59 | [#ets_btree{root=BtState}] -> 60 | %% reopen the btree 61 | cbt_btree:open(BtState, Tab, Options) 62 | end. 63 | 64 | %% @doc update the btree state in the storage which allows the new changes to 65 | %% be read by others. 66 | -spec update_btree(Tab :: atom(), BtName :: any(), 67 | Btree :: cbt_btre:cbtree()) -> true. 68 | update_btree(Tab, BtName, Btree) -> 69 | BtState = cbt_btree:get_state(Btree), 70 | ets:insert(Tab, #ets_btree{name=BtName, root = BtState}). 71 | 72 | %% @doc delete the btree reference in the storage 73 | -spec delete_btree(Tab :: atom(), BtName :: any()) -> true. 74 | delete_btree(Tab, BtName) -> 75 | ets:delete(Tab, BtName). 76 | 77 | %% @doc return the size in memory of the storage 78 | -spec bytes(Tab :: atom()) -> integer(). 79 | bytes(Tab) -> 80 | ets:info(Tab, memory). 81 | 82 | %% BACKEND API 83 | %% 84 | 85 | append_term(Tab, Term) -> 86 | append_term(Tab, Term, []). 87 | 88 | 89 | append_term(Tab, Term, Options) -> 90 | % compress term 91 | Comp = cbt_util:get_value(compression, Options, ?DEFAULT_COMPRESSION), 92 | Data = cbt_compress:compress(Term, Comp), 93 | NewPos = ets:update_counter(Tab, ?ETS_META_KEY, 94 | {#ets_btree_meta.write_loc, 1}), 95 | ets:insert(Tab, #ets_btree_data{pos=NewPos, data=Data}), 96 | {ok, NewPos, byte_size(Data)}. 97 | 98 | pread_term(Tab, Pos) -> 99 | case ets:lookup(Tab, Pos) of 100 | [] -> {missing_btree_item, Pos}; 101 | [#ets_btree_data{data=Bin}] -> {ok, cbt_compress:decompress(Bin)} 102 | end. 103 | 104 | sync(_Tab) -> 105 | ok. 106 | 107 | empty(Tab) -> 108 | %% delete all objects in the table 109 | ets:delete_all_objects(Tab), 110 | %% reiitialize the meta data 111 | ets:new(Tab, [named_table, ordered_set, public, {keypos, 2}]), 112 | ets:insert_new(Tab, #ets_btree_meta{key=?ETS_META_KEY}), 113 | ok. 114 | -------------------------------------------------------------------------------- /src/cbt_file.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_file). 14 | -behaviour(gen_server). 15 | -behaviour(cbt_backend). 16 | 17 | -include("cbt.hrl"). 18 | 19 | -define(SIZE_BLOCK, 16#1000). % 4 KiB 20 | -define(RETRY_TIME_MS, 1000). 21 | -define(MAX_RETRY_TIME_MS, 10000). 22 | 23 | -record(file, { 24 | fd, 25 | eof = 0, 26 | file_path, 27 | open_options 28 | }). 29 | 30 | %% Punlic BTREE API 31 | %% 32 | -export([append_term/2, append_term/3, 33 | pread_term/2, 34 | sync/1, 35 | empty/1]). 36 | 37 | % Public File API 38 | -export([open/1, open/2, close/1, bytes/1, truncate/2, rename/2, reopen/1]). 39 | -export([pread_iolist/2, pread_binary/2]). 40 | -export([append_binary/2, append_binary_crc32/2]). 41 | -export([append_raw_chunk/2, assemble_file_chunk/1, assemble_file_chunk/2]). 42 | -export([append_term_crc32/2, append_term_crc32/3]). 43 | -export([write_header/2, read_header/1]). 44 | -export([delete/2, delete/3, nuke_dir/2, init_delete_dir/1]). 45 | 46 | % gen_server callbacks 47 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2, 48 | terminate/2, code_change/3]). 49 | 50 | 51 | -ifdef(DEBUG). 52 | -define(log(Fmt,Args), io:format(Fmt, Args)). 53 | -else. 54 | -define(log(Fmt,Args), ok). 55 | -endif. 56 | 57 | 58 | -type cbt_file() :: pid(). 59 | -type file_option() :: create | overwrite. 60 | -type file_options() :: [file_option()]. 61 | -type append_options() :: [{compression, cbt_compress:compression_method()}]. 62 | 63 | -export_type([cbt_file/0]). 64 | -export_type([file_option/0, file_options/0]). 65 | -export_type([append_options/0]). 66 | 67 | %% @doc open a file in a gen_server that will be used to handle btree 68 | %% I/Os. 69 | -spec open(FilePath::string()) -> {ok, cbt_file()} | {error, term()}. 70 | open(FilePath) -> 71 | open(FilePath, []). 72 | 73 | 74 | -spec open(FilePath::string(), Options::file_options()) 75 | -> {ok, cbt_file()} | {error, term()}. 76 | open(FilePath, Options) -> 77 | proc_lib:start_link(?MODULE, init, [{FilePath, Options}]). 78 | 79 | 80 | %% @doc append an Erlang term to the end of the file. 81 | %% Args: Erlang term to serialize and append to the file. 82 | %% Returns: {ok, Pos, NumBytesWritten} where Pos is the file offset to 83 | %% the beginning the serialized term. Use pread_term to read the term 84 | %% back. 85 | %% or {error, Reason}. 86 | -spec append_term(Fd::cbt_file(), Term::term()) -> 87 | {ok, Pos::integer(), NumBytesWritten::integer()} 88 | | {error, term()}. 89 | append_term(Fd, Term) -> 90 | append_term(Fd, Term, []). 91 | 92 | 93 | -spec append_term(Fd::cbt_file(), Term::term(), 94 | Options::append_options()) -> 95 | {ok, Pos::integer(), NumBytesWritten::integer()} 96 | | {error, term()}. 97 | append_term(Fd, Term, Options) -> 98 | Comp = cbt_util:get_value(compression, Options, ?DEFAULT_COMPRESSION), 99 | append_binary(Fd, cbt_compress:compress(Term, Comp)). 100 | 101 | 102 | %% @doc append an Erlang term to the end of the file and sign with an 103 | %% crc32 prefix. 104 | -spec append_term_crc32(Fd::cbt_file(), Term::term()) -> 105 | {ok, Pos::integer(), NumBytesWritten::integer()} 106 | | {error, term()}. 107 | append_term_crc32(Fd, Term) -> 108 | append_term_crc32(Fd, Term, []). 109 | 110 | -spec append_term_crc32(Fd::cbt_file(), Term::term(), 111 | Options::append_options()) -> 112 | {ok, Pos::integer(), NumBytesWritten::integer} 113 | | {error, term}. 114 | append_term_crc32(Fd, Term, Options) -> 115 | Comp = cbt_util:get_value(compression, Options, ?DEFAULT_COMPRESSION), 116 | append_binary_crc32(Fd, cbt_compress:compress(Term, Comp)). 117 | 118 | %% @doc append an Erlang binary to the end of the file. 119 | %% Args: Erlang term to serialize and append to the file. 120 | %% Returns: {ok, Pos, NumBytesWritten} where Pos is the file offset to the 121 | %% beginning the serialized term. Use pread_term to read the term back. 122 | %% or {error, Reason}. 123 | -spec append_binary(Fd::cbt_file(), Bin::binary()) -> 124 | {ok, Pos::integer(), NumBytesWriiten::integer} 125 | | {error, term}. 126 | append_binary(Fd, Bin) -> 127 | gen_server:call(Fd, {append_bin, assemble_file_chunk(Bin)}, infinity). 128 | 129 | %% @doc append an Erlang binary to the end of the file and sign in with 130 | %% crc32. 131 | -spec append_binary_crc32(Fd::cbt_file(), Bin::binary()) -> 132 | {ok, Pos::integer(), NumBytesWritten::integer()} 133 | | {error, term()}. 134 | append_binary_crc32(Fd, Bin) -> 135 | gen_server:call(Fd, 136 | {append_bin, assemble_file_chunk(Bin, erlang:crc32(Bin))}, infinity). 137 | 138 | 139 | %% @doc like append_binary but wihout manipulating the binary, it is 140 | %% stored as is. 141 | -spec append_raw_chunk(Fd::cbt_file(), Bin::binary()) -> 142 | {ok, Pos::integer(), NumBytesWriiten::integer} 143 | | {error, term}. 144 | append_raw_chunk(Fd, Chunk) -> 145 | gen_server:call(Fd, {append_bin, Chunk}, infinity). 146 | 147 | 148 | assemble_file_chunk(Bin) -> 149 | [<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin]. 150 | 151 | assemble_file_chunk(Bin, Crc32) -> 152 | [<<1:1/integer, (iolist_size(Bin)):31/integer, Crc32:32/integer>>, Bin]. 153 | 154 | %% @doc Reads a term from a file that was written with append_term 155 | %% Args: Pos, the offset into the file where the term is serialized. 156 | -spec pread_term(Fd::cbt_file(), Pos::integer()) -> 157 | {ok, Term::term()} | {error, term()}. 158 | pread_term(Fd, Pos) -> 159 | {ok, Bin} = pread_binary(Fd, Pos), 160 | {ok, cbt_compress:decompress(Bin)}. 161 | 162 | 163 | empty(Fd) -> 164 | truncate(Fd, 0). 165 | 166 | 167 | %% @doc: Reads a binrary from a file that was written with append_binary 168 | %% Args: Pos, the offset into the file where the term is serialized. 169 | -spec pread_binary(Fd::cbt_file(), Pos::integer()) -> 170 | {ok, Bin::binary()} | {error, term()}. 171 | pread_binary(Fd, Pos) -> 172 | {ok, L} = pread_iolist(Fd, Pos), 173 | {ok, iolist_to_binary(L)}. 174 | 175 | 176 | pread_iolist(Fd, Pos) -> 177 | case gen_server:call(Fd, {pread_iolist, Pos}, infinity) of 178 | {ok, IoList, <<>>} -> 179 | {ok, IoList}; 180 | {ok, IoList, <>} -> 181 | case erlang:crc32(IoList) of 182 | Crc32 -> 183 | {ok, IoList}; 184 | _ -> 185 | error_logger:info_msg("File corruption in ~p at position ~B", 186 | [Fd, Pos]), 187 | exit({file_corruption, <<"file corruption">>}) 188 | end; 189 | Error -> 190 | Error 191 | end. 192 | 193 | %% @doc get he length of a file, in bytes. 194 | -spec bytes(Fd::cbt_file()) -> {ok, Bytes::integer()} | {error, term()}. 195 | bytes(Fd) -> 196 | gen_server:call(Fd, bytes, infinity). 197 | 198 | %% @doc Truncate a file to the number of bytes. 199 | -spec truncate(Fd::cbt_file(), Pos::integer()) -> ok | {error, term()}. 200 | truncate(Fd, Pos) -> 201 | gen_server:call(Fd, {truncate, Pos}, infinity). 202 | 203 | %% @doc rename a file safely 204 | -spec rename(Fd::cbt_file(), NewFilePath::string()) -> ok | {error,term()}. 205 | rename(Fd, NewFilePath) -> 206 | gen_server:call(Fd, {rename, NewFilePath}, infinity). 207 | 208 | %% @doc Ensure all bytes written to the file are flushed to disk. 209 | -spec sync(FdOrPath::cbt_file()|string()) -> ok | {error, term()}. 210 | sync(FilePath) when is_list(FilePath) -> 211 | {ok, Fd} = file:open(FilePath, [append, raw]), 212 | try ok = file:sync(Fd) after ok = file:close(Fd) end; 213 | sync(Fd) -> 214 | gen_server:call(Fd, sync, infinity). 215 | 216 | %% @doc reopen a file 217 | -spec reopen(cbt_file()) -> ok | {error, term()}. 218 | reopen(Fd) -> 219 | gen_server:call(Fd, reopen, infinity). 220 | 221 | 222 | %% @doc Close the file. 223 | -spec close(Fd::cbt_file()) -> ok. 224 | close(Fd) -> 225 | try 226 | gen_server:call(Fd, close, infinity) 227 | catch 228 | exit:{noproc,_} -> ok; 229 | exit:noproc -> ok; 230 | %% Handle the case where the monitor triggers 231 | exit:{normal, _} -> ok 232 | end. 233 | 234 | 235 | 236 | %% @doc delete a file synchronously. 237 | %% Root dir is the root where to find the file. This call is blocking 238 | %% until the file is deleted. 239 | -spec delete(RootDir::string(), FilePath::string()) -> ok | {error, term()}. 240 | delete(RootDir, FilePath) -> 241 | delete(RootDir, FilePath, true). 242 | 243 | %% @doc delete a file asynchronously or not 244 | -spec delete(RootDir::string(), FilePath::string(), Async::boolean()) -> 245 | ok | {error, term()}. 246 | delete(RootDir, FilePath, Async) -> 247 | DelFile = filename:join([RootDir,".delete", cbt_util:uniqid()]), 248 | case file:rename(FilePath, DelFile) of 249 | ok -> 250 | if (Async) -> 251 | spawn(file, delete, [DelFile]), 252 | ok; 253 | true -> 254 | file:delete(DelFile) 255 | end; 256 | Error -> 257 | Error 258 | end. 259 | 260 | 261 | %% @doc utility function to remove completely the content of a directory 262 | -spec nuke_dir(RootDelDir::string(), Dir::string()) -> ok. 263 | nuke_dir(RootDelDir, Dir) -> 264 | FoldFun = fun(File) -> 265 | Path = Dir ++ "/" ++ File, 266 | case filelib:is_dir(Path) of 267 | true -> 268 | ok = nuke_dir(RootDelDir, Path), 269 | file:del_dir(Path); 270 | false -> 271 | delete(RootDelDir, Path, false) 272 | end 273 | end, 274 | case file:list_dir(Dir) of 275 | {ok, Files} -> 276 | lists:foreach(FoldFun, Files), 277 | ok = file:del_dir(Dir); 278 | {error, enoent} -> 279 | ok 280 | end. 281 | 282 | %% @doc utility function to init the deletion directory where the 283 | %% deleted files will be temporarely stored. 284 | -spec init_delete_dir(RootDir::string()) -> ok. 285 | init_delete_dir(RootDir) -> 286 | Dir = filename:join(RootDir,".delete"), 287 | % note: ensure_dir requires an actual filename companent, which is the 288 | % reason for "foo". 289 | filelib:ensure_dir(filename:join(Dir,"foo")), 290 | filelib:fold_files(Dir, ".*", true, 291 | fun(Filename, _) -> 292 | ok = file:delete(Filename) 293 | end, ok). 294 | 295 | 296 | %% @doc read the database header from the database file 297 | -spec read_header(Fd::cbt_file()) 298 | -> {ok, Header::term(), Pos::integer()} | {error, term()}. 299 | read_header(Fd) -> 300 | case gen_server:call(Fd, find_header, infinity) of 301 | {ok, Bin, Pos} -> 302 | {ok, binary_to_term(Bin), Pos}; 303 | Else -> 304 | Else 305 | end. 306 | 307 | %% @doc write the database header at the end of the the database file 308 | -spec write_header(Fd::cbt_file(), Header::term()) 309 | -> {ok, Pos::integer()} | {error, term()}. 310 | write_header(Fd, Data) -> 311 | Bin = term_to_binary(Data), 312 | Crc32 = erlang:crc32(Bin), 313 | % now we assemble the final header binary and write to disk 314 | FinalBin = <>, 315 | gen_server:call(Fd, {write_header, FinalBin}, infinity). 316 | 317 | 318 | % server functions 319 | 320 | init({FilePath, Options}) -> 321 | ok = maybe_create_file(FilePath, Options), 322 | case file:read_file_info(FilePath) of 323 | {ok, _} -> 324 | OpenOptions = case lists:member(read_only, Options) of 325 | true -> [binary, read, raw]; 326 | false -> [binary, read, append, raw] 327 | end, 328 | case try_open_fd(FilePath, OpenOptions, ?RETRY_TIME_MS, 329 | ?MAX_RETRY_TIME_MS) of 330 | {ok, Fd} -> 331 | process_flag(trap_exit, true), 332 | {ok, Eof} = file:position(Fd, eof), 333 | 334 | proc_lib:init_ack({ok, self()}), 335 | InitState = #file{fd=Fd, 336 | eof=Eof, 337 | file_path=FilePath, 338 | open_options=OpenOptions}, 339 | gen_server:enter_loop(?MODULE, [], InitState); 340 | Error -> 341 | proc_lib:init_ack(Error) 342 | end; 343 | Error -> 344 | proc_lib:init_ack(Error) 345 | end. 346 | 347 | handle_call(close, _From, #file{fd=Fd}=File) -> 348 | {stop, normal, file:close(Fd), File#file{fd = nil}}; 349 | 350 | handle_call({pread_iolist, Pos}, _From, File) -> 351 | {RawData, NextPos} = try 352 | % up to 8Kbs of read ahead 353 | read_raw_iolist_int(File, Pos, 2 * ?SIZE_BLOCK - (Pos rem ?SIZE_BLOCK)) 354 | catch 355 | _:_ -> 356 | read_raw_iolist_int(File, Pos, 4) 357 | end, 358 | {Begin, RestRawData} = split_iolist(RawData, 4, []), 359 | <> = iolist_to_binary(Begin), 360 | case Prefix of 361 | 1 -> 362 | {Crc32, IoList} = extract_crc32( 363 | maybe_read_more_iolist(RestRawData, 4 + Len, NextPos, File)), 364 | {reply, {ok, IoList, Crc32}, File}; 365 | 0 -> 366 | IoList = maybe_read_more_iolist(RestRawData, Len, NextPos, File), 367 | {reply, {ok, IoList, <<>>}, File} 368 | end; 369 | 370 | handle_call(bytes, _From, #file{fd = Fd} = File) -> 371 | {reply, file:position(Fd, eof), File}; 372 | 373 | handle_call(sync, _From, #file{fd=Fd}=File) -> 374 | {reply, file:sync(Fd), File}; 375 | 376 | handle_call({truncate, Pos}, _From, #file{fd=Fd}=File) -> 377 | {ok, Pos} = file:position(Fd, Pos), 378 | case file:truncate(Fd) of 379 | ok -> 380 | {reply, ok, File#file{eof = Pos}}; 381 | Error -> 382 | {reply, Error, File} 383 | end; 384 | 385 | handle_call({set_path, NewFilePath}, _From, File) -> 386 | {reply, ok, File#file{file_path=NewFilePath}}; 387 | 388 | handle_call({rename, NewFilePath}, _From, #file{file_path=FilePath} = File) -> 389 | Reply = file:rename(FilePath, NewFilePath), 390 | {reply, Reply, File#file{file_path=NewFilePath}}; 391 | 392 | handle_call(reopen, _From, #file{fd=Fd, file_path=FilePath, 393 | open_options=OpenOptions}=File) -> 394 | 395 | ok = file:close(Fd), 396 | case try_open_fd(FilePath, OpenOptions, ?RETRY_TIME_MS, 397 | ?MAX_RETRY_TIME_MS) of 398 | {ok, Fd2} -> 399 | {ok, Eof} = file:position(Fd, eof), 400 | {reply, ok, File#file{fd=Fd2, eof=Eof}}; 401 | Error -> 402 | {stop, Error, File} 403 | end; 404 | 405 | handle_call({append_bin, Bin}, _From, #file{fd = Fd, eof = Pos} = File) -> 406 | Blocks = make_blocks(Pos rem ?SIZE_BLOCK, Bin), 407 | Size = iolist_size(Blocks), 408 | case file:write(Fd, Blocks) of 409 | ok -> 410 | {reply, {ok, Pos, Size}, File#file{eof = Pos + Size}}; 411 | Error -> 412 | {reply, Error, File} 413 | end; 414 | 415 | handle_call({write_header, Bin}, _From, #file{fd = Fd, eof = Pos} = File) -> 416 | BinSize = byte_size(Bin), 417 | {Padding, Pos2} = case Pos rem ?SIZE_BLOCK of 418 | 0 -> 419 | {<<>>, Pos}; 420 | BlockOffset -> 421 | Pos1 = Pos + (?SIZE_BLOCK - BlockOffset), 422 | {<<0:(8*(?SIZE_BLOCK-BlockOffset))>>, Pos1} 423 | end, 424 | FinalBin = [Padding, <<1, BinSize:32/integer>> | make_blocks(5, [Bin])], 425 | case file:write(Fd, FinalBin) of 426 | ok -> 427 | {reply, {ok, Pos2}, File#file{eof = Pos + iolist_size(FinalBin)}}; 428 | Error -> 429 | {reply, Error, File} 430 | end; 431 | 432 | handle_call(find_header, _From, #file{fd = Fd, eof = Pos} = File) -> 433 | {reply, find_header(Fd, Pos div ?SIZE_BLOCK), File}. 434 | 435 | handle_cast(close, Fd) -> 436 | {stop,normal,Fd}. 437 | 438 | handle_info({'EXIT', _, normal}, Fd) -> 439 | {noreply, Fd}; 440 | handle_info({'EXIT', _, Reason}, Fd) -> 441 | {stop, Reason, Fd}. 442 | 443 | code_change(_OldVsn, State, _Extra) -> 444 | {ok, State}. 445 | 446 | terminate(_Reason, #file{fd = nil}) -> 447 | ok; 448 | terminate(_Reason, #file{fd = Fd}) -> 449 | ok = file:close(Fd). 450 | 451 | 452 | 453 | maybe_create_file(FilePath, Options) -> 454 | IfCreate = case lists:member(create_if_missing, Options) of 455 | true -> 456 | case file:read_file_info(FilePath) of 457 | {error, enoent} -> true; 458 | _ -> lists:member(overwrite, Options) 459 | end; 460 | false -> 461 | lists:member(create, Options) 462 | end, 463 | 464 | case IfCreate of 465 | true -> 466 | filelib:ensure_dir(FilePath), 467 | case file:open(FilePath, [read, write, binary]) of 468 | {ok, Fd} -> 469 | {ok, Length} = file:position(Fd, eof), 470 | case Length > 0 of 471 | true -> 472 | % this means the file already exists and has data. 473 | % FYI: We don't differentiate between empty files and non-existant 474 | % files here. 475 | case lists:member(overwrite, Options) of 476 | true -> 477 | {ok, 0} = file:position(Fd, 0), 478 | ok = file:truncate(Fd), 479 | ok = file:sync(Fd), 480 | file:close(Fd); 481 | false -> 482 | ok = file:close(Fd), 483 | file_exists 484 | end; 485 | false -> 486 | file:close(Fd) 487 | end; 488 | Error -> 489 | Error 490 | end; 491 | false -> 492 | ok 493 | end. 494 | 495 | try_open_fd(FilePath, Options, _Timewait, TotalTimeRemain) 496 | when TotalTimeRemain < 0 -> 497 | % Out of retry time. 498 | % Try one last time and whatever we get is the returned result. 499 | file:open(FilePath, Options); 500 | try_open_fd(FilePath, Options, Timewait, TotalTimeRemain) -> 501 | case file:open(FilePath, Options) of 502 | {ok, Fd} -> 503 | {ok, Fd}; 504 | {error, emfile} -> 505 | error_logger:info_msg("Too many file descriptors open, waiting" 506 | ++ " ~pms to retry", [Timewait]), 507 | receive 508 | after Timewait -> 509 | try_open_fd(FilePath, Options, Timewait, 510 | TotalTimeRemain - Timewait) 511 | end; 512 | {error, eacces} -> 513 | error_logger:info_msg("eacces error opening file ~p waiting" 514 | ++ " ~pms to retry", [FilePath, Timewait]), 515 | receive 516 | after Timewait -> 517 | try_open_fd(FilePath, Options, Timewait, 518 | TotalTimeRemain - Timewait) 519 | end; 520 | Error -> 521 | Error 522 | end. 523 | 524 | 525 | 526 | find_header(_Fd, -1) -> 527 | no_valid_header; 528 | find_header(Fd, Block) -> 529 | case (catch load_header(Fd, Block)) of 530 | {ok, Bin} -> 531 | {ok, Bin, Block * ?SIZE_BLOCK}; 532 | _Error -> 533 | find_header(Fd, Block -1) 534 | end. 535 | 536 | load_header(Fd, Block) -> 537 | {ok, <<1, HeaderLen:32/integer, RestBlock/binary>>} = 538 | file:pread(Fd, Block * ?SIZE_BLOCK, ?SIZE_BLOCK), 539 | TotalBytes = calculate_total_read_len(5, HeaderLen), 540 | RawBin = case TotalBytes > byte_size(RestBlock) of 541 | false -> 542 | <> = RestBlock, 543 | RawBin1; 544 | true -> 545 | {ok, Missing} = file:pread( 546 | Fd, (Block * ?SIZE_BLOCK) + 5 + byte_size(RestBlock), 547 | TotalBytes - byte_size(RestBlock)), 548 | <> 549 | end, 550 | <> = 551 | iolist_to_binary(remove_block_prefixes(RawBin, 5)), 552 | Crc32Sig = erlang:crc32(HeaderBin), 553 | {ok, HeaderBin}. 554 | 555 | 556 | maybe_read_more_iolist(Buffer, DataSize, NextPos, Fd) -> 557 | case iolist_size(Buffer) of 558 | BufferSize when DataSize =< BufferSize -> 559 | {Buffer2, _} = split_iolist(Buffer, DataSize, []), 560 | Buffer2; 561 | BufferSize -> 562 | {Missing, _} = read_raw_iolist_int(Fd, NextPos, DataSize-BufferSize), 563 | [Buffer, Missing] 564 | end. 565 | 566 | -spec read_raw_iolist_int(#file{}, Pos::non_neg_integer(), Len::non_neg_integer()) -> 567 | {Data::iolist(), CurPos::non_neg_integer()}. 568 | read_raw_iolist_int(Fd, {Pos, _Size}, Len) -> % 0110 UPGRADE CODE 569 | read_raw_iolist_int(Fd, Pos, Len); 570 | read_raw_iolist_int(#file{fd = Fd}, Pos, Len) -> 571 | BlockOffset = Pos rem ?SIZE_BLOCK, 572 | TotalBytes = calculate_total_read_len(BlockOffset, Len), 573 | case file:pread(Fd, Pos, TotalBytes) of 574 | {ok, <>} -> 575 | {remove_block_prefixes(RawBin, BlockOffset), Pos + TotalBytes}; 576 | {ok, RawBin} -> 577 | UnexpectedBin = { 578 | unexpected_binary, 579 | {at, Pos}, 580 | {wanted_bytes, TotalBytes}, 581 | {got, byte_size(RawBin), RawBin} 582 | }, 583 | throw({read_error, UnexpectedBin}); 584 | Else -> 585 | throw({read_error, Else}) 586 | end. 587 | 588 | -spec extract_crc32(iolist()) -> {binary(), iolist()}. 589 | extract_crc32(FullIoList) -> 590 | {CrcList, IoList} = split_iolist(FullIoList, 4, []), 591 | {iolist_to_binary(CrcList), IoList}. 592 | 593 | calculate_total_read_len(0, FinalLen) -> 594 | calculate_total_read_len(1, FinalLen) + 1; 595 | calculate_total_read_len(BlockOffset, FinalLen) -> 596 | case ?SIZE_BLOCK - BlockOffset of 597 | BlockLeft when BlockLeft >= FinalLen -> 598 | FinalLen; 599 | BlockLeft -> 600 | FinalLen + ((FinalLen - BlockLeft) div (?SIZE_BLOCK -1)) + 601 | if ((FinalLen - BlockLeft) rem (?SIZE_BLOCK -1)) =:= 0 -> 0; 602 | true -> 1 end 603 | end. 604 | 605 | 606 | 607 | remove_block_prefixes(<<>>, _BlockOffset) -> 608 | []; 609 | remove_block_prefixes(<<_BlockPrefix, Rest/binary>>, 0) -> 610 | remove_block_prefixes(Rest, 1); 611 | remove_block_prefixes(Bin, BlockOffset) -> 612 | BlockBytesAvailable = ?SIZE_BLOCK - BlockOffset, 613 | case size(Bin) of 614 | Size when Size > BlockBytesAvailable -> 615 | <> = Bin, 616 | [DataBlock | remove_block_prefixes(Rest, 0)]; 617 | _Size -> 618 | [Bin] 619 | end. 620 | 621 | make_blocks(_BlockOffset, []) -> 622 | []; 623 | make_blocks(0, IoList) -> 624 | [<<0>> | make_blocks(1, IoList)]; 625 | make_blocks(BlockOffset, IoList) -> 626 | case split_iolist(IoList, (?SIZE_BLOCK - BlockOffset), []) of 627 | {Begin, End} -> 628 | [Begin | make_blocks(0, End)]; 629 | _SplitRemaining -> 630 | IoList 631 | end. 632 | 633 | %% @doc Returns a tuple where the first element contains the leading SplitAt 634 | %% bytes of the original iolist, and the 2nd element is the tail. If SplitAt 635 | %% is larger than byte_size(IoList), return the difference. 636 | -spec split_iolist(IoList::iolist(), SplitAt::non_neg_integer(), Acc::list()) -> 637 | {iolist(), iolist()} | non_neg_integer(). 638 | 639 | split_iolist(List, 0, BeginAcc) -> 640 | {lists:reverse(BeginAcc), List}; 641 | split_iolist([], SplitAt, _BeginAcc) -> 642 | SplitAt; 643 | split_iolist([Bin | Rest], SplitAt, BeginAcc) when is_binary(Bin), SplitAt > byte_size(Bin) -> 644 | split_iolist(Rest, SplitAt - byte_size(Bin), [Bin | BeginAcc]); 645 | split_iolist([Bin | Rest], SplitAt, BeginAcc) when is_binary(Bin) -> 646 | <> = Bin, 647 | split_iolist([End | Rest], 0, [Begin | BeginAcc]); 648 | split_iolist([Sublist| Rest], SplitAt, BeginAcc) when is_list(Sublist) -> 649 | case split_iolist(Sublist, SplitAt, BeginAcc) of 650 | {Begin, End} -> 651 | {Begin, [End | Rest]}; 652 | SplitRemaining -> 653 | split_iolist(Rest, SplitAt - (SplitAt - SplitRemaining), [Sublist | BeginAcc]) 654 | end; 655 | split_iolist([Byte | Rest], SplitAt, BeginAcc) when is_integer(Byte) -> 656 | split_iolist(Rest, SplitAt - 1, [Byte | BeginAcc]). 657 | -------------------------------------------------------------------------------- /src/cbt_ramfile.erl: -------------------------------------------------------------------------------- 1 | -module(cbt_ramfile). 2 | -behaviour(gen_server). 3 | -behaviour(cbt_backend). 4 | 5 | 6 | -export([append_term/2, append_term/3, 7 | append_term_crc32/2, 8 | pread_term/2, 9 | sync/1, 10 | truncate/2, 11 | bytes/1, 12 | empty/1]). 13 | 14 | -export([open/1, open/2, 15 | close/1, 16 | read_header/1, 17 | write_header/2, 18 | append_binary/2, 19 | append_binary_crc32/2, 20 | append_raw_chunk/2, 21 | pread_binary/2, 22 | pread_iolist/2]). 23 | 24 | -export([init/1, 25 | handle_call/3, 26 | handle_cast/2, 27 | terminate/2]). 28 | 29 | 30 | -define(SIZE_BLOCK, 16#1000). % 4 KiB 31 | 32 | -include("cbt.hrl"). 33 | 34 | 35 | append_term(Pid, Term) -> append_term(Pid, Term, []). 36 | 37 | append_term(Pid, Term, Options) -> 38 | Comp = cbt_util:get_value(compression, Options, ?DEFAULT_COMPRESSION), 39 | append_binary(Pid, cbt_compress:compress(Term, Comp)). 40 | 41 | append_term_crc32(Fd, Term) -> 42 | append_term_crc32(Fd, Term, []). 43 | 44 | append_term_crc32(Fd, Term, Options) -> 45 | Comp = cbt_util:get_value(compression, Options, ?DEFAULT_COMPRESSION), 46 | append_binary_crc32(Fd, cbt_compress:compress(Term, Comp)). 47 | 48 | 49 | pread_term(Pid, Pos) -> 50 | {ok, Bin} = pread_binary(Pid, Pos), 51 | {ok, cbt_compress:decompress(Bin)}. 52 | 53 | 54 | sync(_Pid) -> ok. 55 | 56 | read_header(Pid) -> 57 | case find_header(Pid) of 58 | {ok, Bin, Pos} -> 59 | {ok, binary_to_term(Bin), Pos}; 60 | Else -> 61 | Else 62 | end. 63 | 64 | write_header(Pid, Data) -> 65 | Bin = term_to_binary(Data), 66 | Crc32 = erlang:crc32(Bin), 67 | % now we assemble the final header binary and write to disk 68 | FinalBin = <>, 69 | write_header_bin(Pid, FinalBin). 70 | 71 | 72 | open(Name) -> open(Name, []). 73 | 74 | open(_Name, _Options) -> 75 | gen_server:start_link(?MODULE, [], []). 76 | 77 | close(Pid) -> gen_server:call(Pid, close). 78 | 79 | append_binary(Pid, Bin) -> 80 | gen_server:call(Pid, {append_bin, assemble_file_chunk(Bin)}). 81 | 82 | append_binary_crc32(Fd, Bin) -> 83 | gen_server:call( 84 | Fd, 85 | {append_bin, assemble_file_chunk(Bin, erlang:crc32(Bin))}, infinity). 86 | 87 | append_raw_chunk(Fd, Chunk) -> 88 | gen_server:call(Fd, {append_bin, Chunk}, infinity). 89 | 90 | 91 | 92 | pread_binary(Pid, Pos) -> 93 | case pread_iolist(Pid, Pos) of 94 | {ok, IoList} -> 95 | {ok, iolist_to_binary(IoList)}; 96 | Error -> 97 | Error 98 | end. 99 | 100 | pread_iolist(Pid, Pos) -> 101 | Res = gen_server:call(Pid, {pread_iolist, Pos}), 102 | case Res of 103 | {ok, IoList, <<>>} -> {ok, IoList}; 104 | {ok, IoList, << Crc32:32/integer >>} -> 105 | case erlang:crc32(IoList) of 106 | Crc32 -> 107 | {ok, IoList}; 108 | _ -> 109 | error_logger:info_msg("File corruption in ~p at position ~B", 110 | [Pid, Pos]), 111 | exit({file_corruption, <<"file corruption">>}) 112 | end; 113 | Error -> 114 | Error 115 | end. 116 | 117 | write_header_bin(Pid, Bin) -> 118 | gen_server:call(Pid, {write_header, Bin}, infinity). 119 | 120 | find_header(Pid) -> 121 | gen_server:call(Pid, find_header, infinity). 122 | 123 | bytes(Pid) -> 124 | gen_server:call(Pid, bytes). 125 | 126 | empty(Pid) -> 127 | truncate(Pid, 0). 128 | 129 | truncate(Pid, Pos) -> 130 | gen_server:call(Pid, {truncate, Pos}). 131 | 132 | assemble_file_chunk(Bin) -> 133 | [<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin]. 134 | 135 | assemble_file_chunk(Bin, Crc32) -> 136 | [<<1:1/integer, (iolist_size(Bin)):31/integer, Crc32:32/integer>>, Bin]. 137 | 138 | 139 | 140 | init([]) -> 141 | {ok, Fd} = file:open("", [ram, read, write, binary]), 142 | {ok, #{ fd => Fd, eof => 0}}. 143 | 144 | 145 | handle_call({append_bin, Bin}, _From, #{ fd := Fd, eof := Pos } = State) -> 146 | Blocks = make_blocks(Pos rem ?SIZE_BLOCK, Bin), 147 | Size = iolist_size(Blocks), 148 | {Reply, NewState} = case file:pwrite(Fd, Pos, Blocks) of 149 | ok -> 150 | Eof = Pos + Size, 151 | {{ok, Pos, Size}, State#{ eof => Eof }}; 152 | Error -> 153 | {Error, State} 154 | end, 155 | {reply, Reply, NewState}; 156 | handle_call({pread_iolist, Pos}, _From, #{ fd := Fd } = State) -> 157 | {RawData, NextPos} = try 158 | read_raw_iolist_int( 159 | Fd, Pos, 2 * ?SIZE_BLOCK - (Pos rem ?SIZE_BLOCK) 160 | ) 161 | catch 162 | _:_ -> 163 | read_raw_iolist_int(Fd, Pos, 4) 164 | end, 165 | {Begin, RestRawData} = split_iolist(RawData, 4, []), 166 | <> = iolist_to_binary(Begin), 167 | Reply = case Prefix of 168 | 1 -> 169 | {Crc32, IoList} = extract_crc32( 170 | maybe_read_more_iolist(RestRawData, 171 | 4 + Len, NextPos, 172 | Fd) 173 | ), 174 | {ok, IoList, Crc32}; 175 | 0 -> 176 | IoList = maybe_read_more_iolist(RestRawData, Len, NextPos, Fd), 177 | {ok, IoList, <<>>} 178 | end, 179 | {reply, Reply, State}; 180 | handle_call({write_header, Bin}, _From, #{ fd := Fd, eof := Pos } = State) -> 181 | BinSize = byte_size(Bin), 182 | {Padding, Pos2} = case (Pos rem ?SIZE_BLOCK) of 183 | 0 -> { <<>>, Pos }; 184 | BlockOffset -> 185 | Delta = (?SIZE_BLOCK - BlockOffset), 186 | Pos1 = Pos + Delta, 187 | { << 0:(8 * Delta) >>, Pos1 } 188 | end, 189 | IoList = [Padding, << 1, BinSize:32/integer >> | make_blocks(5, [Bin])], 190 | {Reply, NewState} = case file:pwrite(Fd, Pos, IoList) of 191 | ok -> 192 | {{ok, Pos2}, State#{ eof => Pos + iolist_size(IoList) }}; 193 | Error -> 194 | {Error, State} 195 | end, 196 | {reply, Reply,NewState}; 197 | handle_call(find_header, _From, #{ fd := Fd, eof := Pos } = State) -> 198 | Reply = find_header(Fd, Pos div ?SIZE_BLOCK), 199 | {reply, Reply, State}; 200 | handle_call(bytes, _From, #{ fd := Fd } = State) -> 201 | {reply, file:position(Fd, eof), State}; 202 | 203 | handle_call({truncate, Pos}, _From, #{ fd := Fd } = State) -> 204 | {ok, Pos} = file:position(Fd, Pos), 205 | {Reply, NewState} = case file:truncate(Fd) of 206 | ok -> 207 | {ok, State#{ eof => Pos }}; 208 | Error -> 209 | {Error, State} 210 | end, 211 | {reply, Reply, NewState}; 212 | handle_call(close, _From, State) -> 213 | {stop, normal, ok, State}. 214 | 215 | handle_cast(_Msg, State) -> {noreply, State}. 216 | 217 | terminate(_Reason, #{ fd := Fd }) -> 218 | file:close(Fd). 219 | 220 | 221 | 222 | find_header(_Fd, -1) -> 223 | no_valid_header; 224 | find_header(Fd, Block) -> 225 | case (catch load_header(Fd, Block)) of 226 | {ok, Bin} -> 227 | {ok, Bin, Block * ?SIZE_BLOCK}; 228 | _Error -> 229 | find_header(Fd, Block -1) 230 | end. 231 | 232 | load_header(Fd, Block) -> 233 | {ok, <<1, HeaderLen:32/integer, RestBlock/binary>>} = 234 | file:pread(Fd, Block * ?SIZE_BLOCK, ?SIZE_BLOCK), 235 | TotalBytes = calculate_total_read_len(5, HeaderLen), 236 | RawBin = case TotalBytes > byte_size(RestBlock) of 237 | false -> 238 | <> = RestBlock, 239 | RawBin1; 240 | true -> 241 | {ok, Missing} = file:pread( 242 | Fd, (Block * ?SIZE_BLOCK) + 5 + byte_size(RestBlock), 243 | TotalBytes - byte_size(RestBlock)), 244 | <> 245 | end, 246 | <> = 247 | iolist_to_binary(remove_block_prefixes(RawBin, 5)), 248 | Crc32Sig = erlang:crc32(HeaderBin), 249 | {ok, HeaderBin}. 250 | 251 | 252 | maybe_read_more_iolist(Buffer, DataSize, NextPos, Fd) -> 253 | case iolist_size(Buffer) of 254 | BufferSize when DataSize =< BufferSize -> 255 | {Buffer2, _} = split_iolist(Buffer, DataSize, []), 256 | Buffer2; 257 | BufferSize -> 258 | {Missing, _} = read_raw_iolist_int(Fd, NextPos, DataSize-BufferSize), 259 | [Buffer, Missing] 260 | end. 261 | 262 | read_raw_iolist_int(Fd, Pos, Len) -> 263 | BlockOffset = Pos rem ?SIZE_BLOCK, 264 | TotalBytes = calculate_total_read_len(BlockOffset, Len), 265 | case file:pread(Fd, Pos, TotalBytes) of 266 | {ok, <>} -> 267 | {remove_block_prefixes(RawBin, BlockOffset), Pos + TotalBytes}; 268 | {ok, RawBin} -> 269 | UnexpectedBin = { 270 | unexpected_binary, 271 | {at, Pos}, 272 | {wanted_bytes, TotalBytes}, 273 | {got, byte_size(RawBin), RawBin} 274 | }, 275 | throw({read_error, UnexpectedBin}); 276 | Else -> 277 | throw({read_error, Else}) 278 | end. 279 | 280 | extract_crc32(FullIoList) -> 281 | {CrcList, IoList} = split_iolist(FullIoList, 4, []), 282 | {iolist_to_binary(CrcList), IoList}. 283 | 284 | calculate_total_read_len(0, FinalLen) -> 285 | calculate_total_read_len(1, FinalLen) + 1; 286 | calculate_total_read_len(BlockOffset, FinalLen) -> 287 | case ?SIZE_BLOCK - BlockOffset of 288 | BlockLeft when BlockLeft >= FinalLen -> 289 | FinalLen; 290 | BlockLeft -> 291 | FinalLen + ((FinalLen - BlockLeft) div (?SIZE_BLOCK -1)) + 292 | if ((FinalLen - BlockLeft) rem (?SIZE_BLOCK -1)) =:= 0 -> 0; 293 | true -> 1 end 294 | end. 295 | 296 | 297 | 298 | remove_block_prefixes(<<>>, _BlockOffset) -> 299 | []; 300 | remove_block_prefixes(<<_BlockPrefix, Rest/binary>>, 0) -> 301 | remove_block_prefixes(Rest, 1); 302 | remove_block_prefixes(Bin, BlockOffset) -> 303 | BlockBytesAvailable = ?SIZE_BLOCK - BlockOffset, 304 | case size(Bin) of 305 | Size when Size > BlockBytesAvailable -> 306 | <> = Bin, 307 | [DataBlock | remove_block_prefixes(Rest, 0)]; 308 | _Size -> 309 | [Bin] 310 | end. 311 | 312 | make_blocks(_BlockOffset, []) -> 313 | []; 314 | make_blocks(0, IoList) -> 315 | [<<0>> | make_blocks(1, IoList)]; 316 | make_blocks(BlockOffset, IoList) -> 317 | case split_iolist(IoList, (?SIZE_BLOCK - BlockOffset), []) of 318 | {Begin, End} -> 319 | [Begin | make_blocks(0, End)]; 320 | _SplitRemaining -> 321 | IoList 322 | end. 323 | 324 | %% @doc Returns a tuple where the first element contains the leading SplitAt 325 | %% bytes of the original iolist, and the 2nd element is the tail. If SplitAt 326 | %% is larger than byte_size(IoList), return the difference. 327 | 328 | split_iolist(List, 0, BeginAcc) -> 329 | {lists:reverse(BeginAcc), List}; 330 | split_iolist([], SplitAt, _BeginAcc) -> 331 | SplitAt; 332 | split_iolist([Bin | Rest], SplitAt, BeginAcc) when is_binary(Bin), SplitAt > byte_size(Bin) -> 333 | split_iolist(Rest, SplitAt - byte_size(Bin), [Bin | BeginAcc]); 334 | split_iolist([Bin | Rest], SplitAt, BeginAcc) when is_binary(Bin) -> 335 | <> = Bin, 336 | split_iolist([End | Rest], 0, [Begin | BeginAcc]); 337 | split_iolist([Sublist| Rest], SplitAt, BeginAcc) when is_list(Sublist) -> 338 | case split_iolist(Sublist, SplitAt, BeginAcc) of 339 | {Begin, End} -> 340 | {Begin, [End | Rest]}; 341 | SplitRemaining -> 342 | split_iolist(Rest, SplitAt - (SplitAt - SplitRemaining), [Sublist | BeginAcc]) 343 | end; 344 | split_iolist([Byte | Rest], SplitAt, BeginAcc) when is_integer(Byte) -> 345 | split_iolist(Rest, SplitAt - 1, [Byte | BeginAcc]). 346 | -------------------------------------------------------------------------------- /src/cbt_stream.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | % 13 | % 14 | % @doc module to store a large binary (stream) in the database file and 15 | % get the list of each chunk 16 | 17 | -module(cbt_stream). 18 | -behaviour(gen_server). 19 | 20 | % public API 21 | -export([open/1, open/2, close/1]). 22 | -export([foldl/4, foldl/5, foldl_decode/6, range_foldl/6]). 23 | -export([copy_to_new_stream/3, write/2]). 24 | 25 | % gen_server callbacks 26 | -export([init/1, terminate/2, code_change/3, 27 | handle_cast/2, handle_call/3, handle_info/2]). 28 | 29 | -include("cbt.hrl"). 30 | 31 | -define(DEFAULT_BUFFER_SIZE, 4096). 32 | 33 | -record(stream, 34 | {fd = 0, 35 | written_pointers=[], 36 | buffer_list = [], 37 | buffer_len = 0, 38 | max_buffer, 39 | written_len = 0, 40 | md5, 41 | % md5 of the content without any transformation applied (e.g. compression) 42 | % needed for the attachment upload integrity check (ticket 558) 43 | identity_md5, 44 | identity_len = 0, 45 | encoding_fun, 46 | end_encoding_fun 47 | }). 48 | 49 | -type cbt_stream() :: pid(). 50 | -type cbt_stream_options() :: [encoding | {compression_level, integer()} 51 | | {buffer_size, integer()}]. 52 | -export_type([cbt_stream/0]). 53 | 54 | %%% Interface functions %%% 55 | 56 | %% @doc open a new stream 57 | -spec open(Fd::cbt_file:cbt_file()) -> {ok, cbt_stream()}. 58 | open(Fd) -> 59 | open(Fd, []). 60 | 61 | %% @doc open a new stream 62 | -spec open(Fd::cbt_file:cbt_file(), Options::cbt_stream_options()) 63 | -> {ok, cbt_stream()}. 64 | open(Fd, Options) -> 65 | gen_server:start_link(cbt_stream, {Fd, Options}, []). 66 | 67 | %% @doc close the stream 68 | -spec close(Stream::cbt_stream()) -> ok. 69 | close(Pid) -> 70 | gen_server:call(Pid, close, infinity). 71 | 72 | %% @doc copy a stream from one file to another 73 | -spec copy_to_new_stream(Fd::cbt_stream(), PosList::[integer()], 74 | DestFd::cbt_stream()) -> ok | {error, term()}. 75 | copy_to_new_stream(Fd, PosList, DestFd) -> 76 | {ok, Dest} = open(DestFd), 77 | foldl(Fd, PosList, 78 | fun(Bin, _) -> 79 | ok = write(Dest, Bin) 80 | end, ok), 81 | close(Dest). 82 | 83 | %% @doc retrieve all chunks from a list of their positions in the file. 84 | %% Results are passed to a function: 85 | %% 86 | %% ``` 87 | %% fun(Chunk, Acc) -> Acc2 88 | %% ''' 89 | -spec foldl(Fd::cbt_stream(), PosList::[integer()], Fun::fun(), Acc::any()) -> Acc2::any(). 90 | foldl(_Fd, [], _Fun, Acc) -> 91 | Acc; 92 | foldl(Fd, [Pos|Rest], Fun, Acc) -> 93 | {ok, Bin} = cbt_file:pread_iolist(Fd, Pos), 94 | foldl(Fd, Rest, Fun, Fun(Bin, Acc)). 95 | 96 | %% @doc like `fold/4' but check the signature. 97 | %% 98 | -spec foldl(Fd::cbt_stream(), PosList::[integer()], Md5::binary(), 99 | Fun::fun(), Acc::any()) -> Acc2::any(). 100 | foldl(Fd, PosList, <<>>, Fun, Acc) -> 101 | foldl(Fd, PosList, Fun, Acc); 102 | foldl(Fd, PosList, Md5, Fun, Acc) -> 103 | foldl(Fd, PosList, Md5, cbt_util:md5_init(), Fun, Acc). 104 | 105 | %% @doc same as fold but decode the chunk if needed. 106 | -spec foldl_decode(Fd::cbt_stream(), PosList::[integer()], Md5::binary(), 107 | Encoding::gzip | identity, Fun::fun(), Acc::any()) -> Acc2::any(). 108 | foldl_decode(Fd, PosList, Md5, Enc, Fun, Acc) -> 109 | {DecDataFun, DecEndFun} = case Enc of 110 | gzip -> 111 | ungzip_init(); 112 | identity -> 113 | identity_enc_dec_funs() 114 | end, 115 | Result = foldl_decode( 116 | DecDataFun, Fd, PosList, Md5, cbt_util:md5_init(), Fun, Acc 117 | ), 118 | DecEndFun(), 119 | Result. 120 | 121 | foldl(_Fd, [], Md5, Md5Acc, _Fun, Acc) -> 122 | Md5 = cbt_util:md5_final(Md5Acc), 123 | Acc; 124 | foldl(Fd, [{Pos, _Size}], Md5, Md5Acc, Fun, Acc) -> % 0110 UPGRADE CODE 125 | foldl(Fd, [Pos], Md5, Md5Acc, Fun, Acc); 126 | foldl(Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> 127 | {ok, Bin} = cbt_file:pread_iolist(Fd, Pos), 128 | Md5 = cbt_util:md5_final(cbt_util:md5_update(Md5Acc, Bin)), 129 | Fun(Bin, Acc); 130 | foldl(Fd, [{Pos, _Size}|Rest], Md5, Md5Acc, Fun, Acc) -> 131 | foldl(Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc); 132 | foldl(Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> 133 | {ok, Bin} = cbt_file:pread_iolist(Fd, Pos), 134 | foldl(Fd, Rest, Md5, cbt_util:md5_update(Md5Acc, Bin), Fun, Fun(Bin, Acc)). 135 | 136 | 137 | %% @doc retrieve all chunks in a range. 138 | -spec range_foldl(Fd::cbt_stream(), PosList::[integer()], 139 | From::integer(), To::integer(), Fun::fun(), Acc::any()) 140 | -> Acc2::any(). 141 | range_foldl(Fd, PosList, From, To, Fun, Acc) -> 142 | range_foldl(Fd, PosList, From, To, 0, Fun, Acc). 143 | 144 | range_foldl(_Fd, _PosList, _From, To, Off, _Fun, Acc) when Off >= To -> 145 | Acc; 146 | range_foldl(Fd, [Pos|Rest], From, To, Off, Fun, Acc) when is_integer(Pos) -> % old-style attachment 147 | {ok, Bin} = cbt_file:pread_iolist(Fd, Pos), 148 | range_foldl(Fd, [{Pos, iolist_size(Bin)}] ++ Rest, From, To, Off, Fun, Acc); 149 | range_foldl(Fd, [{_Pos, Size}|Rest], From, To, Off, Fun, Acc) when From > Off + Size -> 150 | range_foldl(Fd, Rest, From, To, Off + Size, Fun, Acc); 151 | range_foldl(Fd, [{Pos, Size}|Rest], From, To, Off, Fun, Acc) -> 152 | {ok, Bin} = cbt_file:pread_iolist(Fd, Pos), 153 | Bin1 = if 154 | From =< Off andalso To >= Off + Size -> Bin; %% the whole block is covered 155 | true -> 156 | PrefixLen = clip(From - Off, 0, Size), 157 | PostfixLen = clip(Off + Size - To, 0, Size), 158 | MatchLen = Size - PrefixLen - PostfixLen, 159 | <<_Prefix:PrefixLen/binary,Match:MatchLen/binary,_Postfix:PostfixLen/binary>> = iolist_to_binary(Bin), 160 | Match 161 | end, 162 | range_foldl(Fd, Rest, From, To, Off + Size, Fun, Fun(Bin1, Acc)). 163 | 164 | clip(Value, Lo, Hi) -> 165 | if 166 | Value < Lo -> Lo; 167 | Value > Hi -> Hi; 168 | true -> Value 169 | end. 170 | 171 | foldl_decode(_DecFun, _Fd, [], Md5, Md5Acc, _Fun, Acc) -> 172 | Md5 = cbt_util:md5_final(Md5Acc), 173 | Acc; 174 | foldl_decode(DecFun, Fd, [{Pos, _Size}], Md5, Md5Acc, Fun, Acc) -> 175 | foldl_decode(DecFun, Fd, [Pos], Md5, Md5Acc, Fun, Acc); 176 | foldl_decode(DecFun, Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> 177 | {ok, EncBin} = cbt_file:pread_iolist(Fd, Pos), 178 | Md5 = cbt_util:md5_final(cbt_util:md5_update(Md5Acc, EncBin)), 179 | Bin = DecFun(EncBin), 180 | Fun(Bin, Acc); 181 | foldl_decode(DecFun, Fd, [{Pos, _Size}|Rest], Md5, Md5Acc, Fun, Acc) -> 182 | foldl_decode(DecFun, Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc); 183 | foldl_decode(DecFun, Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> 184 | {ok, EncBin} = cbt_file:pread_iolist(Fd, Pos), 185 | Bin = DecFun(EncBin), 186 | Md5Acc2 = cbt_util:md5_update(Md5Acc, EncBin), 187 | foldl_decode(DecFun, Fd, Rest, Md5, Md5Acc2, Fun, Fun(Bin, Acc)). 188 | 189 | gzip_init(Options) -> 190 | case cbt_util:get_value(compression_level, Options, 0) of 191 | Lvl when Lvl >= 1 andalso Lvl =< 9 -> 192 | Z = zlib:open(), 193 | % 15 = ?MAX_WBITS (defined in the zlib module) 194 | % the 16 + ?MAX_WBITS formula was obtained by inspecting zlib:gzip/1 195 | ok = zlib:deflateInit(Z, Lvl, deflated, 16 + 15, 8, default), 196 | { 197 | fun(Data) -> 198 | zlib:deflate(Z, Data) 199 | end, 200 | fun() -> 201 | Last = zlib:deflate(Z, [], finish), 202 | ok = zlib:deflateEnd(Z), 203 | ok = zlib:close(Z), 204 | Last 205 | end 206 | }; 207 | _ -> 208 | identity_enc_dec_funs() 209 | end. 210 | 211 | ungzip_init() -> 212 | Z = zlib:open(), 213 | zlib:inflateInit(Z, 16 + 15), 214 | { 215 | fun(Data) -> 216 | zlib:inflate(Z, Data) 217 | end, 218 | fun() -> 219 | ok = zlib:inflateEnd(Z), 220 | ok = zlib:close(Z) 221 | end 222 | }. 223 | 224 | identity_enc_dec_funs() -> 225 | { 226 | fun(Data) -> Data end, 227 | fun() -> [] end 228 | }. 229 | 230 | 231 | %% @doc write a chunk from the stream on the database file. 232 | -spec write(Stream::cbt_stream(), Bin::binary()) -> ok | {error, term()}. 233 | write(_Pid, <<>>) -> 234 | ok; 235 | write(Pid, Bin) -> 236 | gen_server:call(Pid, {write, Bin}, infinity). 237 | 238 | 239 | 240 | %% @private 241 | init({Fd, Options}) -> 242 | {EncodingFun, EndEncodingFun} = 243 | case cbt_util:get_value(encoding, Options, identity) of 244 | identity -> 245 | identity_enc_dec_funs(); 246 | gzip -> 247 | gzip_init(Options) 248 | end, 249 | {ok, #stream{ 250 | fd=Fd, 251 | md5=cbt_util:md5_init(), 252 | identity_md5=cbt_util:md5_init(), 253 | encoding_fun=EncodingFun, 254 | end_encoding_fun=EndEncodingFun, 255 | max_buffer=cbt_util:get_value( 256 | buffer_size, Options, ?DEFAULT_BUFFER_SIZE) 257 | } 258 | }. 259 | 260 | %% @private 261 | terminate(_Reason, _Stream) -> 262 | ok. 263 | 264 | %% @private 265 | handle_call({write, Bin}, _From, Stream) -> 266 | BinSize = iolist_size(Bin), 267 | #stream{ 268 | fd = Fd, 269 | written_len = WrittenLen, 270 | written_pointers = Written, 271 | buffer_len = BufferLen, 272 | buffer_list = Buffer, 273 | max_buffer = Max, 274 | md5 = Md5, 275 | identity_md5 = IdenMd5, 276 | identity_len = IdenLen, 277 | encoding_fun = EncodingFun} = Stream, 278 | if BinSize + BufferLen > Max -> 279 | WriteBin = lists:reverse(Buffer, [Bin]), 280 | IdenMd5_2 = cbt_util:md5_update(IdenMd5, WriteBin), 281 | {WrittenLen2, Md5_2, Written2} = case EncodingFun(WriteBin) of 282 | [] -> 283 | % case where the encoder did some internal buffering 284 | % (zlib does it for example) 285 | {WrittenLen, Md5, Written}; 286 | WriteBin2 -> 287 | {ok, Pos, _} = cbt_file:append_binary(Fd, WriteBin2), 288 | WrittenLen1 = WrittenLen + iolist_size(WriteBin2), 289 | Md5_1 = cbt_util:md5_update(Md5, WriteBin2), 290 | Written1 = [{Pos, iolist_size(WriteBin2)}|Written], 291 | {WrittenLen1, Md5_1, Written1} 292 | end, 293 | 294 | {reply, ok, Stream#stream{ 295 | written_len=WrittenLen2, 296 | written_pointers=Written2, 297 | buffer_list=[], 298 | buffer_len=0, 299 | md5=Md5_2, 300 | identity_md5=IdenMd5_2, 301 | identity_len=IdenLen + BinSize}}; 302 | true -> 303 | {reply, ok, Stream#stream{ 304 | buffer_list=[Bin|Buffer], 305 | buffer_len=BufferLen + BinSize, 306 | identity_len=IdenLen + BinSize}} 307 | end; 308 | handle_call(close, _From, Stream) -> 309 | #stream{ 310 | fd = Fd, 311 | written_len = WrittenLen, 312 | written_pointers = Written, 313 | buffer_list = Buffer, 314 | md5 = Md5, 315 | identity_md5 = IdenMd5, 316 | identity_len = IdenLen, 317 | encoding_fun = EncodingFun, 318 | end_encoding_fun = EndEncodingFun} = Stream, 319 | 320 | WriteBin = lists:reverse(Buffer), 321 | IdenMd5Final = cbt_util:md5_final(cbt_util:md5_update(IdenMd5, WriteBin)), 322 | WriteBin2 = EncodingFun(WriteBin) ++ EndEncodingFun(), 323 | Md5Final = cbt_util:md5_final(cbt_util:md5_update(Md5, WriteBin2)), 324 | Result = case WriteBin2 of 325 | [] -> 326 | {lists:reverse(Written), WrittenLen, IdenLen, Md5Final, IdenMd5Final}; 327 | _ -> 328 | {ok, Pos, _} = cbt_file:append_binary(Fd, WriteBin2), 329 | StreamInfo = lists:reverse(Written, [{Pos, iolist_size(WriteBin2)}]), 330 | StreamLen = WrittenLen + iolist_size(WriteBin2), 331 | {StreamInfo, StreamLen, IdenLen, Md5Final, IdenMd5Final} 332 | end, 333 | {stop, normal, Result, Stream}. 334 | 335 | %% @private 336 | handle_cast(_Msg, State) -> 337 | {noreply,State}. 338 | 339 | %% @private 340 | code_change(_OldVsn, State, _Extra) -> 341 | {ok, State}. 342 | 343 | %% @private 344 | handle_info(_Info, State) -> 345 | {noreply, State}. 346 | -------------------------------------------------------------------------------- /src/cbt_util.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_util). 14 | 15 | -export([should_flush/0, should_flush/1]). 16 | -export([rand32/0, implode/2]). 17 | -export([encodeBase64Url/1, decodeBase64Url/1]). 18 | -export([get_value/2, get_value/3]). 19 | -export([md5/1, md5_init/0, md5_update/2, md5_final/1]). 20 | -export([reorder_results/2]). 21 | -export([ensure_all_started/1, ensure_all_started/2]). 22 | -export([uniqid/0]). 23 | -export([get_opt/2, get_opt/3]). 24 | 25 | -ifdef(crypto_compat). 26 | -define(MD5(Data), crypto:md5(Data)). 27 | -define(MD5_INIT(), crypto:md5_init()). 28 | -define(MD5_UPDATE(Ctx, Data), crypto:md5_update(Ctx, Data)). 29 | -define(MD5_FINAL(Ctx), crypto:md5_final(Ctx)). 30 | -else. 31 | -define(MD5(Data), crypto:hash(md5, Data)). 32 | -define(MD5_INIT(), crypto:hash_init(md5)). 33 | -define(MD5_UPDATE(Ctx, Data), crypto:hash_update(Ctx, Data)). 34 | -define(MD5_FINAL(Ctx), crypto:hash_final(Ctx)). 35 | -endif. 36 | 37 | -include("cbt.hrl"). 38 | 39 | % arbitrarily chosen amount of memory to use before flushing to disk 40 | -define(FLUSH_MAX_MEM, 10000000). 41 | 42 | -spec ensure_all_started(Application) -> {'ok', Started} | {'error', Reason} when 43 | Application :: atom(), 44 | Started :: [atom()], 45 | Reason :: term(). 46 | ensure_all_started(Application) -> 47 | ensure_all_started(Application, temporary). 48 | 49 | -spec ensure_all_started(Application, Type) -> {'ok', Started} | {'error', Reason} when 50 | Application :: atom(), 51 | Type :: 'permanent' | 'transient' | 'temporary', 52 | Started :: [atom()], 53 | Reason :: term(). 54 | ensure_all_started(Application, Type) -> 55 | case ensure_all_started(Application, Type, []) of 56 | {ok, Started} -> 57 | {ok, lists:reverse(Started)}; 58 | {error, Reason, Started} -> 59 | [application:stop(App) || App <- Started], 60 | {error, Reason} 61 | end. 62 | 63 | ensure_all_started(Application, Type, Started) -> 64 | case application:start(Application, Type) of 65 | ok -> 66 | {ok, [Application | Started]}; 67 | {error, {already_started, Application}} -> 68 | {ok, Started}; 69 | {error, {not_started, Dependency}} -> 70 | case ensure_all_started(Dependency, Type, Started) of 71 | {ok, NewStarted} -> 72 | ensure_all_started(Application, Type, NewStarted); 73 | Error -> 74 | Error 75 | end; 76 | {error, Reason} -> 77 | {error, {Application, Reason}, Started} 78 | end. 79 | 80 | 81 | get_value(Key, List) -> 82 | get_value(Key, List, undefined). 83 | 84 | get_value(Key, List, Default) -> 85 | case lists:keysearch(Key, 1, List) of 86 | {value, {Key,Value}} -> 87 | Value; 88 | false -> 89 | Default 90 | end. 91 | 92 | % returns a random integer 93 | rand32() -> 94 | rand:uniform(16#100000000). 95 | 96 | implode(List, Sep) -> 97 | implode(List, Sep, []). 98 | 99 | implode([], _Sep, Acc) -> 100 | lists:flatten(lists:reverse(Acc)); 101 | implode([H], Sep, Acc) -> 102 | implode([], Sep, [H|Acc]); 103 | implode([H|T], Sep, Acc) -> 104 | implode(T, Sep, [Sep,H|Acc]). 105 | 106 | should_flush() -> 107 | should_flush(?FLUSH_MAX_MEM). 108 | 109 | should_flush(MemThreshHold) -> 110 | {memory, ProcMem} = process_info(self(), memory), 111 | BinMem = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end, 112 | 0, element(2,process_info(self(), binary))), 113 | if ProcMem+BinMem > 2*MemThreshHold -> 114 | garbage_collect(), 115 | {memory, ProcMem2} = process_info(self(), memory), 116 | BinMem2 = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end, 117 | 0, element(2,process_info(self(), binary))), 118 | ProcMem2+BinMem2 > MemThreshHold; 119 | true -> false end. 120 | 121 | encodeBase64Url(Url) -> 122 | Url1 = re:replace(base64:encode(Url), ["=+", $$], ""), 123 | Url2 = re:replace(Url1, "/", "_", [global]), 124 | re:replace(Url2, "\\+", "-", [global, {return, binary}]). 125 | 126 | decodeBase64Url(Url64) -> 127 | Url1 = re:replace(Url64, "-", "+", [global]), 128 | Url2 = re:replace(Url1, "_", "/", [global]), 129 | Padding = lists:duplicate((4 - iolist_size(Url2) rem 4) rem 4, $=), 130 | base64:decode(iolist_to_binary([Url2, Padding])). 131 | 132 | -type hash() :: 'md4' | 'md5' | 'ripemd160' | 'sha' | 'sha224' | 'sha256' | 'sha384' | 'sha512'. 133 | -type hash_ctx() :: {hash(), binary()}. 134 | -type hash_data() :: binary() | maybe_improper_list(binary() | 135 | maybe_improper_list(any(), binary() | []) | byte(), binary() | []). 136 | 137 | -spec md5(Data::hash_data()) -> Digest::binary(). 138 | md5(Data) -> 139 | ?MD5(Data). 140 | 141 | -spec md5_init() -> Context::hash_ctx(). 142 | md5_init() -> 143 | ?MD5_INIT(). 144 | 145 | -spec md5_update(Context::hash_ctx(), Data::hash_data()) -> 146 | NewContext::hash_ctx(). 147 | md5_update(Ctx, D) -> 148 | ?MD5_UPDATE(Ctx, D). 149 | 150 | -spec md5_final(Context::hash_ctx()) -> Digest::binary(). 151 | md5_final(Ctx) -> 152 | ?MD5_FINAL(Ctx). 153 | 154 | % linear search is faster for small lists, length() is 0.5 ms for 100k list 155 | reorder_results(Keys, SortedResults) when length(Keys) < 100 -> 156 | [cbt_util:get_value(Key, SortedResults) || Key <- Keys]; 157 | reorder_results(Keys, SortedResults) -> 158 | KeyDict = dict:from_list(SortedResults), 159 | [dict:fetch(Key, KeyDict) || Key <- Keys]. 160 | 161 | uniqid() -> 162 | integer_to_list(erlang:phash2(make_ref())). 163 | 164 | 165 | get_opt(Key, Opts) -> 166 | get_opt(Key, Opts, undefined). 167 | 168 | get_opt(Key, Opts, Default) -> 169 | case proplists:get_value(Key, Opts) of 170 | undefined -> 171 | case application:get_env(?MODULE, Key) of 172 | {ok, Value} -> Value; 173 | undefined -> Default 174 | end; 175 | Value -> 176 | Value 177 | end. 178 | 179 | 180 | -------------------------------------------------------------------------------- /test/cbt_btree_copy_tests.erl: -------------------------------------------------------------------------------- 1 | -module(cbt_btree_copy_tests). 2 | 3 | -include_lib("include/cbt.hrl"). 4 | -include("cbt_tests.hrl"). 5 | 6 | 7 | setup_copy(_) -> 8 | ReduceFun = fun(reduce, KVs) -> 9 | length(KVs); 10 | (rereduce, Reds) -> 11 | lists:sum(Reds) 12 | end, 13 | 14 | OriginalFileName = ?tempfile(), 15 | CopyFileName = OriginalFileName ++ ".copy", 16 | {ok, Fd} = cbt_file:open(OriginalFileName, [create, overwrite]), 17 | {ok, FdCopy} = cbt_file:open(CopyFileName, [create, overwrite]), 18 | {ReduceFun, OriginalFileName, CopyFileName, Fd, FdCopy}. 19 | 20 | teardown(_, {_, OriginalFileName, CopyFileName, Fd, FdCopy}) -> 21 | ok = cbt_file:close(Fd), 22 | ok = cbt_file:close(FdCopy), 23 | ok = file:delete(OriginalFileName), 24 | ok = file:delete(CopyFileName). 25 | 26 | btree_copy_test_() -> 27 | TNumItems = [50, 100, 300, 700, 811, 2333, 6594, 9999, 15003, 21477, 28 | 38888, 66069, 150123, 420789, 711321], 29 | { 30 | "Copy BTree", 31 | { 32 | foreachx, 33 | fun setup_copy/1, fun teardown/2, 34 | [{N, fun should_copy_btree/2} || N <- TNumItems] 35 | } 36 | }. 37 | 38 | btree_copy_compressed_test_() -> 39 | TNumItems = [50, 100, 300, 700, 811, 2333, 6594, 9999, 15003, 21477, 40 | 38888, 66069, 150123, 420789, 711321], 41 | { 42 | "Copy Compressed BTree", 43 | { 44 | foreachx, 45 | fun setup_copy/1, fun teardown/2, 46 | [{N, fun should_copy_compressed_btree/2} || N <- TNumItems] 47 | } 48 | }. 49 | 50 | should_copy_btree(NumItems, {ReduceFun, _OriginalFileName, _CopyFileName, 51 | Fd, FdCopy}) -> 52 | KVs = [{I, I} || I <- lists:seq(1, NumItems)], 53 | {ok, Btree} = make_btree(Fd, KVs, ReduceFun), 54 | 55 | {_, Red, _} = cbt_btree:get_state(Btree), 56 | 57 | CopyCallback = fun(KV, Acc) -> {KV, Acc + 1} end, 58 | {ok, RootCopy, FinalAcc} = cbt_btree_copy:copy( 59 | Btree, FdCopy, [{before_kv_write, {CopyCallback, 0}}]), 60 | 61 | ?assertMatch(FinalAcc, length(KVs)), 62 | 63 | {ok, BtreeCopy} = cbt_btree:open( 64 | RootCopy, FdCopy, [{compression, none}, {reduce, ReduceFun}]), 65 | 66 | %% check copy 67 | {_, RedCopy, _} = cbt_btree:get_state(BtreeCopy), 68 | ?assertMatch(Red, RedCopy), 69 | {ok, _, CopyKVs} = cbt_btree:fold( 70 | BtreeCopy, 71 | fun(KV, _, Acc) -> {ok, [KV | Acc]} end, 72 | [], []), 73 | ?_assertMatch(KVs, lists:reverse(CopyKVs)). 74 | 75 | should_copy_compressed_btree(NumItems, {ReduceFun, _OriginalFileName, 76 | _CopyFileName, Fd, FdCopy}) -> 77 | 78 | KVs = [{I, I} || I <- lists:seq(1, NumItems)], 79 | {ok, Btree} = make_btree(Fd, KVs, ReduceFun, snappy), 80 | 81 | {_, Red, _} = cbt_btree:get_state(Btree), 82 | 83 | CopyCallback = fun(KV, Acc) -> {KV, Acc + 1} end, 84 | {ok, RootCopy, FinalAcc} = cbt_btree_copy:copy( 85 | Btree, FdCopy, [{before_kv_write, {CopyCallback, 0}}]), 86 | 87 | ?assertMatch(FinalAcc, length(KVs)), 88 | 89 | {ok, BtreeCopy} = cbt_btree:open( 90 | RootCopy, FdCopy, [{compression, snappy}, {reduce, ReduceFun}]), 91 | 92 | %% check copy 93 | {_, RedCopy, _} = cbt_btree:get_state(BtreeCopy), 94 | ?assertMatch(Red, RedCopy), 95 | {ok, _, CopyKVs} = cbt_btree:fold( 96 | BtreeCopy, 97 | fun(KV, _, Acc) -> {ok, [KV | Acc]} end, 98 | [], []), 99 | ?_assertMatch(KVs, lists:reverse(CopyKVs)). 100 | 101 | 102 | 103 | 104 | make_btree(Fd, KVs, ReduceFun) -> 105 | make_btree(Fd, KVs, ReduceFun, none). 106 | 107 | make_btree(Fd, KVs, ReduceFun, Compression) -> 108 | 109 | {ok, Btree} = cbt_btree:open(nil, Fd, [{compression, Compression}, 110 | {reduce, ReduceFun}]), 111 | {ok, Btree2} = cbt_btree:add_remove(Btree, KVs, []), 112 | {_, Red, _} = cbt_btree:get_state(Btree2), 113 | ?assertMatch(Red, length(KVs)), 114 | ok = cbt_file:sync(Fd), 115 | {ok, Btree2}. 116 | -------------------------------------------------------------------------------- /test/cbt_btree_ramfile_tests.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_btree_ramfile_tests). 14 | 15 | -include_lib("include/cbt.hrl"). 16 | -include("cbt_tests.hrl"). 17 | 18 | -define(ROWS, 1000). 19 | 20 | 21 | setup() -> 22 | {ok, Fd} = cbt_ramfile:open(?tempfile()), 23 | {ok, Btree} = cbt_btree:open(nil, Fd, [{backend, cbt_ramfile}, 24 | {compression, none}, 25 | {reduce, fun reduce_fun/2}]), 26 | {Fd, Btree}. 27 | 28 | setup_kvs(_) -> 29 | setup(). 30 | 31 | setup_red() -> 32 | {_, EvenOddKVs} = lists:foldl( 33 | fun(Idx, {Key, Acc}) -> 34 | case Key of 35 | "even" -> {"odd", [{{Key, Idx}, 1} | Acc]}; 36 | _ -> {"even", [{{Key, Idx}, 1} | Acc]} 37 | end 38 | end, {"odd", []}, lists:seq(1, ?ROWS)), 39 | {Fd, Btree} = setup(), 40 | {ok, Btree1} = cbt_btree:add_remove(Btree, EvenOddKVs, []), 41 | {Fd, Btree1}. 42 | setup_red(_) -> 43 | setup_red(). 44 | 45 | teardown(Fd) when is_pid(Fd) -> 46 | ok = cbt_file:close(Fd); 47 | teardown({Fd, _}) -> 48 | teardown(Fd). 49 | teardown(_, {Fd, _}) -> 50 | teardown(Fd). 51 | 52 | 53 | kvs_test_funs() -> 54 | [ 55 | fun should_set_fd_correctly/2, 56 | fun should_set_root_correctly/2, 57 | fun should_create_zero_sized_btree/2, 58 | fun should_set_reduce_option/2, 59 | fun should_fold_over_empty_btree/2, 60 | fun should_add_all_keys/2, 61 | fun should_continuously_add_new_kv/2, 62 | fun should_continuously_remove_keys/2, 63 | fun should_insert_keys_in_reversed_order/2, 64 | fun should_add_every_odd_key_remove_every_even/2, 65 | fun should_add_every_even_key_remove_every_old/2 66 | ]. 67 | 68 | red_test_funs() -> 69 | [ 70 | fun should_reduce_whole_range/2, 71 | fun should_reduce_first_half/2, 72 | fun should_reduce_second_half/2 73 | ]. 74 | 75 | 76 | btree_open_test_() -> 77 | {ok, Fd} = cbt_file:open(?tempfile(), [create, overwrite]), 78 | {ok, Btree} = cbt_btree:open(nil, Fd, [{compression, none}]), 79 | { 80 | "Ensure that created btree is really a btree record", 81 | ?_assert(is_record(Btree, btree)) 82 | }. 83 | 84 | sorted_kvs_test_() -> 85 | Funs = kvs_test_funs(), 86 | Sorted = [{Seq, rand:uniform()} || Seq <- lists:seq(1, ?ROWS)], 87 | { 88 | "BTree with sorted keys", 89 | { 90 | foreachx, 91 | fun setup_kvs/1, fun teardown/2, 92 | [{Sorted, Fun} || Fun <- Funs] 93 | } 94 | }. 95 | 96 | rsorted_kvs_test_() -> 97 | Sorted = [{Seq, rand:uniform()} || Seq <- lists:seq(1, ?ROWS)], 98 | Funs = kvs_test_funs(), 99 | Reversed = Sorted, 100 | { 101 | "BTree with backward sorted keys", 102 | { 103 | foreachx, 104 | fun setup_kvs/1, fun teardown/2, 105 | [{Reversed, Fun} || Fun <- Funs] 106 | } 107 | }. 108 | 109 | shuffled_kvs_test_() -> 110 | Funs = kvs_test_funs(), 111 | Sorted = [{Seq, rand:uniform()} || Seq <- lists:seq(1, ?ROWS)], 112 | Shuffled = shuffle(Sorted), 113 | { 114 | "BTree with shuffled keys", 115 | { 116 | foreachx, 117 | fun setup_kvs/1, fun teardown/2, 118 | [{Shuffled, Fun} || Fun <- Funs] 119 | } 120 | }. 121 | 122 | reductions_test_() -> 123 | { 124 | "BTree reductions", 125 | [ 126 | { 127 | "Common tests", 128 | { 129 | foreach, 130 | fun setup_red/0, fun teardown/1, 131 | [ 132 | fun should_reduce_without_specified_direction/1, 133 | fun should_reduce_forward/1, 134 | fun should_reduce_backward/1 135 | ] 136 | } 137 | }, 138 | { 139 | "Range requests", 140 | [ 141 | { 142 | "Forward direction", 143 | { 144 | foreachx, 145 | fun setup_red/1, fun teardown/2, 146 | [{fwd, F} || F <- red_test_funs()] 147 | } 148 | }, 149 | { 150 | "Backward direction", 151 | { 152 | foreachx, 153 | fun setup_red/1, fun teardown/2, 154 | [{rev, F} || F <- red_test_funs()] 155 | } 156 | } 157 | ] 158 | } 159 | ] 160 | }. 161 | 162 | 163 | should_set_fd_correctly(_, {Fd, Btree}) -> 164 | ?_assertMatch(Fd, Btree#btree.ref). 165 | 166 | should_set_root_correctly(_, {_, Btree}) -> 167 | ?_assertMatch(nil, Btree#btree.root). 168 | 169 | should_create_zero_sized_btree(_, {_, Btree}) -> 170 | ?_assertMatch(0, cbt_btree:size(Btree)). 171 | 172 | should_set_reduce_option(_, {_, Btree}) -> 173 | ReduceFun = fun reduce_fun/2, 174 | Btree1 = cbt_btree:set_options(Btree, [{reduce, ReduceFun}]), 175 | ?_assertMatch(ReduceFun, Btree1#btree.reduce). 176 | 177 | should_fold_over_empty_btree(_, {_, Btree}) -> 178 | {ok, _, EmptyRes} = cbt_btree:fold(Btree, fun(_, X) -> {ok, X+1} end, 0), 179 | ?_assertEqual(EmptyRes, 0). 180 | 181 | should_add_all_keys(KeyValues, {Fd, Btree}) -> 182 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 183 | [ 184 | should_return_complete_btree_on_adding_all_keys(KeyValues, Btree1), 185 | should_have_non_zero_size(Btree1), 186 | should_have_lesser_size_than_file(Fd, Btree1), 187 | should_keep_root_pointer_to_kp_node(Fd, Btree1), 188 | should_remove_all_keys(KeyValues, Btree1) 189 | ]. 190 | 191 | should_return_complete_btree_on_adding_all_keys(KeyValues, Btree) -> 192 | ?_assert(test_btree(Btree, KeyValues)). 193 | 194 | should_have_non_zero_size(Btree) -> 195 | ?_assert(cbt_btree:size(Btree) > 0). 196 | 197 | should_have_lesser_size_than_file(Fd, Btree) -> 198 | ?_assert((cbt_btree:size(Btree) =< cbt_file:bytes(Fd))). 199 | 200 | should_keep_root_pointer_to_kp_node(Fd, Btree) -> 201 | ?_assertMatch({ok, {kp_node, _}}, 202 | cbt_file:pread_term(Fd, element(1, Btree#btree.root))). 203 | 204 | should_remove_all_keys(KeyValues, Btree) -> 205 | Keys = keys(KeyValues), 206 | {ok, Btree1} = cbt_btree:add_remove(Btree, [], Keys), 207 | { 208 | "Should remove all the keys", 209 | [ 210 | should_produce_valid_btree(Btree1, []), 211 | should_be_empty(Btree1) 212 | ] 213 | }. 214 | 215 | should_continuously_add_new_kv(KeyValues, {_, Btree}) -> 216 | {Btree1, _} = lists:foldl( 217 | fun(KV, {BtAcc, PrevSize}) -> 218 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 219 | ?assert(cbt_btree:size(BtAcc2) > PrevSize), 220 | {BtAcc2, cbt_btree:size(BtAcc2)} 221 | end, {Btree, cbt_btree:size(Btree)}, KeyValues), 222 | { 223 | "Should continuously add key-values to btree", 224 | [ 225 | should_produce_valid_btree(Btree1, KeyValues), 226 | should_not_be_empty(Btree1) 227 | ] 228 | }. 229 | 230 | should_continuously_remove_keys(KeyValues, {_, Btree}) -> 231 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 232 | {Btree2, _} = lists:foldl( 233 | fun({K, _}, {BtAcc, PrevSize}) -> 234 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [], [K]), 235 | ?assert(cbt_btree:size(BtAcc2) < PrevSize), 236 | {BtAcc2, cbt_btree:size(BtAcc2)} 237 | end, {Btree1, cbt_btree:size(Btree1)}, KeyValues), 238 | { 239 | "Should continuously remove keys from btree", 240 | [ 241 | should_produce_valid_btree(Btree2, []), 242 | should_be_empty(Btree2) 243 | ] 244 | }. 245 | 246 | should_insert_keys_in_reversed_order(KeyValues, {_, Btree}) -> 247 | KeyValuesRev = lists:reverse(KeyValues), 248 | {Btree1, _} = lists:foldl( 249 | fun(KV, {BtAcc, PrevSize}) -> 250 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 251 | ?assert(cbt_btree:size(BtAcc2) > PrevSize), 252 | {BtAcc2, cbt_btree:size(BtAcc2)} 253 | end, {Btree, cbt_btree:size(Btree)}, KeyValuesRev), 254 | should_produce_valid_btree(Btree1, KeyValues). 255 | 256 | should_add_every_odd_key_remove_every_even(KeyValues, {_, Btree}) -> 257 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 258 | {_, Rem2Keys0, Rem2Keys1} = lists:foldl(fun(X, {Count, Left, Right}) -> 259 | case Count rem 2 == 0 of 260 | true -> {Count + 1, [X | Left], Right}; 261 | false -> {Count + 1, Left, [X | Right]} 262 | end 263 | end, {0, [], []}, KeyValues), 264 | ?_assert(test_add_remove(Btree1, Rem2Keys0, Rem2Keys1)). 265 | 266 | should_add_every_even_key_remove_every_old(KeyValues, {_, Btree}) -> 267 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 268 | {_, Rem2Keys0, Rem2Keys1} = lists:foldl(fun(X, {Count, Left, Right}) -> 269 | case Count rem 2 == 0 of 270 | true -> {Count + 1, [X | Left], Right}; 271 | false -> {Count + 1, Left, [X | Right]} 272 | end 273 | end, {0, [], []}, KeyValues), 274 | ?_assert(test_add_remove(Btree1, Rem2Keys1, Rem2Keys0)). 275 | 276 | 277 | should_reduce_without_specified_direction({_, Btree}) -> 278 | ?_assertMatch( 279 | {ok, [{{"odd", _}, ?ROWS div 2}, {{"even", _}, ?ROWS div 2}]}, 280 | fold_reduce(Btree, [])). 281 | 282 | should_reduce_forward({_, Btree}) -> 283 | ?_assertMatch( 284 | {ok, [{{"odd", _}, ?ROWS div 2}, {{"even", _}, ?ROWS div 2}]}, 285 | fold_reduce(Btree, [{dir, fwd}])). 286 | 287 | should_reduce_backward({_, Btree}) -> 288 | ?_assertMatch( 289 | {ok, [{{"even", _}, ?ROWS div 2}, {{"odd", _}, ?ROWS div 2}]}, 290 | fold_reduce(Btree, [{dir, rev}])). 291 | 292 | should_reduce_whole_range(fwd, {_, Btree}) -> 293 | {SK, EK} = {{"even", 0}, {"odd", ?ROWS - 1}}, 294 | [ 295 | { 296 | "include endkey", 297 | ?_assertMatch( 298 | {ok, [{{"odd", 1}, ?ROWS div 2}, 299 | {{"even", 2}, ?ROWS div 2}]}, 300 | fold_reduce(Btree, [{dir, fwd}, 301 | {start_key, SK}, 302 | {end_key, EK}])) 303 | }, 304 | { 305 | "exclude endkey", 306 | ?_assertMatch( 307 | {ok, [{{"odd", 1}, (?ROWS div 2) - 1}, 308 | {{"even", 2}, ?ROWS div 2}]}, 309 | fold_reduce(Btree, [{dir, fwd}, 310 | {start_key, SK}, 311 | {end_key_gt, EK}])) 312 | } 313 | ]; 314 | should_reduce_whole_range(rev, {_, Btree}) -> 315 | {SK, EK} = {{"odd", ?ROWS - 1}, {"even", 2}}, 316 | [ 317 | { 318 | "include endkey", 319 | ?_assertMatch( 320 | {ok, [{{"even", ?ROWS}, ?ROWS div 2}, 321 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 322 | fold_reduce(Btree, [{dir, rev}, 323 | {start_key, SK}, 324 | {end_key, EK}])) 325 | }, 326 | { 327 | "exclude endkey", 328 | ?_assertMatch( 329 | {ok, [{{"even", ?ROWS}, (?ROWS div 2) - 1}, 330 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 331 | fold_reduce(Btree, [{dir, rev}, 332 | {start_key, SK}, 333 | {end_key_gt, EK}])) 334 | } 335 | ]. 336 | 337 | should_reduce_first_half(fwd, {_, Btree}) -> 338 | {SK, EK} = {{"even", 0}, {"odd", (?ROWS div 2) - 1}}, 339 | [ 340 | { 341 | "include endkey", 342 | ?_assertMatch( 343 | {ok, [{{"odd", 1}, ?ROWS div 4}, 344 | {{"even", 2}, ?ROWS div 2}]}, 345 | fold_reduce(Btree, [{dir, fwd}, 346 | {start_key, SK}, {end_key, EK}])) 347 | }, 348 | { 349 | "exclude endkey", 350 | ?_assertMatch( 351 | {ok, [{{"odd", 1}, (?ROWS div 4) - 1}, 352 | {{"even", 2}, ?ROWS div 2}]}, 353 | fold_reduce(Btree, [{dir, fwd}, 354 | {start_key, SK}, 355 | {end_key_gt, EK}])) 356 | } 357 | ]; 358 | should_reduce_first_half(rev, {_, Btree}) -> 359 | {SK, EK} = {{"odd", ?ROWS - 1}, {"even", ?ROWS div 2}}, 360 | [ 361 | { 362 | "include endkey", 363 | ?_assertMatch( 364 | {ok, [{{"even", ?ROWS}, (?ROWS div 4) + 1}, 365 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 366 | fold_reduce(Btree, [{dir, rev}, 367 | {start_key, SK}, 368 | {end_key, EK}])) 369 | }, 370 | { 371 | "exclude endkey", 372 | ?_assertMatch( 373 | {ok, [{{"even", ?ROWS}, ?ROWS div 4}, 374 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 375 | fold_reduce(Btree, [{dir, rev}, 376 | {start_key, SK}, 377 | {end_key_gt, EK}])) 378 | } 379 | ]. 380 | 381 | should_reduce_second_half(fwd, {_, Btree}) -> 382 | {SK, EK} = {{"even", ?ROWS div 2}, {"odd", ?ROWS - 1}}, 383 | [ 384 | { 385 | "include endkey", 386 | ?_assertMatch( 387 | {ok, [{{"odd", 1}, ?ROWS div 2}, 388 | {{"even", ?ROWS div 2}, (?ROWS div 4) + 1}]}, 389 | fold_reduce(Btree, [{dir, fwd}, 390 | {start_key, SK}, 391 | {end_key, EK}])) 392 | }, 393 | { 394 | "exclude endkey", 395 | ?_assertMatch( 396 | {ok, [{{"odd", 1}, (?ROWS div 2) - 1}, 397 | {{"even", ?ROWS div 2}, (?ROWS div 4) + 1}]}, 398 | fold_reduce(Btree, [{dir, fwd}, 399 | {start_key, SK}, 400 | {end_key_gt, EK}])) 401 | } 402 | ]; 403 | should_reduce_second_half(rev, {_, Btree}) -> 404 | {SK, EK} = {{"odd", (?ROWS div 2) + 1}, {"even", 2}}, 405 | [ 406 | { 407 | "include endkey", 408 | ?_assertMatch( 409 | {ok, [{{"even", ?ROWS}, ?ROWS div 2}, 410 | {{"odd", (?ROWS div 2) + 1}, (?ROWS div 4) + 1}]}, 411 | fold_reduce(Btree, [{dir, rev}, 412 | {start_key, SK}, 413 | {end_key, EK}])) 414 | }, 415 | { 416 | "exclude endkey", 417 | ?_assertMatch( 418 | {ok, [{{"even", ?ROWS}, (?ROWS div 2) - 1}, 419 | {{"odd", (?ROWS div 2) + 1}, (?ROWS div 4) + 1}]}, 420 | fold_reduce(Btree, [{dir, rev}, 421 | {start_key, SK}, 422 | {end_key_gt, EK}])) 423 | } 424 | ]. 425 | 426 | should_produce_valid_btree(Btree, KeyValues) -> 427 | ?_assert(test_btree(Btree, KeyValues)). 428 | 429 | should_be_empty(Btree) -> 430 | ?_assertEqual(cbt_btree:size(Btree), 0). 431 | 432 | should_not_be_empty(Btree) -> 433 | ?_assert(cbt_btree:size(Btree) > 0). 434 | 435 | fold_reduce(Btree, Opts) -> 436 | GroupFun = fun({K1, _}, {K2, _}) -> 437 | K1 == K2 438 | end, 439 | FoldFun = fun(GroupedKey, Unreduced, Acc) -> 440 | {ok, [{GroupedKey, cbt_btree:final_reduce(Btree, Unreduced)} | Acc]} 441 | end, 442 | cbt_btree:fold_reduce(Btree, FoldFun, [], 443 | [{key_group_fun, GroupFun}] ++ Opts). 444 | 445 | 446 | keys(KVs) -> 447 | [K || {K, _} <- KVs]. 448 | 449 | reduce_fun(reduce, KVs) -> 450 | length(KVs); 451 | reduce_fun(rereduce, Reds) -> 452 | lists:sum(Reds). 453 | 454 | 455 | shuffle(List) -> 456 | randomize(round(math:log(length(List)) + 0.5), List). 457 | 458 | randomize(1, List) -> 459 | randomize(List); 460 | randomize(T, List) -> 461 | lists:foldl( 462 | fun(_E, Acc) -> 463 | randomize(Acc) 464 | end, randomize(List), lists:seq(1, (T - 1))). 465 | 466 | randomize(List) -> 467 | D = lists:map(fun(A) -> {rand:uniform(), A} end, List), 468 | {_, D1} = lists:unzip(lists:keysort(1, D)), 469 | D1. 470 | 471 | test_btree(Btree, KeyValues) -> 472 | ok = test_key_access(Btree, KeyValues), 473 | ok = test_lookup_access(Btree, KeyValues), 474 | ok = test_final_reductions(Btree, KeyValues), 475 | ok = test_traversal_callbacks(Btree, KeyValues), 476 | true. 477 | 478 | test_add_remove(Btree, OutKeyValues, RemainingKeyValues) -> 479 | Btree2 = lists:foldl( 480 | fun({K, _}, BtAcc) -> 481 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [], [K]), 482 | BtAcc2 483 | end, Btree, OutKeyValues), 484 | true = test_btree(Btree2, RemainingKeyValues), 485 | 486 | Btree3 = lists:foldl( 487 | fun(KV, BtAcc) -> 488 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 489 | BtAcc2 490 | end, Btree2, OutKeyValues), 491 | true = test_btree(Btree3, OutKeyValues ++ RemainingKeyValues). 492 | 493 | test_key_access(Btree, List) -> 494 | FoldFun = fun(Element, {[HAcc|TAcc], Count}) -> 495 | case Element == HAcc of 496 | true -> {ok, {TAcc, Count + 1}}; 497 | _ -> {ok, {TAcc, Count + 1}} 498 | end 499 | end, 500 | Length = length(List), 501 | Sorted = lists:sort(List), 502 | {ok, _, {[], Length}} = cbt_btree:fold(Btree, FoldFun, {Sorted, 0}), 503 | {ok, _, {[], Length}} = cbt_btree:fold(Btree, FoldFun, 504 | {Sorted, 0}, [{dir, rev}]), 505 | ok. 506 | 507 | test_lookup_access(Btree, KeyValues) -> 508 | FoldFun = fun({Key, Value}, {Key, Value}) -> {stop, true} end, 509 | lists:foreach( 510 | fun({Key, Value}) -> 511 | [{ok, {Key, Value}}] = cbt_btree:lookup(Btree, [Key]), 512 | {ok, _, true} = cbt_btree:fold(Btree, FoldFun, 513 | {Key, Value}, [{start_key, Key}]) 514 | end, KeyValues). 515 | 516 | test_final_reductions(Btree, KeyValues) -> 517 | KVLen = length(KeyValues), 518 | FoldLFun = fun(_X, LeadingReds, Acc) -> 519 | CountToStart = KVLen div 3 + Acc, 520 | CountToStart = cbt_btree:final_reduce(Btree, LeadingReds), 521 | {ok, Acc + 1} 522 | end, 523 | FoldRFun = fun(_X, LeadingReds, Acc) -> 524 | CountToEnd = KVLen - KVLen div 3 + Acc, 525 | CountToEnd = cbt_btree:final_reduce(Btree, LeadingReds), 526 | {ok, Acc + 1} 527 | end, 528 | {LStartKey, _} = case KVLen of 529 | 0 -> {nil, nil}; 530 | _ -> lists:nth(KVLen div 3 + 1, lists:sort(KeyValues)) 531 | end, 532 | {RStartKey, _} = case KVLen of 533 | 0 -> {nil, nil}; 534 | _ -> lists:nth(KVLen div 3, lists:sort(KeyValues)) 535 | end, 536 | {ok, _, FoldLRed} = cbt_btree:fold(Btree, FoldLFun, 0, 537 | [{start_key, LStartKey}]), 538 | {ok, _, FoldRRed} = cbt_btree:fold(Btree, FoldRFun, 0, 539 | [{dir, rev}, {start_key, RStartKey}]), 540 | KVLen = FoldLRed + FoldRRed, 541 | ok. 542 | 543 | test_traversal_callbacks(Btree, _KeyValues) -> 544 | FoldFun = fun 545 | (visit, _GroupedKey, _Unreduced, Acc) -> 546 | {ok, Acc andalso false}; 547 | (traverse, _LK, _Red, Acc) -> 548 | {skip, Acc andalso true} 549 | end, 550 | % With 250 items the root is a kp. Always skipping should reduce to true. 551 | {ok, _, true} = cbt_btree:fold(Btree, FoldFun, true, [{dir, fwd}]), 552 | ok. 553 | -------------------------------------------------------------------------------- /test/cbt_btree_tests.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_btree_tests). 14 | 15 | -include_lib("include/cbt.hrl"). 16 | -include("cbt_tests.hrl"). 17 | 18 | -define(ROWS, 1000). 19 | 20 | 21 | setup() -> 22 | {ok, Fd} = cbt_file:open(?tempfile(), [create, overwrite]), 23 | {ok, Btree} = cbt_btree:open(nil, Fd, [{compression, none}, 24 | {reduce, fun reduce_fun/2}]), 25 | {Fd, Btree}. 26 | 27 | setup_kvs(_) -> 28 | setup(). 29 | 30 | setup_red() -> 31 | {_, EvenOddKVs} = lists:foldl( 32 | fun(Idx, {Key, Acc}) -> 33 | case Key of 34 | "even" -> {"odd", [{{Key, Idx}, 1} | Acc]}; 35 | _ -> {"even", [{{Key, Idx}, 1} | Acc]} 36 | end 37 | end, {"odd", []}, lists:seq(1, ?ROWS)), 38 | {Fd, Btree} = setup(), 39 | {ok, Btree1} = cbt_btree:add_remove(Btree, EvenOddKVs, []), 40 | {Fd, Btree1}. 41 | setup_red(_) -> 42 | setup_red(). 43 | 44 | teardown(Fd) when is_pid(Fd) -> 45 | ok = cbt_file:close(Fd); 46 | teardown({Fd, _}) -> 47 | teardown(Fd). 48 | teardown(_, {Fd, _}) -> 49 | teardown(Fd). 50 | 51 | 52 | kvs_test_funs() -> 53 | [ 54 | fun should_set_fd_correctly/2, 55 | fun should_set_root_correctly/2, 56 | fun should_create_zero_sized_btree/2, 57 | fun should_set_reduce_option/2, 58 | fun should_fold_over_empty_btree/2, 59 | fun should_add_all_keys/2, 60 | fun should_continuously_add_new_kv/2, 61 | fun should_continuously_remove_keys/2, 62 | fun should_insert_keys_in_reversed_order/2, 63 | fun should_add_every_odd_key_remove_every_even/2, 64 | fun should_add_every_even_key_remove_every_old/2 65 | ]. 66 | 67 | red_test_funs() -> 68 | [ 69 | fun should_reduce_whole_range/2, 70 | fun should_reduce_first_half/2, 71 | fun should_reduce_second_half/2 72 | ]. 73 | 74 | 75 | btree_open_test_() -> 76 | {ok, Fd} = cbt_file:open(?tempfile(), [create, overwrite]), 77 | {ok, Btree} = cbt_btree:open(nil, Fd, [{compression, none}]), 78 | { 79 | "Ensure that created btree is really a btree record", 80 | ?_assert(is_record(Btree, btree)) 81 | }. 82 | 83 | sorted_kvs_test_() -> 84 | Funs = kvs_test_funs(), 85 | Sorted = [{Seq, rand:uniform()} || Seq <- lists:seq(1, ?ROWS)], 86 | { 87 | "BTree with sorted keys", 88 | { 89 | foreachx, 90 | fun setup_kvs/1, fun teardown/2, 91 | [{Sorted, Fun} || Fun <- Funs] 92 | } 93 | }. 94 | 95 | rsorted_kvs_test_() -> 96 | Sorted = [{Seq, rand:uniform()} || Seq <- lists:seq(1, ?ROWS)], 97 | Funs = kvs_test_funs(), 98 | Reversed = Sorted, 99 | { 100 | "BTree with backward sorted keys", 101 | { 102 | foreachx, 103 | fun setup_kvs/1, fun teardown/2, 104 | [{Reversed, Fun} || Fun <- Funs] 105 | } 106 | }. 107 | 108 | shuffled_kvs_test_() -> 109 | Funs = kvs_test_funs(), 110 | Sorted = [{Seq, rand:uniform()} || Seq <- lists:seq(1, ?ROWS)], 111 | Shuffled = shuffle(Sorted), 112 | { 113 | "BTree with shuffled keys", 114 | { 115 | foreachx, 116 | fun setup_kvs/1, fun teardown/2, 117 | [{Shuffled, Fun} || Fun <- Funs] 118 | } 119 | }. 120 | 121 | reductions_test_() -> 122 | { 123 | "BTree reductions", 124 | [ 125 | { 126 | "Common tests", 127 | { 128 | foreach, 129 | fun setup_red/0, fun teardown/1, 130 | [ 131 | fun should_reduce_without_specified_direction/1, 132 | fun should_reduce_forward/1, 133 | fun should_reduce_backward/1 134 | ] 135 | } 136 | }, 137 | { 138 | "Range requests", 139 | [ 140 | { 141 | "Forward direction", 142 | { 143 | foreachx, 144 | fun setup_red/1, fun teardown/2, 145 | [{fwd, F} || F <- red_test_funs()] 146 | } 147 | }, 148 | { 149 | "Backward direction", 150 | { 151 | foreachx, 152 | fun setup_red/1, fun teardown/2, 153 | [{rev, F} || F <- red_test_funs()] 154 | } 155 | } 156 | ] 157 | } 158 | ] 159 | }. 160 | 161 | 162 | should_set_fd_correctly(_, {Fd, Btree}) -> 163 | ?_assertMatch(Fd, Btree#btree.ref). 164 | 165 | should_set_root_correctly(_, {_, Btree}) -> 166 | ?_assertMatch(nil, Btree#btree.root). 167 | 168 | should_create_zero_sized_btree(_, {_, Btree}) -> 169 | ?_assertMatch(0, cbt_btree:size(Btree)). 170 | 171 | should_set_reduce_option(_, {_, Btree}) -> 172 | ReduceFun = fun reduce_fun/2, 173 | Btree1 = cbt_btree:set_options(Btree, [{reduce, ReduceFun}]), 174 | ?_assertMatch(ReduceFun, Btree1#btree.reduce). 175 | 176 | should_fold_over_empty_btree(_, {_, Btree}) -> 177 | {ok, _, EmptyRes} = cbt_btree:fold(Btree, fun(_, X) -> {ok, X+1} end, 0), 178 | ?_assertEqual(EmptyRes, 0). 179 | 180 | should_add_all_keys(KeyValues, {Fd, Btree}) -> 181 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 182 | [ 183 | should_return_complete_btree_on_adding_all_keys(KeyValues, Btree1), 184 | should_have_non_zero_size(Btree1), 185 | should_have_lesser_size_than_file(Fd, Btree1), 186 | should_keep_root_pointer_to_kp_node(Fd, Btree1), 187 | should_remove_all_keys(KeyValues, Btree1) 188 | ]. 189 | 190 | should_return_complete_btree_on_adding_all_keys(KeyValues, Btree) -> 191 | ?_assert(test_btree(Btree, KeyValues)). 192 | 193 | should_have_non_zero_size(Btree) -> 194 | ?_assert(cbt_btree:size(Btree) > 0). 195 | 196 | should_have_lesser_size_than_file(Fd, Btree) -> 197 | ?_assert((cbt_btree:size(Btree) =< cbt_file:bytes(Fd))). 198 | 199 | should_keep_root_pointer_to_kp_node(Fd, Btree) -> 200 | ?_assertMatch({ok, {kp_node, _}}, 201 | cbt_file:pread_term(Fd, element(1, Btree#btree.root))). 202 | 203 | should_remove_all_keys(KeyValues, Btree) -> 204 | Keys = keys(KeyValues), 205 | {ok, Btree1} = cbt_btree:add_remove(Btree, [], Keys), 206 | { 207 | "Should remove all the keys", 208 | [ 209 | should_produce_valid_btree(Btree1, []), 210 | should_be_empty(Btree1) 211 | ] 212 | }. 213 | 214 | should_continuously_add_new_kv(KeyValues, {_, Btree}) -> 215 | {Btree1, _} = lists:foldl( 216 | fun(KV, {BtAcc, PrevSize}) -> 217 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 218 | ?assert(cbt_btree:size(BtAcc2) > PrevSize), 219 | {BtAcc2, cbt_btree:size(BtAcc2)} 220 | end, {Btree, cbt_btree:size(Btree)}, KeyValues), 221 | { 222 | "Should continuously add key-values to btree", 223 | [ 224 | should_produce_valid_btree(Btree1, KeyValues), 225 | should_not_be_empty(Btree1) 226 | ] 227 | }. 228 | 229 | should_continuously_remove_keys(KeyValues, {_, Btree}) -> 230 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 231 | {Btree2, _} = lists:foldl( 232 | fun({K, _}, {BtAcc, PrevSize}) -> 233 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [], [K]), 234 | ?assert(cbt_btree:size(BtAcc2) < PrevSize), 235 | {BtAcc2, cbt_btree:size(BtAcc2)} 236 | end, {Btree1, cbt_btree:size(Btree1)}, KeyValues), 237 | { 238 | "Should continuously remove keys from btree", 239 | [ 240 | should_produce_valid_btree(Btree2, []), 241 | should_be_empty(Btree2) 242 | ] 243 | }. 244 | 245 | should_insert_keys_in_reversed_order(KeyValues, {_, Btree}) -> 246 | KeyValuesRev = lists:reverse(KeyValues), 247 | {Btree1, _} = lists:foldl( 248 | fun(KV, {BtAcc, PrevSize}) -> 249 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 250 | ?assert(cbt_btree:size(BtAcc2) > PrevSize), 251 | {BtAcc2, cbt_btree:size(BtAcc2)} 252 | end, {Btree, cbt_btree:size(Btree)}, KeyValuesRev), 253 | should_produce_valid_btree(Btree1, KeyValues). 254 | 255 | should_add_every_odd_key_remove_every_even(KeyValues, {_, Btree}) -> 256 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 257 | {_, Rem2Keys0, Rem2Keys1} = lists:foldl(fun(X, {Count, Left, Right}) -> 258 | case Count rem 2 == 0 of 259 | true -> {Count + 1, [X | Left], Right}; 260 | false -> {Count + 1, Left, [X | Right]} 261 | end 262 | end, {0, [], []}, KeyValues), 263 | ?_assert(test_add_remove(Btree1, Rem2Keys0, Rem2Keys1)). 264 | 265 | should_add_every_even_key_remove_every_old(KeyValues, {_, Btree}) -> 266 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 267 | {_, Rem2Keys0, Rem2Keys1} = lists:foldl(fun(X, {Count, Left, Right}) -> 268 | case Count rem 2 == 0 of 269 | true -> {Count + 1, [X | Left], Right}; 270 | false -> {Count + 1, Left, [X | Right]} 271 | end 272 | end, {0, [], []}, KeyValues), 273 | ?_assert(test_add_remove(Btree1, Rem2Keys1, Rem2Keys0)). 274 | 275 | 276 | should_reduce_without_specified_direction({_, Btree}) -> 277 | ?_assertMatch( 278 | {ok, [{{"odd", _}, ?ROWS div 2}, {{"even", _}, ?ROWS div 2}]}, 279 | fold_reduce(Btree, [])). 280 | 281 | should_reduce_forward({_, Btree}) -> 282 | ?_assertMatch( 283 | {ok, [{{"odd", _}, ?ROWS div 2}, {{"even", _}, ?ROWS div 2}]}, 284 | fold_reduce(Btree, [{dir, fwd}])). 285 | 286 | should_reduce_backward({_, Btree}) -> 287 | ?_assertMatch( 288 | {ok, [{{"even", _}, ?ROWS div 2}, {{"odd", _}, ?ROWS div 2}]}, 289 | fold_reduce(Btree, [{dir, rev}])). 290 | 291 | should_reduce_whole_range(fwd, {_, Btree}) -> 292 | {SK, EK} = {{"even", 0}, {"odd", ?ROWS - 1}}, 293 | [ 294 | { 295 | "include endkey", 296 | ?_assertMatch( 297 | {ok, [{{"odd", 1}, ?ROWS div 2}, 298 | {{"even", 2}, ?ROWS div 2}]}, 299 | fold_reduce(Btree, [{dir, fwd}, 300 | {start_key, SK}, 301 | {end_key, EK}])) 302 | }, 303 | { 304 | "exclude endkey", 305 | ?_assertMatch( 306 | {ok, [{{"odd", 1}, (?ROWS div 2) - 1}, 307 | {{"even", 2}, ?ROWS div 2}]}, 308 | fold_reduce(Btree, [{dir, fwd}, 309 | {start_key, SK}, 310 | {end_key_gt, EK}])) 311 | } 312 | ]; 313 | should_reduce_whole_range(rev, {_, Btree}) -> 314 | {SK, EK} = {{"odd", ?ROWS - 1}, {"even", 2}}, 315 | [ 316 | { 317 | "include endkey", 318 | ?_assertMatch( 319 | {ok, [{{"even", ?ROWS}, ?ROWS div 2}, 320 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 321 | fold_reduce(Btree, [{dir, rev}, 322 | {start_key, SK}, 323 | {end_key, EK}])) 324 | }, 325 | { 326 | "exclude endkey", 327 | ?_assertMatch( 328 | {ok, [{{"even", ?ROWS}, (?ROWS div 2) - 1}, 329 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 330 | fold_reduce(Btree, [{dir, rev}, 331 | {start_key, SK}, 332 | {end_key_gt, EK}])) 333 | } 334 | ]. 335 | 336 | should_reduce_first_half(fwd, {_, Btree}) -> 337 | {SK, EK} = {{"even", 0}, {"odd", (?ROWS div 2) - 1}}, 338 | [ 339 | { 340 | "include endkey", 341 | ?_assertMatch( 342 | {ok, [{{"odd", 1}, ?ROWS div 4}, 343 | {{"even", 2}, ?ROWS div 2}]}, 344 | fold_reduce(Btree, [{dir, fwd}, 345 | {start_key, SK}, {end_key, EK}])) 346 | }, 347 | { 348 | "exclude endkey", 349 | ?_assertMatch( 350 | {ok, [{{"odd", 1}, (?ROWS div 4) - 1}, 351 | {{"even", 2}, ?ROWS div 2}]}, 352 | fold_reduce(Btree, [{dir, fwd}, 353 | {start_key, SK}, 354 | {end_key_gt, EK}])) 355 | } 356 | ]; 357 | should_reduce_first_half(rev, {_, Btree}) -> 358 | {SK, EK} = {{"odd", ?ROWS - 1}, {"even", ?ROWS div 2}}, 359 | [ 360 | { 361 | "include endkey", 362 | ?_assertMatch( 363 | {ok, [{{"even", ?ROWS}, (?ROWS div 4) + 1}, 364 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 365 | fold_reduce(Btree, [{dir, rev}, 366 | {start_key, SK}, 367 | {end_key, EK}])) 368 | }, 369 | { 370 | "exclude endkey", 371 | ?_assertMatch( 372 | {ok, [{{"even", ?ROWS}, ?ROWS div 4}, 373 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 374 | fold_reduce(Btree, [{dir, rev}, 375 | {start_key, SK}, 376 | {end_key_gt, EK}])) 377 | } 378 | ]. 379 | 380 | should_reduce_second_half(fwd, {_, Btree}) -> 381 | {SK, EK} = {{"even", ?ROWS div 2}, {"odd", ?ROWS - 1}}, 382 | [ 383 | { 384 | "include endkey", 385 | ?_assertMatch( 386 | {ok, [{{"odd", 1}, ?ROWS div 2}, 387 | {{"even", ?ROWS div 2}, (?ROWS div 4) + 1}]}, 388 | fold_reduce(Btree, [{dir, fwd}, 389 | {start_key, SK}, 390 | {end_key, EK}])) 391 | }, 392 | { 393 | "exclude endkey", 394 | ?_assertMatch( 395 | {ok, [{{"odd", 1}, (?ROWS div 2) - 1}, 396 | {{"even", ?ROWS div 2}, (?ROWS div 4) + 1}]}, 397 | fold_reduce(Btree, [{dir, fwd}, 398 | {start_key, SK}, 399 | {end_key_gt, EK}])) 400 | } 401 | ]; 402 | should_reduce_second_half(rev, {_, Btree}) -> 403 | {SK, EK} = {{"odd", (?ROWS div 2) + 1}, {"even", 2}}, 404 | [ 405 | { 406 | "include endkey", 407 | ?_assertMatch( 408 | {ok, [{{"even", ?ROWS}, ?ROWS div 2}, 409 | {{"odd", (?ROWS div 2) + 1}, (?ROWS div 4) + 1}]}, 410 | fold_reduce(Btree, [{dir, rev}, 411 | {start_key, SK}, 412 | {end_key, EK}])) 413 | }, 414 | { 415 | "exclude endkey", 416 | ?_assertMatch( 417 | {ok, [{{"even", ?ROWS}, (?ROWS div 2) - 1}, 418 | {{"odd", (?ROWS div 2) + 1}, (?ROWS div 4) + 1}]}, 419 | fold_reduce(Btree, [{dir, rev}, 420 | {start_key, SK}, 421 | {end_key_gt, EK}])) 422 | } 423 | ]. 424 | 425 | should_produce_valid_btree(Btree, KeyValues) -> 426 | ?_assert(test_btree(Btree, KeyValues)). 427 | 428 | should_be_empty(Btree) -> 429 | ?_assertEqual(cbt_btree:size(Btree), 0). 430 | 431 | should_not_be_empty(Btree) -> 432 | ?_assert(cbt_btree:size(Btree) > 0). 433 | 434 | fold_reduce(Btree, Opts) -> 435 | GroupFun = fun({K1, _}, {K2, _}) -> 436 | K1 == K2 437 | end, 438 | FoldFun = fun(GroupedKey, Unreduced, Acc) -> 439 | {ok, [{GroupedKey, cbt_btree:final_reduce(Btree, Unreduced)} | Acc]} 440 | end, 441 | cbt_btree:fold_reduce(Btree, FoldFun, [], 442 | [{key_group_fun, GroupFun}] ++ Opts). 443 | 444 | 445 | keys(KVs) -> 446 | [K || {K, _} <- KVs]. 447 | 448 | reduce_fun(reduce, KVs) -> 449 | length(KVs); 450 | reduce_fun(rereduce, Reds) -> 451 | lists:sum(Reds). 452 | 453 | 454 | shuffle(List) -> 455 | randomize(round(math:log(length(List)) + 0.5), List). 456 | 457 | randomize(1, List) -> 458 | randomize(List); 459 | randomize(T, List) -> 460 | lists:foldl( 461 | fun(_E, Acc) -> 462 | randomize(Acc) 463 | end, randomize(List), lists:seq(1, (T - 1))). 464 | 465 | randomize(List) -> 466 | D = lists:map(fun(A) -> {rand:uniform(), A} end, List), 467 | {_, D1} = lists:unzip(lists:keysort(1, D)), 468 | D1. 469 | 470 | test_btree(Btree, KeyValues) -> 471 | ok = test_key_access(Btree, KeyValues), 472 | ok = test_lookup_access(Btree, KeyValues), 473 | ok = test_final_reductions(Btree, KeyValues), 474 | ok = test_traversal_callbacks(Btree, KeyValues), 475 | true. 476 | 477 | test_add_remove(Btree, OutKeyValues, RemainingKeyValues) -> 478 | Btree2 = lists:foldl( 479 | fun({K, _}, BtAcc) -> 480 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [], [K]), 481 | BtAcc2 482 | end, Btree, OutKeyValues), 483 | true = test_btree(Btree2, RemainingKeyValues), 484 | 485 | Btree3 = lists:foldl( 486 | fun(KV, BtAcc) -> 487 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 488 | BtAcc2 489 | end, Btree2, OutKeyValues), 490 | true = test_btree(Btree3, OutKeyValues ++ RemainingKeyValues). 491 | 492 | test_key_access(Btree, List) -> 493 | FoldFun = fun(Element, {[HAcc|TAcc], Count}) -> 494 | case Element == HAcc of 495 | true -> {ok, {TAcc, Count + 1}}; 496 | _ -> {ok, {TAcc, Count + 1}} 497 | end 498 | end, 499 | Length = length(List), 500 | Sorted = lists:sort(List), 501 | {ok, _, {[], Length}} = cbt_btree:fold(Btree, FoldFun, {Sorted, 0}), 502 | {ok, _, {[], Length}} = cbt_btree:fold(Btree, FoldFun, 503 | {Sorted, 0}, [{dir, rev}]), 504 | ok. 505 | 506 | test_lookup_access(Btree, KeyValues) -> 507 | FoldFun = fun({Key, Value}, {Key, Value}) -> {stop, true} end, 508 | lists:foreach( 509 | fun({Key, Value}) -> 510 | [{ok, {Key, Value}}] = cbt_btree:lookup(Btree, [Key]), 511 | {ok, _, true} = cbt_btree:fold(Btree, FoldFun, 512 | {Key, Value}, [{start_key, Key}]) 513 | end, KeyValues). 514 | 515 | test_final_reductions(Btree, KeyValues) -> 516 | KVLen = length(KeyValues), 517 | FoldLFun = fun(_X, LeadingReds, Acc) -> 518 | CountToStart = KVLen div 3 + Acc, 519 | CountToStart = cbt_btree:final_reduce(Btree, LeadingReds), 520 | {ok, Acc + 1} 521 | end, 522 | FoldRFun = fun(_X, LeadingReds, Acc) -> 523 | CountToEnd = KVLen - KVLen div 3 + Acc, 524 | CountToEnd = cbt_btree:final_reduce(Btree, LeadingReds), 525 | {ok, Acc + 1} 526 | end, 527 | {LStartKey, _} = case KVLen of 528 | 0 -> {nil, nil}; 529 | _ -> lists:nth(KVLen div 3 + 1, lists:sort(KeyValues)) 530 | end, 531 | {RStartKey, _} = case KVLen of 532 | 0 -> {nil, nil}; 533 | _ -> lists:nth(KVLen div 3, lists:sort(KeyValues)) 534 | end, 535 | {ok, _, FoldLRed} = cbt_btree:fold(Btree, FoldLFun, 0, 536 | [{start_key, LStartKey}]), 537 | {ok, _, FoldRRed} = cbt_btree:fold(Btree, FoldRFun, 0, 538 | [{dir, rev}, {start_key, RStartKey}]), 539 | KVLen = FoldLRed + FoldRRed, 540 | ok. 541 | 542 | test_traversal_callbacks(Btree, _KeyValues) -> 543 | FoldFun = fun 544 | (visit, _GroupedKey, _Unreduced, Acc) -> 545 | {ok, Acc andalso false}; 546 | (traverse, _LK, _Red, Acc) -> 547 | {skip, Acc andalso true} 548 | end, 549 | % With 250 items the root is a kp. Always skipping should reduce to true. 550 | {ok, _, true} = cbt_btree:fold(Btree, FoldFun, true, [{dir, fwd}]), 551 | ok. 552 | -------------------------------------------------------------------------------- /test/cbt_ets_btree_copy_tests.erl: -------------------------------------------------------------------------------- 1 | -module(cbt_ets_btree_copy_tests). 2 | 3 | -include_lib("include/cbt.hrl"). 4 | -include("cbt_tests.hrl"). 5 | 6 | 7 | setup_copy(_) -> 8 | ReduceFun = fun(reduce, KVs) -> 9 | length(KVs); 10 | (rereduce, Reds) -> 11 | lists:sum(Reds) 12 | end, 13 | 14 | OriginalFileName = ?tempfile(), 15 | CopyEts = test_db, 16 | {ok, Fd} = cbt_file:open(OriginalFileName, [create, overwrite]), 17 | cbt_ets:new(CopyEts), 18 | {ReduceFun, OriginalFileName, CopyEts, Fd}. 19 | 20 | teardown(_, {_, OriginalFileName, CopyEts, Fd}) -> 21 | ok = cbt_file:close(Fd), 22 | ok = cbt_ets:delete(CopyEts), 23 | ok = file:delete(OriginalFileName). 24 | 25 | btree_copy_test_() -> 26 | TNumItems = [50, 100, 300, 700, 811, 2333, 6594, 9999, 15003, 21477, 27 | 38888, 66069, 150123, 420789, 711321], 28 | { 29 | "Copy BTree", 30 | { 31 | foreachx, 32 | fun setup_copy/1, fun teardown/2, 33 | [{N, fun should_copy_btree/2} || N <- TNumItems] 34 | } 35 | }. 36 | 37 | btree_copy_compressed_test_() -> 38 | TNumItems = [50, 100, 300, 700, 811, 2333, 6594, 9999, 15003, 21477, 39 | 38888, 66069, 150123, 420789, 711321], 40 | { 41 | "Copy Compressed BTree", 42 | { 43 | foreachx, 44 | fun setup_copy/1, fun teardown/2, 45 | [{N, fun should_copy_compressed_btree/2} || N <- TNumItems] 46 | } 47 | }. 48 | 49 | should_copy_btree(NumItems, {ReduceFun, _OriginalFileName, CopyEts, Fd}) -> 50 | KVs = [{I, I} || I <- lists:seq(1, NumItems)], 51 | {ok, Btree} = make_btree(Fd, KVs, ReduceFun), 52 | 53 | {_, Red, _} = cbt_btree:get_state(Btree), 54 | 55 | CopyCallback = fun(KV, Acc) -> {KV, Acc + 1} end, 56 | {ok, RootCopy, FinalAcc} = cbt_btree_copy:copy( 57 | Btree, CopyEts, [{backend, cbt_ets}, 58 | {before_kv_write, {CopyCallback, 0}}]), 59 | 60 | ?assertMatch(FinalAcc, length(KVs)), 61 | 62 | {ok, BtreeCopy} = cbt_btree:open(RootCopy, CopyEts, [{backend, cbt_ets}, 63 | {compression, none}, 64 | {reduce, ReduceFun}]), 65 | 66 | %% check copy 67 | {_, RedCopy, _} = cbt_btree:get_state(BtreeCopy), 68 | ?assertMatch(Red, RedCopy), 69 | {ok, _, CopyKVs} = cbt_btree:fold( 70 | BtreeCopy, 71 | fun(KV, _, Acc) -> {ok, [KV | Acc]} end, 72 | [], []), 73 | ?_assertMatch(KVs, lists:reverse(CopyKVs)). 74 | 75 | should_copy_compressed_btree(NumItems, {ReduceFun, _OriginalFileName, CopyEts, 76 | Fd}) -> 77 | 78 | KVs = [{I, I} || I <- lists:seq(1, NumItems)], 79 | {ok, Btree} = make_btree(Fd, KVs, ReduceFun, snappy), 80 | 81 | {_, Red, _} = cbt_btree:get_state(Btree), 82 | 83 | CopyCallback = fun(KV, Acc) -> {KV, Acc + 1} end, 84 | {ok, RootCopy, FinalAcc} = cbt_btree_copy:copy(Btree, CopyEts, 85 | [{backend, cbt_ets}, 86 | {before_kv_write, {CopyCallback, 0}}]), 87 | 88 | ?assertMatch(FinalAcc, length(KVs)), 89 | 90 | {ok, BtreeCopy} = cbt_btree:open(RootCopy, CopyEts, [{backend, cbt_ets}, 91 | {compression, snappy}, 92 | {reduce, ReduceFun}]), 93 | 94 | %% check copy 95 | {_, RedCopy, _} = cbt_btree:get_state(BtreeCopy), 96 | ?assertMatch(Red, RedCopy), 97 | {ok, _, CopyKVs} = cbt_btree:fold( 98 | BtreeCopy, 99 | fun(KV, _, Acc) -> {ok, [KV | Acc]} end, 100 | [], []), 101 | ?_assertMatch(KVs, lists:reverse(CopyKVs)). 102 | 103 | make_btree(Fd, KVs, ReduceFun) -> 104 | make_btree(Fd, KVs, ReduceFun, none). 105 | 106 | make_btree(Fd, KVs, ReduceFun, Compression) -> 107 | 108 | {ok, Btree} = cbt_btree:open(nil, Fd, [{compression, Compression}, 109 | {reduce, ReduceFun}]), 110 | {ok, Btree2} = cbt_btree:add_remove(Btree, KVs, []), 111 | {_, Red, _} = cbt_btree:get_state(Btree2), 112 | ?assertMatch(Red, length(KVs)), 113 | ok = cbt_file:sync(Fd), 114 | {ok, Btree2}. 115 | -------------------------------------------------------------------------------- /test/cbt_ets_tests.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_ets_tests). 14 | 15 | -include_lib("include/cbt.hrl"). 16 | -include("cbt_tests.hrl"). 17 | 18 | -define(ROWS, 1000). 19 | 20 | 21 | setup() -> 22 | Ref = cbt_ets:new(test_db), 23 | {ok, Btree} = cbt_ets:open_btree(Ref, test, [{backend, cbt_ets}, 24 | {compression, none}, 25 | {reduce, fun reduce_fun/2}]), 26 | {Ref, Btree}. 27 | 28 | setup_kvs(_) -> 29 | setup(). 30 | 31 | setup_red() -> 32 | {_, EvenOddKVs} = lists:foldl( 33 | fun(Idx, {Key, Acc}) -> 34 | case Key of 35 | "even" -> {"odd", [{{Key, Idx}, 1} | Acc]}; 36 | _ -> {"even", [{{Key, Idx}, 1} | Acc]} 37 | end 38 | end, {"odd", []}, lists:seq(1, ?ROWS)), 39 | {Ref, Btree} = setup(), 40 | {ok, Btree1} = cbt_btree:add_remove(Btree, EvenOddKVs, []), 41 | {Ref, Btree1}. 42 | setup_red(_) -> 43 | setup_red(). 44 | 45 | teardown(Ref) when is_atom(Ref) orelse is_reference(Ref) -> 46 | cbt_ets:delete(Ref); 47 | teardown({Ref, _}) -> 48 | cbt_ets:delete(Ref). 49 | teardown(_, {Ref, _}) -> 50 | cbt_ets:delete(Ref). 51 | 52 | 53 | kvs_test_funs() -> 54 | [ 55 | fun should_set_fd_correctly/2, 56 | fun should_set_root_correctly/2, 57 | fun should_create_zero_sized_btree/2, 58 | fun should_set_reduce_option/2, 59 | fun should_fold_over_empty_btree/2, 60 | fun should_add_all_keys/2, 61 | fun should_continuously_add_new_kv/2, 62 | fun should_continuously_remove_keys/2, 63 | fun should_insert_keys_in_reversed_order/2, 64 | fun should_add_every_odd_key_remove_every_even/2, 65 | fun should_add_every_even_key_remove_every_old/2 66 | ]. 67 | 68 | red_test_funs() -> 69 | [ 70 | fun should_reduce_whole_range/2, 71 | fun should_reduce_first_half/2, 72 | fun should_reduce_second_half/2 73 | ]. 74 | 75 | 76 | btree_open_test_() -> 77 | cbt_ets:new(test_ets_db), 78 | {ok, Btree} = cbt_ets:open_btree(test_ets_db, test_btree, [{compression, none}]), 79 | cbt_ets:delete(test_ets_db), 80 | { 81 | "Ensure that created btree is really a btree record", 82 | ?_assert(is_record(Btree, btree)) 83 | }. 84 | 85 | sorted_kvs_test_() -> 86 | Funs = kvs_test_funs(), 87 | Sorted = [{Seq, random:uniform()} || Seq <- lists:seq(1, ?ROWS)], 88 | { 89 | "BTree with sorted keys", 90 | { 91 | foreachx, 92 | fun setup_kvs/1, fun teardown/2, 93 | [{Sorted, Fun} || Fun <- Funs] 94 | } 95 | }. 96 | 97 | rsorted_kvs_test_() -> 98 | Sorted = [{Seq, random:uniform()} || Seq <- lists:seq(1, ?ROWS)], 99 | Funs = kvs_test_funs(), 100 | Reversed = Sorted, 101 | { 102 | "BTree with backward sorted keys", 103 | { 104 | foreachx, 105 | fun setup_kvs/1, fun teardown/2, 106 | [{Reversed, Fun} || Fun <- Funs] 107 | } 108 | }. 109 | 110 | shuffled_kvs_test_() -> 111 | Funs = kvs_test_funs(), 112 | Sorted = [{Seq, random:uniform()} || Seq <- lists:seq(1, ?ROWS)], 113 | Shuffled = shuffle(Sorted), 114 | { 115 | "BTree with shuffled keys", 116 | { 117 | foreachx, 118 | fun setup_kvs/1, fun teardown/2, 119 | [{Shuffled, Fun} || Fun <- Funs] 120 | } 121 | }. 122 | 123 | reductions_test_() -> 124 | { 125 | "BTree reductions", 126 | [ 127 | { 128 | "Common tests", 129 | { 130 | foreach, 131 | fun setup_red/0, fun teardown/1, 132 | [ 133 | fun should_reduce_without_specified_direction/1, 134 | fun should_reduce_forward/1, 135 | fun should_reduce_backward/1 136 | ] 137 | } 138 | }, 139 | { 140 | "Range requests", 141 | [ 142 | { 143 | "Forward direction", 144 | { 145 | foreachx, 146 | fun setup_red/1, fun teardown/2, 147 | [{fwd, F} || F <- red_test_funs()] 148 | } 149 | }, 150 | { 151 | "Backward direction", 152 | { 153 | foreachx, 154 | fun setup_red/1, fun teardown/2, 155 | [{rev, F} || F <- red_test_funs()] 156 | } 157 | } 158 | ] 159 | } 160 | ] 161 | }. 162 | 163 | 164 | should_set_fd_correctly(_, {Ref, Btree}) -> 165 | ?_assertMatch(Ref, Btree#btree.ref). 166 | 167 | should_set_root_correctly(_, {_, Btree}) -> 168 | ?_assertMatch(nil, Btree#btree.root). 169 | 170 | should_create_zero_sized_btree(_, {_, Btree}) -> 171 | ?_assertMatch(0, cbt_btree:size(Btree)). 172 | 173 | should_set_reduce_option(_, {_, Btree}) -> 174 | ReduceFun = fun reduce_fun/2, 175 | Btree1 = cbt_btree:set_options(Btree, [{reduce, ReduceFun}]), 176 | ?_assertMatch(ReduceFun, Btree1#btree.reduce). 177 | 178 | should_fold_over_empty_btree(_, {_, Btree}) -> 179 | {ok, _, EmptyRes} = cbt_btree:fold(Btree, fun(_, X) -> {ok, X+1} end, 0), 180 | ?_assertEqual(EmptyRes, 0). 181 | 182 | should_add_all_keys(KeyValues, {Ref, Btree}) -> 183 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 184 | [ 185 | should_return_complete_btree_on_adding_all_keys(KeyValues, Btree1), 186 | should_have_non_zero_size(Btree1), 187 | should_keep_root_pointer_to_kp_node(Ref, Btree1), 188 | should_remove_all_keys(KeyValues, Btree1) 189 | ]. 190 | 191 | should_return_complete_btree_on_adding_all_keys(KeyValues, Btree) -> 192 | ?_assert(test_btree(Btree, KeyValues)). 193 | 194 | should_have_non_zero_size(Btree) -> 195 | ?_assert(cbt_btree:size(Btree) > 0). 196 | 197 | should_keep_root_pointer_to_kp_node(Ref, Btree) -> 198 | ?_assertMatch({ok, {kp_node, _}}, 199 | cbt_ets:pread_term(Ref, element(1, Btree#btree.root))). 200 | 201 | should_remove_all_keys(KeyValues, Btree) -> 202 | Keys = keys(KeyValues), 203 | {ok, Btree1} = cbt_btree:add_remove(Btree, [], Keys), 204 | { 205 | "Should remove all the keys", 206 | [ 207 | should_produce_valid_btree(Btree1, []), 208 | should_be_empty(Btree1) 209 | ] 210 | }. 211 | 212 | should_continuously_add_new_kv(KeyValues, {_, Btree}) -> 213 | {Btree1, _} = lists:foldl( 214 | fun(KV, {BtAcc, PrevSize}) -> 215 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 216 | ?assert(cbt_btree:size(BtAcc2) > PrevSize), 217 | {BtAcc2, cbt_btree:size(BtAcc2)} 218 | end, {Btree, cbt_btree:size(Btree)}, KeyValues), 219 | { 220 | "Should continuously add key-values to btree", 221 | [ 222 | should_produce_valid_btree(Btree1, KeyValues), 223 | should_not_be_empty(Btree1) 224 | ] 225 | }. 226 | 227 | should_continuously_remove_keys(KeyValues, {_, Btree}) -> 228 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 229 | {Btree2, _} = lists:foldl( 230 | fun({K, _}, {BtAcc, PrevSize}) -> 231 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [], [K]), 232 | ?assert(cbt_btree:size(BtAcc2) < PrevSize), 233 | {BtAcc2, cbt_btree:size(BtAcc2)} 234 | end, {Btree1, cbt_btree:size(Btree1)}, KeyValues), 235 | { 236 | "Should continuously remove keys from btree", 237 | [ 238 | should_produce_valid_btree(Btree2, []), 239 | should_be_empty(Btree2) 240 | ] 241 | }. 242 | 243 | should_insert_keys_in_reversed_order(KeyValues, {_, Btree}) -> 244 | KeyValuesRev = lists:reverse(KeyValues), 245 | {Btree1, _} = lists:foldl( 246 | fun(KV, {BtAcc, PrevSize}) -> 247 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 248 | ?assert(cbt_btree:size(BtAcc2) > PrevSize), 249 | {BtAcc2, cbt_btree:size(BtAcc2)} 250 | end, {Btree, cbt_btree:size(Btree)}, KeyValuesRev), 251 | should_produce_valid_btree(Btree1, KeyValues). 252 | 253 | should_add_every_odd_key_remove_every_even(KeyValues, {_, Btree}) -> 254 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 255 | {_, Rem2Keys0, Rem2Keys1} = lists:foldl(fun(X, {Count, Left, Right}) -> 256 | case Count rem 2 == 0 of 257 | true -> {Count + 1, [X | Left], Right}; 258 | false -> {Count + 1, Left, [X | Right]} 259 | end 260 | end, {0, [], []}, KeyValues), 261 | ?_assert(test_add_remove(Btree1, Rem2Keys0, Rem2Keys1)). 262 | 263 | should_add_every_even_key_remove_every_old(KeyValues, {_, Btree}) -> 264 | {ok, Btree1} = cbt_btree:add_remove(Btree, KeyValues, []), 265 | {_, Rem2Keys0, Rem2Keys1} = lists:foldl(fun(X, {Count, Left, Right}) -> 266 | case Count rem 2 == 0 of 267 | true -> {Count + 1, [X | Left], Right}; 268 | false -> {Count + 1, Left, [X | Right]} 269 | end 270 | end, {0, [], []}, KeyValues), 271 | ?_assert(test_add_remove(Btree1, Rem2Keys1, Rem2Keys0)). 272 | 273 | 274 | should_reduce_without_specified_direction({_, Btree}) -> 275 | ?_assertMatch( 276 | {ok, [{{"odd", _}, ?ROWS div 2}, {{"even", _}, ?ROWS div 2}]}, 277 | fold_reduce(Btree, [])). 278 | 279 | should_reduce_forward({_, Btree}) -> 280 | ?_assertMatch( 281 | {ok, [{{"odd", _}, ?ROWS div 2}, {{"even", _}, ?ROWS div 2}]}, 282 | fold_reduce(Btree, [{dir, fwd}])). 283 | 284 | should_reduce_backward({_, Btree}) -> 285 | ?_assertMatch( 286 | {ok, [{{"even", _}, ?ROWS div 2}, {{"odd", _}, ?ROWS div 2}]}, 287 | fold_reduce(Btree, [{dir, rev}])). 288 | 289 | should_reduce_whole_range(fwd, {_, Btree}) -> 290 | {SK, EK} = {{"even", 0}, {"odd", ?ROWS - 1}}, 291 | [ 292 | { 293 | "include endkey", 294 | ?_assertMatch( 295 | {ok, [{{"odd", 1}, ?ROWS div 2}, 296 | {{"even", 2}, ?ROWS div 2}]}, 297 | fold_reduce(Btree, [{dir, fwd}, 298 | {start_key, SK}, 299 | {end_key, EK}])) 300 | }, 301 | { 302 | "exclude endkey", 303 | ?_assertMatch( 304 | {ok, [{{"odd", 1}, (?ROWS div 2) - 1}, 305 | {{"even", 2}, ?ROWS div 2}]}, 306 | fold_reduce(Btree, [{dir, fwd}, 307 | {start_key, SK}, 308 | {end_key_gt, EK}])) 309 | } 310 | ]; 311 | should_reduce_whole_range(rev, {_, Btree}) -> 312 | {SK, EK} = {{"odd", ?ROWS - 1}, {"even", 2}}, 313 | [ 314 | { 315 | "include endkey", 316 | ?_assertMatch( 317 | {ok, [{{"even", ?ROWS}, ?ROWS div 2}, 318 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 319 | fold_reduce(Btree, [{dir, rev}, 320 | {start_key, SK}, 321 | {end_key, EK}])) 322 | }, 323 | { 324 | "exclude endkey", 325 | ?_assertMatch( 326 | {ok, [{{"even", ?ROWS}, (?ROWS div 2) - 1}, 327 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 328 | fold_reduce(Btree, [{dir, rev}, 329 | {start_key, SK}, 330 | {end_key_gt, EK}])) 331 | } 332 | ]. 333 | 334 | should_reduce_first_half(fwd, {_, Btree}) -> 335 | {SK, EK} = {{"even", 0}, {"odd", (?ROWS div 2) - 1}}, 336 | [ 337 | { 338 | "include endkey", 339 | ?_assertMatch( 340 | {ok, [{{"odd", 1}, ?ROWS div 4}, 341 | {{"even", 2}, ?ROWS div 2}]}, 342 | fold_reduce(Btree, [{dir, fwd}, 343 | {start_key, SK}, {end_key, EK}])) 344 | }, 345 | { 346 | "exclude endkey", 347 | ?_assertMatch( 348 | {ok, [{{"odd", 1}, (?ROWS div 4) - 1}, 349 | {{"even", 2}, ?ROWS div 2}]}, 350 | fold_reduce(Btree, [{dir, fwd}, 351 | {start_key, SK}, 352 | {end_key_gt, EK}])) 353 | } 354 | ]; 355 | should_reduce_first_half(rev, {_, Btree}) -> 356 | {SK, EK} = {{"odd", ?ROWS - 1}, {"even", ?ROWS div 2}}, 357 | [ 358 | { 359 | "include endkey", 360 | ?_assertMatch( 361 | {ok, [{{"even", ?ROWS}, (?ROWS div 4) + 1}, 362 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 363 | fold_reduce(Btree, [{dir, rev}, 364 | {start_key, SK}, 365 | {end_key, EK}])) 366 | }, 367 | { 368 | "exclude endkey", 369 | ?_assertMatch( 370 | {ok, [{{"even", ?ROWS}, ?ROWS div 4}, 371 | {{"odd", ?ROWS - 1}, ?ROWS div 2}]}, 372 | fold_reduce(Btree, [{dir, rev}, 373 | {start_key, SK}, 374 | {end_key_gt, EK}])) 375 | } 376 | ]. 377 | 378 | should_reduce_second_half(fwd, {_, Btree}) -> 379 | {SK, EK} = {{"even", ?ROWS div 2}, {"odd", ?ROWS - 1}}, 380 | [ 381 | { 382 | "include endkey", 383 | ?_assertMatch( 384 | {ok, [{{"odd", 1}, ?ROWS div 2}, 385 | {{"even", ?ROWS div 2}, (?ROWS div 4) + 1}]}, 386 | fold_reduce(Btree, [{dir, fwd}, 387 | {start_key, SK}, 388 | {end_key, EK}])) 389 | }, 390 | { 391 | "exclude endkey", 392 | ?_assertMatch( 393 | {ok, [{{"odd", 1}, (?ROWS div 2) - 1}, 394 | {{"even", ?ROWS div 2}, (?ROWS div 4) + 1}]}, 395 | fold_reduce(Btree, [{dir, fwd}, 396 | {start_key, SK}, 397 | {end_key_gt, EK}])) 398 | } 399 | ]; 400 | should_reduce_second_half(rev, {_, Btree}) -> 401 | {SK, EK} = {{"odd", (?ROWS div 2) + 1}, {"even", 2}}, 402 | [ 403 | { 404 | "include endkey", 405 | ?_assertMatch( 406 | {ok, [{{"even", ?ROWS}, ?ROWS div 2}, 407 | {{"odd", (?ROWS div 2) + 1}, (?ROWS div 4) + 1}]}, 408 | fold_reduce(Btree, [{dir, rev}, 409 | {start_key, SK}, 410 | {end_key, EK}])) 411 | }, 412 | { 413 | "exclude endkey", 414 | ?_assertMatch( 415 | {ok, [{{"even", ?ROWS}, (?ROWS div 2) - 1}, 416 | {{"odd", (?ROWS div 2) + 1}, (?ROWS div 4) + 1}]}, 417 | fold_reduce(Btree, [{dir, rev}, 418 | {start_key, SK}, 419 | {end_key_gt, EK}])) 420 | } 421 | ]. 422 | 423 | should_produce_valid_btree(Btree, KeyValues) -> 424 | ?_assert(test_btree(Btree, KeyValues)). 425 | 426 | should_be_empty(Btree) -> 427 | ?_assertEqual(cbt_btree:size(Btree), 0). 428 | 429 | should_not_be_empty(Btree) -> 430 | ?_assert(cbt_btree:size(Btree) > 0). 431 | 432 | fold_reduce(Btree, Opts) -> 433 | GroupFun = fun({K1, _}, {K2, _}) -> 434 | K1 == K2 435 | end, 436 | FoldFun = fun(GroupedKey, Unreduced, Acc) -> 437 | {ok, [{GroupedKey, cbt_btree:final_reduce(Btree, Unreduced)} | Acc]} 438 | end, 439 | cbt_btree:fold_reduce(Btree, FoldFun, [], 440 | [{key_group_fun, GroupFun}] ++ Opts). 441 | 442 | 443 | keys(KVs) -> 444 | [K || {K, _} <- KVs]. 445 | 446 | reduce_fun(reduce, KVs) -> 447 | length(KVs); 448 | reduce_fun(rereduce, Reds) -> 449 | lists:sum(Reds). 450 | 451 | 452 | shuffle(List) -> 453 | randomize(round(math:log(length(List)) + 0.5), List). 454 | 455 | randomize(1, List) -> 456 | randomize(List); 457 | randomize(T, List) -> 458 | lists:foldl( 459 | fun(_E, Acc) -> 460 | randomize(Acc) 461 | end, randomize(List), lists:seq(1, (T - 1))). 462 | 463 | randomize(List) -> 464 | D = lists:map(fun(A) -> {random:uniform(), A} end, List), 465 | {_, D1} = lists:unzip(lists:keysort(1, D)), 466 | D1. 467 | 468 | test_btree(Btree, KeyValues) -> 469 | ok = test_key_access(Btree, KeyValues), 470 | ok = test_lookup_access(Btree, KeyValues), 471 | ok = test_final_reductions(Btree, KeyValues), 472 | ok = test_traversal_callbacks(Btree, KeyValues), 473 | true. 474 | 475 | test_add_remove(Btree, OutKeyValues, RemainingKeyValues) -> 476 | Btree2 = lists:foldl( 477 | fun({K, _}, BtAcc) -> 478 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [], [K]), 479 | BtAcc2 480 | end, Btree, OutKeyValues), 481 | true = test_btree(Btree2, RemainingKeyValues), 482 | 483 | Btree3 = lists:foldl( 484 | fun(KV, BtAcc) -> 485 | {ok, BtAcc2} = cbt_btree:add_remove(BtAcc, [KV], []), 486 | BtAcc2 487 | end, Btree2, OutKeyValues), 488 | true = test_btree(Btree3, OutKeyValues ++ RemainingKeyValues). 489 | 490 | test_key_access(Btree, List) -> 491 | FoldFun = fun(Element, {[HAcc|TAcc], Count}) -> 492 | case Element == HAcc of 493 | true -> {ok, {TAcc, Count + 1}}; 494 | _ -> {ok, {TAcc, Count + 1}} 495 | end 496 | end, 497 | Length = length(List), 498 | Sorted = lists:sort(List), 499 | {ok, _, {[], Length}} = cbt_btree:fold(Btree, FoldFun, {Sorted, 0}), 500 | {ok, _, {[], Length}} = cbt_btree:fold(Btree, FoldFun, 501 | {Sorted, 0}, [{dir, rev}]), 502 | ok. 503 | 504 | test_lookup_access(Btree, KeyValues) -> 505 | FoldFun = fun({Key, Value}, {Key, Value}) -> {stop, true} end, 506 | lists:foreach( 507 | fun({Key, Value}) -> 508 | [{ok, {Key, Value}}] = cbt_btree:lookup(Btree, [Key]), 509 | {ok, _, true} = cbt_btree:fold(Btree, FoldFun, 510 | {Key, Value}, [{start_key, Key}]) 511 | end, KeyValues). 512 | 513 | test_final_reductions(Btree, KeyValues) -> 514 | KVLen = length(KeyValues), 515 | FoldLFun = fun(_X, LeadingReds, Acc) -> 516 | CountToStart = KVLen div 3 + Acc, 517 | CountToStart = cbt_btree:final_reduce(Btree, LeadingReds), 518 | {ok, Acc + 1} 519 | end, 520 | FoldRFun = fun(_X, LeadingReds, Acc) -> 521 | CountToEnd = KVLen - KVLen div 3 + Acc, 522 | CountToEnd = cbt_btree:final_reduce(Btree, LeadingReds), 523 | {ok, Acc + 1} 524 | end, 525 | {LStartKey, _} = case KVLen of 526 | 0 -> {nil, nil}; 527 | _ -> lists:nth(KVLen div 3 + 1, lists:sort(KeyValues)) 528 | end, 529 | {RStartKey, _} = case KVLen of 530 | 0 -> {nil, nil}; 531 | _ -> lists:nth(KVLen div 3, lists:sort(KeyValues)) 532 | end, 533 | {ok, _, FoldLRed} = cbt_btree:fold(Btree, FoldLFun, 0, 534 | [{start_key, LStartKey}]), 535 | {ok, _, FoldRRed} = cbt_btree:fold(Btree, FoldRFun, 0, 536 | [{dir, rev}, {start_key, RStartKey}]), 537 | KVLen = FoldLRed + FoldRRed, 538 | ok. 539 | 540 | test_traversal_callbacks(Btree, _KeyValues) -> 541 | FoldFun = fun 542 | (visit, _GroupedKey, _Unreduced, Acc) -> 543 | {ok, Acc andalso false}; 544 | (traverse, _LK, _Red, Acc) -> 545 | {skip, Acc andalso true} 546 | end, 547 | % With 250 items the root is a kp. Always skipping should reduce to true. 548 | {ok, _, true} = cbt_btree:fold(Btree, FoldFun, true, [{dir, fwd}]), 549 | ok. 550 | -------------------------------------------------------------------------------- /test/cbt_file_tests.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_file_tests). 14 | 15 | -include("cbt_tests.hrl"). 16 | 17 | -define(BLOCK_SIZE, 4096). 18 | -define(setup(F), {setup, fun setup/0, fun teardown/1, F}). 19 | -define(foreach(Fs), {foreach, fun setup/0, fun teardown/1, Fs}). 20 | 21 | 22 | setup() -> 23 | {ok, Fd} = cbt_file:open(?tempfile(), [create, overwrite]), 24 | Fd. 25 | 26 | teardown(Fd) -> 27 | ok = cbt_file:close(Fd). 28 | 29 | 30 | open_close_test_() -> 31 | { 32 | "Test for proper file open and close", 33 | [ 34 | should_return_enoent_if_missed(), 35 | should_ignore_invalid_flags_with_open(), 36 | ?setup(fun should_return_pid_on_file_open/1), 37 | should_close_file_properly(), 38 | ?setup(fun should_create_empty_new_files/1) 39 | ] 40 | }. 41 | 42 | should_return_enoent_if_missed() -> 43 | ?_assertEqual({error, enoent}, cbt_file:open("not a real file")). 44 | 45 | should_ignore_invalid_flags_with_open() -> 46 | ?_assertMatch({ok, _}, 47 | cbt_file:open(?tempfile(), [create, invalid_option])). 48 | 49 | should_return_pid_on_file_open(Fd) -> 50 | ?_assert(is_pid(Fd)). 51 | 52 | should_close_file_properly() -> 53 | {ok, Fd} = cbt_file:open(?tempfile(), [create, overwrite]), 54 | ok = cbt_file:close(Fd), 55 | ?_assert(true). 56 | 57 | should_create_empty_new_files(Fd) -> 58 | ?_assertMatch({ok, 0}, cbt_file:bytes(Fd)). 59 | 60 | 61 | read_write_test_() -> 62 | { 63 | "Common file read/write tests", 64 | ?foreach([ 65 | fun should_increase_file_size_on_write/1, 66 | fun should_return_current_file_size_on_write/1, 67 | fun should_write_and_read_term/1, 68 | fun should_write_and_read_binary/1, 69 | fun should_write_and_read_large_binary/1, 70 | fun should_read_iolist/1, 71 | fun should_fsync/1, 72 | fun should_not_read_beyond_eof/1, 73 | fun should_truncate/1 74 | ]) 75 | }. 76 | 77 | 78 | should_increase_file_size_on_write(Fd) -> 79 | {ok, 0, _} = cbt_file:append_term(Fd, foo), 80 | {ok, Size} = cbt_file:bytes(Fd), 81 | ?_assert(Size > 0). 82 | 83 | should_return_current_file_size_on_write(Fd) -> 84 | {ok, 0, _} = cbt_file:append_term(Fd, foo), 85 | {ok, Size} = cbt_file:bytes(Fd), 86 | ?_assertMatch({ok, Size, _}, cbt_file:append_term(Fd, bar)). 87 | 88 | should_write_and_read_term(Fd) -> 89 | {ok, Pos, _} = cbt_file:append_term(Fd, foo), 90 | ?_assertMatch({ok, foo}, cbt_file:pread_term(Fd, Pos)). 91 | 92 | should_write_and_read_binary(Fd) -> 93 | {ok, Pos, _} = cbt_file:append_binary(Fd, <<"fancy!">>), 94 | ?_assertMatch({ok, <<"fancy!">>}, cbt_file:pread_binary(Fd, Pos)). 95 | 96 | should_write_and_read_large_binary(Fd) -> 97 | BigBin = list_to_binary(lists:duplicate(100000, 0)), 98 | {ok, Pos, _} = cbt_file:append_binary(Fd, BigBin), 99 | ?_assertMatch({ok, BigBin}, cbt_file:pread_binary(Fd, Pos)). 100 | 101 | should_read_iolist(Fd) -> 102 | %% append_binary == append_iolist? 103 | %% Possible bug in pread_iolist or iolist() -> append_binary 104 | {ok, Pos, _} = cbt_file:append_binary(Fd, ["foo", $m, <<"bam">>]), 105 | {ok, IoList} = cbt_file:pread_iolist(Fd, Pos), 106 | ?_assertMatch(<<"foombam">>, iolist_to_binary(IoList)). 107 | 108 | should_fsync(Fd) -> 109 | {"How does on test fsync?", ?_assertMatch(ok, cbt_file:sync(Fd))}. 110 | 111 | should_not_read_beyond_eof(_) -> 112 | {"No idea how to test reading beyond EOF", ?_assert(true)}. 113 | 114 | should_truncate(Fd) -> 115 | {ok, 0, _} = cbt_file:append_term(Fd, foo), 116 | {ok, Size} = cbt_file:bytes(Fd), 117 | BigBin = list_to_binary(lists:duplicate(100000, 0)), 118 | {ok, _, _} = cbt_file:append_binary(Fd, BigBin), 119 | ok = cbt_file:truncate(Fd, Size), 120 | ?_assertMatch({ok, foo}, cbt_file:pread_term(Fd, 0)). 121 | 122 | 123 | header_test_() -> 124 | { 125 | "File header read/write tests", 126 | [ 127 | ?foreach([ 128 | fun should_write_and_read_atom_header/1, 129 | fun should_write_and_read_tuple_header/1, 130 | fun should_write_and_read_second_header/1, 131 | fun should_truncate_second_header/1, 132 | fun should_produce_same_file_size_on_rewrite/1, 133 | fun should_save_headers_larger_than_block_size/1 134 | ]), 135 | should_recover_header_marker_corruption(), 136 | should_recover_header_size_corruption(), 137 | should_recover_header_crc32sig_corruption(), 138 | should_recover_header_data_corruption() 139 | ] 140 | }. 141 | 142 | 143 | should_write_and_read_atom_header(Fd) -> 144 | {ok, HeaderPos} = cbt_file:write_header(Fd, hello), 145 | ?_assertMatch({ok, hello, HeaderPos}, cbt_file:read_header(Fd)). 146 | 147 | should_write_and_read_tuple_header(Fd) -> 148 | {ok, _} = cbt_file:write_header(Fd, {<<"some_data">>, 32}), 149 | ?_assertMatch({ok, {<<"some_data">>, 32}, _}, cbt_file:read_header(Fd)). 150 | 151 | should_write_and_read_second_header(Fd) -> 152 | {ok, 0} = cbt_file:write_header(Fd, {<<"some_data">>, 32}), 153 | {ok, 4096} = cbt_file:write_header(Fd, [foo, <<"more">>]), 154 | ?_assertMatch({ok, [foo, <<"more">>], 4096}, cbt_file:read_header(Fd)). 155 | 156 | should_truncate_second_header(Fd) -> 157 | {ok, _} = cbt_file:write_header(Fd, {<<"some_data">>, 32}), 158 | {ok, Size} = cbt_file:bytes(Fd), 159 | {ok, _} = cbt_file:write_header(Fd, [foo, <<"more">>]), 160 | ok = cbt_file:truncate(Fd, Size), 161 | ?_assertMatch({ok, {<<"some_data">>, 32}, 0}, cbt_file:read_header(Fd)). 162 | 163 | should_produce_same_file_size_on_rewrite(Fd) -> 164 | {ok, _} = cbt_file:write_header(Fd, {<<"some_data">>, 32}), 165 | {ok, Size1} = cbt_file:bytes(Fd), 166 | {ok, _} = cbt_file:write_header(Fd, [foo, <<"more">>]), 167 | {ok, Size2} = cbt_file:bytes(Fd), 168 | ok = cbt_file:truncate(Fd, Size1), 169 | {ok, _} = cbt_file:write_header(Fd, [foo, <<"more">>]), 170 | ?_assertMatch({ok, Size2}, cbt_file:bytes(Fd)). 171 | 172 | should_save_headers_larger_than_block_size(Fd) -> 173 | Header = erlang:make_tuple(5000, <<"CouchDB">>), 174 | {ok, _} = cbt_file:write_header(Fd, Header), 175 | {"COUCHDB-1319", ?_assertMatch({ok, Header, 0}, 176 | cbt_file:read_header(Fd))}. 177 | 178 | 179 | should_recover_header_marker_corruption() -> 180 | ?_assertMatch( 181 | ok, 182 | check_header_recovery( 183 | fun(Fd, RawFd, Expect, HeaderPos) -> 184 | ?assertNotMatch(Expect, cbt_file:read_header(Fd)), 185 | file:pwrite(RawFd, HeaderPos, <<0>>), 186 | ?assertMatch(Expect, cbt_file:read_header(Fd)) 187 | end) 188 | ). 189 | 190 | should_recover_header_size_corruption() -> 191 | ?_assertMatch( 192 | ok, 193 | check_header_recovery( 194 | fun(Fd, RawFd, Expect, HeaderPos) -> 195 | ?assertNotMatch(Expect, cbt_file:read_header(Fd)), 196 | % +1 for 0x1 byte marker 197 | file:pwrite(RawFd, HeaderPos + 1, <<10/integer>>), 198 | ?assertMatch(Expect, cbt_file:read_header(Fd)) 199 | end) 200 | ). 201 | 202 | should_recover_header_crc32sig_corruption() -> 203 | ?_assertMatch( 204 | ok, 205 | check_header_recovery( 206 | fun(Fd, RawFd, Expect, HeaderPos) -> 207 | ?assertNotMatch(Expect, cbt_file:read_header(Fd)), 208 | % +5 = +1 for 0x1 byte and +4 for term size. 209 | file:pwrite(RawFd, HeaderPos + 5, <<"F01034F88D320B22">>), 210 | ?assertMatch(Expect, cbt_file:read_header(Fd)) 211 | end) 212 | ). 213 | 214 | should_recover_header_data_corruption() -> 215 | ?_assertMatch( 216 | ok, 217 | check_header_recovery( 218 | fun(Fd, RawFd, Expect, HeaderPos) -> 219 | ?assertNotMatch(Expect, cbt_file:read_header(Fd)), 220 | % +9 = +1 for 0x1 byte, +4 for term size and +4 for % CRC32 sig 221 | file:pwrite(RawFd, HeaderPos + 9, <<"some data goes here!">>), 222 | ?assertMatch(Expect, cbt_file:read_header(Fd)) 223 | end) 224 | ). 225 | 226 | 227 | check_header_recovery(CheckFun) -> 228 | Path = ?tempfile(), 229 | {ok, Fd} = cbt_file:open(Path, [create, overwrite]), 230 | {ok, RawFd} = file:open(Path, [read, write, raw, binary]), 231 | 232 | {ok, _} = write_random_data(Fd), 233 | ExpectHeader = {some_atom, <<"a binary">>, 756}, 234 | {ok, ExpectHeaderPos} = cbt_file:write_header(Fd, ExpectHeader), 235 | 236 | {ok, HeaderPos} = write_random_data(Fd), 237 | {ok, _} = cbt_file:write_header(Fd, {2342, <<"corruption! greed!">>}), 238 | 239 | CheckFun(Fd, RawFd, {ok, ExpectHeader, ExpectHeaderPos}, HeaderPos), 240 | 241 | ok = file:close(RawFd), 242 | ok = cbt_file:close(Fd), 243 | ok. 244 | 245 | write_random_data(Fd) -> 246 | write_random_data(Fd, 100 + rand:uniform(1000)). 247 | 248 | write_random_data(Fd, 0) -> 249 | {ok, Bytes} = cbt_file:bytes(Fd), 250 | {ok, (1 + Bytes div ?BLOCK_SIZE) * ?BLOCK_SIZE}; 251 | write_random_data(Fd, N) -> 252 | Choices = [foo, bar, <<"bizzingle">>, "bank", ["rough", stuff]], 253 | Term = lists:nth(rand:uniform(4) + 1, Choices), 254 | {ok, _, _} = cbt_file:append_term(Fd, Term), 255 | write_random_data(Fd, N - 1). 256 | -------------------------------------------------------------------------------- /test/cbt_ramfile_tests.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_ramfile_tests). 14 | 15 | -include("cbt_tests.hrl"). 16 | 17 | -define(BLOCK_SIZE, 4096). 18 | -define(setup(F), {setup, fun setup/0, fun teardown/1, F}). 19 | -define(foreach(Fs), {foreach, fun setup/0, fun teardown/1, Fs}). 20 | 21 | 22 | setup() -> 23 | {ok, Fd} = cbt_ramfile:open(?tempfile()), 24 | Fd. 25 | 26 | teardown(Fd) -> 27 | ok = cbt_ramfile:close(Fd). 28 | 29 | 30 | open_close_test_() -> 31 | { 32 | "Test for proper file open and close", 33 | [ 34 | ?setup(fun should_return_pid_on_file_open/1), 35 | should_close_file_properly(), 36 | ?setup(fun should_create_empty_new_files/1) 37 | ] 38 | }. 39 | 40 | 41 | should_return_pid_on_file_open(Fd) -> 42 | ?_assert(is_pid(Fd)). 43 | 44 | should_close_file_properly() -> 45 | {ok, Fd} = cbt_ramfile:open(?tempfile()), 46 | ok = cbt_ramfile:close(Fd), 47 | ?_assert(true). 48 | 49 | should_create_empty_new_files(Fd) -> 50 | ?_assertMatch({ok, 0}, cbt_ramfile:bytes(Fd)). 51 | 52 | 53 | read_write_test_() -> 54 | { 55 | "Common file read/write tests", 56 | ?foreach([ 57 | fun should_increase_file_size_on_write/1, 58 | fun should_write_and_read_term/1, 59 | fun should_write_and_read_binary/1, 60 | fun should_write_and_read_large_binary/1, 61 | fun should_read_iolist/1, 62 | fun should_fsync/1, 63 | fun should_not_read_beyond_eof/1, 64 | fun should_truncate/1 65 | ]) 66 | }. 67 | 68 | 69 | should_increase_file_size_on_write(Fd) -> 70 | {ok, 0, _} = cbt_ramfile:append_term(Fd, foo), 71 | {ok, Size} = cbt_ramfile:bytes(Fd), 72 | ?_assert(Size > 0). 73 | 74 | should_return_current_file_size_on_write(Fd) -> 75 | {ok, 0, _} = cbt_ramfile:append_term(Fd, foo), 76 | {ok, Size} = cbt_ramfile:bytes(Fd), 77 | ?_assertMatch({ok, Size, _}, cbt_ramfile:append_term(Fd, bar)). 78 | 79 | should_write_and_read_term(Fd) -> 80 | {ok, Pos, _} = cbt_ramfile:append_term(Fd, foo), 81 | ?_assertMatch({ok, foo}, cbt_ramfile:pread_term(Fd, Pos)). 82 | 83 | should_write_and_read_binary(Fd) -> 84 | {ok, Pos, _} = cbt_ramfile:append_binary(Fd, <<"fancy!">>), 85 | ?_assertMatch({ok, <<"fancy!">>}, cbt_ramfile:pread_binary(Fd, Pos)). 86 | 87 | should_write_and_read_large_binary(Fd) -> 88 | BigBin = list_to_binary(lists:duplicate(100000, 0)), 89 | {ok, Pos, _} = cbt_ramfile:append_binary(Fd, BigBin), 90 | ?_assertMatch({ok, BigBin}, cbt_ramfile:pread_binary(Fd, Pos)). 91 | 92 | should_read_iolist(Fd) -> 93 | %% append_binary == append_iolist? 94 | %% Possible bug in pread_iolist or iolist() -> append_binary 95 | {ok, Pos, _} = cbt_ramfile:append_binary(Fd, ["foo", $m, <<"bam">>]), 96 | {ok, IoList} = cbt_ramfile:pread_iolist(Fd, Pos), 97 | ?_assertMatch(<<"foombam">>, iolist_to_binary(IoList)). 98 | 99 | should_fsync(Fd) -> 100 | {"How does on test fsync?", ?_assertMatch(ok, cbt_ramfile:sync(Fd))}. 101 | 102 | should_not_read_beyond_eof(_) -> 103 | {"No idea how to test reading beyond EOF", ?_assert(true)}. 104 | 105 | should_truncate(Fd) -> 106 | {ok, 0, _} = cbt_ramfile:append_term(Fd, foo), 107 | {ok, Size} = cbt_ramfile:bytes(Fd), 108 | BigBin = list_to_binary(lists:duplicate(100000, 0)), 109 | {ok, _, _} = cbt_ramfile:append_binary(Fd, BigBin), 110 | ok = cbt_ramfile:truncate(Fd, Size), 111 | ?_assertMatch({ok, foo}, cbt_ramfile:pread_term(Fd, 0)). 112 | 113 | 114 | header_test_() -> 115 | { 116 | "File header read/write tests", 117 | [ 118 | ?foreach([ 119 | fun should_write_and_read_atom_header/1, 120 | fun should_write_and_read_tuple_header/1, 121 | fun should_write_and_read_second_header/1, 122 | fun should_truncate_second_header/1, 123 | fun should_produce_same_file_size_on_rewrite/1, 124 | fun should_save_headers_larger_than_block_size/1 125 | ]) 126 | ] 127 | }. 128 | 129 | 130 | should_write_and_read_atom_header(Fd) -> 131 | {ok, HeaderPos} = cbt_ramfile:write_header(Fd, hello), 132 | ?_assertMatch({ok, hello, HeaderPos}, cbt_ramfile:read_header(Fd)). 133 | 134 | should_write_and_read_tuple_header(Fd) -> 135 | {ok, _} = cbt_ramfile:write_header(Fd, {<<"some_data">>, 32}), 136 | ?_assertMatch({ok, {<<"some_data">>, 32}, _}, cbt_ramfile:read_header(Fd)). 137 | 138 | should_write_and_read_second_header(Fd) -> 139 | {ok, 0} = cbt_ramfile:write_header(Fd, {<<"some_data">>, 32}), 140 | {ok, 4096} = cbt_ramfile:write_header(Fd, [foo, <<"more">>]), 141 | ?_assertMatch({ok, [foo, <<"more">>], 4096}, cbt_ramfile:read_header(Fd)). 142 | 143 | should_truncate_second_header(Fd) -> 144 | {ok, _} = cbt_ramfile:write_header(Fd, {<<"some_data">>, 32}), 145 | {ok, Size} = cbt_ramfile:bytes(Fd), 146 | {ok, _} = cbt_ramfile:write_header(Fd, [foo, <<"more">>]), 147 | ok = cbt_ramfile:truncate(Fd, Size), 148 | ?_assertMatch({ok, {<<"some_data">>, 32}, 0}, cbt_ramfile:read_header(Fd)). 149 | 150 | should_produce_same_file_size_on_rewrite(Fd) -> 151 | {ok, _} = cbt_ramfile:write_header(Fd, {<<"some_data">>, 32}), 152 | {ok, Size1} = cbt_ramfile:bytes(Fd), 153 | {ok, _} = cbt_ramfile:write_header(Fd, [foo, <<"more">>]), 154 | {ok, Size2} = cbt_ramfile:bytes(Fd), 155 | ok = cbt_ramfile:truncate(Fd, Size1), 156 | {ok, _} = cbt_ramfile:write_header(Fd, [foo, <<"more">>]), 157 | ?_assertMatch({ok, Size2}, cbt_ramfile:bytes(Fd)). 158 | 159 | should_save_headers_larger_than_block_size(Fd) -> 160 | Header = erlang:make_tuple(5000, <<"CouchDB">>), 161 | {ok, _} = cbt_ramfile:write_header(Fd, Header), 162 | {"COUCHDB-1319", ?_assertMatch({ok, Header, 0}, 163 | cbt_ramfile:read_header(Fd))}. 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | write_random_data(Fd) -> 173 | write_random_data(Fd, 100 + rand:uniform(1000)). 174 | 175 | write_random_data(Fd, 0) -> 176 | {ok, Bytes} = cbt_ramfile:bytes(Fd), 177 | {ok, (1 + Bytes div ?BLOCK_SIZE) * ?BLOCK_SIZE}; 178 | write_random_data(Fd, N) -> 179 | Choices = [foo, bar, <<"bizzingle">>, "bank", ["rough", stuff]], 180 | Term = lists:nth(rand:uniform(4) + 1, Choices), 181 | {ok, _, _} = cbt_ramfile:append_term(Fd, Term), 182 | write_random_data(Fd, N - 1). 183 | -------------------------------------------------------------------------------- /test/cbt_stream_tests.erl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -module(cbt_stream_tests). 14 | 15 | -include("cbt_tests.hrl"). 16 | 17 | 18 | setup() -> 19 | {ok, Fd} = cbt_file:open(?tempfile(), [create, overwrite]), 20 | {ok, Stream} = cbt_stream:open(Fd), 21 | {Fd, Stream}. 22 | 23 | teardown({Fd, _}) -> 24 | ok = cbt_file:close(Fd). 25 | 26 | 27 | stream_test_() -> 28 | { 29 | "CBT stream tests", 30 | { 31 | foreach, 32 | fun setup/0, fun teardown/1, 33 | [ 34 | fun should_write/1, 35 | fun should_write_consecutive/1, 36 | fun should_write_empty_binary/1, 37 | fun should_return_file_pointers_on_close/1, 38 | fun should_return_stream_size_on_close/1, 39 | fun should_return_valid_pointers/1, 40 | fun should_recall_last_pointer_position/1, 41 | fun should_stream_more_with_4K_chunk_size/1 42 | ] 43 | } 44 | }. 45 | 46 | 47 | should_write({_, Stream}) -> 48 | ?_assertEqual(ok, cbt_stream:write(Stream, <<"food">>)). 49 | 50 | should_write_consecutive({_, Stream}) -> 51 | cbt_stream:write(Stream, <<"food">>), 52 | ?_assertEqual(ok, cbt_stream:write(Stream, <<"foob">>)). 53 | 54 | should_write_empty_binary({_, Stream}) -> 55 | ?_assertEqual(ok, cbt_stream:write(Stream, <<>>)). 56 | 57 | should_return_file_pointers_on_close({_, Stream}) -> 58 | cbt_stream:write(Stream, <<"foodfoob">>), 59 | {Ptrs, _, _, _, _} = cbt_stream:close(Stream), 60 | ?_assertEqual([{0, 8}], Ptrs). 61 | 62 | should_return_stream_size_on_close({_, Stream}) -> 63 | cbt_stream:write(Stream, <<"foodfoob">>), 64 | {_, Length, _, _, _} = cbt_stream:close(Stream), 65 | ?_assertEqual(8, Length). 66 | 67 | should_return_valid_pointers({Fd, Stream}) -> 68 | cbt_stream:write(Stream, <<"foodfoob">>), 69 | {Ptrs, _, _, _, _} = cbt_stream:close(Stream), 70 | ?_assertEqual(<<"foodfoob">>, read_all(Fd, Ptrs)). 71 | 72 | should_recall_last_pointer_position({Fd, Stream}) -> 73 | cbt_stream:write(Stream, <<"foodfoob">>), 74 | {_, _, _, _, _} = cbt_stream:close(Stream), 75 | {ok, ExpPtr} = cbt_file:bytes(Fd), 76 | {ok, Stream2} = cbt_stream:open(Fd), 77 | ZeroBits = <<0:(8 * 10)>>, 78 | OneBits = <<1:(8 * 10)>>, 79 | ok = cbt_stream:write(Stream2, OneBits), 80 | ok = cbt_stream:write(Stream2, ZeroBits), 81 | {Ptrs, 20, _, _, _} = cbt_stream:close(Stream2), 82 | [{ExpPtr, 20}] = Ptrs, 83 | AllBits = iolist_to_binary([OneBits, ZeroBits]), 84 | ?_assertEqual(AllBits, read_all(Fd, Ptrs)). 85 | 86 | should_stream_more_with_4K_chunk_size({Fd, _}) -> 87 | {ok, Stream} = cbt_stream:open(Fd, [{buffer_size, 4096}]), 88 | lists:foldl( 89 | fun(_, Acc) -> 90 | Data = <<"a1b2c">>, 91 | cbt_stream:write(Stream, Data), 92 | [Data | Acc] 93 | end, [], lists:seq(1, 1024)), 94 | ?_assertMatch({[{0, 4100}, {4106, 1020}], 5120, _, _, _}, 95 | cbt_stream:close(Stream)). 96 | 97 | 98 | read_all(Fd, PosList) -> 99 | Data = cbt_stream:foldl(Fd, PosList, fun(Bin, Acc) -> [Bin, Acc] end, []), 100 | iolist_to_binary(Data). 101 | -------------------------------------------------------------------------------- /test/cbt_tests.hrl: -------------------------------------------------------------------------------- 1 | % Licensed under the Apache License, Version 2.0 (the "License"); you may not 2 | % use this file except in compliance with the License. You may obtain a copy of 3 | % the License at 4 | % 5 | % http://www.apache.org/licenses/LICENSE-2.0 6 | % 7 | % Unless required by applicable law or agreed to in writing, software 8 | % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9 | % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10 | % License for the specific language governing permissions and limitations under 11 | % the License. 12 | 13 | -include_lib("eunit/include/eunit.hrl"). 14 | 15 | -define(BUILDDIR, filename:absname( 16 | filename:join([ 17 | filename:dirname(code:which(?MODULE)), 18 | ".."]))). 19 | 20 | -define(TEMPDIR, 21 | filename:join([ 22 | ?BUILDDIR, "test", "temp"])). 23 | 24 | -define(tempfile, 25 | fun() -> 26 | {A, B, C} = erlang:timestamp(), 27 | N = node(), 28 | FileName = lists:flatten(io_lib:format("~p-~p.~p.~p", [N, A, B, C])), 29 | filename:join([?TEMPDIR, FileName]) 30 | end). 31 | -define(tempdb, 32 | fun() -> 33 | Nums = tuple_to_list(erlang:timestamp()), 34 | Prefix = "eunit-test-db", 35 | Suffix = lists:concat([integer_to_list(Num) || Num <- Nums]), 36 | list_to_binary(Prefix ++ "-" ++ Suffix) 37 | end). 38 | 39 | --------------------------------------------------------------------------------