├── .github └── workflows │ └── test.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── bucket.go ├── bucket_test.go ├── compaction.go ├── compaction_test.go ├── control.sh ├── db.go ├── db_test.go ├── docs └── design.md ├── go.mod ├── go.sum ├── load.go ├── load_test.go ├── options.go ├── pkg ├── binaryx │ ├── binary.go │ └── binary_test.go ├── buffer │ └── buffer.go ├── codec │ ├── codec.go │ └── codec_test.go ├── fsx │ └── fs.go ├── logx │ └── logger.go ├── rescue │ └── rescue.go ├── slice │ ├── slice.go │ └── slice_test.go ├── uint64set │ ├── bloomfilter.go │ ├── bloomfilter_test.go │ └── uint64set.go └── wait │ └── wait.go ├── segment.go └── status.go /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Go 5 | 6 | on: 7 | pull_request: 8 | push: 9 | branches: 10 | - 'master' 11 | tags: 12 | - 'v*' 13 | 14 | jobs: 15 | build: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - name: Set up Go 21 | uses: actions/setup-go@v3 22 | with: 23 | go-version: '^1.19' 24 | 25 | - name: Run coverage 26 | run: go test -race -coverprofile=coverage.txt -covermode=atomic 27 | 28 | - name: Upload coverage to Codecov 29 | uses: codecov/codecov-action@v3 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # IDE 15 | .idea/ 16 | .vscode/ 17 | .DS_Store 18 | 19 | grogudb 20 | coverage.txt 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # REQUIRE COMPONENTS 2 | # https://github.com/mvdan/gofumpt => $go install mvdan.cc/gofumpt@latest 3 | # https://github.com/incu6us/goimports-reviser => $go install github.com/incu6us/goimports-reviser/v3@v3.1.1 4 | # https://github.com/golangci/golangci-lint => $go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.50.1 5 | 6 | GO ?= go 7 | SHELL := bash 8 | 9 | .PHONY: help 10 | help: 11 | @echo "Make Targets: " 12 | @echo " lint: Lint Go code" 13 | @echo " test: Run unit tests" 14 | 15 | .PHONY: lint 16 | lint: 17 | diff -u <(echo -n) <(gofumpt -w .) 18 | diff -u <(echo -n) <(goimports-reviser -project-name "github.com/chenjiandongx/grogudb" ./...) 19 | 20 | .PHONY: test 21 | test: 22 | ./control.sh test 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # grogudb 2 | 3 | [![Docs](https://godoc.org/github.com/chenjiandongx/grogudb?status.svg)](https://pkg.go.dev/github.com/chenjiandongx/grogudb) 4 | [![Build Status](https://github.com/chenjiandongx/grogudb/actions/workflows/test.yaml/badge.svg?branch=master)](https://github.com/chenjiandongx/grogudb/actions) 5 | [![Go Report Card](https://goreportcard.com/badge/chenjiandongx/grogudb "Go Report Card")](https://goreportcard.com/report/chenjiandongx/grogudb) 6 | [![Codecov](https://codecov.io/gh/chenjiandongx/grogudb/branch/master/graph/badge.svg)](https://codecov.io/gh/chenjiandongx/grogudb) 7 | 8 | grogudb 是一个为高频 Put/Has/Del/Range 操作而设计的持久化 KV 数据库。 9 | 10 | image 11 | 12 | ## Features 13 | 14 | * 纯 Go 实现,可内嵌进程序中。 15 | * 高效的 Put/Has/Del/Range 操作。 16 | * 线程安全。 17 | * 允许存储超过物理内存的数据。 18 | * 简洁的 API。 19 | 20 | 设计文档详见 [Design Documention](./docs/design.md) 21 | 22 | ## Usages 23 | 24 | ### DB 操作 25 | 26 | 打开 DB 27 | 28 | ```golang 29 | package main 30 | 31 | import "github.com/chenjiandongx/grogudb" 32 | 33 | func main() { 34 | db, err := grogudb.Open("/path/to/db", nil) 35 | if err != nil { 36 | // handle err 37 | } 38 | defer db.Close() 39 | 40 | // db.Gc() 手动执行 Gc,正常情况无需用户手动执行 41 | // db.Compact() 手动执行 Compact,正常情况无需用户手动执行 42 | } 43 | ``` 44 | 45 | 设置 logger 46 | 47 | ```golang 48 | package main 49 | 50 | import "github.com/chenjiandongx/grogudb/pkg/logx" 51 | 52 | func main() { 53 | // Logger 接口定义 54 | // type Logger interface { 55 | // Infof(format string, v ...interface{}) 56 | // Errorf(format string, v ...interface{}) 57 | // } 58 | 59 | // 或者设置为 nil 则不输出任何日志 60 | logx.SetLogger(nil) 61 | } 62 | ``` 63 | 64 | ### Bucket 写操作 65 | 66 | ```golang 67 | bucket := db.GetOrCreateBucket("bucket0") 68 | 69 | // Put 新增 Key/Value 记录 70 | if err := bucket.Put([]byte("key1"), []byte("val1")); err != nil { 71 | // handle err 72 | } 73 | 74 | // PutIf 当 Key 不存在的时候设置 Key/Value Key 存在时不做操作 75 | if err := bucket.PutIf([]byte("key1"), []byte("val1")); err != nil { 76 | // handle err 77 | } 78 | 79 | if err := bucket.Del([]byte("key1")); err != nil { 80 | // handle err 81 | } 82 | 83 | // bucket.Clear() // 清理 Bucket 所有 keys 84 | ``` 85 | 86 | ### Bucket 读操作 87 | 88 | ```golang 89 | bucket := db.GetOrCreateBucket("bucket0") 90 | 91 | 92 | // Has 判断 Key 是否存在 93 | if bucket.Has([]byte("key1")) { 94 | // ... 95 | } 96 | 97 | // Get 返回指定 Key 对应的 Value 98 | // 99 | // Get 返回的数据不允许直接修改,如果有修改需求 请使用 .Copy() 复制后的数据 100 | // Get 是一个开销`相对高昂`的操作,查询 key 有 3 种情况 101 | // 1. key 不存在,直接返回,无 IO 102 | // 2. key 存在,在 memory segment 中检索,key 命中,无 IO 103 | // 3. key 存在,在 memory segment 未命中,退避到 disk segment 检索 104 | // 由于 key 是没有排序的,因此必须按序扫描所有的 block 直至找到,此时会有读放大的情况(比如为了查找 10B 数据而扫描了 2MB 的 datablock) 105 | // 同时 disk segment 的搜索已经做了一些措施来尽量避免陷入 IO,如提前判断 key 是否存在,bloomfilter 加速过滤... 106 | if b, err := bucket.Get([]byte("key1")); err != nil { 107 | // ... 108 | } 109 | 110 | // Range 遍历每个 Key 并执行 fn 方法 111 | // 112 | // Range 返回的数据不允许直接修改 如果有修改需求 请使用 .Copy() 复制后的数据 113 | // 请勿在 Range 内调用 Bucket 其他 API 避免死锁 114 | if err := bucket.Range(func(key, val Bytes) { 115 | // handle key/value 116 | }) 117 | 118 | // FastRange 拷贝 memory segment 元素并遍历每个 Key 并执行 fn 方法 119 | // 120 | // 避免长期占用锁影响写入 但同时会带来一定的内存开销 121 | // Range 返回的数据不允许直接修改 如果有修改需求 请使用 .Copy() 复制后的数据 122 | // 请勿在 Range 内调用 Bucket 其他 API 避免死锁 123 | if err := bucket.FastRange(func(key, val Bytes) { 124 | // handle key/value 125 | }) 126 | 127 | // bucket.Count() // Count 返回 Bucket Keys 数量 128 | ``` 129 | 130 | ## Benchmark 131 | 132 | grogudb 并不为 `Get` 操作而设计,不进行极限的性能压测(详见设计文档)。 133 | 134 | * 压测机器: **2019 MacBook Pro 12C/16G**。 135 | * 压测 DB:[gorgudb](https://github.com/chenjiandongx/grogudb)、[leveldb](https://github.com/syndtr/goleveldb) 以及 [badger](https://github.com/dgraph-io/badger)。 136 | * 压测项目: [chenjiandongx/grogudb-benchmark](https://github.com/chenjiandongx/grogudb-benchmark) 137 | 138 | **除了 Get API,其他所有操作性能几乎均优于 badger/leveldb。** 139 | 140 | Iter: 10k, Bucket: 100 => 1M key 141 | 142 | ```docs 143 | Storage: grogudb Op: PutUnique Elapsed: 1.587680726s Ops: 629849.555785/s 144 | Storage: leveldb Op: PutUnique Elapsed: 3.207660071s Ops: 311753.732586/s 145 | Storage: badger Op: PutUnique Elapsed: 3.713279852s Ops: 269303.699117/s 146 | Storage: grogudb Op: PutDuplicate Elapsed: 809.645321ms Ops: 1235108.724849/s 147 | Storage: leveldb Op: PutDuplicate Elapsed: 3.344680804s Ops: 298982.192502/s 148 | Storage: badger Op: PutDuplicate Elapsed: 3.809289718s Ops: 262516.131360/s 149 | Storage: grogudb Op: PutIf Elapsed: 179.746041ms Ops: 5563404.870764/s 150 | Storage: leveldb Op: PutIf Elapsed: 512.160806ms Ops: 1952511.766392/s 151 | Storage: badger Op: PutIf Elapsed: 1.063730519s Ops: 940087.721597/s 152 | Storage: grogudb Op: Has Elapsed: 79.718185ms Ops: 12544189.258699/s 153 | Storage: leveldb Op: Has Elapsed: 1.188825549s Ops: 841166.309759/s 154 | Storage: badger Op: Has Elapsed: 1.443558895s Ops: 692732.387618/s 155 | Storage: grogudb Op: Del Elapsed: 1.25951208s Ops: 793958.244529/s 156 | Storage: leveldb Op: Del Elapsed: 3.471029382s Ops: 288098.972940/s` 157 | Storage: badger Op: Del Elapsed: 4.524956978s Ops: 220996.576291/s 158 | Storage: grogudb Op: Range Elapsed: 81.139301ms Ops: 12.324484/s 159 | Storage: leveldb Op: Range Elapsed: 71.821588ms Ops: 13.923390/s 160 | Storage: badger Op: Range Elapsed: 295.666737ms Ops: 3.382186/s 161 | Storage: grogudb Op: Get Elapsed: 26.561270284s Ops: 37648.801782/s 162 | Storage: leveldb Op: Get Elapsed: 1.080395935s Ops: 925586.599879/s 163 | Storage: badger Op: Get Elapsed: 1.423728937s Ops: 702380.891483/s 164 | ``` 165 | 166 | Iter: 100k, Bucket: 100 => 10M key (Without grogudb) 167 | 168 | ```docs 169 | Storage: grogudb Op: PutUnique Elapsed: 16.423032579s Ops: 608900.941522/s 170 | Storage: leveldb Op: PutUnique Elapsed: 51.516953146s Ops: 194110.858452/s 171 | Storage: badger Op: PutUnique Elapsed: 42.421363992s Ops: 235730.279721/s 172 | Storage: grogudb Op: PutDuplicate Elapsed: 8.815478924s Ops: 1134368.318070/s 173 | Storage: leveldb Op: PutDuplicate Elapsed: 39.615313747s Ops: 252427.636037/s 174 | Storage: badger Op: PutDuplicate Elapsed: 47.31107471s Ops: 211367.001517/s 175 | Storage: grogudb Op: PutIf Elapsed: 2.299923889s Ops: 4347969.968844/s 176 | Storage: leveldb Op: PutIf Elapsed: 5.870490731s Ops: 1703435.105892/s 177 | Storage: badger Op: PutIf Elapsed: 15.958825217s Ops: 626612.539709/s 178 | Storage: grogudb Op: Has Elapsed: 850.056456ms Ops: 11763924.536325/s 179 | Storage: leveldb Op: Has Elapsed: 19.188154981s Ops: 521154.848390/s 180 | Storage: badger Op: Has Elapsed: 22.721393642s Ops: 440113.848541/s 181 | Storage: grogudb Op: Del Elapsed: 12.924122561s Ops: 773746.918044/s 182 | Storage: leveldb Op: Del Elapsed: 44.487984603s Ops: 224779.793673/s 183 | Storage: badger Op: Del Elapsed: 44.315291044s Ops: 225655.744652/s 184 | Storage: grogudb Op: Range Elapsed: 524.850653ms Ops: 1.905304/s 185 | Storage: leveldb Op: Range Elapsed: 1.177148523s Ops: 0.849510/s 186 | Storage: badger Op: Range Elapsed: 3.15658723s Ops: 0.316798/s 187 | ``` 188 | 189 | ## Contribution 190 | 191 | **PRs always welcome.** 192 | 193 | 欢迎对此项目感兴趣的开发者参与到开发和讨论中来。 194 | 195 | ## License 196 | 197 | Apache License v2 [©chenjiandongx](https://github.com/chenjiandongx) 198 | -------------------------------------------------------------------------------- /bucket.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "errors" 18 | 19 | "github.com/chenjiandongx/grogudb/pkg/codec" 20 | "github.com/chenjiandongx/grogudb/pkg/uint64set" 21 | ) 22 | 23 | var ( 24 | ErrEmtpyRecordKey = errors.New("grogudb/bucket: empty record key") 25 | ErrEmtpyRecordValue = errors.New("grogudb/bucket: empty record value") 26 | ) 27 | 28 | func validateRecordKV(k, v []byte) error { 29 | if len(k) == 0 { 30 | return ErrEmtpyRecordKey 31 | } 32 | if len(v) == 0 { 33 | return ErrEmtpyRecordValue 34 | } 35 | return nil 36 | } 37 | 38 | func validateRecordKey(k []byte) error { 39 | if len(k) == 0 { 40 | return ErrEmtpyRecordKey 41 | } 42 | return nil 43 | } 44 | 45 | // Bucket 是为了对存储 Key/Value 做 shard 相当于数据库中的 Table 46 | // Bucket 中包含了 memorySegment 处理实时写入 同时反向持有 *DB 实例的 diskSegment 列表 47 | type Bucket struct { 48 | name string 49 | keys *uint64set.Set 50 | head *memorySegment 51 | statistics *statistics 52 | getIterReleaser func() *iterReleaser 53 | } 54 | 55 | // Count 返回 Bucket Keys 数量 56 | func (b *Bucket) Count() int { 57 | return b.keys.Count() 58 | } 59 | 60 | // Clear 清空 Bucket 所有 key 61 | func (b *Bucket) Clear() { 62 | b.statistics.clear.Add(1) 63 | b.keys.Clear() 64 | b.head.Clear() 65 | } 66 | 67 | // Has 判断 Key 是否存在 68 | func (b *Bucket) Has(key []byte) bool { 69 | if len(key) == 0 { 70 | return false 71 | } 72 | 73 | return b.keys.Has(codec.HashKey(key)) 74 | } 75 | 76 | // PutIf 当 Key 不存在的时候设置 Key/Value Key 存在时不做操作 77 | func (b *Bucket) PutIf(key, val []byte) error { 78 | if err := validateRecordKV(key, val); err != nil { 79 | return err 80 | } 81 | b.head.PutIf(key, val) 82 | return nil 83 | } 84 | 85 | // Put 新增 Key/Value 记录 86 | func (b *Bucket) Put(key, val []byte) error { 87 | if err := validateRecordKV(key, val); err != nil { 88 | return err 89 | } 90 | 91 | b.statistics.put.Add(1) 92 | b.head.Put(key, val) 93 | return nil 94 | } 95 | 96 | // Del 删除指定 Key 97 | func (b *Bucket) Del(key []byte) error { 98 | if err := validateRecordKey(key); err != nil { 99 | return err 100 | } 101 | 102 | b.statistics.del.Add(1) 103 | b.head.Del(key) 104 | return nil 105 | } 106 | 107 | // Get 返回指定 Key 对应的 Value 108 | // 109 | // Get 返回的数据不允许直接修改,如果有修改需求 请使用 .Copy() 复制后的数据 110 | // Get 是一个开销`相对高昂`的操作,查询 key 有 3 种情况 111 | // 1. key 不存在,直接返回,无 IO 112 | // 2. key 存在,在 memory segment 中检索,key 命中,无 IO 113 | // 3. key 存在,在 memory segment 未命中,退避到 disk segment 检索 114 | // 由于 key 是没有排序的,因此必须按序扫描所有的 block 直至找到,此时会有读放大的情况(比如为了查找 10B 数据而扫描了 2MB 的 datablock) 115 | // 同时 disk segment 的搜索已经做了一些措施来尽量避免陷入 IO,如提前判断 key 是否存在,bloomfilter 加速过滤... 116 | // 117 | // Note: 118 | // 1. 尝试过使用 LRU 来缓存热区的部分 datablock,但在随机查询的时候,这种方式频繁的换入换出反而带来额外的开销 119 | // 因为几 MB 的 buffer 区几乎每次都 miss 了,难以满足 GB+ 的数据的查询需求 120 | func (b *Bucket) Get(key []byte) (Bytes, error) { 121 | if err := validateRecordKey(key); err != nil { 122 | return nil, err 123 | } 124 | 125 | if !b.keys.Has(codec.HashKey(key)) { 126 | return nil, nil 127 | } 128 | 129 | var val []byte 130 | var err error 131 | 132 | ir := b.getIterReleaser() // 锁定 disk segments 快照 133 | defer ir.release() 134 | 135 | // 优先从 memory segment 检索 136 | val, ok := b.head.Get(key) 137 | if ok { 138 | return val, nil 139 | } 140 | 141 | ir.iter(func(seg *diskSegment) bool { 142 | val, err = seg.Get(b.name, key) 143 | if err != nil { 144 | return true 145 | } 146 | 147 | // val 不为空代表已经检索到 需要退出循环 148 | return val != nil 149 | }) 150 | 151 | return val, err 152 | } 153 | 154 | // Range 遍历每个 Key 并执行 fn 方法 155 | // 156 | // Range 返回的数据不允许直接修改 如果有修改需求 请使用 .Copy() 复制后的数据 157 | // 请勿在 Range 内调用 Bucket 其他 API 避免死锁 158 | func (b *Bucket) Range(fn func(key, val Bytes)) error { 159 | return b.bucketRange(false, fn) 160 | } 161 | 162 | // FastRange 拷贝 memory segment 元素并遍历每个 Key 并执行 fn 方法 163 | // 164 | // 避免长期占用锁影响写入 但同时会带来一定的内存开销 165 | // Range 返回的数据不允许直接修改 如果有修改需求 请使用 .Copy() 复制后的数据 166 | // 请勿在 Range 内调用 Bucket 其他 API 避免死锁 167 | // 168 | // TODO(optimize): 考虑使用 tempfile 代替内存拷贝 牺牲 IO 来减少内存使用 169 | func (b *Bucket) FastRange(fn func(key, val Bytes)) error { 170 | return b.bucketRange(true, fn) 171 | } 172 | 173 | func (b *Bucket) bucketRange(ifCopy bool, fn func(key, val Bytes)) error { 174 | visited := uint64set.NewSet() 175 | deleted := uint64set.NewSet() 176 | pass := func(flag codec.Flag, key uint64) bool { 177 | // 记录删除的 key 178 | if flag == codec.FlagDel && !deleted.Has(key) { 179 | deleted.Insert(key) 180 | } 181 | // 如果 key 被删除 无须再访问 182 | if deleted.Has(key) { 183 | return false 184 | } 185 | 186 | if visited.Has(key) { 187 | return false 188 | } 189 | visited.Insert(key) 190 | return true 191 | } 192 | 193 | // 优先遍历 memory segment 然后再是 disk segment 194 | // 遍历是取当刻快照 195 | var err error 196 | 197 | ir := b.getIterReleaser() // 锁定 disk segments 快照 198 | defer ir.release() 199 | 200 | b.head.Range(ifCopy, fn, pass) 201 | ir.iter(func(seg *diskSegment) bool { 202 | err = seg.Range(b.name, func(flag codec.Flag, key, val []byte, n int) bool { 203 | fn(key, val) 204 | return false 205 | }, pass) 206 | 207 | // 错误则退出后续 disk segment 迭代 208 | return err != nil 209 | }) 210 | 211 | return err 212 | } 213 | -------------------------------------------------------------------------------- /bucket_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "fmt" 18 | "os" 19 | "sync" 20 | "sync/atomic" 21 | "testing" 22 | "time" 23 | 24 | "github.com/chenjiandongx/logger" 25 | "github.com/stretchr/testify/require" 26 | 27 | "github.com/chenjiandongx/grogudb/pkg/logx" 28 | ) 29 | 30 | const ( 31 | KB = 1024 32 | MB = 1024 * KB 33 | ) 34 | 35 | func removeDir(dir string) { 36 | if err := os.RemoveAll(dir); err != nil { 37 | panic(err) 38 | } 39 | } 40 | 41 | func makeTmpDir() string { 42 | dir, err := os.MkdirTemp("", "grogudb-test") 43 | // fmt.Println("mkdir:", dir) 44 | if err != nil { 45 | panic(err) 46 | } 47 | return dir 48 | } 49 | 50 | func keyNum(i int) []byte { 51 | return []byte(fmt.Sprintf("key%d", i)) 52 | } 53 | 54 | func valNum(i int) []byte { 55 | return []byte(fmt.Sprintf("val%d", i)) 56 | } 57 | 58 | func bucketNum(i int) string { 59 | return fmt.Sprintf("bucket%d", i) 60 | } 61 | 62 | func runGrogudbTest(t require.TestingT, opts *Options, open, reOpen func(t require.TestingT, db *DB)) { 63 | l := logger.New(logger.Options{ 64 | Stdout: true, 65 | ConsoleMode: true, 66 | Level: logger.ErrorLevel, 67 | }) 68 | logx.SetLogger(l) 69 | 70 | dir := makeTmpDir() 71 | defer removeDir(dir) 72 | 73 | db, err := Open(dir, opts) 74 | require.NoError(t, err) 75 | open(t, db) 76 | require.NoError(t, db.Close()) 77 | 78 | if reOpen != nil { 79 | db, err = Open(dir, opts) 80 | require.NoError(t, err) 81 | reOpen(t, db) 82 | require.NoError(t, db.Close()) 83 | } 84 | } 85 | 86 | func runParallel(n int, f func(int)) { 87 | wg := sync.WaitGroup{} 88 | for i := 0; i < n; i++ { 89 | wg.Add(1) 90 | j := i 91 | go func() { 92 | defer wg.Done() 93 | f(j) 94 | }() 95 | } 96 | wg.Wait() 97 | } 98 | 99 | func bucketPut(n int, bucket *Bucket) { 100 | for i := 0; i < n; i++ { 101 | _ = bucket.Put(keyNum(i), valNum(i)) 102 | } 103 | } 104 | 105 | func bucketPutIf(n int, bucket *Bucket) { 106 | for i := 0; i < n; i++ { 107 | _ = bucket.PutIf(keyNum(i), valNum(i)) 108 | } 109 | } 110 | 111 | func assertGet(t require.TestingT, fn func(b []byte) (Bytes, error), keyN, valN int) { 112 | val, err := fn(keyNum(keyN)) 113 | require.NoError(t, err) 114 | require.Equal(t, string(valNum(valN)), val.String()) 115 | } 116 | 117 | func assertRange(t require.TestingT, bucket *Bucket, iter int) { 118 | j := iter - 1 119 | err := bucket.Range(func(key, val Bytes) { 120 | require.Equal(t, valNum(j), val.B()) 121 | j-- 122 | }) 123 | require.Equal(t, -1, j) 124 | require.NoError(t, err) 125 | } 126 | 127 | func assertFastRange(t require.TestingT, bucket *Bucket, iter int) { 128 | j := iter - 1 129 | err := bucket.FastRange(func(key, val Bytes) { 130 | require.Equal(t, valNum(j), val.B()) 131 | j-- 132 | }) 133 | require.Equal(t, -1, j) 134 | require.NoError(t, err) 135 | } 136 | 137 | const ( 138 | bucketCount = 2 139 | iterCount = 500 140 | maxMemorySize = 10 * KB 141 | ) 142 | 143 | func TestEmpty(t *testing.T) { 144 | dir := makeTmpDir() 145 | defer removeDir(dir) 146 | 147 | db, err := Open(dir, nil) 148 | require.NoError(t, err) 149 | 150 | bucket := db.GetOrCreateBucket("bucket") 151 | 152 | _, err = bucket.Get(nil) 153 | require.Equal(t, ErrEmtpyRecordKey, err) 154 | require.Equal(t, ErrEmtpyRecordKey, bucket.Put(nil, []byte("1"))) 155 | require.Equal(t, ErrEmtpyRecordValue, bucket.Put([]byte("1"), nil)) 156 | require.Equal(t, ErrEmtpyRecordKey, bucket.PutIf(nil, []byte("1"))) 157 | require.Equal(t, ErrEmtpyRecordValue, bucket.PutIf([]byte("1"), nil)) 158 | require.Equal(t, ErrEmtpyRecordKey, bucket.Del(nil)) 159 | } 160 | 161 | func TestBucketPutGet(t *testing.T) { 162 | // Key 不重名 Put 正常写入 163 | opt := DefaultOptions() 164 | opt.MaxMemSegmentBytes = maxMemorySize 165 | 166 | t.Run("Open", func(t *testing.T) { 167 | runGrogudbTest(t, &opt, 168 | func(t require.TestingT, db *DB) { 169 | runParallel(bucketCount, func(i int) { 170 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 171 | }) 172 | runParallel(bucketCount, func(i int) { 173 | bucket := db.GetOrCreateBucket(bucketNum(i)) 174 | for j := 0; j < iterCount; j++ { 175 | assertGet(t, bucket.Get, j, j) 176 | } 177 | }) 178 | }, nil) 179 | }) 180 | 181 | t.Run("ReOpen", func(t *testing.T) { 182 | runGrogudbTest(t, &opt, 183 | func(t require.TestingT, db *DB) { 184 | runParallel(bucketCount, func(i int) { 185 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 186 | }) 187 | require.Equal(t, int64(2), db.Stats().DiskSegment) 188 | }, 189 | func(t require.TestingT, db *DB) { 190 | runParallel(bucketCount, func(i int) { 191 | bucket := db.GetOrCreateBucket(bucketNum(i)) 192 | for j := 0; j < iterCount; j++ { 193 | assertGet(t, bucket.Get, j, j) 194 | } 195 | }) 196 | }, 197 | ) 198 | }) 199 | 200 | t.Run("Compat-ReOpen", func(t *testing.T) { 201 | runGrogudbTest(t, &opt, 202 | func(t require.TestingT, db *DB) { 203 | runParallel(bucketCount, func(i int) { 204 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 205 | }) 206 | require.Equal(t, int64(2), db.Stats().DiskSegment) 207 | cpm, err := db.Compact() 208 | require.True(t, cpm) 209 | require.NoError(t, err) 210 | db.Gc() 211 | require.Equal(t, int64(1), db.Stats().DiskSegment) 212 | }, 213 | func(t require.TestingT, db *DB) { 214 | runParallel(bucketCount, func(i int) { 215 | bucket := db.GetOrCreateBucket(bucketNum(i)) 216 | for j := 0; j < iterCount; j++ { 217 | assertGet(t, bucket.Get, j, j) 218 | } 219 | }) 220 | }, 221 | ) 222 | }) 223 | } 224 | 225 | func TestBucketPutIfGet(t *testing.T) { 226 | // Key 不重名 Put/PutIf 行为保持一致 227 | opt := DefaultOptions() 228 | opt.MaxMemSegmentBytes = maxMemorySize 229 | 230 | t.Run("Open", func(t *testing.T) { 231 | runGrogudbTest(t, &opt, 232 | func(t require.TestingT, db *DB) { 233 | runParallel(bucketCount, func(i int) { 234 | bucketPutIf(iterCount, db.GetOrCreateBucket(bucketNum(i))) 235 | }) 236 | runParallel(bucketCount, func(i int) { 237 | bucket := db.GetOrCreateBucket(bucketNum(i)) 238 | for j := 0; j < iterCount; j++ { 239 | assertGet(t, bucket.Get, j, j) 240 | } 241 | }) 242 | }, nil) 243 | }) 244 | 245 | t.Run("ReOpen", func(t *testing.T) { 246 | runGrogudbTest(t, &opt, 247 | func(t require.TestingT, db *DB) { 248 | runParallel(bucketCount, func(i int) { 249 | bucketPutIf(iterCount, db.GetOrCreateBucket(bucketNum(i))) 250 | }) 251 | require.Equal(t, int64(2), db.Stats().DiskSegment) 252 | }, 253 | func(t require.TestingT, db *DB) { 254 | runParallel(bucketCount, func(i int) { 255 | bucket := db.GetOrCreateBucket(bucketNum(i)) 256 | for j := 0; j < iterCount; j++ { 257 | assertGet(t, bucket.Get, j, j) 258 | } 259 | }) 260 | }, 261 | ) 262 | }) 263 | 264 | t.Run("Compact-ReOpen", func(t *testing.T) { 265 | runGrogudbTest(t, &opt, 266 | func(t require.TestingT, db *DB) { 267 | runParallel(bucketCount, func(i int) { 268 | bucketPutIf(iterCount, db.GetOrCreateBucket(bucketNum(i))) 269 | }) 270 | require.Equal(t, int64(2), db.Stats().DiskSegment) 271 | cpm, err := db.Compact() 272 | require.True(t, cpm) 273 | require.NoError(t, err) 274 | db.Gc() 275 | require.Equal(t, int64(1), db.Stats().DiskSegment) 276 | }, 277 | func(t require.TestingT, db *DB) { 278 | runParallel(bucketCount, func(i int) { 279 | bucket := db.GetOrCreateBucket(bucketNum(i)) 280 | for j := 0; j < iterCount; j++ { 281 | assertGet(t, bucket.Get, j, j) 282 | } 283 | }) 284 | }, 285 | ) 286 | }) 287 | } 288 | 289 | func TestBucketPutOverwrite(t *testing.T) { 290 | // Key 重名 Put 更新 Val 291 | opt := DefaultOptions() 292 | opt.MaxMemSegmentBytes = maxMemorySize 293 | 294 | t.Run("Open", func(t *testing.T) { 295 | runGrogudbTest(t, &opt, 296 | func(t require.TestingT, db *DB) { 297 | runParallel(bucketCount, func(i int) { 298 | bucket := db.GetOrCreateBucket(bucketNum(i)) 299 | for j := 0; j < iterCount; j++ { 300 | _ = bucket.Put(keyNum(0), valNum(j)) 301 | } 302 | }) 303 | runParallel(bucketCount, func(i int) { 304 | bucket := db.GetOrCreateBucket(bucketNum(i)) 305 | assertGet(t, bucket.Get, 0, iterCount-1) 306 | }) 307 | }, nil) 308 | }) 309 | 310 | t.Run("ReOpen", func(t *testing.T) { 311 | runGrogudbTest(t, &opt, 312 | func(t require.TestingT, db *DB) { 313 | runParallel(bucketCount, func(i int) { 314 | bucket := db.GetOrCreateBucket(bucketNum(i)) 315 | for j := 0; j < iterCount; j++ { 316 | _ = bucket.Put(keyNum(0), valNum(j)) 317 | } 318 | }) 319 | require.Equal(t, int64(0), db.Stats().DiskSegment) 320 | }, 321 | func(t require.TestingT, db *DB) { 322 | runParallel(bucketCount, func(i int) { 323 | bucket := db.GetOrCreateBucket(bucketNum(i)) 324 | assertGet(t, bucket.Get, 0, iterCount-1) 325 | }) 326 | }, 327 | ) 328 | 329 | t.Run("Compact-ReOpen", func(t *testing.T) { 330 | runGrogudbTest(t, &opt, 331 | func(t require.TestingT, db *DB) { 332 | runParallel(bucketCount, func(i int) { 333 | bucket := db.GetOrCreateBucket(bucketNum(i)) 334 | for j := 0; j < iterCount; j++ { 335 | _ = bucket.Put(keyNum(0), valNum(j)) 336 | } 337 | }) 338 | require.Equal(t, int64(0), db.Stats().DiskSegment) 339 | cpm, err := db.Compact() 340 | require.True(t, cpm) 341 | require.NoError(t, err) 342 | db.Gc() 343 | require.Equal(t, int64(0), db.Stats().DiskSegment) 344 | }, 345 | func(t require.TestingT, db *DB) { 346 | runParallel(bucketCount, func(i int) { 347 | bucket := db.GetOrCreateBucket(bucketNum(i)) 348 | assertGet(t, bucket.Get, 0, iterCount-1) 349 | }) 350 | }, 351 | ) 352 | }) 353 | }) 354 | } 355 | 356 | func TestBucketPutIfOverwrite(t *testing.T) { 357 | // Key 重名 PutIf 保持第一个值 358 | opt := DefaultOptions() 359 | opt.MaxMemSegmentBytes = maxMemorySize 360 | 361 | t.Run("Open", func(t *testing.T) { 362 | runGrogudbTest(t, &opt, 363 | func(t require.TestingT, db *DB) { 364 | runParallel(bucketCount, func(i int) { 365 | bucket := db.GetOrCreateBucket(bucketNum(i)) 366 | for j := 0; j < iterCount; j++ { 367 | _ = bucket.PutIf(keyNum(0), valNum(j)) 368 | } 369 | }) 370 | runParallel(bucketCount, func(i int) { 371 | bucket := db.GetOrCreateBucket(bucketNum(i)) 372 | assertGet(t, bucket.Get, 0, 0) 373 | }) 374 | }, nil) 375 | }) 376 | 377 | t.Run("ReOpen", func(t *testing.T) { 378 | runGrogudbTest(t, &opt, 379 | func(t require.TestingT, db *DB) { 380 | runParallel(bucketCount, func(i int) { 381 | bucket := db.GetOrCreateBucket(bucketNum(i)) 382 | for j := 0; j < iterCount; j++ { 383 | _ = bucket.PutIf(keyNum(0), valNum(j)) 384 | } 385 | }) 386 | }, 387 | func(t require.TestingT, db *DB) { 388 | runParallel(bucketCount, func(i int) { 389 | bucket := db.GetOrCreateBucket(bucketNum(i)) 390 | assertGet(t, bucket.Get, 0, 0) 391 | }) 392 | }, 393 | ) 394 | }) 395 | 396 | t.Run("Compact-ReOpen", func(t *testing.T) { 397 | runGrogudbTest(t, &opt, 398 | func(t require.TestingT, db *DB) { 399 | runParallel(bucketCount, func(i int) { 400 | bucket := db.GetOrCreateBucket(bucketNum(i)) 401 | for j := 0; j < iterCount; j++ { 402 | _ = bucket.PutIf(keyNum(0), valNum(j)) 403 | } 404 | }) 405 | require.Equal(t, int64(0), db.Stats().DiskSegment) 406 | cpm, err := db.Compact() 407 | require.True(t, cpm) 408 | require.NoError(t, err) 409 | db.Gc() 410 | require.Equal(t, int64(0), db.Stats().DiskSegment) 411 | }, 412 | func(t require.TestingT, db *DB) { 413 | runParallel(bucketCount, func(i int) { 414 | bucket := db.GetOrCreateBucket(bucketNum(i)) 415 | assertGet(t, bucket.Get, 0, 0) 416 | }) 417 | }, 418 | ) 419 | }) 420 | } 421 | 422 | func TestBucketHas(t *testing.T) { 423 | // Key 不重名 Put 正常写入 424 | opt := DefaultOptions() 425 | opt.MaxMemSegmentBytes = maxMemorySize 426 | 427 | t.Run("Open", func(t *testing.T) { 428 | runGrogudbTest(t, &opt, 429 | func(t require.TestingT, db *DB) { 430 | runParallel(bucketCount, func(i int) { 431 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 432 | }) 433 | runParallel(bucketCount, func(i int) { 434 | bucket := db.GetOrCreateBucket(bucketNum(i)) 435 | for j := 0; j < iterCount; j++ { 436 | require.True(t, bucket.Has(keyNum(j))) 437 | } 438 | }) 439 | }, nil) 440 | }) 441 | 442 | t.Run("ReOpen", func(t *testing.T) { 443 | runGrogudbTest(t, &opt, 444 | func(t require.TestingT, db *DB) { 445 | runParallel(bucketCount, func(i int) { 446 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 447 | }) 448 | require.Equal(t, int64(2), db.Stats().DiskSegment) 449 | }, 450 | func(t require.TestingT, db *DB) { 451 | runParallel(bucketCount, func(i int) { 452 | bucket := db.GetOrCreateBucket(bucketNum(i)) 453 | for j := 0; j < iterCount; j++ { 454 | require.True(t, bucket.Has(keyNum(j))) 455 | } 456 | }) 457 | }, 458 | ) 459 | }) 460 | 461 | t.Run("Compact-ReOpen", func(t *testing.T) { 462 | runGrogudbTest(t, &opt, 463 | func(t require.TestingT, db *DB) { 464 | runParallel(bucketCount, func(i int) { 465 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 466 | }) 467 | require.Equal(t, int64(2), db.Stats().DiskSegment) 468 | cpm, err := db.Compact() 469 | require.True(t, cpm) 470 | require.NoError(t, err) 471 | db.Gc() 472 | require.Equal(t, int64(1), db.Stats().DiskSegment) 473 | }, 474 | func(t require.TestingT, db *DB) { 475 | runParallel(bucketCount, func(i int) { 476 | bucket := db.GetOrCreateBucket(bucketNum(i)) 477 | for j := 0; j < iterCount; j++ { 478 | require.True(t, bucket.Has(keyNum(j))) 479 | } 480 | }) 481 | }, 482 | ) 483 | }) 484 | } 485 | 486 | func TestBucketPutCount(t *testing.T) { 487 | // Key 不重名 Put 正常写入 488 | opt := DefaultOptions() 489 | opt.MaxMemSegmentBytes = maxMemorySize 490 | 491 | t.Run("Open", func(t *testing.T) { 492 | runGrogudbTest(t, &opt, 493 | func(t require.TestingT, db *DB) { 494 | runParallel(bucketCount, func(i int) { 495 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 496 | }) 497 | runParallel(bucketCount, func(i int) { 498 | require.Equal(t, iterCount, db.GetOrCreateBucket(bucketNum(i)).Count()) 499 | }) 500 | }, nil) 501 | }) 502 | 503 | t.Run("ReOpen", func(t *testing.T) { 504 | runGrogudbTest(t, &opt, 505 | func(t require.TestingT, db *DB) { 506 | runParallel(bucketCount, func(i int) { 507 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 508 | }) 509 | require.Equal(t, int64(2), db.Stats().DiskSegment) 510 | }, 511 | func(t require.TestingT, db *DB) { 512 | runParallel(bucketCount, func(i int) { 513 | require.Equal(t, iterCount, db.GetOrCreateBucket(bucketNum(i)).Count()) 514 | }) 515 | }, 516 | ) 517 | }) 518 | 519 | t.Run("Compact-ReOpen", func(t *testing.T) { 520 | runGrogudbTest(t, &opt, 521 | func(t require.TestingT, db *DB) { 522 | runParallel(bucketCount, func(i int) { 523 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 524 | }) 525 | require.Equal(t, int64(2), db.Stats().DiskSegment) 526 | cpm, err := db.Compact() 527 | require.True(t, cpm) 528 | require.NoError(t, err) 529 | db.Gc() 530 | require.Equal(t, int64(1), db.Stats().DiskSegment) 531 | }, 532 | func(t require.TestingT, db *DB) { 533 | runParallel(bucketCount, func(i int) { 534 | require.Equal(t, iterCount, db.GetOrCreateBucket(bucketNum(i)).Count()) 535 | }) 536 | }, 537 | ) 538 | }) 539 | } 540 | 541 | func TestBucketPutIfCount(t *testing.T) { 542 | // Key 重名 PutIf 保持第一个值 543 | opt := DefaultOptions() 544 | opt.MaxMemSegmentBytes = maxMemorySize 545 | 546 | t.Run("Open", func(t *testing.T) { 547 | runGrogudbTest(t, &opt, 548 | func(t require.TestingT, db *DB) { 549 | runParallel(bucketCount, func(i int) { 550 | bucket := db.GetOrCreateBucket(bucketNum(i)) 551 | for j := 0; j < iterCount; j++ { 552 | _ = bucket.PutIf(keyNum(0), valNum(j)) 553 | } 554 | }) 555 | runParallel(bucketCount, func(i int) { 556 | require.Equal(t, 1, db.GetOrCreateBucket(bucketNum(i)).Count()) 557 | }) 558 | }, nil) 559 | }) 560 | 561 | t.Run("ReOpen", func(t *testing.T) { 562 | runGrogudbTest(t, &opt, 563 | func(t require.TestingT, db *DB) { 564 | runParallel(bucketCount, func(i int) { 565 | bucket := db.GetOrCreateBucket(bucketNum(i)) 566 | for j := 0; j < iterCount; j++ { 567 | _ = bucket.PutIf(keyNum(0), valNum(j)) 568 | } 569 | }) 570 | }, 571 | func(t require.TestingT, db *DB) { 572 | runParallel(bucketCount, func(i int) { 573 | require.Equal(t, 1, db.GetOrCreateBucket(bucketNum(i)).Count()) 574 | }) 575 | }, 576 | ) 577 | }) 578 | 579 | t.Run("Compact-ReOpen", func(t *testing.T) { 580 | runGrogudbTest(t, &opt, 581 | func(t require.TestingT, db *DB) { 582 | runParallel(bucketCount, func(i int) { 583 | bucket := db.GetOrCreateBucket(bucketNum(i)) 584 | for j := 0; j < iterCount; j++ { 585 | _ = bucket.PutIf(keyNum(0), valNum(j)) 586 | } 587 | }) 588 | require.Equal(t, int64(0), db.Stats().DiskSegment) 589 | cpm, err := db.Compact() 590 | require.True(t, cpm) 591 | require.NoError(t, err) 592 | db.Gc() 593 | require.Equal(t, int64(0), db.Stats().DiskSegment) 594 | }, 595 | func(t require.TestingT, db *DB) { 596 | runParallel(bucketCount, func(i int) { 597 | require.Equal(t, 1, db.GetOrCreateBucket(bucketNum(i)).Count()) 598 | }) 599 | }, 600 | ) 601 | }) 602 | } 603 | 604 | func TestBucketPutRange(t *testing.T) { 605 | // Key 不重名 Put 正常写入 606 | opt := DefaultOptions() 607 | opt.MaxMemSegmentBytes = maxMemorySize 608 | 609 | t.Run("Open", func(t *testing.T) { 610 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 611 | runParallel(bucketCount, func(i int) { 612 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 613 | }) 614 | runParallel(bucketCount, func(i int) { 615 | assertRange(t, db.GetOrCreateBucket(bucketNum(i)), iterCount) 616 | }) 617 | }, nil) 618 | }) 619 | 620 | t.Run("ReOpen", func(t *testing.T) { 621 | runGrogudbTest(t, &opt, 622 | func(t require.TestingT, db *DB) { 623 | runParallel(bucketCount, func(i int) { 624 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 625 | }) 626 | require.Equal(t, int64(2), db.Stats().DiskSegment) 627 | }, 628 | func(t require.TestingT, db *DB) { 629 | runParallel(bucketCount, func(i int) { 630 | assertRange(t, db.GetOrCreateBucket(bucketNum(i)), iterCount) 631 | }) 632 | }, 633 | ) 634 | }) 635 | 636 | t.Run("Compact-ReOpen", func(t *testing.T) { 637 | runGrogudbTest(t, &opt, 638 | func(t require.TestingT, db *DB) { 639 | runParallel(bucketCount, func(i int) { 640 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 641 | }) 642 | require.Equal(t, int64(2), db.Stats().DiskSegment) 643 | cpm, err := db.Compact() 644 | require.True(t, cpm) 645 | require.NoError(t, err) 646 | 647 | db.Gc() 648 | require.Equal(t, int64(1), db.Stats().DiskSegment) 649 | }, 650 | func(t require.TestingT, db *DB) { 651 | runParallel(bucketCount, func(i int) { 652 | assertRange(t, db.GetOrCreateBucket(bucketNum(i)), iterCount) 653 | }) 654 | }, 655 | ) 656 | }) 657 | 658 | t.Run("Compact-ReOpen FastRange", func(t *testing.T) { 659 | runGrogudbTest(t, &opt, 660 | func(t require.TestingT, db *DB) { 661 | runParallel(bucketCount, func(i int) { 662 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 663 | }) 664 | require.Equal(t, int64(2), db.Stats().DiskSegment) 665 | cpm, err := db.Compact() 666 | require.True(t, cpm) 667 | require.NoError(t, err) 668 | 669 | db.Gc() 670 | require.Equal(t, int64(1), db.Stats().DiskSegment) 671 | }, 672 | func(t require.TestingT, db *DB) { 673 | runParallel(bucketCount, func(i int) { 674 | assertFastRange(t, db.GetOrCreateBucket(bucketNum(i)), iterCount) 675 | }) 676 | }, 677 | ) 678 | }) 679 | } 680 | 681 | func TestBucketPutIfRange(t *testing.T) { 682 | // Key 不重名 Put/PutIf 行为保持一致 683 | opt := DefaultOptions() 684 | opt.MaxMemSegmentBytes = maxMemorySize 685 | 686 | t.Run("Open", func(t *testing.T) { 687 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 688 | runParallel(bucketCount, func(i int) { 689 | bucketPutIf(iterCount, db.GetOrCreateBucket(bucketNum(i))) 690 | }) 691 | runParallel(bucketCount, func(i int) { 692 | assertRange(t, db.GetOrCreateBucket(bucketNum(i)), iterCount) 693 | }) 694 | }, nil) 695 | }) 696 | 697 | t.Run("ReOpen", func(t *testing.T) { 698 | runGrogudbTest(t, &opt, 699 | func(t require.TestingT, db *DB) { 700 | runParallel(bucketCount, func(i int) { 701 | bucketPutIf(iterCount, db.GetOrCreateBucket(bucketNum(i))) 702 | }) 703 | require.Equal(t, int64(2), db.Stats().DiskSegment) 704 | }, 705 | func(t require.TestingT, db *DB) { 706 | runParallel(bucketCount, func(i int) { 707 | assertRange(t, db.GetOrCreateBucket(bucketNum(i)), iterCount) 708 | }) 709 | }, 710 | ) 711 | }) 712 | 713 | t.Run("Compact-ReOpen", func(t *testing.T) { 714 | runGrogudbTest(t, &opt, 715 | func(t require.TestingT, db *DB) { 716 | runParallel(bucketCount, func(i int) { 717 | bucketPutIf(iterCount, db.GetOrCreateBucket(bucketNum(i))) 718 | }) 719 | require.Equal(t, int64(2), db.Stats().DiskSegment) 720 | cpm, err := db.Compact() 721 | require.True(t, cpm) 722 | require.NoError(t, err) 723 | 724 | db.Gc() 725 | require.Equal(t, int64(1), db.Stats().DiskSegment) 726 | }, 727 | func(t require.TestingT, db *DB) { 728 | runParallel(bucketCount, func(i int) { 729 | assertRange(t, db.GetOrCreateBucket(bucketNum(i)), iterCount) 730 | }) 731 | }, 732 | ) 733 | }) 734 | } 735 | 736 | func TestBucketPutRangeOverwrite(t *testing.T) { 737 | // Key 重名 Put 更新 Val 738 | opt := DefaultOptions() 739 | opt.MaxMemSegmentBytes = maxMemorySize 740 | 741 | t.Run("Open", func(t *testing.T) { 742 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 743 | runParallel(bucketCount, func(i int) { 744 | bucket := db.GetOrCreateBucket(bucketNum(i)) 745 | for j := 0; j < iterCount; j++ { 746 | _ = bucket.Put(keyNum(0), valNum(j)) 747 | } 748 | }) 749 | runParallel(bucketCount, func(i int) { 750 | bucket := db.GetOrCreateBucket(bucketNum(i)) 751 | j := 0 752 | err := bucket.Range(func(key, val Bytes) { 753 | j++ 754 | require.Equal(t, valNum(iterCount-1), val.B()) 755 | }) 756 | require.Equal(t, 1, j) 757 | require.NoError(t, err) 758 | }) 759 | }, nil) 760 | }) 761 | 762 | t.Run("ReOpen", func(t *testing.T) { 763 | runGrogudbTest(t, &opt, 764 | func(t require.TestingT, db *DB) { 765 | runParallel(bucketCount, func(i int) { 766 | bucket := db.GetOrCreateBucket(bucketNum(i)) 767 | for j := 0; j < iterCount; j++ { 768 | _ = bucket.Put(keyNum(0), valNum(j)) 769 | } 770 | }) 771 | require.Equal(t, int64(0), db.Stats().DiskSegment) 772 | }, 773 | func(t require.TestingT, db *DB) { 774 | runParallel(bucketCount, func(i int) { 775 | bucket := db.GetOrCreateBucket(bucketNum(i)) 776 | j := 0 777 | err := bucket.Range(func(key, val Bytes) { 778 | j++ 779 | require.Equal(t, valNum(iterCount-1), val.B()) 780 | }) 781 | require.Equal(t, 1, j) 782 | require.NoError(t, err) 783 | }) 784 | }, 785 | ) 786 | }) 787 | 788 | t.Run("Compact-ReOpen", func(t *testing.T) { 789 | runGrogudbTest(t, &opt, 790 | func(t require.TestingT, db *DB) { 791 | runParallel(bucketCount, func(i int) { 792 | bucket := db.GetOrCreateBucket(bucketNum(i)) 793 | for j := 0; j < iterCount; j++ { 794 | _ = bucket.Put(keyNum(0), valNum(j)) 795 | } 796 | }) 797 | require.Equal(t, int64(0), db.Stats().DiskSegment) 798 | cpm, err := db.Compact() 799 | require.True(t, cpm) 800 | require.NoError(t, err) 801 | 802 | db.Gc() 803 | require.Equal(t, int64(0), db.Stats().DiskSegment) 804 | }, 805 | func(t require.TestingT, db *DB) { 806 | runParallel(bucketCount, func(i int) { 807 | bucket := db.GetOrCreateBucket(bucketNum(i)) 808 | j := 0 809 | err := bucket.Range(func(key, val Bytes) { 810 | j++ 811 | require.Equal(t, valNum(iterCount-1), val.B()) 812 | }) 813 | require.Equal(t, 1, j) 814 | require.NoError(t, err) 815 | }) 816 | }, 817 | ) 818 | }) 819 | } 820 | 821 | func TestBucketPutIfRangeOverwrite(t *testing.T) { 822 | // Key 重名 PutIf 保持第一个值 823 | opt := DefaultOptions() 824 | opt.MaxMemSegmentBytes = maxMemorySize 825 | 826 | t.Run("Open", func(t *testing.T) { 827 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 828 | runParallel(bucketCount, func(i int) { 829 | bucket := db.GetOrCreateBucket(bucketNum(i)) 830 | for j := 0; j < iterCount; j++ { 831 | _ = bucket.PutIf(keyNum(0), valNum(j)) 832 | } 833 | }) 834 | runParallel(bucketCount, func(i int) { 835 | bucket := db.GetOrCreateBucket(bucketNum(i)) 836 | j := 0 837 | err := bucket.Range(func(key, val Bytes) { 838 | j++ 839 | require.Equal(t, valNum(0), val.B()) 840 | }) 841 | require.Equal(t, 1, j) 842 | require.NoError(t, err) 843 | }) 844 | }, nil) 845 | }) 846 | 847 | t.Run("ReOpen", func(t *testing.T) { 848 | runGrogudbTest(t, &opt, 849 | func(t require.TestingT, db *DB) { 850 | runParallel(bucketCount, func(i int) { 851 | bucket := db.GetOrCreateBucket(bucketNum(i)) 852 | for j := 0; j < iterCount; j++ { 853 | _ = bucket.PutIf(keyNum(0), valNum(j)) 854 | } 855 | }) 856 | }, 857 | func(t require.TestingT, db *DB) { 858 | runParallel(bucketCount, func(i int) { 859 | bucket := db.GetOrCreateBucket(bucketNum(i)) 860 | j := 0 861 | err := bucket.Range(func(key, val Bytes) { 862 | j++ 863 | require.Equal(t, valNum(0), val.B()) 864 | }) 865 | require.Equal(t, 1, j) 866 | require.NoError(t, err) 867 | }) 868 | }, 869 | ) 870 | }) 871 | } 872 | 873 | func TestBucketDel(t *testing.T) { 874 | opt := DefaultOptions() 875 | opt.MaxMemSegmentBytes = maxMemorySize 876 | 877 | t.Run("Open", func(t *testing.T) { 878 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 879 | // Put 880 | runParallel(bucketCount, func(i int) { 881 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 882 | }) 883 | // Del 884 | require.Equal(t, int64(2), db.Stats().DiskSegment) 885 | runParallel(bucketCount, func(i int) { 886 | bucket := db.GetOrCreateBucket(bucketNum(i)) 887 | for j := 0; j < iterCount; j++ { 888 | // 只保留 [20, 30] 889 | if j <= 30 && j >= 20 { 890 | continue 891 | } 892 | _ = bucket.Del(keyNum(j)) 893 | } 894 | }) 895 | // Get 896 | runParallel(bucketCount, func(i int) { 897 | bucket := db.GetOrCreateBucket(bucketNum(i)) 898 | val, err := bucket.Get(keyNum(30)) 899 | require.NoError(t, err) 900 | require.Equal(t, valNum(30), val.B()) 901 | 902 | val, err = bucket.Get(keyNum(31)) 903 | require.NoError(t, err) 904 | require.Nil(t, val) 905 | }) 906 | // Range 907 | runParallel(bucketCount, func(i int) { 908 | bucket := db.GetOrCreateBucket(bucketNum(i)) 909 | j := 30 910 | err := bucket.Range(func(key, val Bytes) { 911 | require.Equal(t, string(keyNum(j)), key.String()) 912 | require.Equal(t, string(valNum(j)), val.String()) 913 | j-- 914 | }) 915 | require.Equal(t, 19, j) 916 | require.NoError(t, err) 917 | }) 918 | }, nil) 919 | }) 920 | 921 | t.Run("ReOpen", func(t *testing.T) { 922 | runGrogudbTest(t, &opt, 923 | func(t require.TestingT, db *DB) { 924 | // Put 925 | runParallel(bucketCount, func(i int) { 926 | bucketPut(iterCount, db.GetOrCreateBucket(bucketNum(i))) 927 | }) 928 | // Del 929 | runParallel(bucketCount, func(i int) { 930 | bucket := db.GetOrCreateBucket(bucketNum(i)) 931 | for j := 0; j < iterCount; j++ { 932 | // 只保留 [20, 30] 933 | if j <= 30 && j >= 20 { 934 | continue 935 | } 936 | _ = bucket.Del(keyNum(j)) 937 | } 938 | }) 939 | require.Equal(t, int64(4), db.Stats().DiskSegment) 940 | cpm, err := db.Compact() 941 | require.True(t, cpm) 942 | require.NoError(t, err) 943 | 944 | db.Gc() 945 | require.Equal(t, int64(1), db.Stats().DiskSegment) 946 | }, 947 | func(t require.TestingT, db *DB) { 948 | // Get 949 | runParallel(bucketCount, func(i int) { 950 | bucket := db.GetOrCreateBucket(bucketNum(i)) 951 | val, err := bucket.Get(keyNum(30)) 952 | require.NoError(t, err) 953 | require.Equal(t, valNum(30), val.B()) 954 | 955 | val, err = bucket.Get(keyNum(31)) 956 | require.NoError(t, err) 957 | require.Nil(t, val) 958 | }) 959 | // Range 960 | runParallel(bucketCount, func(i int) { 961 | bucket := db.GetOrCreateBucket(bucketNum(i)) 962 | j := 30 963 | err := bucket.Range(func(key, val Bytes) { 964 | require.Equal(t, string(keyNum(j)), key.String()) 965 | require.Equal(t, string(valNum(j)), val.String()) 966 | j-- 967 | }) 968 | require.Equal(t, 19, j) 969 | require.NoError(t, err) 970 | }) 971 | }, 972 | ) 973 | }) 974 | } 975 | 976 | func TestBucketClear(t *testing.T) { 977 | opt := DefaultOptions() 978 | opt.MaxMemSegmentBytes = maxMemorySize 979 | 980 | t.Run("Open", func(t *testing.T) { 981 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 982 | runParallel(bucketCount, func(i int) { 983 | bucket := db.GetOrCreateBucket(bucketNum(i)) 984 | bucketPut(iterCount, bucket) 985 | 986 | bucket.Clear() 987 | require.Equal(t, 0, bucket.Count()) 988 | 989 | n := 0 990 | err := bucket.Range(func(key, val Bytes) { 991 | n++ 992 | }) 993 | require.NoError(t, err) 994 | require.Equal(t, 0, n) 995 | }) 996 | }, nil) 997 | }) 998 | 999 | t.Run("ReOpen", func(t *testing.T) { 1000 | runGrogudbTest(t, &opt, 1001 | func(t require.TestingT, db *DB) { 1002 | runParallel(bucketCount, func(i int) { 1003 | bucket := db.GetOrCreateBucket(bucketNum(i)) 1004 | bucketPut(iterCount, bucket) 1005 | 1006 | bucket.Clear() 1007 | require.Equal(t, 0, bucket.Count()) 1008 | }) 1009 | require.Equal(t, int64(2), db.Stats().DiskSegment) 1010 | cpm, err := db.Compact() 1011 | require.True(t, cpm) 1012 | require.NoError(t, err) 1013 | 1014 | db.Gc() 1015 | require.Equal(t, int64(1), db.Stats().DiskSegment) 1016 | }, 1017 | func(t require.TestingT, db *DB) { 1018 | runParallel(bucketCount, func(i int) { 1019 | require.Len(t, db.Buckets(), 0) 1020 | }) 1021 | require.Equal(t, int64(2), db.Stats().DiskSegment) 1022 | }, 1023 | ) 1024 | }) 1025 | } 1026 | 1027 | func TestBucketClearThenPut(t *testing.T) { 1028 | opt := DefaultOptions() 1029 | opt.MaxMemSegmentBytes = maxMemorySize 1030 | 1031 | t.Run("Open", func(t *testing.T) { 1032 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 1033 | runParallel(bucketCount, func(i int) { 1034 | bucket := db.GetOrCreateBucket(bucketNum(i)) 1035 | bucketPut(iterCount, bucket) 1036 | 1037 | bucket.Clear() 1038 | require.NoError(t, bucket.Put(keyNum(1), valNum(1))) 1039 | require.Equal(t, 1, bucket.Count()) 1040 | 1041 | val, err := bucket.Get(keyNum(1)) 1042 | require.NoError(t, err) 1043 | require.Equal(t, valNum(1), val.B()) 1044 | }) 1045 | }, nil) 1046 | }) 1047 | 1048 | t.Run("ReOpen", func(t *testing.T) { 1049 | runGrogudbTest(t, &opt, 1050 | func(t require.TestingT, db *DB) { 1051 | runParallel(bucketCount, func(i int) { 1052 | bucket := db.GetOrCreateBucket(bucketNum(i)) 1053 | bucketPut(iterCount, bucket) 1054 | bucket.Clear() 1055 | require.NoError(t, bucket.Put(keyNum(1), valNum(1))) 1056 | }) 1057 | }, 1058 | func(t require.TestingT, db *DB) { 1059 | runParallel(bucketCount, func(i int) { 1060 | bucket := db.GetOrCreateBucket(bucketNum(i)) 1061 | require.Equal(t, 1, bucket.Count()) 1062 | val, err := bucket.Get(keyNum(1)) 1063 | require.NoError(t, err) 1064 | require.Equal(t, valNum(1), val.B()) 1065 | }) 1066 | }, 1067 | ) 1068 | }) 1069 | } 1070 | 1071 | // ----- Benchmark ----- 1072 | 1073 | const ( 1074 | benchmarkIter = 100000 // 100k 1075 | benchmarkBuckets = 100 1076 | ) 1077 | 1078 | func BenchmarkBucketPut(b *testing.B) { 1079 | for i := 0; i < b.N; i++ { 1080 | benchmarkBucketPut(b) 1081 | } 1082 | } 1083 | 1084 | func benchmarkBucketPut(b *testing.B) { 1085 | runGrogudbTest(b, nil, func(t require.TestingT, db *DB) { 1086 | start := time.Now() 1087 | wg := sync.WaitGroup{} 1088 | for i := 0; i < benchmarkBuckets; i++ { 1089 | wg.Add(1) 1090 | n := i 1091 | go func() { 1092 | defer wg.Done() 1093 | bucket := db.GetOrCreateBucket(bucketNum(n)) 1094 | for j := 0; j < benchmarkIter; j++ { 1095 | _ = bucket.Put(keyNum(j), valNum(j)) 1096 | } 1097 | }() 1098 | } 1099 | wg.Wait() 1100 | 1101 | since := time.Since(start) 1102 | ops := float64(benchmarkIter*benchmarkBuckets) / since.Seconds() 1103 | b.Logf("Benchmark Put elapsed[%s]: iter=%d, bucket=%d, ops=%f, stat=%+v", since, benchmarkIter, benchmarkBuckets, ops, db.Stats()) 1104 | }, nil) 1105 | } 1106 | 1107 | func BenchmarkBucketPutIf(b *testing.B) { 1108 | for i := 0; i < b.N; i++ { 1109 | benchmarkBucketPutIf(b) 1110 | } 1111 | } 1112 | 1113 | func benchmarkBucketPutIf(b *testing.B) { 1114 | runGrogudbTest(b, nil, func(t require.TestingT, db *DB) { 1115 | start := time.Now() 1116 | wg := sync.WaitGroup{} 1117 | for i := 0; i < benchmarkBuckets; i++ { 1118 | wg.Add(1) 1119 | n := i 1120 | go func() { 1121 | defer wg.Done() 1122 | bucket := db.GetOrCreateBucket(bucketNum(n)) 1123 | for j := 0; j < benchmarkIter; j++ { 1124 | _ = bucket.PutIf(keyNum(0), valNum(j)) 1125 | } 1126 | }() 1127 | } 1128 | wg.Wait() 1129 | 1130 | since := time.Since(start) 1131 | ops := float64(benchmarkIter*benchmarkBuckets) / since.Seconds() 1132 | b.Logf("Benchmark PutIf elapsed[%s]: iter=%d, bucket=%d, ops=%f, stat=%+v", since, benchmarkIter, benchmarkBuckets, ops, db.Stats()) 1133 | }, nil) 1134 | } 1135 | 1136 | func BenchmarkBucketRange(b *testing.B) { 1137 | for i := 0; i < b.N; i++ { 1138 | benchmarkBucketRange(b) 1139 | } 1140 | } 1141 | 1142 | func benchmarkBucketRange(b *testing.B) { 1143 | runGrogudbTest(b, nil, func(t require.TestingT, db *DB) { 1144 | wg := sync.WaitGroup{} 1145 | for i := 0; i < benchmarkBuckets; i++ { 1146 | wg.Add(1) 1147 | n := i 1148 | go func() { 1149 | defer wg.Done() 1150 | bucket := db.GetOrCreateBucket(bucketNum(n)) 1151 | for j := 0; j < benchmarkIter; j++ { 1152 | _ = bucket.Put(keyNum(j), valNum(j)) 1153 | } 1154 | }() 1155 | } 1156 | wg.Wait() 1157 | 1158 | start := time.Now() 1159 | wg = sync.WaitGroup{} 1160 | var total atomic.Int64 1161 | for i := 0; i < benchmarkBuckets; i++ { 1162 | wg.Add(1) 1163 | n := i 1164 | go func() { 1165 | defer wg.Done() 1166 | 1167 | bucket := db.GetOrCreateBucket(bucketNum(n)) 1168 | err := bucket.Range(func(key, val Bytes) { 1169 | // drains 1170 | total.Add(1) 1171 | }) 1172 | require.NoError(t, err) 1173 | }() 1174 | } 1175 | wg.Wait() 1176 | 1177 | since := time.Since(start) 1178 | require.Equal(t, int64(benchmarkIter*benchmarkBuckets), total.Load()) 1179 | ops := float64(benchmarkIter*benchmarkBuckets) / since.Seconds() 1180 | b.Logf("Benchmark Range elapsed[%s]: iter=%d, bucket=%d, ops=%f, stat=%+v", since, benchmarkIter, benchmarkBuckets, ops, db.Stats()) 1181 | }, nil) 1182 | } 1183 | 1184 | func BenchmarkBucketHas(b *testing.B) { 1185 | for i := 0; i < b.N; i++ { 1186 | benchmarkBucketHas(b) 1187 | } 1188 | } 1189 | 1190 | func benchmarkBucketHas(b *testing.B) { 1191 | runGrogudbTest(b, nil, func(t require.TestingT, db *DB) { 1192 | wg := sync.WaitGroup{} 1193 | for i := 0; i < benchmarkBuckets; i++ { 1194 | wg.Add(1) 1195 | n := i 1196 | go func() { 1197 | defer wg.Done() 1198 | bucket := db.GetOrCreateBucket(bucketNum(n)) 1199 | for j := 0; j < benchmarkIter; j++ { 1200 | _ = bucket.Put(keyNum(j), valNum(j)) 1201 | } 1202 | }() 1203 | } 1204 | wg.Wait() 1205 | 1206 | start := time.Now() 1207 | var total int 1208 | for i := 0; i < benchmarkBuckets; i++ { 1209 | bucket := db.GetOrCreateBucket(bucketNum(i)) 1210 | for j := 0; j < benchmarkIter; j++ { 1211 | if bucket.Has(keyNum(j)) { 1212 | total++ 1213 | } 1214 | } 1215 | } 1216 | require.Equal(t, benchmarkIter*benchmarkBuckets, total) 1217 | 1218 | since := time.Since(start) 1219 | ops := float64(benchmarkIter*benchmarkBuckets) / since.Seconds() 1220 | b.Logf("Benchmark Has elapsed[%s]: iter=%d, bucket=%d, ops=%f, stat=%+v", since, benchmarkIter, benchmarkBuckets, ops, db.Stats()) 1221 | }, nil) 1222 | } 1223 | 1224 | func BenchmarkBucketDel(b *testing.B) { 1225 | for i := 0; i < b.N; i++ { 1226 | benchmarkBucketDel(b) 1227 | } 1228 | } 1229 | 1230 | func benchmarkBucketDel(b *testing.B) { 1231 | runGrogudbTest(b, nil, func(t require.TestingT, db *DB) { 1232 | wg := sync.WaitGroup{} 1233 | for i := 0; i < benchmarkBuckets; i++ { 1234 | wg.Add(1) 1235 | n := i 1236 | go func() { 1237 | defer wg.Done() 1238 | bucket := db.GetOrCreateBucket(bucketNum(n)) 1239 | for j := 0; j < benchmarkIter; j++ { 1240 | _ = bucket.Put(keyNum(j), valNum(j)) 1241 | } 1242 | }() 1243 | } 1244 | wg.Wait() 1245 | 1246 | start := time.Now() 1247 | for i := 0; i < benchmarkBuckets; i++ { 1248 | bucket := db.GetOrCreateBucket(bucketNum(i)) 1249 | for j := 0; j < benchmarkIter; j++ { 1250 | _ = bucket.Del(keyNum(j)) 1251 | } 1252 | } 1253 | 1254 | since := time.Since(start) 1255 | ops := float64(benchmarkIter*benchmarkBuckets) / since.Seconds() 1256 | b.Logf("Benchmark Del elapsed[%s]: iter=%d, bucket=%d, ops=%f, stat=%+v", since, benchmarkIter, benchmarkBuckets, ops, db.Stats()) 1257 | }, nil) 1258 | } 1259 | -------------------------------------------------------------------------------- /compaction.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "fmt" 18 | "io" 19 | "os" 20 | "sort" 21 | "strconv" 22 | "strings" 23 | "time" 24 | 25 | "github.com/chenjiandongx/grogudb/pkg/buffer" 26 | "github.com/chenjiandongx/grogudb/pkg/codec" 27 | "github.com/chenjiandongx/grogudb/pkg/fsx" 28 | "github.com/chenjiandongx/grogudb/pkg/logx" 29 | "github.com/chenjiandongx/grogudb/pkg/uint64set" 30 | ) 31 | 32 | const chunkSize = 1024 * 1024 * 4 33 | 34 | // compactionPlan compact 计划 描述将 source disk segments 合并至 destination segment 35 | type compactionPlan struct { 36 | src []*diskSegment 37 | dst *diskSegment 38 | } 39 | 40 | // String 格式化 compactionPlan 输出 41 | func (c compactionPlan) String() string { 42 | if len(c.src) <= 0 || c.dst == nil { 43 | return "" 44 | } 45 | 46 | var srcSeqIDs []string 47 | for _, ds := range c.src { 48 | srcSeqIDs = append(srcSeqIDs, strconv.Itoa(int(ds.seqID))) 49 | } 50 | return fmt.Sprintf("src.seqID=[%v] -> dst.seqID=[%d]", strings.Join(srcSeqIDs, ","), c.dst.seqID) 51 | } 52 | 53 | func (db *DB) getBucketClearAt(name string) int64 { 54 | db.memMut.Lock() 55 | defer db.memMut.Unlock() 56 | 57 | v, ok := db.buckets[name] 58 | if !ok { 59 | return 0 60 | } 61 | return v.head.clearAt 62 | } 63 | 64 | // splitDiskSegmentGroups 将传入的 segs 切分为多个分组 65 | func (db *DB) splitDiskSegmentGroups(segs diskSegments) []diskSegments { 66 | count := len(segs) 67 | groups := make([]diskSegments, 0) 68 | group := make(diskSegments, 0) 69 | 70 | var idx int 71 | var size int 72 | for { 73 | if idx >= count { 74 | break 75 | } 76 | 77 | size += segs[idx].size 78 | group = append(group, segs[idx]) 79 | 80 | // 当 group 中文件大小超过 db.opts.MaxDiskSegmentBytes 时切分为一个分组 81 | if size > db.opts.MaxDiskSegmentBytes { 82 | // 补偿回退 不然会丢弃当次循环的 seg 83 | if len(group) > 1 { 84 | idx-- 85 | group = group[:len(group)-1] 86 | } 87 | 88 | // 追加至 groups 中并置空状态 89 | size = 0 90 | groups = append(groups, group) 91 | group = make(diskSegments, 0) 92 | } 93 | idx++ 94 | } 95 | 96 | if len(group) > 0 { 97 | groups = append(groups, group) 98 | } 99 | 100 | return groups 101 | } 102 | 103 | // getCompactionPlans 生成 compaction plans 104 | func (db *DB) getCompactionPlans() ([]compactionPlan, error) { 105 | db.diskMut.Lock() 106 | segs := make(diskSegments, 0, len(db.diskSegs)) 107 | for i := range db.diskSegs { 108 | seg := db.diskSegs[i] 109 | seg.incRef() 110 | segs = append(segs, seg) 111 | } 112 | segs.OrderIncrement() // 时间序 113 | db.diskMut.Unlock() 114 | 115 | // 退出前保证 desc reference 116 | defer func() { 117 | for _, seg := range segs { 118 | seg.decRef() 119 | } 120 | }() 121 | 122 | var plans []compactionPlan 123 | groups := db.splitDiskSegmentGroups(segs) 124 | for _, group := range groups { 125 | cmp := newCompactor(db.opts.CompactFragmentation, db.getBucketClearAt, db.bucketsKeys.HasKey) 126 | 127 | // 分组长度为 1 时先判断是否需要 Compact 128 | if len(group) == 1 { 129 | needed, err := cmp.needCompat(group[0]) 130 | if err != nil { 131 | return nil, err 132 | } 133 | if !needed { 134 | continue 135 | } 136 | } 137 | 138 | // 其余情况可以合并多个 segs 139 | group.OrderDecrement() 140 | newSeg, err := cmp.doCompact(group...) 141 | if err != nil { 142 | return nil, err 143 | } 144 | plans = append(plans, compactionPlan{ 145 | src: group, 146 | dst: newSeg, 147 | }) 148 | } 149 | 150 | return plans, nil 151 | } 152 | 153 | // compactor 负责判断并压缩 disk segments 154 | type compactor struct { 155 | fragmentation float64 156 | onExist func(name string, h uint64) bool 157 | bucketPos codec.BucketPos 158 | clearAtFn func(string) int64 159 | visited *uint64set.Sets 160 | deleted *uint64set.Sets 161 | 162 | dataStart uint32 163 | dataEnd uint32 164 | keysStart uint32 165 | keysEnd uint32 166 | } 167 | 168 | // newCompactor 生成并返回 *compactor 实例 169 | func newCompactor(fragmentation float64, clearAtFn func(string) int64, onExist func(string, uint64) bool) *compactor { 170 | return &compactor{ 171 | fragmentation: fragmentation, 172 | onExist: onExist, 173 | visited: uint64set.NewSets(), 174 | deleted: uint64set.NewSets(), 175 | clearAtFn: clearAtFn, 176 | bucketPos: codec.BucketPos{ 177 | Record: map[string]codec.Positions{}, 178 | KeyItem: map[string]codec.Positions{}, 179 | }, 180 | } 181 | } 182 | 183 | // mergeBucketNames 合并多个 disk segments 的 bucket names 并进行排序 184 | func mergeBucketNames(diskSegs ...*diskSegment) []string { 185 | unique := map[string]struct{}{} 186 | for _, diskSeg := range diskSegs { 187 | for name := range diskSeg.pos.Record { 188 | unique[name] = struct{}{} 189 | } 190 | } 191 | names := make([]string, 0, len(unique)) 192 | for name := range unique { 193 | names = append(names, name) 194 | } 195 | sort.Strings(names) 196 | return names 197 | } 198 | 199 | // needCompat 判断是否需要 compact 200 | func (c *compactor) needCompat(diskSeg *diskSegment) (bool, error) { 201 | var deletedBytes int 202 | for bucket := range diskSeg.pos.KeyItem { 203 | // 如果 bucket Clear 的时间大于 disk segment 构建时间 则需要清除所有 keys 204 | if c.clearAtFn(bucket) > diskSeg.seqID { 205 | err := diskSeg.rangeKeys(bucket, func(flag codec.Flag, h uint64, n uint32) { 206 | deletedBytes += int(n) 207 | }) 208 | if err != nil { 209 | return false, err 210 | } 211 | continue 212 | } 213 | 214 | visited := uint64set.NewSet() 215 | deleted := uint64set.NewSet() 216 | err := diskSeg.rangeKeys(bucket, func(flag codec.Flag, h uint64, n uint32) { 217 | // 如果 key 在最新状态的 bucket 中不存在 则删除 218 | if !c.onExist(bucket, h) { 219 | deletedBytes += int(n) 220 | return 221 | } 222 | 223 | // 如果该 key 已删除 则后续相同的 key 都不再保留 224 | if flag == codec.FlagDel && !deleted.Has(h) { 225 | deleted.Insert(h) 226 | } 227 | if deleted.Has(h) { 228 | deletedBytes += int(n) 229 | return 230 | } 231 | 232 | // FlagPut: 取最新的 Record 233 | if !visited.Has(h) { 234 | visited.Insert(h) 235 | return 236 | } 237 | deletedBytes += int(n) 238 | }) 239 | if err != nil { 240 | return false, err 241 | } 242 | } 243 | 244 | // 当 deletedBytes 超过一定比例时才具备 Compact 效益 245 | return float64(diskSeg.size)*c.fragmentation < float64(deletedBytes), nil 246 | } 247 | 248 | // doCompact 执行真正的 compact 逻辑 249 | func (c *compactor) doCompact(diskSegs ...*diskSegment) (*diskSegment, error) { 250 | first := 0 // 往更大的 seqID 靠近 251 | 252 | seqID := diskSegs[first].seqID + 1 // 保证文件名不重复 253 | path := diskSegs[first].path 254 | 255 | dataTemp := fsx.DataTmpFilename(seqID, path) 256 | keysTemp := fsx.KeysTmpFilename(seqID, path) 257 | 258 | var err error 259 | defer func() { 260 | if err != nil { 261 | _ = os.RemoveAll(dataTemp) 262 | _ = os.RemoveAll(keysTemp) 263 | } 264 | }() 265 | 266 | var dataF *os.File 267 | if dataF, err = os.OpenFile(dataTemp, fsx.FlagAppend, 0o644); err != nil { 268 | return nil, err 269 | } 270 | defer dataF.Close() 271 | 272 | var keysF *os.File 273 | if keysF, err = os.OpenFile(keysTemp, fsx.FlagAppend, 0o644); err != nil { 274 | return nil, err 275 | } 276 | defer keysF.Close() 277 | 278 | buckets := mergeBucketNames(diskSegs...) 279 | LOOP: 280 | for _, bucket := range buckets { 281 | for _, diskSeg := range diskSegs { 282 | if err = c.compact(dataF, keysF, bucket, diskSeg); err != nil { 283 | break LOOP 284 | } 285 | } 286 | } 287 | 288 | // 判断 Compact 是否出错 289 | if err != nil { 290 | return nil, err 291 | } 292 | 293 | metaBytes := codec.EncodeMetadata(c.bucketPos.AsBucketMetaSlice()) 294 | _, err = dataF.Write(metaBytes) 295 | if err != nil { 296 | return nil, err 297 | } 298 | 299 | bf := c.visited.AsBloomFilter() 300 | bfBytes := bf.Bytes() 301 | if _, err := dataF.Write(bfBytes); err != nil { 302 | return nil, err 303 | } 304 | 305 | footer := codec.Footer{ 306 | DataSize: c.dataStart, 307 | MetaSize: uint32(len(metaBytes)), 308 | BloomFilterCount: uint32(bf.Count()), 309 | BloomFilterSize: uint32(len(bfBytes)), 310 | } 311 | 312 | _, err = dataF.Write(codec.EncodeFooter(footer)) 313 | if err != nil { 314 | return nil, err 315 | } 316 | 317 | diskSeg := newDiskSegment(seqID, int(c.dataEnd), path, bf, c.bucketPos) 318 | diskSeg.clearAtFn = c.clearAtFn 319 | return diskSeg, nil 320 | } 321 | 322 | func (c *compactor) compact(dw, kw io.Writer, bucket string, diskSeg *diskSegment) error { 323 | dataBuf := buffer.Get() 324 | keysBuf := buffer.Get() 325 | defer func() { 326 | buffer.Put(dataBuf) 327 | buffer.Put(keysBuf) 328 | }() 329 | 330 | var err error 331 | visited := c.visited.GetOrCreate(bucket) 332 | deleted := c.deleted.GetOrCreate(bucket) 333 | err = diskSeg.Range(bucket, func(flag codec.Flag, key, val []byte, n int) bool { 334 | // 若 bucket 执行了 Clear 则优先判断是否丢弃 335 | if c.clearAtFn(bucket) > diskSeg.seqID { 336 | return true 337 | } 338 | 339 | h := codec.HashKey(key) 340 | if !c.onExist(bucket, h) { 341 | return false 342 | } 343 | 344 | // 如果该 key 已删除 则后续相同的 key 都不再保留 345 | if flag == codec.FlagDel && !deleted.Has(h) { 346 | deleted.Insert(h) 347 | } 348 | if deleted.Has(h) { 349 | return false 350 | } 351 | 352 | if visited.Has(h) { 353 | return false 354 | } 355 | visited.Insert(h) 356 | 357 | b := codec.EncodeRecord(codec.FlagPut, key, val) 358 | _, _ = dataBuf.Write(b) 359 | c.dataEnd += uint32(len(b)) 360 | 361 | b = codec.EncodeKeyEntity(codec.FlagPut, h, uint32(len(b))) 362 | _, _ = keysBuf.Write(b) 363 | c.keysEnd += uint32(len(b)) 364 | 365 | if dataBuf.Len() >= chunkSize { 366 | c.dataEnd += codec.SizeChecksum 367 | if _, err = dw.Write(dataBuf.Frozen()); err != nil { 368 | return true 369 | } 370 | 371 | c.keysEnd += codec.SizeChecksum 372 | if _, err = kw.Write(keysBuf.Frozen()); err != nil { 373 | return true 374 | } 375 | 376 | c.bucketPos.Record[bucket] = append(c.bucketPos.Record[bucket], codec.Position{ 377 | Start: c.dataStart, 378 | End: c.dataEnd, 379 | }) 380 | c.dataStart = c.dataEnd 381 | 382 | c.bucketPos.KeyItem[bucket] = append(c.bucketPos.KeyItem[bucket], codec.Position{ 383 | Start: c.keysStart, 384 | End: c.keysEnd, 385 | }) 386 | c.keysStart = c.keysEnd 387 | 388 | // 置空 buffer 但不回收 389 | dataBuf.Reset() 390 | keysBuf.Reset() 391 | } 392 | return false 393 | }, codec.PassAll()) 394 | 395 | if err != nil { 396 | return err 397 | } 398 | 399 | if dataBuf.Len() > 0 { 400 | c.dataEnd += codec.SizeChecksum 401 | if _, err = dw.Write(dataBuf.Frozen()); err != nil { 402 | return err 403 | } 404 | 405 | c.keysEnd += codec.SizeChecksum 406 | if _, err = kw.Write(keysBuf.Frozen()); err != nil { 407 | return err 408 | } 409 | 410 | c.bucketPos.Record[bucket] = append(c.bucketPos.Record[bucket], codec.Position{ 411 | Start: c.dataStart, 412 | End: c.dataEnd, 413 | }) 414 | c.dataStart = c.dataEnd 415 | 416 | c.bucketPos.KeyItem[bucket] = append(c.bucketPos.KeyItem[bucket], codec.Position{ 417 | Start: c.keysStart, 418 | End: c.keysEnd, 419 | }) 420 | c.keysStart = c.keysEnd 421 | } 422 | 423 | return nil 424 | } 425 | 426 | // removeDiskSegments 移除给定 disk segments 427 | // 调用方需保证线程安全 此处不加锁 428 | func (db *DB) removeDiskSegments(segs ...*diskSegment) { 429 | count := len(segs) 430 | var n int 431 | for { 432 | if n >= count { 433 | break 434 | } 435 | 436 | var index int 437 | for i, seg := range db.diskSegs { 438 | if seg.seqID == segs[n].seqID { 439 | index = i 440 | break 441 | } 442 | } 443 | 444 | db.diskSegs[index].decRef() 445 | db.garbageDiskSegs = append(db.garbageDiskSegs, db.diskSegs[index]) 446 | db.diskSegs = append(db.diskSegs[:index], db.diskSegs[index+1:]...) 447 | n++ 448 | } 449 | } 450 | 451 | // Compact 合并 disk segment 减少磁盘空间占用 452 | // 同一时刻只能有一次 Compact 操作 执行操作返回 true 反之返回 false 453 | // 此 API 用户一般无需手动调用 454 | func (db *DB) Compact() (bool, error) { 455 | var compacted bool 456 | if !db.state.compacting.CompareAndSwap(false, true) { 457 | return compacted, nil 458 | } 459 | 460 | defer db.state.compacting.Store(false) 461 | compacted = true 462 | 463 | start := time.Now() 464 | plans, err := db.getCompactionPlans() 465 | if err != nil { 466 | return compacted, err 467 | } 468 | 469 | // 没有 compaction 计划 提前返回 470 | if len(plans) <= 0 { 471 | return compacted, nil 472 | } 473 | 474 | db.diskMut.Lock() 475 | defer db.diskMut.Unlock() 476 | 477 | for _, plan := range plans { 478 | logx.Infof("get compaction plan %s", plan) 479 | db.removeDiskSegments(plan.src...) 480 | if err := plan.dst.Install(); err != nil { 481 | logx.Errorf("install segment failed, SeqID·%d, err=%v", plan.dst.seqID, err) 482 | continue 483 | } 484 | 485 | db.diskSegs = append(db.diskSegs, plan.dst) 486 | db.diskSegs.OrderDecrement() 487 | db.stats.diskSegment.Add(1) 488 | db.stats.compact.Add(1) 489 | } 490 | logx.Infof("compaction operation elapsed %v", time.Since(start)) 491 | return compacted, nil 492 | } 493 | 494 | func (db *DB) evaluateCompact(prev, curr Stats) bool { 495 | // 如果进程正在 rotating 中 不进行操作 496 | if db.state.rotating.Load() { 497 | return false 498 | } 499 | 500 | // 如果存在清除 Bucket 操作 需要进行 compact 501 | if curr.Clear-prev.Clear > 0 { 502 | return true 503 | } 504 | 505 | // keyOp 均没有超过 CompactKeyOpDelta 需要跳过 506 | if int(curr.Del-prev.Del) <= db.opts.CompactKeyOpDelta && int(curr.Put-prev.Put) <= db.opts.CompactKeyOpDelta { 507 | return false 508 | } 509 | 510 | return true 511 | } 512 | 513 | func (db *DB) loopCompact() { 514 | logx.Infof("start compaction worker") 515 | 516 | db.waiting.Inc() 517 | defer db.waiting.Dec() 518 | 519 | ticker := time.NewTicker(db.opts.CompactCheckInterval) 520 | defer ticker.Stop() 521 | 522 | statsCache := db.Stats() 523 | last := time.Now() 524 | 525 | for { 526 | select { 527 | case <-db.ctx.Done(): 528 | return 529 | 530 | case <-ticker.C: 531 | now := time.Now() 532 | stats := db.Stats() 533 | goahead := db.evaluateCompact(statsCache, stats) 534 | force := now.Second()-last.Second() > int(db.opts.CompactForceInterval.Seconds()) 535 | if !goahead && !force { 536 | continue 537 | } 538 | 539 | last = now 540 | statsCache = stats // 跨周期对比 541 | if _, err := db.Compact(); err != nil { 542 | logx.Errorf("compaction failed, err=%v", err) 543 | } 544 | } 545 | } 546 | } 547 | -------------------------------------------------------------------------------- /compaction_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "testing" 18 | 19 | "github.com/stretchr/testify/require" 20 | 21 | "github.com/chenjiandongx/grogudb/pkg/codec" 22 | ) 23 | 24 | func TestCompact(t *testing.T) { 25 | t.Run("Compact: 0->0", func(t *testing.T) { 26 | opt := DefaultOptions() 27 | opt.MaxMemSegmentBytes = 100 * KB 28 | testCompact(t, 450, opt, 0, 0) 29 | }) 30 | 31 | t.Run("Compact: 1->1", func(t *testing.T) { 32 | opt := DefaultOptions() 33 | opt.MaxMemSegmentBytes = 10 * KB 34 | testCompact(t, 450, opt, 1, 1) 35 | }) 36 | 37 | t.Run("Compact: 5->1", func(t *testing.T) { 38 | opt := DefaultOptions() 39 | opt.MaxMemSegmentBytes = 10 * KB 40 | testCompact(t, 2000, opt, 5, 1) 41 | }) 42 | 43 | t.Run("Compact: 7->1", func(t *testing.T) { 44 | opt := DefaultOptions() 45 | opt.MaxMemSegmentBytes = 10 * KB 46 | testCompact(t, 3000, opt, 7, 1) 47 | }) 48 | 49 | t.Run("Compact: 7->2", func(t *testing.T) { 50 | opt := DefaultOptions() 51 | opt.MaxMemSegmentBytes = 10 * KB 52 | opt.MaxDiskSegmentBytes = 50 * KB 53 | testCompact(t, 3000, opt, 7, 2) 54 | }) 55 | } 56 | 57 | func testCompact(t *testing.T, iter int, opt Options, beforeSeg, afterSeg int64) { 58 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 59 | bucket := db.GetOrCreateBucket(bucketNum(0)) 60 | bucketPut(iter, bucket) 61 | 62 | require.Equal(t, beforeSeg, db.Stats().DiskSegment) 63 | prevIDs := make([]int64, 0) 64 | for _, seg := range db.diskSegs { 65 | prevIDs = append(prevIDs, seg.seqID) 66 | } 67 | 68 | compacted, err := db.Compact() 69 | require.True(t, compacted) 70 | require.NoError(t, err) 71 | 72 | db.Gc() 73 | require.Equal(t, afterSeg, db.Stats().DiskSegment) 74 | 75 | nextIDs := make([]int64, 0) 76 | for _, seg := range db.diskSegs { 77 | nextIDs = append(nextIDs, seg.seqID) 78 | } 79 | 80 | // [10, 8, 6, 5] -> [11] 81 | // +1 82 | if len(prevIDs) != len(nextIDs) && len(nextIDs) == 1 { 83 | require.Equal(t, prevIDs[0]+1, nextIDs[0]) 84 | } 85 | 86 | for i := 0; i < iter; i++ { 87 | assertGet(t, bucket.Get, i, i) 88 | } 89 | }, nil) 90 | } 91 | 92 | func TestSplitDiskSegmentGroups(t *testing.T) { 93 | t.Run("1 Group", func(t *testing.T) { 94 | opt := DefaultOptions() 95 | opt.MaxDiskSegmentBytes = 10 * KB 96 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 97 | segs := make([]*diskSegment, 0) 98 | segs = append(segs, &diskSegment{seqID: 0, size: 9 * KB}) 99 | ret := db.splitDiskSegmentGroups(segs) 100 | require.Len(t, ret, 1) 101 | require.Equal(t, 9*KB, ret[0][0].size) 102 | }, nil) 103 | }) 104 | 105 | t.Run("2 Groups", func(t *testing.T) { 106 | opt := DefaultOptions() 107 | opt.MaxDiskSegmentBytes = 10 * KB 108 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 109 | segs := make([]*diskSegment, 0) 110 | segs = append(segs, &diskSegment{seqID: 0, size: 9 * KB}) 111 | segs = append(segs, &diskSegment{seqID: 1, size: 2 * KB}) 112 | ret := db.splitDiskSegmentGroups(segs) 113 | require.Len(t, ret, 2) 114 | require.Equal(t, 9*KB, ret[0][0].size) 115 | require.Equal(t, 2*KB, ret[1][0].size) 116 | }, nil) 117 | }) 118 | 119 | t.Run("2 Groups", func(t *testing.T) { 120 | opt := DefaultOptions() 121 | opt.MaxDiskSegmentBytes = 10 * KB 122 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 123 | segs := make([]*diskSegment, 0) 124 | segs = append(segs, &diskSegment{seqID: 0, size: 3 * KB}) 125 | segs = append(segs, &diskSegment{seqID: 1, size: 2 * KB}) 126 | segs = append(segs, &diskSegment{seqID: 2, size: 8 * KB}) 127 | ret := db.splitDiskSegmentGroups(segs) 128 | require.Len(t, ret, 2) 129 | require.Equal(t, 3*KB, ret[0][0].size) 130 | require.Equal(t, 2*KB, ret[0][1].size) 131 | require.Equal(t, 8*KB, ret[1][0].size) 132 | }, nil) 133 | }) 134 | 135 | t.Run("2 Groups", func(t *testing.T) { 136 | opt := DefaultOptions() 137 | opt.MaxDiskSegmentBytes = 10 * KB 138 | runGrogudbTest(t, &opt, func(t require.TestingT, db *DB) { 139 | segs := make([]*diskSegment, 0) 140 | segs = append(segs, &diskSegment{seqID: 0, size: 3 * KB}) 141 | segs = append(segs, &diskSegment{seqID: 1, size: 2 * KB}) 142 | segs = append(segs, &diskSegment{seqID: 2, size: 8 * KB}) 143 | segs = append(segs, &diskSegment{seqID: 3, size: 1 * KB}) 144 | ret := db.splitDiskSegmentGroups(segs) 145 | require.Len(t, ret, 2) 146 | require.Equal(t, 3*KB, ret[0][0].size) 147 | require.Equal(t, 2*KB, ret[0][1].size) 148 | require.Equal(t, 8*KB, ret[1][0].size) 149 | require.Equal(t, 1*KB, ret[1][1].size) 150 | }, nil) 151 | }) 152 | } 153 | 154 | func TestMergeBucketNames(t *testing.T) { 155 | segs := []*diskSegment{ 156 | { 157 | pos: codec.BucketPos{ 158 | Record: map[string]codec.Positions{ 159 | "bucket1": {}, 160 | "bucket2": {}, 161 | "bucket5": {}, 162 | }, 163 | }, 164 | }, 165 | { 166 | pos: codec.BucketPos{ 167 | Record: map[string]codec.Positions{ 168 | "bucket1": {}, 169 | "bucket3": {}, 170 | "bucket5": {}, 171 | }, 172 | }, 173 | }, 174 | } 175 | names := mergeBucketNames(segs...) 176 | require.Equal(t, []string{"bucket1", "bucket2", "bucket3", "bucket5"}, names) 177 | } 178 | -------------------------------------------------------------------------------- /control.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # shellcheck disable=SC2006 6 | # shellcheck disable=SC2086 7 | 8 | TEST_COVERAGE_THRESHOLD=70 9 | 10 | function unittest() { 11 | go test -buildmode=pie -parallel=8 ./... -coverprofile coverage.out -covermode count 12 | go tool cover -func coverage.out 13 | echo "Quality Gate: checking test coverage is above threshold ..." 14 | echo "Threshold : $TEST_COVERAGE_THRESHOLD%" 15 | totalCoverage=$(go tool cover -func=coverage.out | grep total | grep -Eo '[0-9]+\.[0-9]+') 16 | echo "Current test coverage : $totalCoverage %" 17 | if (($(echo "$totalCoverage $TEST_COVERAGE_THRESHOLD" | awk '{print ($1 > $2)}'))); then 18 | echo "OK" 19 | else 20 | echo "Current test coverage is below threshold. Please add more unit tests or adjust threshold to a lower value." 21 | echo "FAIL" 22 | exit 1 23 | fi 24 | } 25 | 26 | if [ "$1" == "test" ]; then 27 | unittest 28 | fi 29 | -------------------------------------------------------------------------------- /db.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "context" 18 | "os" 19 | "path/filepath" 20 | "sort" 21 | "sync" 22 | "time" 23 | 24 | "github.com/juju/fslock" 25 | "github.com/pkg/errors" 26 | "go.uber.org/multierr" 27 | 28 | "github.com/chenjiandongx/grogudb/pkg/buffer" 29 | "github.com/chenjiandongx/grogudb/pkg/codec" 30 | "github.com/chenjiandongx/grogudb/pkg/fsx" 31 | "github.com/chenjiandongx/grogudb/pkg/logx" 32 | "github.com/chenjiandongx/grogudb/pkg/uint64set" 33 | "github.com/chenjiandongx/grogudb/pkg/wait" 34 | ) 35 | 36 | const ( 37 | lockName = "_lock" 38 | 39 | keysMaxFdHold = 1 40 | dataMaxFdHold = 4 41 | ) 42 | 43 | type operation uint8 44 | 45 | const ( 46 | opPut operation = iota 47 | opPutIf 48 | opDel 49 | ) 50 | 51 | var ( 52 | ErrEmptyDBPath = errors.New("grogudb: empty database path") 53 | ErrClosed = errors.New("grogudb: database closed") 54 | ) 55 | 56 | // Bytes 定义为 []byte 提供了 Copy 方法 57 | type Bytes []byte 58 | 59 | // Copy 复制 bytes 60 | func (b Bytes) Copy() []byte { 61 | if b == nil { 62 | return nil 63 | } 64 | if len(b) == 0 { 65 | return []byte{} 66 | } 67 | 68 | dst := make([]byte, len(b)) 69 | copy(dst, b) 70 | return dst 71 | } 72 | 73 | // B 将 Bytes 转换为 []byte 74 | func (b Bytes) B() []byte { 75 | return b 76 | } 77 | 78 | // Nil 返回 Bytes 是否为 nil 79 | func (b Bytes) Nil() bool { 80 | return b == nil 81 | } 82 | 83 | // String 将 Bytes 转换为 string 84 | func (b Bytes) String() string { 85 | return string(b) 86 | } 87 | 88 | // DB grogudb 实例实现 89 | // 90 | // 管控着所有的 db 操作行为以及 db 的状态管理 91 | type DB struct { 92 | opts *Options 93 | 94 | ctx context.Context 95 | cancel context.CancelFunc 96 | flock *fslock.Lock 97 | 98 | bucketsKeys *uint64set.Sets 99 | markBytesCh chan int64 100 | path string 101 | clearAt map[string]int64 // key is bucket name, val is seqID of segmentFile 102 | 103 | memMut sync.RWMutex 104 | buckets map[string]*Bucket // key is bucket name 105 | memorySegs map[string]*memorySegment // key is bucket name 106 | 107 | diskMut sync.Mutex 108 | diskSegs diskSegments 109 | garbageDiskSegs diskSegments // 待清理的 disk segment 110 | 111 | stats *statistics 112 | state *state 113 | waiting wait.Waiting 114 | } 115 | 116 | // Open 打开一个 db 实例 打开 db 链接时会持有文件锁 避免二次打开 117 | func Open(path string, opts *Options) (*DB, error) { 118 | if path == "" { 119 | return nil, ErrEmptyDBPath 120 | } 121 | 122 | if _, err := os.Stat(path); os.IsNotExist(err) { 123 | if err := os.MkdirAll(path, os.ModePerm); err != nil { 124 | return nil, err 125 | } 126 | } 127 | 128 | if opts == nil { 129 | opts = &Options{} 130 | } 131 | opts.Validate() 132 | logx.Infof("database options: %+v", *opts) 133 | 134 | flock := fslock.New(filepath.Join(path, lockName)) 135 | if err := flock.TryLock(); err != nil { 136 | return nil, err 137 | } 138 | 139 | ctx, cancel := context.WithCancel(context.Background()) 140 | db := &DB{ 141 | ctx: ctx, 142 | cancel: cancel, 143 | flock: flock, 144 | path: path, 145 | opts: opts, 146 | bucketsKeys: uint64set.NewSets(), 147 | buckets: make(map[string]*Bucket), 148 | memorySegs: make(map[string]*memorySegment), 149 | markBytesCh: make(chan int64, 1), 150 | stats: &statistics{}, 151 | state: &state{}, 152 | clearAt: make(map[string]int64), 153 | } 154 | 155 | if err := db.loadDiskSegments(); err != nil { 156 | return nil, err 157 | } 158 | 159 | go wait.Until(ctx, db.loopRotate) 160 | go wait.Until(ctx, db.loopCompact) 161 | go wait.Until(ctx, db.loopGc) 162 | db.waiting.Until(3) 163 | 164 | return db, nil 165 | } 166 | 167 | func (db *DB) loopGc() { 168 | logx.Infof("start gc worker") 169 | 170 | db.waiting.Inc() 171 | defer db.waiting.Dec() 172 | 173 | ticker := time.NewTicker(db.opts.GcInterval) 174 | defer ticker.Stop() 175 | 176 | for { 177 | select { 178 | case <-db.ctx.Done(): 179 | return 180 | 181 | case <-ticker.C: 182 | db.Gc() 183 | } 184 | } 185 | } 186 | 187 | // Gc 负责清理 handing 状态的 disk segment 188 | // 此 API 用户一般无需手动调用 189 | func (db *DB) Gc() { 190 | db.diskMut.Lock() 191 | defer db.diskMut.Unlock() 192 | 193 | var changed bool 194 | for i := range db.garbageDiskSegs { 195 | seg := db.garbageDiskSegs[i] 196 | if seg.loadRef() > 0 { 197 | continue 198 | } 199 | 200 | changed = true 201 | db.stats.diskSegment.Add(-1) 202 | if err := seg.Remove(); err != nil { 203 | logx.Errorf("remove segment failed, SegID·%d, err=%v", seg.seqID, err) 204 | continue 205 | } 206 | logx.Infof("gc segment, SegID·%d", seg.seqID) 207 | db.garbageDiskSegs[i] = nil // 释放资源 208 | } 209 | 210 | if !changed { 211 | return 212 | } 213 | 214 | nextRound := make(diskSegments, 0) 215 | for i := range db.garbageDiskSegs { 216 | if seg := db.garbageDiskSegs[i]; seg != nil { 217 | nextRound = append(nextRound, seg) 218 | } 219 | } 220 | db.garbageDiskSegs = nextRound 221 | } 222 | 223 | func (db *DB) loopRotate() { 224 | logx.Infof("start rotation worker") 225 | 226 | db.waiting.Inc() 227 | defer db.waiting.Dec() 228 | 229 | for { 230 | select { 231 | case <-db.ctx.Done(): 232 | if err := db.rotate(); err != nil { 233 | logx.Errorf("rotate segment failed, err=%v", err) 234 | } 235 | return 236 | 237 | case n := <-db.markBytesCh: 238 | cur := db.stats.memHoldBytes.Add(n) 239 | if cur < int64(db.opts.MaxMemSegmentBytes) { 240 | continue 241 | } 242 | if err := db.rotate(); err != nil { 243 | logx.Errorf("rotate segment failed, err=%v", err) 244 | } 245 | } 246 | } 247 | } 248 | 249 | func (db *DB) getMemoryBuckets() []string { 250 | db.memMut.RLock() 251 | defer db.memMut.RUnlock() 252 | 253 | names := make([]string, 0, len(db.memorySegs)) 254 | for name, seg := range db.memorySegs { 255 | if seg.Len() > 0 { 256 | names = append(names, name) 257 | } 258 | } 259 | sort.Strings(names) 260 | return names 261 | } 262 | 263 | func (db *DB) rotate() error { 264 | if !db.state.rotating.CompareAndSwap(false, true) { 265 | return nil 266 | } 267 | 268 | defer db.state.rotating.Store(false) 269 | db.stats.rotate.Add(1) 270 | 271 | names := db.getMemoryBuckets() 272 | if len(names) <= 0 { 273 | return nil 274 | } 275 | 276 | dataBuf := buffer.Get() 277 | keysBuf := buffer.Get() 278 | defer func() { 279 | buffer.Put(dataBuf) 280 | buffer.Put(keysBuf) 281 | }() 282 | 283 | var bms codec.BucketMetaSlice 284 | var dataStart, keysStart int 285 | keySets := uint64set.NewSets() 286 | 287 | var total int 288 | for _, name := range names { 289 | db.memMut.Lock() 290 | seg := db.memorySegs[name] 291 | dataBytes, keysBytes, keySet := seg.Flush() 292 | db.memMut.Unlock() 293 | 294 | l := len(dataBytes) + len(keysBytes) 295 | if l <= 0 { 296 | continue 297 | } 298 | keySets.Update(name, keySet) 299 | 300 | total += len(dataBytes) 301 | total -= codec.SizeChecksum 302 | bms = append(bms, codec.BucketMeta{ 303 | Name: name, 304 | RecordPos: []codec.Position{{ 305 | Start: uint32(dataStart), 306 | End: uint32(dataStart + len(dataBytes)), 307 | }}, 308 | KeyEntityPos: []codec.Position{{ 309 | Start: uint32(keysStart), 310 | End: uint32(keysStart + len(keysBytes)), 311 | }}, 312 | }) 313 | 314 | dataStart += len(dataBytes) 315 | keysStart += len(keysBytes) 316 | _, _ = dataBuf.Write(dataBytes) 317 | _, _ = keysBuf.Write(keysBytes) 318 | } 319 | 320 | db.stats.memHoldBytes.Add(int64(-total)) 321 | if dataBuf.Len() <= 0 || keysBuf.Len() <= 0 { 322 | return nil 323 | } 324 | 325 | // 使用 UnixNano 作为 seqID 保证单调递增 326 | seqID := time.Now().UnixNano() 327 | 328 | keysPath := fsx.KeysFilename(seqID, db.path) 329 | dataPath := fsx.DataFilename(seqID, db.path) 330 | 331 | var err error 332 | defer func() { 333 | if err != nil { 334 | _ = os.RemoveAll(keysPath) 335 | _ = os.RemoveAll(dataPath) 336 | } 337 | }() 338 | 339 | if err = fsx.WriteFile(keysPath, keysBuf.Bytes()); err != nil { 340 | return err 341 | } 342 | 343 | var dataF *os.File 344 | dataF, err = os.OpenFile(dataPath, fsx.FlagAppend, 0o644) 345 | if err != nil { 346 | return err 347 | } 348 | defer dataF.Close() 349 | 350 | footer := codec.Footer{} 351 | footer.DataSize = uint32(dataBuf.Len()) 352 | if _, err = dataF.Write(dataBuf.Bytes()); err != nil { 353 | return err 354 | } 355 | 356 | metaBytes := codec.EncodeMetadata(bms) 357 | footer.MetaSize = uint32(len(metaBytes)) 358 | if _, err = dataF.Write(metaBytes); err != nil { 359 | return err 360 | } 361 | 362 | bf := keySets.AsBloomFilter() 363 | bfBytes := bf.Bytes() 364 | footer.BloomFilterCount = uint32(keySets.CountAll()) 365 | footer.BloomFilterSize = uint32(len(bf.Bytes())) 366 | if _, err = dataF.Write(bfBytes); err != nil { 367 | return err 368 | } 369 | 370 | footerBytes := codec.EncodeFooter(footer) 371 | if _, err = dataF.Write(footerBytes); err != nil { 372 | return err 373 | } 374 | 375 | diskSeg := newDiskSegment(seqID, dataBuf.Len(), db.path, bf, bms.AsBucketPos()) 376 | diskSeg.clearAtFn = db.getBucketClearAt 377 | 378 | diskSeg.dataCfd, err = fsx.NewCacheFD(dataPath, dataMaxFdHold) 379 | if err != nil { 380 | return err 381 | } 382 | diskSeg.keysCfd, err = fsx.NewCacheFD(keysPath, keysMaxFdHold) 383 | if err != nil { 384 | return err 385 | } 386 | 387 | db.diskMut.Lock() 388 | db.diskSegs = append(db.diskSegs, diskSeg) 389 | db.stats.diskSegment.Add(1) 390 | db.diskSegs.OrderDecrement() 391 | db.diskMut.Unlock() 392 | 393 | logx.Infof("rotate KeyF·%s, DataF·%s", fsx.KeysFilename(seqID), fsx.DataFilename(seqID)) 394 | 395 | return nil 396 | } 397 | 398 | // Buckets 返回当前 db 所有 buckets 名称 399 | func (db *DB) Buckets() []string { 400 | db.memMut.Lock() 401 | defer db.memMut.Unlock() 402 | 403 | buckets := make([]string, 0, len(db.buckets)) 404 | for k := range db.buckets { 405 | buckets = append(buckets, k) 406 | } 407 | sort.Strings(buckets) 408 | return buckets 409 | } 410 | 411 | func (db *DB) getIterReleaser() *iterReleaser { 412 | db.diskMut.Lock() 413 | segs := make([]*diskSegment, 0, len(db.diskSegs)) 414 | for _, seg := range db.diskSegs { 415 | seg.incRef() 416 | segs = append(segs, seg) 417 | } 418 | db.diskMut.Unlock() 419 | 420 | return newDiskSegmentVersion(segs) 421 | } 422 | 423 | // Stats 返回 db 操作统计情况 424 | func (db *DB) Stats() Stats { 425 | return db.stats.Load() 426 | } 427 | 428 | // State 返回 db 状态 429 | func (db *DB) State() State { 430 | return db.state.Load() 431 | } 432 | 433 | // GetOrCreateBucket 获取或创建 Bucket 实例 434 | func (db *DB) GetOrCreateBucket(name string) *Bucket { 435 | db.memMut.RLock() 436 | seg := db.memorySegs[name] 437 | db.memMut.RUnlock() 438 | 439 | if seg != nil { 440 | return db.buckets[name] 441 | } 442 | 443 | db.memMut.Lock() 444 | defer db.memMut.Unlock() 445 | 446 | seg = db.memorySegs[name] 447 | if seg != nil { 448 | return db.buckets[name] 449 | } 450 | 451 | keySet := db.bucketsKeys.GetOrCreate(name) 452 | seg = newMemorySegment(name, memSegCallback{ 453 | onRemove: keySet.Remove, 454 | onInsert: keySet.Insert, 455 | onExist: keySet.Has, 456 | onBytes: func(n int64) { 457 | db.markBytesCh <- n 458 | }, 459 | }) 460 | db.stats.memSegment.Add(1) 461 | db.memorySegs[name] = seg 462 | 463 | bucket := &Bucket{ 464 | name: name, 465 | keys: keySet, 466 | head: seg, 467 | statistics: db.stats, 468 | getIterReleaser: db.getIterReleaser, 469 | } 470 | db.buckets[name] = bucket 471 | return bucket 472 | } 473 | 474 | // Close 关闭 db 475 | func (db *DB) Close() error { 476 | if !db.state.closed.CompareAndSwap(false, true) { 477 | return ErrClosed 478 | } 479 | 480 | db.cancel() 481 | db.diskMut.Lock() 482 | var errs []error 483 | for _, seg := range db.diskSegs { 484 | if err := seg.Close(); err != nil { 485 | errs = append(errs, err) 486 | } 487 | } 488 | db.diskMut.Unlock() 489 | db.waiting.Until(0) 490 | 491 | if err := db.flock.Unlock(); err != nil { 492 | errs = append(errs, err) 493 | } 494 | 495 | return multierr.Combine(errs...) 496 | } 497 | -------------------------------------------------------------------------------- /db_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "testing" 18 | 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | func TestDBOpen(t *testing.T) { 23 | t.Run("Flock", func(t *testing.T) { 24 | dir := makeTmpDir() 25 | defer removeDir(dir) 26 | 27 | _, err := Open(dir, nil) 28 | require.NoError(t, err) 29 | 30 | _, err = Open(dir, nil) 31 | require.Error(t, err) 32 | }) 33 | 34 | t.Run("Empty database path", func(t *testing.T) { 35 | _, err := Open("", nil) 36 | require.Equal(t, ErrEmptyDBPath, err) 37 | }) 38 | 39 | t.Run("Double close", func(t *testing.T) { 40 | dir := makeTmpDir() 41 | defer removeDir(dir) 42 | 43 | db, err := Open(dir, nil) 44 | require.NoError(t, err) 45 | require.NoError(t, db.Close()) 46 | require.Equal(t, ErrClosed, db.Close()) 47 | }) 48 | } 49 | 50 | func TestBucketName(t *testing.T) { 51 | dir := makeTmpDir() 52 | defer removeDir(dir) 53 | 54 | db, err := Open(dir, nil) 55 | require.NoError(t, err) 56 | 57 | db.GetOrCreateBucket("bucket1") 58 | db.GetOrCreateBucket("bucket3") 59 | db.GetOrCreateBucket("bucket2") 60 | 61 | buckets := db.Buckets() 62 | require.Equal(t, []string{"bucket1", "bucket2", "bucket3"}, buckets) 63 | } 64 | 65 | func TestGetMemoryBuckets(t *testing.T) { 66 | dir := makeTmpDir() 67 | defer removeDir(dir) 68 | 69 | db, err := Open(dir, nil) 70 | require.NoError(t, err) 71 | 72 | db.GetOrCreateBucket("bucket1") 73 | db.GetOrCreateBucket("bucket3") 74 | db.GetOrCreateBucket("bucket2") 75 | 76 | buckets := db.getMemoryBuckets() 77 | require.Len(t, buckets, 0) 78 | } 79 | 80 | func TestState(t *testing.T) { 81 | runGrogudbTest(t, nil, func(t require.TestingT, db *DB) { 82 | s := db.State() 83 | require.Zero(t, s) 84 | }, nil) 85 | } 86 | 87 | func TestStats(t *testing.T) { 88 | runGrogudbTest(t, nil, func(t require.TestingT, db *DB) { 89 | s := db.Stats() 90 | require.Zero(t, s) 91 | }, nil) 92 | } 93 | -------------------------------------------------------------------------------- /docs/design.md: -------------------------------------------------------------------------------- 1 | # Design Documention 2 | 3 | grogudb 是一个为高频 Put/Has/Del/Range 操作而设计的持久化 KV 数据库。 4 | 5 | ## Overview 6 | 7 | grogudb 设计初衷是为了顺序读写的场景,在调研了开源社区成熟的持久化 KV 存储后,发现大多数项目都是考虑了更通用的场景,如: 8 | 9 | * 读写均衡 10 | * 批量写 11 | * 事务性 12 | * 断电恢复 13 | * ... 14 | 15 | 没有发现对高频 `Put/Has/Del/Range` 场景优化设计的存储(甚至可以没有 `Get` 操作)。假设 KV DB 也有 Table 的概念,在这里称为 Bucket,那对于 DB 来讲,实际上使用就是一种「垂直写,水平查」的行为。 16 | 17 | **多 Buckets 同时垂直写,单 Bucket 水平读。** 18 | 19 | ```docs 20 | Buckets 21 | ^ 22 | │ . . . . . . . . . . . . . . . . . . . . . . 23 | │ . . . . . . . . . . . . . . . . . . . . . . Range: Bucket01 24 | │ . . . . . . . 25 | │ . . . . . . . . . . . . . . . . . . . ... 26 | │ . . . . . . . . . . . . . . . . . . . . . 27 | │ . . . . . . . . . . . . . . . . . . . . . Range: Bucket99 28 | │ . . . . . . . . . . . . . . . . . Range: BucketN ... 29 | │ . . . . . . . . . . . . . . 30 | │ . . . . . . . . . . . . . . . . . . . ... 31 | │ . . . . . . . . . . . . . . . . . . . . 32 | v 33 | <----------------- KV Record ------------------> 34 | ``` 35 | 36 | 这种用法符合 [LSM-Tree](https://www.wikiwand.com/en/Log-structured_merge-tree) 的设计思路,把**逻辑上相邻**的数据在**物理落盘**上也使其相邻,即顺序读写。这样在减少随机 IO 带来的开销的同时还能利用 [CPU SIMD](https://www.wikiwand.com/en/Single_instruction,_multiple_data) 特性。 37 | 38 | 具体做法是将数据分为热区(memory segment)以及冷区(disk segment)再通过层级 compact 来归档数据,同时利用 WAL 来保证断电恢复,不少**追求吞吐**的 DB 都使用了这种类似的实现方式。 39 | 40 | 但仔细思考一些问题: 41 | 42 | **1. WAL 能 100% 保证断电恢复吗?** 43 | 44 | **不是的。** 严格意义上讲,于操作系统而言,并不是每次 `write syscall` 都会将数据落盘,而是会写到内核缓存区,批量写入,除非每次 `write` 之后都调用 `fsync` 刷盘,但这显然非常影响效率。 45 | 46 | **2. 事务性是强要求吗?** 47 | 48 | **看情况。** 取决于业务需求,不一定所有的业务都要求在一个 Txn 里同时进行 write/read 操作,因此可允许提供单操作的原子性即可。 49 | 50 | **3. 读写性能需要同时保证吗?** 51 | 52 | **不一定。** 上层需求决定底层设计,比如业务只需要 `Put/Del/Range` 操作,根本没有 `Get` 要求,这也是写这个项目的缘由。所以可以在设计上牺牲一部分的查询性能来换取写入性能。 53 | 54 | 经过一番思考和取舍后,设计就逐渐明朗,我们要什么? 55 | 56 | 1. 设计简单,可维护性好。 57 | 2. 对 `Write/Scan` 操作进行性能优化。 58 | 3. 不需要严格的数据完整性,允许断电故障。 59 | 4. 操作并发安全,但不提供事务。 60 | 61 | bitcask 提供了一种非常轻巧的设计方案,详见论文 [bitcask-intro](https://riak.com/assets/bitcask-intro.pdf),本项目也是参考该论文实现的。 62 | 63 | ## Key Hash 64 | 65 | 每次写入 record 时会将 key 做 hash(uint64),并将 hash 记录至对应的 bucket 中,即每个 bucket 持有自己的 `key hashmap`。这种做法使得 DB 可以掌握全局所有 bucket keys 情况。 66 | 67 | * 优点:`has` 操作非常快,几乎为 O(1),且没有任何 IO。 68 | * 缺点:`hash map` 会占用一定的内存(map 扩容)。 69 | 70 | ## Segments 71 | 72 | ### Memory Segment 73 | 74 | 数据存储的切割单位为 `segment`,所有数据写入是先落入 `memory segment` 中,待数据写满缓冲区后(默认 8MB)Flush 至磁盘,并归档为 `disk segment`。 75 | 76 | ```docs 77 | + --- Head ----- | --------------- | --------------- | --- | --------------- + 78 | | Memory Segment | Disk Segment(1) | Disk Segment(2) | ... | Disk Segment(N) | 79 | + -------------- | --------------- | --------------- | --- | --------------- + 80 | ``` 81 | 82 | memory segment 由多个 buckets 组成。bucket 是为了 shard 而设计的接口,类比于 MySQL 中的 Table 概念(参考了 bblot 设计)。 83 | 84 | `bucket buffer` 为双向链表设计,使用双向链表而不是切片数组的原因如下: 85 | 86 | 1. 内存优化,没有扩容开销,无需预留 buffer。 87 | 2. 删除操作开销低,解链即可。 88 | 3. 更新数据开销低,元素解引用并重新链至末尾即可。 89 | 90 | 缺点是元素使用指针相链,会带来一定的 GC 扫描开销。 91 | 92 | ```docs 93 | + ------------- + 94 | DoublyLinkedList --> | *head | 95 | | ------------- | 96 | | key/record(1) | 97 | | ------------- | 98 | | key/record(2) | 99 | | ------------- | 100 | | key/record(3) | 101 | | ------------- | 102 | | ....... | 103 | | ------------- | 104 | | key/record(N) | 105 | + ------------- + 106 | ``` 107 | 108 | 当 buffer 满时进行归档,归档操作如下,此过程保证线程安全: 109 | 110 | 1. 反转列表,因为数据是按时间序排列的(oldest -> newest),但读取时是需要从最新数据往回读的(newest -> oldest)。假设 record(1) 先增后删,按序遍历的话是 put -> del,此时可能会误判为该 key 存在。而反转以后,如果确定最新数据已经是删除状态,则可以直接返回。 111 | 2. 对归档数据写入 checksum,保证落盘数据完整性。 112 | 3. 将 memory segment 转换为 disk segment,同时置空 memory segment。 113 | 4. 状态标记。 114 | 115 | memory segment 保证了单个 segment 内不会同时存在两条相同 key 的 record 记录。 116 | 117 | ### Disk Segment 118 | 119 | disk segment 包含两个文件,命名规则为 `data_$segid`,`keys_$data`,前者保存了完整的 record 信息,后者保存了 key 信息。 120 | 121 | *DataFile 布局:* 122 | 123 | #### DataFile 124 | 125 | DataFile 数据以二进制存储,分为 4 块内容。 126 | 127 | ```docs 128 | + ----------- | --------- | ----------- | ----------- + 129 | | DataBlock | MetaBlock | BloomFilter | Footer | 130 | + ----------- | --------- | ----------- | ----------- + 131 | ``` 132 | 133 | 下面逐一介绍。 134 | 135 | **DataBlock** 136 | 137 | DataBlock 由多条 record 一起组成,布局如下: 138 | 139 | ```docs 140 | + --------- + 141 | | Record(1) | 142 | + --------- + 143 | | Record(2) | 144 | + --------- + 145 | | ... | 146 | + --------- + 147 | | Record(N) | 148 | + --------- + 149 | | Checksum | 150 | + --------- + 151 | ``` 152 | 153 | *Record 布局:* 154 | 155 | ```docs 156 | + -------------- | -------- | ----------- | --- | ------------- | ----- + 157 | | RecordSize(4B) | Flag(1B) | KeySize(4B) | Key | ValueSize(4B) | Value | 158 | + -------------- | -------- | ----------- | --- | ------------- | ----- + 159 | ``` 160 | 161 | * RecordSize: record 大小,即 [Flag, Value] 区间大小。 162 | * Flag: 数据标识,Put/Del/Tombstone 163 | * KeySize/Key: key 大小以及内容。 164 | * ValueSize/Value: value 大小以及内容。 165 | 166 | **MetaBlock** 167 | 168 | *Metadata 布局:* 169 | 170 | ```docs 171 | + -------------- | ------ | ------------------ | -------------- | ----------------- | ------------ + 172 | | BucketSize(4B) | Bucket | RecordPosCount(4B) | RecordPosBlock | KeyEntityPosBlock | Checksum(4B) | 173 | + -------------- | ------ | ------------------ | -------------- | ----------------- | ------------ + 174 | 175 | RecordPosBlock/KeyEntityPosBlock 176 | 177 | + ----------- + 178 | | Position(1) | 179 | + ----------- + 180 | | Position(2) | 181 | + ----------- + 182 | | ... | 183 | + ----------- + 184 | | Position(N) | 185 | + ----------- + 186 | 187 | Position 188 | 189 | + ----------------- | --------------- + 190 | | PositionStart(4B) | PositionEnd(4B) | 191 | + ----------------- | --------------- + 192 | ``` 193 | 194 | * BucketSize/Bucket: bucket name 大小及其内容。 195 | * RecordPosCount/RecordPosBlock: record position 数量,一个 position 描述了数据块的起始和终止位置。比如单个 bucket 在 disk segment 中有 20M 的数据,那其将会被切割成若干个小块,每个小块对应着各自的 position(读取更高效)。 196 | * KeyEntityPosCount/KeyEntityPosBlock: 同 record pos,只不过描述的是 key 的存储。 197 | * Checksum: 校验码。 198 | 199 | **BloomFilter** 200 | 201 | *BloomFilter 布局:* 202 | 203 | ```docs 204 | + ---------- + 205 | | BloomBytes | 206 | + ---------- + 207 | ``` 208 | 209 | * BloomBytes: 布隆过滤器实现。 210 | 211 | **Footer** 212 | 213 | *Footer 布局:* 214 | 215 | ```docs 216 | + ------------ | ------------ | -------------------- | -------------------- | --------- + 217 | | DataSize(4B) | MetaSize(4B) | BloomFilterCount(4B) | BloomFilterBytes(4B) | Magic(4B) | 218 | + ------------ | ------------ | -------------------- | -------------------- | --------- + 219 | ``` 220 | 221 | * DataSize: 数据块大小。 222 | * MetaSize: 元数据块大小。 223 | * BloomFilterCount: bloomFilter 元素个数。 224 | * BloomFilterBytes: bloomFilter 字节数组。 225 | * Magic: 魔法数。 226 | 227 | #### KeysFile 228 | 229 | Keys 由多个 keyEntity 组成,布局如下: 230 | 231 | ```docs 232 | + ------------ + 233 | | KeyEntity(1) | 234 | + ------------ + 235 | | KeyEntity(2) | 236 | + ------------ + 237 | | ... | 238 | + ------------ + 239 | | KeyEntity(N) | 240 | + ------------ + 241 | ``` 242 | 243 | *KeyEntity 布局:* 244 | 245 | ```docs 246 | + -------- | ----------- | -------------- + 247 | | Flag(1B) | KeyHash(8B) | RecordSize(4B) | 248 | + -------- | ----------- | -------------- + 249 | ``` 250 | 251 | * Flag: 数据标识,Put/Del/Tombstone 252 | * KeyHash: key uint64 hash。 253 | * RecordSize: record 大小。 254 | 255 | 关于 Keys 文件两个主要作用: 256 | 257 | 1. 启动扫描:启动时程序需要扫描所有的 disk segment 文件来还原 DB 关闭前的状态,使用 keys 文件可以减少读取数据量,因为此时并不需要扫描 key/val 具体内容,只要 key hash 即可。 258 | 2. Compact:compact 时也需要扫描所有 key,确定哪些 key 需要被合并或者被删除,同时还要判断变更的 record 大小。对于一个 disk segment,如果 compact 后只是将大小从 10M 压缩为 9.5M,那不具备 compact 效益,选择跳过。 259 | 260 | ## Compact 261 | 262 | 在 compact 的处理上,没有完全依照 level-compact,而是采用了一种平铺的方式,所有的 disk segment 都在同一个 level。 263 | 264 | compact 的目的在于压缩磁盘空间,但同时会带来一定的 IO 开销,因此需要选择好 compact 的时机。默认的行为是当 compact 后的 disk segment 体积减小超过 50% 才进行操作。 265 | 266 | compact 过程中对读写没有影响,每个 disk segment 会有引用计数,访问时 `ref++`,结束访问时 `ref--`,当且仅当 `ref == 0` 时才会删除磁盘文件,避免正在遍历的操作受影响。即程序实现了对于 disk segment 的 GC 行为。 267 | 268 | ```docs 269 | | --------------- | --------------- | --------------- | --------------- | 270 | | ref 0 | ref: 2 | ref: 0 | ref 0 | 271 | | disk sgement(1) | disk sgement(2) | disk sgement(3) | disk sgement(4) | 272 | | --------------- | --------------- | --------------- | --------------- | 273 | 274 | ---> Compact operation 275 | 276 | | --------------- | --------------- | --------------- | --------------- | 277 | | ref 0 | ref: 2 | ref: 0 | ref: 0 | 278 | | disk sgement(1) | disk sgement(2) | disk sgement(3) | disk sgement(4) | 279 | | --------------- | --------------- | --------------- | --------------- | 280 | | | | 281 | | ------|-------------------------- | // rollback if compact failed 282 | | | new disk sgement(3) | 283 | | 284 | Hanging: in database view, no more disk segment(2) // lock guarded 285 | 286 | | --------------- | --------------- | --------------- | 287 | | ref 0 | ref: 0 | ref 0 | 288 | | disk sgement(1) | disk sgement(3) | disk sgement(4) | 289 | | --------------- | --------------- | --------------- | 290 | 291 | After range/get opertion, disk segment(2) ref is reduced to 0. 292 | 293 | | --------------- | 294 | | ref: 0 | 295 | | disk segment(2) | GC worker will cleanup disk segment(2), remove all files of it. 296 | | --------------- | 297 | ``` 298 | 299 | compact 操作启动后,会将现有的 disk segment 分成若干各组,分组规则为: 300 | 301 | 1. 单 disk segment 大小不得超过 MaxDiskSegmentBytes。 302 | 2. 相邻 disk segment 如果大小之和小于等于 MaxDiskSegmentBytes 则合并为一个分组。 303 | 304 | ```docs 305 | // If MaxDiskSegmentBytes Option is 20MB. 306 | 307 | | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | 308 | | segment(1) | segment(2) | segment(3) | segment(4) | segment(5) | segment(6) | 309 | | 10MB | 9MB | 18MB | 10MB | 7MB | 2MB | 310 | | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | 311 | 312 | // After grouping 313 | 314 | | Group1 | Group2 | Group3 | 315 | | ----------------------- | ---------- | ------------------------------------ | 316 | | segment(1) segment(2) | segment(3) | segment(4) segment(5) segment(6) | 317 | | 19MB | 18MB | 19MB | 318 | | ----------------------- | ---------- | ------------------------------------ | 319 | 320 | // Compact 321 | 322 | | ---------- | ---------- | ---------- | 323 | | segment(2) | segment(3) | segment(6) | 324 | | 19MB | 18MB | 19MB | 325 | | ---------- | ---------- | ---------- | 326 | ``` 327 | 328 | ## Load 329 | 330 | DB 启动时,如果路径下存在数据文件,会进行扫描和加载,对于 DataFile,解码顺序和编码是相反的。 331 | 332 | 1. 解码 Footer,并校验数据是否合法(Magic 判断)。 333 | 2. 解码 BloomFilter,校验 Checksum 并加载进内存中。 334 | 3. 解码 Metadata,校验 Checksum 并加载进内存中。 335 | 4. 解码 KeysFile,按序将 key 还原进内存中。 336 | 337 | 关于 BloomFilter,为了加速解压和节省内存空间使用,编码时是直接将 BloomFilter 二进制序列化写入 DataFile,解压时读取整片数据。 338 | 339 | ## Performance 340 | 341 | grogudb 把 WAL 当成数据存储来使用,所以写入性能较为优异,写入的操作都是 Append-Only,而且对于磁盘交互部分,均使用了 BufferdIO 的思路,尽量减少 `write syscall` 次数。 342 | 343 | 不同操作性能开销: 344 | 345 | 1. Put: 如果某段时间窗口内对某个 key 存在高频的更新行为,实际上只有内存操作(解链然后重新链接到末尾),对比纯粹的 WAL 这种操作可以节约不少的磁盘 IO。 346 | 2. PutIf: 如果 key 已经存在,则是 O(1) 操作,不存在则跟 Put 行为保持一致。 347 | 3. Del: 追加 FlagDel 记录,并从内存中删除该 key 记录,其他跟 Put 操作无异。 348 | 4. Range: 按 block 顺序读取 record,使用 RecordRanger 解析数据流。 349 | 5. Has: O(1) 内存判断。 350 | 6. Clear: 追加 FlagTombstone 记录,从内存中删除该 bucket 所有 key,其他跟 Put 操作无异。 351 | 352 | ## Summary 353 | 354 | 编写 grogudb 主要是出于兴趣,想尝试更深地了解数据库的一些设计哲学。 355 | 356 | 毕竟古人有言,纸上得来终觉浅,绝知此事要躬行。 357 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/chenjiandongx/grogudb 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/bits-and-blooms/bloom/v3 v3.3.1 7 | github.com/cespare/xxhash/v2 v2.2.0 8 | github.com/chenjiandongx/logger v0.2.0 9 | github.com/emirpasic/gods v1.18.1 10 | github.com/juju/fslock v0.0.0-20160525022230-4d5c94c67b4b 11 | github.com/orcaman/concurrent-map/v2 v2.0.1 12 | github.com/pkg/errors v0.9.1 13 | github.com/stretchr/testify v1.8.1 14 | go.uber.org/multierr v1.6.0 15 | ) 16 | 17 | require ( 18 | github.com/bits-and-blooms/bitset v1.3.1 // indirect 19 | github.com/davecgh/go-spew v1.1.1 // indirect 20 | github.com/pmezard/go-difflib v1.0.0 // indirect 21 | go.uber.org/atomic v1.10.0 // indirect 22 | go.uber.org/zap v1.17.0 // indirect 23 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 24 | gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect 25 | gopkg.in/yaml.v2 v2.4.0 // indirect 26 | gopkg.in/yaml.v3 v3.0.1 // indirect 27 | ) 28 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 3 | github.com/bits-and-blooms/bitset v1.3.1 h1:y+qrlmq3XsWi+xZqSaueaE8ry8Y127iMxlMfqcK8p0g= 4 | github.com/bits-and-blooms/bitset v1.3.1/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= 5 | github.com/bits-and-blooms/bloom/v3 v3.3.1 h1:K2+A19bXT8gJR5mU7y+1yW6hsKfNCjcP2uNfLFKncjQ= 6 | github.com/bits-and-blooms/bloom/v3 v3.3.1/go.mod h1:bhUUknWd5khVbTe4UgMCSiOOVJzr3tMoijSK3WwvW90= 7 | github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= 8 | github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 9 | github.com/chenjiandongx/logger v0.2.0 h1:8P62EwgjElg30GtYdP3amN1PIpaIXC7rdido1k0eNZk= 10 | github.com/chenjiandongx/logger v0.2.0/go.mod h1:thpEAwW5FU8iveT36pO+ut6hj9opKgyeDgUV8lWnz7A= 11 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 12 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 13 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 14 | github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= 15 | github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= 16 | github.com/juju/fslock v0.0.0-20160525022230-4d5c94c67b4b h1:FQ7+9fxhyp82ks9vAuyPzG0/vVbWwMwLJ+P6yJI5FN8= 17 | github.com/juju/fslock v0.0.0-20160525022230-4d5c94c67b4b/go.mod h1:HMcgvsgd0Fjj4XXDkbjdmlbI505rUPBs6WBMYg2pXks= 18 | github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= 19 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 20 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 21 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 22 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 23 | github.com/orcaman/concurrent-map/v2 v2.0.1 h1:jOJ5Pg2w1oeB6PeDurIYf6k9PQ+aTITr/6lP/L/zp6c= 24 | github.com/orcaman/concurrent-map/v2 v2.0.1/go.mod h1:9Eq3TG2oBe5FirmYWQfYO5iH1q0Jv47PLaNK++uCdOM= 25 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 26 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 27 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 28 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 29 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 30 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 31 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 32 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 33 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 34 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 35 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 36 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 37 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 38 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 39 | github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= 40 | github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= 41 | go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= 42 | go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= 43 | go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= 44 | go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= 45 | go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= 46 | go.uber.org/zap v1.17.0 h1:MTjgFu6ZLKvY6Pvaqk97GlxNBuMpV4Hy/3P6tRGlI2U= 47 | go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= 48 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 49 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 50 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 51 | gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= 52 | gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= 53 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 54 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 55 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 56 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 57 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 58 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 59 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 60 | -------------------------------------------------------------------------------- /load.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "bytes" 18 | "os" 19 | "path/filepath" 20 | "sort" 21 | "time" 22 | 23 | "github.com/chenjiandongx/grogudb/pkg/codec" 24 | "github.com/chenjiandongx/grogudb/pkg/fsx" 25 | "github.com/chenjiandongx/grogudb/pkg/logx" 26 | "github.com/chenjiandongx/grogudb/pkg/uint64set" 27 | ) 28 | 29 | type segmentFile struct { 30 | seqID int64 31 | keysFile os.DirEntry 32 | dataFile os.DirEntry 33 | } 34 | 35 | // splitEntries 切分 segmentFiles 相同 seqID keysFile/dataFile 为一组 36 | func splitEntries(entries []os.DirEntry) []segmentFile { 37 | unique := make(map[string]os.DirEntry) 38 | for _, entry := range entries { 39 | // 递归文件夹不处理 40 | if entry.IsDir() { 41 | continue 42 | } 43 | unique[entry.Name()] = entry 44 | } 45 | 46 | segmentFiles := make(map[int64]*segmentFile) 47 | for name, entry := range unique { 48 | // 非法文件名不做处理 49 | prefix, seqID, ok := fsx.ParseFilename(name) 50 | if !ok { 51 | continue 52 | } 53 | 54 | if prefix == fsx.PrefixDataFile { 55 | segmentFiles[seqID] = &segmentFile{ 56 | seqID: seqID, 57 | dataFile: entry, 58 | } 59 | } 60 | } 61 | 62 | for seqID, sf := range segmentFiles { 63 | keysFile, ok := unique[fsx.KeysFilename(seqID)] 64 | if !ok { 65 | delete(segmentFiles, seqID) 66 | continue 67 | } 68 | 69 | sf.keysFile = keysFile 70 | } 71 | 72 | var ret []segmentFile 73 | for _, sf := range segmentFiles { 74 | ret = append(ret, *sf) 75 | } 76 | 77 | // 按时间顺序排序 78 | sort.Slice(ret, func(i, j int) bool { 79 | return ret[i].seqID < ret[j].seqID 80 | }) 81 | return ret 82 | } 83 | 84 | // loadFooter 加载 Footer 85 | func loadFooter(cfd *fsx.CacheFD) (codec.Footer, error) { 86 | var footer codec.Footer 87 | fd, err := cfd.FileDesc() 88 | if err != nil { 89 | return footer, err 90 | } 91 | defer cfd.Reuse(fd) 92 | 93 | offset := cfd.Size() - codec.FooterSize 94 | if offset < 0 { 95 | return footer, codec.ErrInvalidFooterSize 96 | } 97 | 98 | b := make([]byte, codec.FooterSize) 99 | _, err = fd.ReadAt(b, offset) 100 | if err != nil { 101 | return footer, err 102 | } 103 | 104 | return codec.DecodeFooter(b) 105 | } 106 | 107 | // loadMetadata 加载 metadata 108 | func loadMetadata(cfd *fsx.CacheFD, footer codec.Footer) (codec.BucketMetaSlice, error) { 109 | fd, err := cfd.FileDesc() 110 | if err != nil { 111 | return nil, err 112 | } 113 | defer cfd.Reuse(fd) 114 | 115 | b := make([]byte, footer.MetaSize) 116 | _, err = fd.ReadAt(b, int64(footer.PosMetaBlock().Start)) 117 | if err != nil { 118 | return nil, err 119 | } 120 | 121 | return codec.DecodeMetadata(b) 122 | } 123 | 124 | // loadBloomFilter 加载 bloomfilter 125 | func loadBloomFilter(cfd *fsx.CacheFD, footer codec.Footer) (uint64set.BloomFilter, error) { 126 | fd, err := cfd.FileDesc() 127 | if err != nil { 128 | return nil, err 129 | } 130 | defer cfd.Reuse(fd) 131 | 132 | b := make([]byte, footer.BloomFilterSize) 133 | _, err = fd.ReadAt(b, int64(footer.PosBloomFilterBlock().Start)) 134 | if err != nil { 135 | return nil, err 136 | } 137 | 138 | return uint64set.LoadBloomFilter(int(footer.BloomFilterCount), b) 139 | } 140 | 141 | func (db *DB) loadDiskSegments() error { 142 | start := time.Now() 143 | entries, err := os.ReadDir(db.path) 144 | if err != nil { 145 | return err 146 | } 147 | segmentFiles := splitEntries(entries) 148 | if len(segmentFiles) <= 0 { 149 | return nil 150 | } 151 | 152 | for _, sf := range segmentFiles { 153 | logx.Infof("load segment KeyF·%s, DataF·%s", sf.keysFile.Name(), sf.dataFile.Name()) 154 | if err := db.loadDiskSegment(db.path, sf); err != nil { 155 | return err 156 | } 157 | } 158 | 159 | // 如果 bucket 中没有任何 key 则表示该 bucket 在此次加载中将被丢弃 160 | var dropBuckets []string 161 | for name, bucket := range db.buckets { 162 | if bucket.keys.Count() <= 0 { 163 | dropBuckets = append(dropBuckets, name) 164 | } 165 | } 166 | for _, name := range dropBuckets { 167 | // 清理内存记录 168 | delete(db.memorySegs, name) 169 | db.stats.memSegment.Add(-1) 170 | 171 | // 清理 buckets 记录 172 | delete(db.buckets, name) 173 | db.bucketsKeys.Remove(name) 174 | logx.Infof("drop bucket %s", name) 175 | } 176 | 177 | logx.Infof("load %d segments elapsed %v", len(db.diskSegs), time.Since(start)) 178 | return nil 179 | } 180 | 181 | // loadKeys 加载 keys 182 | func (db *DB) loadKeys(cfd *fsx.CacheFD, bucketPos codec.BucketPos, seqID int64) error { 183 | fd, err := cfd.FileDesc() 184 | if err != nil { 185 | return err 186 | } 187 | 188 | for bucket, positions := range bucketPos.KeyItem { 189 | bucketKeys := db.bucketsKeys.GetOrCreate(bucket) 190 | for _, pos := range positions { 191 | b := make([]byte, pos.End-pos.Start) 192 | _, err = fd.ReadAt(b, int64(pos.Start)) 193 | if err != nil { 194 | return err 195 | } 196 | 197 | encoded, err := codec.VerifyTailChecksum(b) 198 | if err != nil { 199 | return err 200 | } 201 | 202 | rg := codec.NewKeysRanger(bytes.NewReader(encoded)) 203 | err = rg.Range(func(flag codec.Flag, h uint64, n uint32) { 204 | switch flag { 205 | case codec.FlagDel: 206 | bucketKeys.Remove(h) 207 | case codec.FlagTombstone: 208 | bucketKeys.Clear() 209 | db.clearAt[bucket] = seqID 210 | default: 211 | bucketKeys.Insert(h) 212 | } 213 | }) 214 | if err != nil { 215 | return err 216 | } 217 | } 218 | } 219 | 220 | return nil 221 | } 222 | 223 | func (db *DB) loadDiskSegment(path string, sf segmentFile) error { 224 | dataPath := filepath.Join(path, sf.dataFile.Name()) 225 | dataCfd, err := fsx.NewCacheFD(dataPath, dataMaxFdHold) 226 | if err != nil { 227 | return err 228 | } 229 | 230 | footer, err := loadFooter(dataCfd) 231 | if err != nil { 232 | return err 233 | } 234 | 235 | bms, err := loadMetadata(dataCfd, footer) 236 | if err != nil { 237 | return err 238 | } 239 | bucketPos := bms.AsBucketPos() 240 | 241 | keysPath := filepath.Join(path, sf.keysFile.Name()) 242 | keysCfd, err := fsx.NewCacheFD(keysPath, keysMaxFdHold) 243 | if err != nil { 244 | return err 245 | } 246 | 247 | if err := db.loadKeys(keysCfd, bucketPos, sf.seqID); err != nil { 248 | return err 249 | } 250 | 251 | bf, err := loadBloomFilter(dataCfd, footer) 252 | if err != nil { 253 | return err 254 | } 255 | 256 | diskSeg := newDiskSegment(sf.seqID, int(dataCfd.Size()), db.path, bf, bucketPos) 257 | diskSeg.clearAtFn = db.getBucketClearAt 258 | diskSeg.keysCfd = keysCfd 259 | diskSeg.dataCfd = dataCfd 260 | 261 | db.diskSegs = append(db.diskSegs, diskSeg) 262 | db.stats.diskSegment.Add(1) 263 | db.diskSegs.OrderDecrement() 264 | 265 | for _, bucketMeta := range bms { 266 | name := bucketMeta.Name 267 | keySet := db.bucketsKeys.GetOrCreate(name) 268 | if _, ok := db.memorySegs[name]; !ok { 269 | memSeg := newMemorySegment(name, memSegCallback{ 270 | onRemove: keySet.Remove, 271 | onInsert: keySet.Insert, 272 | onExist: keySet.Has, 273 | onBytes: func(n int64) { db.markBytesCh <- n }, 274 | }) 275 | memSeg.clearAt = db.clearAt[name] 276 | db.memorySegs[name] = memSeg 277 | db.stats.memSegment.Add(1) 278 | } 279 | 280 | bucket := &Bucket{ 281 | name: name, 282 | keys: keySet, 283 | head: db.memorySegs[name], 284 | statistics: db.stats, 285 | getIterReleaser: db.getIterReleaser, 286 | } 287 | db.buckets[name] = bucket 288 | } 289 | 290 | return nil 291 | } 292 | -------------------------------------------------------------------------------- /load_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "os" 18 | "path/filepath" 19 | "strings" 20 | "testing" 21 | 22 | "github.com/stretchr/testify/require" 23 | 24 | "github.com/chenjiandongx/grogudb/pkg/codec" 25 | "github.com/chenjiandongx/grogudb/pkg/fsx" 26 | ) 27 | 28 | type mockEntity struct { 29 | name string 30 | dir bool 31 | } 32 | 33 | func (e mockEntity) Name() string { 34 | return e.name 35 | } 36 | 37 | func (e mockEntity) IsDir() bool { 38 | return e.dir 39 | } 40 | 41 | func (e mockEntity) Type() os.FileMode { 42 | return os.ModePerm 43 | } 44 | 45 | func (e mockEntity) Info() (os.FileInfo, error) { 46 | return nil, nil 47 | } 48 | 49 | func TestSplitEntries(t *testing.T) { 50 | t.Run("Got Two", func(t *testing.T) { 51 | entities := []os.DirEntry{ 52 | mockEntity{name: "data_0"}, 53 | mockEntity{name: "data_1"}, 54 | mockEntity{name: "keys_0"}, 55 | mockEntity{name: "keys_1"}, 56 | } 57 | 58 | sfs := splitEntries(entities) 59 | require.Len(t, sfs, 2) 60 | require.Equal(t, int64(0), sfs[0].seqID) 61 | require.Equal(t, int64(1), sfs[1].seqID) 62 | }) 63 | 64 | t.Run("Got One", func(t *testing.T) { 65 | entities := []os.DirEntry{ 66 | mockEntity{name: "data_0"}, 67 | mockEntity{name: "data_1"}, 68 | mockEntity{name: "keys_0"}, 69 | mockEntity{name: "keys_2"}, 70 | } 71 | 72 | sfs := splitEntries(entities) 73 | require.Len(t, sfs, 1) 74 | require.Equal(t, int64(0), sfs[0].seqID) 75 | }) 76 | 77 | t.Run("Got Zero", func(t *testing.T) { 78 | entities := []os.DirEntry{ 79 | mockEntity{name: "data_0"}, 80 | mockEntity{name: "data_1"}, 81 | mockEntity{name: "keys_0", dir: true}, 82 | mockEntity{name: "keys_2"}, 83 | } 84 | 85 | sfs := splitEntries(entities) 86 | require.Len(t, sfs, 0) 87 | }) 88 | } 89 | 90 | func getDataFd(t require.TestingT, db *DB) *fsx.CacheFD { 91 | bucket := db.GetOrCreateBucket("bucket0") 92 | require.NoError(t, bucket.Put(keyNum(0), valNum(0))) 93 | require.NoError(t, db.rotate()) 94 | 95 | entities, err := os.ReadDir(db.path) 96 | require.NoError(t, err) 97 | 98 | var dataF os.DirEntry 99 | for _, entity := range entities { 100 | if strings.HasPrefix(entity.Name(), fsx.PrefixDataFile) { 101 | dataF = entity 102 | } 103 | } 104 | 105 | cfd, err := fsx.NewCacheFD(filepath.Join(db.path, dataF.Name()), 1) 106 | require.NoError(t, err) 107 | return cfd 108 | } 109 | 110 | func TestLoadFooter(t *testing.T) { 111 | runGrogudbTest(t, nil, func(t require.TestingT, db *DB) { 112 | cfd := getDataFd(t, db) 113 | footer, err := loadFooter(cfd) 114 | require.NoError(t, err) 115 | require.Equal(t, codec.Footer{ 116 | DataSize: 25, 117 | MetaSize: 35, 118 | BloomFilterCount: 1, 119 | BloomFilterSize: 32, 120 | }, footer) 121 | }, nil) 122 | } 123 | 124 | func TestLoadMetadata(t *testing.T) { 125 | runGrogudbTest(t, nil, func(t require.TestingT, db *DB) { 126 | cfd := getDataFd(t, db) 127 | footer, err := loadFooter(cfd) 128 | require.NoError(t, err) 129 | 130 | meta, err := loadMetadata(cfd, footer) 131 | require.NoError(t, err) 132 | require.Equal(t, codec.BucketMetaSlice{ 133 | { 134 | Name: "bucket0", 135 | RecordPos: codec.Positions{{Start: 0, End: 25}}, 136 | KeyEntityPos: codec.Positions{{Start: 0, End: 17}}, 137 | }, 138 | }, meta) 139 | }, nil) 140 | } 141 | 142 | func TestLoadBloomFilter(t *testing.T) { 143 | runGrogudbTest(t, nil, func(t require.TestingT, db *DB) { 144 | cfd := getDataFd(t, db) 145 | footer, err := loadFooter(cfd) 146 | require.NoError(t, err) 147 | 148 | bf, err := loadBloomFilter(cfd, footer) 149 | require.NoError(t, err) 150 | 151 | require.True(t, bf.Test(codec.HashKey(keyNum(0)))) 152 | require.False(t, bf.Test(codec.HashKey(keyNum(1)))) 153 | }, nil) 154 | } 155 | -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import "time" 17 | 18 | const ( 19 | defaultCompactCheckInterval = time.Minute 20 | defaultCompactForceInterval = time.Minute * 30 21 | defaultCompactKeyOpDelta = 5 22 | defaultGcInterval = time.Minute 23 | defaultCompactFragmentation = 0.5 24 | defaultMaxMemSegmentBytes = 1024 * 1024 * 8 25 | defaultMaxDiskSegmentBytes = 1024 * 1024 * 50 26 | ) 27 | 28 | // Options 控制 DB 行为的配置项 29 | type Options struct { 30 | // MaxMemSegmentBytes memory segment 最大允许字节 31 | MaxMemSegmentBytes int 32 | 33 | // MaxDiskSegmentBytes disk segment 最大允许字节 34 | MaxDiskSegmentBytes int 35 | 36 | // CompactFragmentation compact 碎片比例 37 | // 即超过 HoldBytes 中需要被删除的字节数超过此比例时才需要 compact 38 | CompactFragmentation float64 39 | 40 | // CompactCheckInterval compact 巡检周期 41 | CompactCheckInterval time.Duration 42 | 43 | // CompactForceInterval 强制 compact 周期 即 compact 兜底行为 确保该周期内一定会执行一次 compact 44 | // CompactForceInterval 必须大于 CompactCheckInterval 45 | // 扫描磁盘是有 IO 开销 force 只是会忽略所有前置判断条件进行扫描 不代表会执行 compact 操作 46 | CompactForceInterval time.Duration 47 | 48 | // CompactKeyOpDelta compact 时会对两个 CompactCheckInterval 周期的 Put/Del 做差值计算 49 | // 超过一定差值才会进行 compact 目的是为了尽量减少 compact 操作 50 | CompactKeyOpDelta int 51 | 52 | // GcInterval gc 巡检周期 53 | GcInterval time.Duration 54 | } 55 | 56 | // DefaultOptions 返回默认 Options 配置 57 | func DefaultOptions() Options { 58 | opt := &Options{} 59 | opt.Validate() 60 | return *opt 61 | } 62 | 63 | // Validate 校验 Options 64 | func (o *Options) Validate() { 65 | if o.MaxMemSegmentBytes <= 0 { 66 | o.MaxMemSegmentBytes = defaultMaxMemSegmentBytes 67 | } 68 | if o.MaxDiskSegmentBytes <= 0 { 69 | o.MaxDiskSegmentBytes = defaultMaxDiskSegmentBytes 70 | } 71 | if o.CompactFragmentation <= 0 { 72 | o.CompactFragmentation = defaultCompactFragmentation 73 | } 74 | if o.CompactCheckInterval <= 0 { 75 | o.CompactCheckInterval = defaultCompactCheckInterval 76 | } 77 | if o.CompactForceInterval <= 0 { 78 | o.CompactForceInterval = defaultCompactForceInterval 79 | } 80 | if o.CompactForceInterval <= o.CompactCheckInterval { 81 | o.CompactForceInterval = o.CompactCheckInterval * 2 82 | } 83 | if o.CompactKeyOpDelta <= 0 { 84 | o.CompactKeyOpDelta = defaultCompactKeyOpDelta 85 | } 86 | if o.GcInterval <= 0 { 87 | o.GcInterval = defaultGcInterval 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /pkg/binaryx/binary.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package binaryx 15 | 16 | import "encoding/binary" 17 | 18 | // PutUint32 编码 uint32 19 | func PutUint32(n uint32) []byte { 20 | b := make([]byte, 4) 21 | binary.LittleEndian.PutUint32(b, n) 22 | return b 23 | } 24 | 25 | // Uint32 解码 uint32 26 | func Uint32(b []byte) uint32 { 27 | return binary.LittleEndian.Uint32(b) 28 | } 29 | 30 | // PutUint64 编码 uint64 31 | func PutUint64(n uint64) []byte { 32 | b := make([]byte, 8) 33 | binary.LittleEndian.PutUint64(b, n) 34 | return b 35 | } 36 | 37 | // Uint64 解码 uint64 38 | func Uint64(b []byte) uint64 { 39 | return binary.LittleEndian.Uint64(b) 40 | } 41 | -------------------------------------------------------------------------------- /pkg/binaryx/binary_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package binaryx 15 | 16 | import ( 17 | "testing" 18 | 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | func TestBinary(t *testing.T) { 23 | n := 0x0A 24 | 25 | t.Run("Uint32", func(t *testing.T) { 26 | b := PutUint32(uint32(n)) 27 | require.Equal(t, uint32(n), Uint32(b)) 28 | }) 29 | 30 | t.Run("Uint64", func(t *testing.T) { 31 | b := PutUint64(uint64(n)) 32 | require.Equal(t, uint64(n), Uint64(b)) 33 | }) 34 | } 35 | -------------------------------------------------------------------------------- /pkg/buffer/buffer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package buffer 15 | 16 | import ( 17 | "bytes" 18 | "hash/crc32" 19 | "sync" 20 | 21 | "github.com/chenjiandongx/grogudb/pkg/binaryx" 22 | ) 23 | 24 | var bufPool = sync.Pool{New: func() interface{} { 25 | return &Buffer{ 26 | buf: &bytes.Buffer{}, 27 | } 28 | }} 29 | 30 | // Get 从 Pool 中取 *Buffer 31 | func Get() *Buffer { 32 | return bufPool.Get().(*Buffer) 33 | } 34 | 35 | // Put 将 *Buffer 放置到 Pool 36 | func Put(buf *Buffer) { 37 | buf.Reset() 38 | bufPool.Put(buf) 39 | } 40 | 41 | // Buffer 可复用的 *Buffer 42 | type Buffer struct { 43 | buf *bytes.Buffer 44 | } 45 | 46 | // Len 返回 buffer 长度 47 | func (b *Buffer) Len() int { 48 | return b.buf.Len() 49 | } 50 | 51 | // Writer 将 bs 写入到 buffer 中 52 | func (b *Buffer) Write(bs []byte) (int, error) { 53 | return b.buf.Write(bs) 54 | } 55 | 56 | // Reset 重置 buffer 57 | func (b *Buffer) Reset() { 58 | b.buf.Reset() 59 | } 60 | 61 | // Bytes 返回字节数据 62 | func (b *Buffer) Bytes() []byte { 63 | return b.buf.Bytes() 64 | } 65 | 66 | // Frozen 冻结 buffer 并补充 crc32 checksum 67 | func (b *Buffer) Frozen() []byte { 68 | checksum := crc32.ChecksumIEEE(b.Bytes()) 69 | b.buf.Write(binaryx.PutUint32(checksum)) 70 | return b.buf.Bytes() 71 | } 72 | -------------------------------------------------------------------------------- /pkg/codec/codec.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package codec 15 | 16 | import ( 17 | "bytes" 18 | "errors" 19 | "hash/crc32" 20 | "io" 21 | "sort" 22 | 23 | "github.com/cespare/xxhash/v2" 24 | 25 | "github.com/chenjiandongx/grogudb/pkg/binaryx" 26 | ) 27 | 28 | var ( 29 | ErrInvalidFlagSize = errors.New("grogudb/codec: invalid flag size") 30 | ErrInvalidUint32Size = errors.New("grogudb/codec: invalid uint32 size") 31 | ErrInvalidUint64Size = errors.New("grogudb/codec: invalid uint64 size") 32 | ErrInvalidRecordSize = errors.New("grogudb/codec: invalid record size") 33 | ErrInvalidRecordChecksum = errors.New("grogudb/codec: invalid record checksum") 34 | ErrInvalidRecordKeySize = errors.New("grogudb/codec: invalid record key size") 35 | ErrInvalidRecordValueSize = errors.New("grogudb/codec: invalid record value size") 36 | ErrInvalidMetadataChecksum = errors.New("grogudb/codec: invalid metadata checksum") 37 | ErrInvalidTombstoneChecksum = errors.New("grogudb/codec: invalid tombstone checksum") 38 | ErrInvalidFooterSize = errors.New("grogudb/codec: invalid footer size") 39 | ErrInvalidFooterMagic = errors.New("grogudb/codec: invalid footer magic") 40 | ErrReadPartial = errors.New("grogudb/codec: read partial") 41 | ErrInvalidChecksum = errors.New("grogudb/codec: invalid checksum") 42 | ) 43 | 44 | const ( 45 | magicNumber uint32 = 0xdeadbeef 46 | ) 47 | 48 | const ( 49 | SizeFlag = 1 50 | SizeUint32 = 4 51 | SizeUint64 = 8 52 | SizePosition = 8 53 | SizeChecksum = 4 54 | ) 55 | 56 | // CRC32 计算 b crc32 checksum 57 | func CRC32(b []byte) []byte { 58 | return binaryx.PutUint32(crc32.ChecksumIEEE(b)) 59 | } 60 | 61 | type Flag uint8 62 | 63 | const ( 64 | FlagUnset Flag = iota 65 | FlagPut 66 | FlagDel 67 | FlagTombstone 68 | ) 69 | 70 | type ( 71 | RecordVisitFunc func(flag Flag, key, val []byte, n int) (quit bool) 72 | KeyVisitFunc func(flag Flag, h uint64, n uint32) 73 | PassFunc func(flag Flag, key uint64) bool 74 | ) 75 | 76 | // PassAll PassFunc 放行所有条件 77 | func PassAll() PassFunc { 78 | return func(Flag, uint64) bool { 79 | return true 80 | } 81 | } 82 | 83 | // HashKey hash key 使用 uint64 表示 84 | func HashKey(b []byte) uint64 { 85 | return xxhash.Sum64(b) 86 | } 87 | 88 | const ( 89 | FooterSize = 20 90 | ) 91 | 92 | // Footer 尾部内容描述字节区间 93 | type Footer struct { 94 | DataSize uint32 95 | MetaSize uint32 96 | BloomFilterCount uint32 97 | BloomFilterSize uint32 98 | } 99 | 100 | // PosDataBlock 返回 DataBlock Position 101 | func (f Footer) PosDataBlock() Position { 102 | return Position{ 103 | Start: 0, 104 | End: f.DataSize, 105 | } 106 | } 107 | 108 | // PosMetaBlock 返回 MetaBlock Position 109 | func (f Footer) PosMetaBlock() Position { 110 | return Position{ 111 | Start: f.DataSize, 112 | End: f.DataSize + f.MetaSize, 113 | } 114 | } 115 | 116 | // PosBloomFilterBlock 返回 BloomFilter Position 117 | func (f Footer) PosBloomFilterBlock() Position { 118 | return Position{ 119 | Start: f.DataSize + f.MetaSize, 120 | End: f.BloomFilterSize, 121 | } 122 | } 123 | 124 | // EncodeFooter 编码 Footer 125 | // 126 | // 布局结构如下 127 | // | DataSize | MetaSize | BloomFilterCount | BloomFilterBytes | Magic | 128 | // | 4B | 4B | 4B | 4B | 4B | 129 | func EncodeFooter(footer Footer) []byte { 130 | buf := make([]byte, 0, FooterSize) 131 | buf = append(buf, binaryx.PutUint32(footer.DataSize)...) 132 | buf = append(buf, binaryx.PutUint32(footer.MetaSize)...) 133 | buf = append(buf, binaryx.PutUint32(footer.BloomFilterCount)...) 134 | buf = append(buf, binaryx.PutUint32(footer.BloomFilterSize)...) 135 | buf = append(buf, binaryx.PutUint32(magicNumber)...) 136 | return buf 137 | } 138 | 139 | // DecodeFooter 解码 Footer 140 | func DecodeFooter(b []byte) (Footer, error) { 141 | if len(b) != FooterSize { 142 | return Footer{}, ErrInvalidFooterSize 143 | } 144 | 145 | if binaryx.Uint32(b[16:]) != magicNumber { 146 | return Footer{}, ErrInvalidFooterMagic 147 | } 148 | 149 | footer := Footer{ 150 | DataSize: binaryx.Uint32(b[:4]), 151 | MetaSize: binaryx.Uint32(b[4:8]), 152 | BloomFilterCount: binaryx.Uint32(b[8:12]), 153 | BloomFilterSize: binaryx.Uint32(b[12:16]), 154 | } 155 | 156 | return footer, nil 157 | } 158 | 159 | // EncodeTombstoneEntity 编码 TombstoneEntity 160 | // 161 | // 布局结构如下 162 | // | RecordSize | BucketSize | Bucket | Nanosecond | Checksum | 163 | // | 4B | 4B | ... | 8B | 4B | 164 | func EncodeTombstoneEntity(name string, nano uint64) []byte { 165 | size := len(name) + 16 166 | 167 | buf := make([]byte, 0, size+SizeUint32) 168 | buf = append(buf, binaryx.PutUint32(uint32(size))...) 169 | buf = append(buf, binaryx.PutUint32(uint32(len(name)))...) 170 | buf = append(buf, []byte(name)...) 171 | buf = append(buf, binaryx.PutUint64(nano)...) 172 | buf = append(buf, CRC32(buf[SizeUint32:])...) 173 | return buf 174 | } 175 | 176 | // DecodeTombstoneEntity 解码 TombstoneEntity 177 | func DecodeTombstoneEntity(b []byte) (string, uint64, error) { 178 | if len(b) <= SizeChecksum { 179 | return "", 0, ErrInvalidTombstoneChecksum 180 | } 181 | 182 | if !bytes.Equal(CRC32(b[:len(b)-SizeChecksum]), b[len(b)-SizeChecksum:]) { 183 | return "", 0, ErrInvalidTombstoneChecksum 184 | } 185 | 186 | buffer := bytes.NewBuffer(b) 187 | 188 | // 解析 Bucket 189 | sizeBytes := buffer.Next(SizeUint32) 190 | if len(sizeBytes) != SizeUint32 { 191 | return "", 0, ErrInvalidUint32Size 192 | } 193 | bucketSize := binaryx.Uint32(sizeBytes) 194 | bucket := buffer.Next(int(bucketSize)) 195 | if len(bucket) != int(bucketSize) { 196 | return "", 0, ErrInvalidRecordKeySize 197 | } 198 | 199 | // 解析 Nanosecond 200 | sizeBytes = buffer.Next(SizeUint64) 201 | if len(sizeBytes) != SizeUint64 { 202 | return "", 0, ErrInvalidUint32Size 203 | } 204 | nano := binaryx.Uint64(sizeBytes) 205 | 206 | return string(bucket), nano, nil 207 | } 208 | 209 | // TombstoneRanger 负责遍历解析 io.Reader Tombstone 数据流 210 | type TombstoneRanger struct { 211 | r io.Reader 212 | } 213 | 214 | // NewTombstoneRanger 生成并返回 *TombstoneRanger 实例 215 | func NewTombstoneRanger(r io.Reader) *TombstoneRanger { 216 | return &TombstoneRanger{r: r} 217 | } 218 | 219 | // Range 遍历所有 TombstoneEntity 并对每个 Entity 执行 fn 220 | func (rg *TombstoneRanger) Range(fn func(bucket string, nano int64)) error { 221 | sizeBytes := make([]byte, SizeUint32) 222 | for { 223 | n, err := rg.r.Read(sizeBytes) 224 | if err != nil { 225 | if err == io.EOF { 226 | return nil 227 | } 228 | return err 229 | } 230 | if n != SizeUint32 { 231 | return err 232 | } 233 | 234 | size := binaryx.Uint32(sizeBytes) 235 | b := make([]byte, size) 236 | n, err = rg.r.Read(b) 237 | if err != nil { 238 | return err 239 | } 240 | if n != int(size) { 241 | return ErrReadPartial 242 | } 243 | 244 | bucket, nano, err := DecodeTombstoneEntity(b) 245 | if err != nil { 246 | return err 247 | } 248 | 249 | fn(bucket, int64(nano)) 250 | } 251 | } 252 | 253 | // EncodeRecord 编码 Record 254 | // 255 | // Record 布局结构如下 256 | // | RecordSize | Flag | KeySize | Key | ValueSize | Value | 257 | // | 4B | 1B | 4B | ... | 4B | ... | 258 | func EncodeRecord(flag Flag, key, val []byte) []byte { 259 | size := len(key) + len(val) + 9 260 | 261 | buf := make([]byte, 0, size) 262 | buf = append(buf, binaryx.PutUint32(uint32(size))...) 263 | buf = append(buf, byte(flag)) 264 | buf = append(buf, binaryx.PutUint32(uint32(len(key)))...) 265 | buf = append(buf, key...) 266 | buf = append(buf, binaryx.PutUint32(uint32(len(val)))...) 267 | buf = append(buf, val...) 268 | 269 | return buf 270 | } 271 | 272 | // DecodeRecord memory segment 的 record 是包含了 size header 解析时候需要 offset 273 | func DecodeRecord(b []byte) (Flag, []byte, []byte, error) { 274 | if len(b) < SizeUint32 { 275 | return FlagUnset, nil, nil, ErrInvalidRecordSize 276 | } 277 | return DecodeRecordWithoutSize(b[SizeUint32:]) 278 | } 279 | 280 | // DecodeRecordWithoutSize 解码 Record 281 | func DecodeRecordWithoutSize(b []byte) (Flag, []byte, []byte, error) { 282 | buffer := bytes.NewBuffer(b) 283 | flag, err := buffer.ReadByte() 284 | if err != nil { 285 | return FlagUnset, nil, nil, err 286 | } 287 | 288 | // FlagTombstone 跳过不做解析 289 | if Flag(flag) == FlagTombstone { 290 | return Flag(flag), nil, nil, nil 291 | } 292 | 293 | // 解析 Key 294 | sizeBytes := buffer.Next(SizeUint32) 295 | if len(sizeBytes) != SizeUint32 { 296 | return FlagUnset, nil, nil, ErrInvalidUint32Size 297 | } 298 | keySize := binaryx.Uint32(sizeBytes) 299 | key := buffer.Next(int(keySize)) 300 | if len(key) != int(keySize) { 301 | return FlagUnset, nil, nil, ErrInvalidRecordKeySize 302 | } 303 | 304 | // 解析 Value 305 | sizeBytes = buffer.Next(SizeUint32) 306 | if len(sizeBytes) != SizeUint32 { 307 | return FlagUnset, nil, nil, ErrInvalidUint32Size 308 | } 309 | 310 | var val []byte 311 | valSize := binaryx.Uint32(sizeBytes) 312 | if valSize > 0 { 313 | val = buffer.Next(int(valSize)) 314 | if len(val) != int(valSize) { 315 | return FlagUnset, nil, nil, ErrInvalidRecordValueSize 316 | } 317 | } 318 | 319 | return Flag(flag), key, val, nil 320 | } 321 | 322 | // RecordRanger 负责遍历解析 io.Reader Record 数据流 323 | type RecordRanger struct { 324 | r io.Reader 325 | } 326 | 327 | // NewRecordRanger 生产并返回 *RecordRanger 实例 328 | func NewRecordRanger(r io.Reader) *RecordRanger { 329 | return &RecordRanger{r: r} 330 | } 331 | 332 | func (rg *RecordRanger) next() ([]byte, error) { 333 | sizeBytes := make([]byte, SizeUint32) 334 | n, err := rg.r.Read(sizeBytes) 335 | if err != nil { 336 | return nil, err 337 | } 338 | if n != SizeUint32 { 339 | return nil, ErrInvalidUint32Size 340 | } 341 | 342 | dataSize := binaryx.Uint32(sizeBytes) 343 | data := make([]byte, dataSize) 344 | n, err = rg.r.Read(data) 345 | if err != nil { 346 | return nil, err 347 | } 348 | if n != int(dataSize) { 349 | return nil, ErrInvalidRecordSize 350 | } 351 | 352 | return data, nil 353 | } 354 | 355 | // Range 遍历所有 Records 并对每个 Record 执行 visitFn 356 | func (rg *RecordRanger) Range(visitFn RecordVisitFunc) error { 357 | for { 358 | b, err := rg.next() 359 | if err != nil { 360 | if err == io.EOF { 361 | return nil 362 | } 363 | return err 364 | } 365 | 366 | flag, key, val, err := DecodeRecordWithoutSize(b) 367 | if err != nil { 368 | return err 369 | } 370 | 371 | if flag == FlagTombstone { 372 | return nil 373 | } 374 | 375 | quit := visitFn(flag, key, val, len(b)) 376 | if quit { 377 | return nil 378 | } 379 | } 380 | } 381 | 382 | // EncodeKeyEntity 编码 KeyEntity 383 | // 384 | // 布局如下 385 | // | Flag | KeyHash | RecordSize | 386 | // | 1B | 8B | 4B | 387 | func EncodeKeyEntity(flag Flag, h uint64, l uint32) []byte { 388 | b := make([]byte, 0, 1+SizeUint64+SizeUint32) 389 | b = append(b, byte(flag)) 390 | b = append(b, binaryx.PutUint64(h)...) 391 | b = append(b, binaryx.PutUint32(l)...) 392 | return b 393 | } 394 | 395 | // VerifyTailChecksum 校验尾部 checksum 并返回被校验数据 396 | func VerifyTailChecksum(b []byte) ([]byte, error) { 397 | if len(b) < SizeChecksum { 398 | return nil, ErrInvalidChecksum 399 | } 400 | 401 | if !bytes.Equal(CRC32(b[:len(b)-SizeChecksum]), b[len(b)-SizeChecksum:]) { 402 | return nil, ErrInvalidChecksum 403 | } 404 | 405 | return b[:len(b)-SizeChecksum], nil 406 | } 407 | 408 | // KeysRanger 负责遍历解析 io.Reader Keys 数据流 409 | type KeysRanger struct { 410 | r io.Reader 411 | } 412 | 413 | // NewKeysRanger 生成并返回 *KeysRanger 实例 414 | func NewKeysRanger(r io.Reader) *KeysRanger { 415 | return &KeysRanger{r: r} 416 | } 417 | 418 | // Range 遍历所有 HashKeys 并对每个 Key 执行 fn 419 | func (rg *KeysRanger) Range(fn func(flag Flag, h uint64, n uint32)) error { 420 | flagBytes := make([]byte, SizeFlag) 421 | hashBytes := make([]byte, SizeUint64) 422 | lenBytes := make([]byte, SizeUint32) 423 | 424 | for { 425 | n, err := rg.r.Read(flagBytes) 426 | if err != nil { 427 | if err == io.EOF { 428 | return nil 429 | } 430 | return err 431 | } 432 | if n != SizeFlag { 433 | return ErrInvalidFlagSize 434 | } 435 | flag := Flag(flagBytes[0]) 436 | 437 | n, err = rg.r.Read(hashBytes) 438 | if err != nil { 439 | return err 440 | } 441 | if n != SizeUint64 { 442 | return ErrInvalidUint64Size 443 | } 444 | h := binaryx.Uint64(hashBytes) 445 | 446 | n, err = rg.r.Read(lenBytes) 447 | if err != nil { 448 | return err 449 | } 450 | if n != SizeUint32 { 451 | return ErrInvalidUint32Size 452 | } 453 | l := binaryx.Uint32(lenBytes) 454 | fn(flag, h, l) 455 | } 456 | } 457 | 458 | // Position 表示索引字节位置 459 | type Position struct { 460 | Start uint32 461 | End uint32 462 | } 463 | 464 | // BucketMeta Bucket 元数据信息 包含 bucket 名称以及索引字节位置 465 | type BucketMeta struct { 466 | Name string 467 | RecordPos []Position 468 | KeyEntityPos []Position 469 | } 470 | 471 | // BucketMetaSlice BucketMeta 列表 472 | type BucketMetaSlice []BucketMeta 473 | 474 | // AsBucketPos 将 BucketMetaSlice 转换成 BucketPos 475 | func (bms BucketMetaSlice) AsBucketPos() BucketPos { 476 | recordPos := make(map[string]Positions) 477 | for _, bm := range bms { 478 | recordPos[bm.Name] = bm.RecordPos 479 | } 480 | keyEntityPos := make(map[string]Positions) 481 | for _, bm := range bms { 482 | keyEntityPos[bm.Name] = bm.KeyEntityPos 483 | } 484 | 485 | return BucketPos{ 486 | Record: recordPos, 487 | KeyItem: keyEntityPos, 488 | } 489 | } 490 | 491 | type Positions []Position 492 | 493 | // MaxRange 取 Positions 最大区间 494 | func (ps Positions) MaxRange() uint32 { 495 | var max uint32 496 | for _, pos := range ps { 497 | delta := pos.End - pos.Start 498 | if delta > max { 499 | max = delta 500 | } 501 | } 502 | return max 503 | } 504 | 505 | // BucketPos Bucket 索引字节位置信息 506 | type BucketPos struct { 507 | Record map[string]Positions // Record Position 信息 508 | KeyItem map[string]Positions // KeyItem Position 信息 509 | } 510 | 511 | // AsBucketMetaSlice 将 AsBucketMetaSlice 转换成 BucketMetaSlice 512 | func (bp BucketPos) AsBucketMetaSlice() BucketMetaSlice { 513 | bms := make(BucketMetaSlice, 0, len(bp.Record)) 514 | names := make([]string, 0, len(bp.Record)) 515 | for name := range bp.Record { 516 | names = append(names, name) 517 | } 518 | sort.Strings(names) 519 | 520 | for _, name := range names { 521 | bms = append(bms, BucketMeta{ 522 | Name: name, 523 | RecordPos: bp.Record[name], 524 | KeyEntityPos: bp.KeyItem[name], 525 | }) 526 | } 527 | return bms 528 | } 529 | 530 | // EncodeMetadata 编码 Metadata 531 | // 532 | // 布局如下 533 | // | BucketSize | Bucket | RecordPosCount | RecordPosBlock | KeyEntityPosBlock | Checksum | 534 | // | 4B | .. | 4B | ... | ... | 4B | 535 | // 536 | // RecordPosBlock/KeyEntityPosBlock 布局 537 | // | PositionStart | PositionEnd | 538 | // | 4B | 4B | 539 | func EncodeMetadata(bms BucketMetaSlice) []byte { 540 | var size int 541 | for _, bucket := range bms { 542 | size += SizeUint32 * 2 543 | size += len(bucket.Name) 544 | size += len(bucket.RecordPos) * SizePosition 545 | size += len(bucket.KeyEntityPos) * SizePosition 546 | } 547 | 548 | buf := make([]byte, 0, size+SizeUint32) 549 | for _, bucket := range bms { 550 | // Bucket Name 551 | buf = append(buf, binaryx.PutUint32(uint32(len(bucket.Name)))...) 552 | buf = append(buf, []byte(bucket.Name)...) 553 | 554 | // Positions 555 | buf = append(buf, binaryx.PutUint32(uint32(len(bucket.RecordPos)))...) 556 | for _, pos := range bucket.RecordPos { 557 | buf = append(buf, binaryx.PutUint32(pos.Start)...) 558 | buf = append(buf, binaryx.PutUint32(pos.End)...) 559 | } 560 | for _, pos := range bucket.KeyEntityPos { 561 | buf = append(buf, binaryx.PutUint32(pos.Start)...) 562 | buf = append(buf, binaryx.PutUint32(pos.End)...) 563 | } 564 | } 565 | 566 | buf = append(buf, CRC32(buf[:])...) 567 | return buf 568 | } 569 | 570 | // DecodeMetadata 解码 Metadata 571 | func DecodeMetadata(b []byte) (BucketMetaSlice, error) { 572 | if len(b) < SizeChecksum { 573 | return nil, ErrInvalidMetadataChecksum 574 | } 575 | 576 | if !bytes.Equal(CRC32(b[:len(b)-SizeChecksum]), b[len(b)-SizeChecksum:]) { 577 | return nil, ErrInvalidMetadataChecksum 578 | } 579 | 580 | b = b[:len(b)-SizeChecksum] 581 | 582 | var bms BucketMetaSlice 583 | buffer := bytes.NewBuffer(b) 584 | sizeBytes := make([]byte, SizeUint32) 585 | for { 586 | // read bucket size 587 | n, err := buffer.Read(sizeBytes) 588 | if err != nil { 589 | if err == io.EOF { 590 | break 591 | } 592 | return nil, err 593 | } 594 | if n != SizeUint32 { 595 | return nil, ErrInvalidUint32Size 596 | } 597 | 598 | // read bucket name 599 | bucketSize := binaryx.Uint32(sizeBytes) 600 | bucketName := make([]byte, bucketSize) 601 | n, err = buffer.Read(bucketName) 602 | if err != nil { 603 | return nil, err 604 | } 605 | if n != int(bucketSize) { 606 | return nil, ErrReadPartial 607 | } 608 | 609 | // read position count 610 | n, err = buffer.Read(sizeBytes) 611 | if err != nil { 612 | return nil, err 613 | } 614 | if n != SizeUint32 { 615 | return nil, ErrInvalidUint32Size 616 | } 617 | 618 | // read positions 619 | posCnt := int(binaryx.Uint32(sizeBytes)) 620 | recordPos := make([]Position, 0, posCnt) 621 | for i := 0; i < posCnt; i++ { 622 | n, err = buffer.Read(sizeBytes) 623 | if err != nil { 624 | return nil, err 625 | } 626 | if n != SizeUint32 { 627 | return nil, ErrInvalidUint32Size 628 | } 629 | start := binaryx.Uint32(sizeBytes) 630 | 631 | n, err = buffer.Read(sizeBytes) 632 | if err != nil { 633 | return nil, err 634 | } 635 | if n != SizeUint32 { 636 | return nil, ErrInvalidUint32Size 637 | } 638 | end := binaryx.Uint32(sizeBytes) 639 | 640 | recordPos = append(recordPos, Position{ 641 | Start: start, 642 | End: end, 643 | }) 644 | } 645 | 646 | keyEntityPos := make([]Position, 0, posCnt) 647 | for i := 0; i < posCnt; i++ { 648 | n, err = buffer.Read(sizeBytes) 649 | if err != nil { 650 | return nil, err 651 | } 652 | if n != SizeUint32 { 653 | return nil, ErrInvalidUint32Size 654 | } 655 | start := binaryx.Uint32(sizeBytes) 656 | 657 | n, err = buffer.Read(sizeBytes) 658 | if err != nil { 659 | return nil, err 660 | } 661 | if n != SizeUint32 { 662 | return nil, ErrInvalidUint32Size 663 | } 664 | end := binaryx.Uint32(sizeBytes) 665 | 666 | keyEntityPos = append(keyEntityPos, Position{ 667 | Start: start, 668 | End: end, 669 | }) 670 | } 671 | 672 | bms = append(bms, BucketMeta{ 673 | Name: string(bucketName), 674 | RecordPos: recordPos, 675 | KeyEntityPos: keyEntityPos, 676 | }) 677 | } 678 | 679 | return bms, nil 680 | } 681 | -------------------------------------------------------------------------------- /pkg/codec/codec_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package codec 15 | 16 | import ( 17 | "bytes" 18 | "testing" 19 | 20 | "github.com/stretchr/testify/require" 21 | 22 | "github.com/chenjiandongx/grogudb/pkg/buffer" 23 | ) 24 | 25 | func TestTombstoneCodec(t *testing.T) { 26 | key1 := "tombstone1" 27 | key2 := "tombstone2" 28 | 29 | t.Run("Tombstone", func(t *testing.T) { 30 | b := EncodeTombstoneEntity(key1, 1000) 31 | name, nano, err := DecodeTombstoneEntity(b[SizeUint32:]) 32 | require.NoError(t, err) 33 | require.Equal(t, name, "tombstone1") 34 | require.Equal(t, uint64(1000), nano) 35 | }) 36 | 37 | t.Run("Invalid checksum", func(t *testing.T) { 38 | b := EncodeTombstoneEntity(key1, 1000) 39 | name, nano, err := DecodeTombstoneEntity(b) 40 | require.Error(t, err, ErrInvalidRecordChecksum) 41 | require.Zero(t, name) 42 | require.Zero(t, nano) 43 | }) 44 | 45 | t.Run("Encode-Ranger", func(t *testing.T) { 46 | var buf []byte 47 | buf = append(buf, EncodeTombstoneEntity(key1, 1000)...) 48 | buf = append(buf, EncodeTombstoneEntity(key2, 2000)...) 49 | 50 | idx := 0 51 | rg := NewTombstoneRanger(bytes.NewReader(buf)) 52 | err := rg.Range(func(bucket string, nano int64) { 53 | switch idx { 54 | case 0: 55 | require.Equal(t, bucket, key1) 56 | require.Equal(t, nano, int64(1000)) 57 | case 1: 58 | require.Equal(t, bucket, key2) 59 | require.Equal(t, nano, int64(2000)) 60 | } 61 | idx++ 62 | }) 63 | require.Equal(t, 2, idx) 64 | require.NoError(t, err) 65 | }) 66 | } 67 | 68 | func TestRecordCodec(t *testing.T) { 69 | key := []byte("grogu_bucket_test_key") 70 | val := []byte("grogu_bucket_test_val") 71 | 72 | t.Run("DecodeRecord invalid uint32 size", func(t *testing.T) { 73 | flag, k, v, err := DecodeRecord(make([]byte, 3)) 74 | require.Error(t, err, ErrInvalidUint32Size) 75 | require.Equal(t, FlagUnset, flag) 76 | require.Nil(t, k) 77 | require.Nil(t, v) 78 | }) 79 | 80 | t.Run("Encode-Ranger", func(t *testing.T) { 81 | b := EncodeRecord(FlagPut, key, val) 82 | buf := bytes.NewBuffer(b) 83 | rg := NewRecordRanger(buf) 84 | err := rg.Range(func(flag Flag, k, v []byte, n int) (quit bool) { 85 | require.Equal(t, FlagPut, flag) 86 | require.Equal(t, key, k) 87 | require.Equal(t, val, v) 88 | return false 89 | }) 90 | require.NoError(t, err) 91 | }) 92 | 93 | t.Run("DecodeRecord-Put", func(t *testing.T) { 94 | b := EncodeRecord(FlagPut, key, val) 95 | flag, k, v, err := DecodeRecord(b) 96 | require.NoError(t, err) 97 | require.Equal(t, FlagPut, flag) 98 | require.Equal(t, key, k) 99 | require.Equal(t, val, v) 100 | }) 101 | 102 | t.Run("DecodeRecord-Del", func(t *testing.T) { 103 | b := EncodeRecord(FlagDel, key, nil) 104 | flag, k, v, err := DecodeRecord(b) 105 | require.NoError(t, err) 106 | require.Equal(t, FlagDel, flag) 107 | require.Equal(t, key, k) 108 | require.Nil(t, v) 109 | }) 110 | 111 | t.Run("EncodeRecord-Tombstone", func(t *testing.T) { 112 | b := EncodeRecord(FlagTombstone, nil, nil) 113 | flag, k, v, err := DecodeRecord(b) 114 | require.NoError(t, err) 115 | require.Equal(t, FlagTombstone, flag) 116 | require.Nil(t, k) 117 | require.Nil(t, v) 118 | }) 119 | } 120 | 121 | func TestKeyEntityCodec(t *testing.T) { 122 | t.Run("Encode invalid checksum", func(t *testing.T) { 123 | buf, err := VerifyTailChecksum(make([]byte, 3)) 124 | require.Equal(t, ErrInvalidChecksum, err) 125 | require.Zero(t, buf) 126 | }) 127 | 128 | t.Run("Encode-KeyBlock", func(t *testing.T) { 129 | buf := buffer.Get() 130 | defer buffer.Put(buf) 131 | 132 | _, _ = buf.Write(EncodeKeyEntity(FlagPut, 1, 100)) 133 | _, _ = buf.Write(EncodeKeyEntity(FlagPut, 2, 200)) 134 | _, err := VerifyTailChecksum(buf.Frozen()) 135 | require.NoError(t, err) 136 | }) 137 | 138 | t.Run("Encode-Ranger not skip del", func(t *testing.T) { 139 | buf := buffer.Get() 140 | defer buffer.Put(buf) 141 | 142 | _, _ = buf.Write(EncodeKeyEntity(FlagPut, 1, 100)) 143 | _, _ = buf.Write(EncodeKeyEntity(FlagDel, 2, 200)) 144 | b, err := VerifyTailChecksum(buf.Frozen()) 145 | require.NoError(t, err) 146 | 147 | idx := 0 148 | rg := NewKeysRanger(bytes.NewReader(b)) 149 | err = rg.Range(func(flag Flag, h uint64, n uint32) { 150 | switch idx { 151 | case 0: 152 | require.Equal(t, h, uint64(1)) 153 | require.Equal(t, n, uint32(100)) 154 | case 1: 155 | require.Equal(t, h, uint64(2)) 156 | require.Equal(t, n, uint32(200)) 157 | } 158 | idx++ 159 | }) 160 | require.Equal(t, 2, idx) 161 | require.NoError(t, err) 162 | }) 163 | } 164 | 165 | func TestFooterCodec(t *testing.T) { 166 | t.Run("Encode Footer", func(t *testing.T) { 167 | footer := Footer{ 168 | DataSize: 1, 169 | MetaSize: 2, 170 | BloomFilterCount: 3, 171 | BloomFilterSize: 4, 172 | } 173 | b := EncodeFooter(footer) 174 | require.Equal(t, 20, len(b)) 175 | 176 | f, err := DecodeFooter(b) 177 | require.NoError(t, err) 178 | require.Equal(t, footer, f) 179 | }) 180 | 181 | t.Run("Footer invalid size", func(t *testing.T) { 182 | b := make([]byte, 19) 183 | _, err := DecodeFooter(b) 184 | require.Equal(t, ErrInvalidFooterSize, err) 185 | }) 186 | 187 | t.Run("Footer invalid magic", func(t *testing.T) { 188 | b := make([]byte, 20) 189 | _, err := DecodeFooter(b) 190 | require.Equal(t, ErrInvalidFooterMagic, err) 191 | }) 192 | } 193 | 194 | func TestMetadataCodec(t *testing.T) { 195 | t.Run("Encode Metadata", func(t *testing.T) { 196 | var bms BucketMetaSlice 197 | bms = append(bms, BucketMeta{ 198 | Name: "bucket1", 199 | RecordPos: []Position{ 200 | {Start: 10, End: 20}, 201 | {Start: 20, End: 30}, 202 | {Start: 30, End: 40}, 203 | }, 204 | KeyEntityPos: []Position{ 205 | {Start: 10, End: 20}, 206 | {Start: 20, End: 30}, 207 | {Start: 30, End: 40}, 208 | }, 209 | }) 210 | bms = append(bms, BucketMeta{ 211 | Name: "bucket2", 212 | RecordPos: []Position{ 213 | {Start: 40, End: 50}, 214 | {Start: 50, End: 60}, 215 | {Start: 60, End: 70}, 216 | }, 217 | KeyEntityPos: []Position{ 218 | {Start: 40, End: 50}, 219 | {Start: 50, End: 60}, 220 | {Start: 60, End: 70}, 221 | }, 222 | }) 223 | 224 | b := EncodeMetadata(bms) 225 | data, err := DecodeMetadata(b) 226 | require.NoError(t, err) 227 | require.Equal(t, bms, data) 228 | }) 229 | 230 | t.Run("Metadata invalid checksum", func(t *testing.T) { 231 | b := make([]byte, 8) 232 | _, err := DecodeMetadata(b) 233 | require.Equal(t, ErrInvalidMetadataChecksum, err) 234 | }) 235 | } 236 | -------------------------------------------------------------------------------- /pkg/fsx/fs.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package fsx 15 | 16 | import ( 17 | "errors" 18 | "fmt" 19 | "io" 20 | "os" 21 | "path/filepath" 22 | "strconv" 23 | "strings" 24 | 25 | "go.uber.org/multierr" 26 | 27 | "github.com/chenjiandongx/grogudb/pkg/rescue" 28 | ) 29 | 30 | const ( 31 | FlagAppend = os.O_APPEND | os.O_CREATE | os.O_WRONLY 32 | ) 33 | 34 | // FileDesc 文件句柄接口定义 35 | type FileDesc interface { 36 | io.ReaderAt 37 | io.ReadCloser 38 | io.Seeker 39 | Size() int64 40 | } 41 | 42 | var _ FileDesc = (*fileDesc)(nil) 43 | 44 | type fileDesc struct { 45 | f *os.File 46 | size int64 47 | } 48 | 49 | func (fd *fileDesc) Size() int64 { 50 | return fd.size 51 | } 52 | 53 | func (fd *fileDesc) Seek(offset int64, whence int) (int64, error) { 54 | return fd.f.Seek(offset, whence) 55 | } 56 | 57 | func (fd *fileDesc) Read(b []byte) (int, error) { 58 | return fd.f.Read(b) 59 | } 60 | 61 | func (fd *fileDesc) ReadAt(b []byte, off int64) (int, error) { 62 | return fd.f.ReadAt(b, off) 63 | } 64 | 65 | func (fd *fileDesc) Close() error { 66 | return fd.f.Close() 67 | } 68 | 69 | // CacheFD 可缓存 fd 70 | type CacheFD struct { 71 | path string 72 | size int64 73 | cache chan FileDesc 74 | closed chan struct{} 75 | } 76 | 77 | // NewCacheFD 生成并返回 *CacheFD 实例 78 | func NewCacheFD(path string, n int) (*CacheFD, error) { 79 | if n <= 0 { 80 | return nil, errors.New("negative fd cache count") 81 | } 82 | 83 | f, err := os.Open(path) 84 | if err != nil { 85 | return nil, err 86 | } 87 | defer f.Close() 88 | 89 | info, err := f.Stat() 90 | if err != nil { 91 | return nil, err 92 | } 93 | 94 | return &CacheFD{ 95 | path: path, 96 | size: info.Size(), 97 | cache: make(chan FileDesc, n), 98 | closed: make(chan struct{}, 1), 99 | }, nil 100 | } 101 | 102 | // Path 返回 fd path 103 | func (fd *CacheFD) Path() string { 104 | return fd.path 105 | } 106 | 107 | // Size 返回 fd 持有文件大小 108 | func (fd *CacheFD) Size() int64 { 109 | return fd.size 110 | } 111 | 112 | // Close 关闭并清理 fd 113 | func (fd *CacheFD) Close() error { 114 | close(fd.cache) 115 | 116 | var errs []error 117 | for r := range fd.cache { 118 | errs = append(errs, r.Close()) 119 | } 120 | return multierr.Combine(errs...) 121 | } 122 | 123 | // FileDesc 返回 Fd 124 | func (fd *CacheFD) FileDesc() (FileDesc, error) { 125 | select { 126 | case r := <-fd.cache: 127 | return r, nil 128 | default: 129 | } 130 | 131 | f, err := os.Open(fd.path) 132 | if err != nil { 133 | return nil, err 134 | } 135 | return &fileDesc{f: f, size: fd.size}, nil 136 | } 137 | 138 | // Reuse 复用 fd 139 | func (fd *CacheFD) Reuse(f FileDesc) { 140 | defer rescue.HandleCrash() 141 | 142 | _, err := f.Seek(0, io.SeekStart) 143 | if err != nil { 144 | _ = f.Close() 145 | return 146 | } 147 | 148 | select { 149 | case <-fd.closed: 150 | _ = f.Close() 151 | return 152 | default: 153 | } 154 | 155 | select { 156 | case fd.cache <- f: 157 | default: 158 | _ = f.Close() 159 | } 160 | } 161 | 162 | const ( 163 | PrefixDataFile = "data" 164 | PrefixKeysFile = "keys" 165 | ) 166 | 167 | // DataFilename Data 文件名称 168 | func DataFilename(seqID int64, path ...string) string { 169 | var p []string 170 | p = append(p, path...) 171 | p = append(p, fmt.Sprintf("%s_%d", PrefixDataFile, seqID)) 172 | return filepath.Join(p...) 173 | } 174 | 175 | // DataTmpFilename 临时 Data 文件名称 176 | func DataTmpFilename(seqID int64, path ...string) string { 177 | return DataFilename(seqID, path...) + ".tmp" 178 | } 179 | 180 | // KeysFilename Keys 文件名称 181 | func KeysFilename(seqID int64, path ...string) string { 182 | var p []string 183 | p = append(p, path...) 184 | p = append(p, fmt.Sprintf("%s_%d", PrefixKeysFile, seqID)) 185 | return filepath.Join(p...) 186 | } 187 | 188 | // KeysTmpFilename 临时 Keys 文件名称 189 | func KeysTmpFilename(seqID int64, path ...string) string { 190 | return KeysFilename(seqID, path...) + ".tmp" 191 | } 192 | 193 | // ParseFilename 解析文件名 194 | func ParseFilename(s string) (string, int64, bool) { 195 | parts := strings.Split(s, "_") 196 | if len(parts) != 2 { 197 | return "", 0, false 198 | } 199 | 200 | switch parts[0] { 201 | case PrefixDataFile, PrefixKeysFile: 202 | default: 203 | return "", 0, false 204 | } 205 | 206 | seqID, err := strconv.Atoi(parts[1]) 207 | if err != nil { 208 | return "", 0, false 209 | } 210 | 211 | return parts[0], int64(seqID), true 212 | } 213 | 214 | // WriteFile 创建文件 p 并写入数据 b 215 | func WriteFile(p string, b []byte) error { 216 | f, err := os.Create(p) 217 | if err != nil { 218 | return err 219 | } 220 | defer f.Close() 221 | 222 | if _, err := f.Write(b); err != nil { 223 | return err 224 | } 225 | 226 | return nil 227 | } 228 | -------------------------------------------------------------------------------- /pkg/logx/logger.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package logx 15 | 16 | import ( 17 | "github.com/chenjiandongx/logger" 18 | ) 19 | 20 | // Logger 接口定义 21 | type Logger interface { 22 | Infof(format string, v ...interface{}) 23 | Errorf(format string, v ...interface{}) 24 | } 25 | 26 | var logf Logger = logger.New(logger.Options{ 27 | Stdout: true, 28 | ConsoleMode: true, 29 | Skip: 2, 30 | Level: logger.ErrorLevel, 31 | }) 32 | 33 | // SetLogger 设置自定义 logger 可设置为 nil 34 | func SetLogger(logger Logger) { 35 | logf = logger 36 | } 37 | 38 | func Infof(format string, v ...interface{}) { 39 | if logf == nil { 40 | return 41 | } 42 | logf.Infof(format, v...) 43 | } 44 | 45 | func Errorf(format string, v ...interface{}) { 46 | if logf == nil { 47 | return 48 | } 49 | logf.Errorf(format, v...) 50 | } 51 | -------------------------------------------------------------------------------- /pkg/rescue/rescue.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package rescue 15 | 16 | import ( 17 | "runtime" 18 | 19 | "github.com/chenjiandongx/grogudb/pkg/logx" 20 | ) 21 | 22 | func logPanic(r interface{}) { 23 | const size = 64 << 10 24 | stacktrace := make([]byte, size) 25 | stacktrace = stacktrace[:runtime.Stack(stacktrace, false)] 26 | if _, ok := r.(string); ok { 27 | logx.Errorf("Observed a panic: %s\n%s", r, stacktrace) 28 | } else { 29 | logx.Errorf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace) 30 | } 31 | } 32 | 33 | // HandleCrash 处理 panic 事件 34 | func HandleCrash() { 35 | if r := recover(); r != nil { 36 | logPanic(r) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /pkg/slice/slice.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package slice 15 | 16 | import ( 17 | "sync" 18 | 19 | dll "github.com/emirpasic/gods/lists/doublylinkedlist" 20 | 21 | "github.com/chenjiandongx/grogudb/pkg/codec" 22 | ) 23 | 24 | type ID struct { 25 | Flag uint8 26 | Hash uint64 27 | } 28 | 29 | // Slice 内部使用的是双向链表的数据结构(尽量减少额外内存分配) 30 | // 对外表现是 Slice 的行为 31 | type Slice struct { 32 | mut sync.RWMutex 33 | length int 34 | list *dll.List 35 | pos map[ID]uint32 // 省吃俭用 4 个字节 36 | } 37 | 38 | // New 创建一个新的 Slice 实例 39 | func New() *Slice { 40 | return &Slice{ 41 | list: dll.New(), 42 | pos: make(map[ID]uint32), 43 | } 44 | } 45 | 46 | // Append 追加 b 至 Slice 47 | func (s *Slice) Append(id ID, b []byte) int { 48 | s.mut.Lock() 49 | defer s.mut.Unlock() 50 | 51 | var l int 52 | 53 | cur, ok := s.pos[id] 54 | if ok { 55 | obj, _ := s.list.Get(int(cur)) 56 | val := obj.([]byte) 57 | l -= len(val) 58 | s.list.Remove(int(cur)) 59 | s.list.Append(b) 60 | s.pos[id] = uint32(s.list.Size() - 1) 61 | } else { 62 | s.pos[id] = uint32(s.list.Size()) 63 | s.list.Append(b) 64 | } 65 | 66 | l += len(b) 67 | s.length += l 68 | 69 | return l 70 | } 71 | 72 | // Len 返回 Slice Bytes 总长度 73 | func (s *Slice) Len() int { 74 | s.mut.RLock() 75 | defer s.mut.RUnlock() 76 | 77 | return s.length 78 | } 79 | 80 | // Count 返回 Slice 元素个数 81 | func (s *Slice) Count() int { 82 | s.mut.RLock() 83 | defer s.mut.RUnlock() 84 | 85 | return s.list.Size() 86 | } 87 | 88 | // FrozenReverse 重置 Slice 返回倒序的 []byte 同时会补充 crc32 checksum 89 | func (s *Slice) FrozenReverse() []byte { 90 | s.mut.Lock() 91 | defer s.mut.Unlock() 92 | 93 | dst := make([]byte, 0, s.length+codec.SizeChecksum) 94 | it := s.list.Iterator() 95 | for it.End(); it.Prev(); { 96 | obj := it.Value() 97 | val := obj.([]byte) 98 | dst = append(dst, val...) 99 | } 100 | 101 | checksum := codec.CRC32(dst[:]) 102 | dst = append(dst, checksum...) 103 | 104 | s.reset() 105 | return dst 106 | } 107 | 108 | // Frozen 重置 Slice 返回正序的 []byte 同时会补充 crc32 checksum 109 | func (s *Slice) Frozen() []byte { 110 | s.mut.Lock() 111 | defer s.mut.Unlock() 112 | 113 | dst := make([]byte, 0, s.length+codec.SizeChecksum) 114 | it := s.list.Iterator() 115 | for it.Begin(); it.Next(); { 116 | obj := it.Value() 117 | val := obj.([]byte) 118 | dst = append(dst, val...) 119 | } 120 | 121 | checksum := codec.CRC32(dst[:]) 122 | dst = append(dst, checksum...) 123 | 124 | s.reset() 125 | return dst 126 | } 127 | 128 | func (s *Slice) reset() { 129 | s.list.Clear() 130 | s.length = 0 131 | s.pos = make(map[ID]uint32) 132 | } 133 | 134 | // ForEach 遍历每一个 Item 并执行 visitFn 函数 135 | func (s *Slice) ForEach(visitFn func(b []byte) bool) { 136 | s.mut.RLock() 137 | defer s.mut.RUnlock() 138 | 139 | it := s.list.Iterator() 140 | for it.End(); it.Prev(); { 141 | obj := it.Value() 142 | val := obj.([]byte) 143 | if visitFn(val) { 144 | break 145 | } 146 | } 147 | } 148 | 149 | // CopyForEach 复制遍历每一个 Item 并执行 visitFn 函数 150 | // 避免锁占用太长时间 影响写入 151 | func (s *Slice) CopyForEach(visitFn func(b []byte) bool) { 152 | s.mut.RLock() 153 | bs := make([][]byte, 0, s.Count()) 154 | it := s.list.Iterator() 155 | for it.End(); it.Prev(); { 156 | obj := it.Value() 157 | val := obj.([]byte) 158 | bs = append(bs, val) 159 | } 160 | s.mut.RUnlock() 161 | 162 | for _, item := range bs { 163 | if visitFn(item) { 164 | break 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /pkg/slice/slice_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package slice 15 | 16 | import ( 17 | "testing" 18 | 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | func TestSlice(t *testing.T) { 23 | t.Run("Append unique", func(t *testing.T) { 24 | slice := New() 25 | slice.Append(ID{0, 1}, []byte("foo")) 26 | slice.Append(ID{0, 2}, []byte("baz")) 27 | require.Equal(t, 2, slice.Count()) 28 | require.Equal(t, 6, slice.Len()) 29 | 30 | idx := 0 31 | slice.ForEach(func(b []byte) bool { 32 | switch idx { 33 | case 0: 34 | require.Equal(t, []byte("baz"), b) 35 | case 1: 36 | require.Equal(t, []byte("foo"), b) 37 | } 38 | idx++ 39 | return false 40 | }) 41 | }) 42 | 43 | t.Run("Append duplicated", func(t *testing.T) { 44 | slice := New() 45 | slice.Append(ID{0, 1}, []byte("foo")) 46 | slice.Append(ID{0, 1}, []byte("baz")) 47 | require.Equal(t, 1, slice.Count()) 48 | require.Equal(t, 3, slice.Len()) 49 | 50 | slice.ForEach(func(b []byte) bool { 51 | require.Equal(t, []byte("baz"), b) 52 | return false 53 | }) 54 | }) 55 | 56 | t.Run("Frozen", func(t *testing.T) { 57 | slice := New() 58 | slice.Append(ID{0, 1}, []byte("foo")) 59 | slice.Append(ID{0, 2}, []byte("baz")) 60 | 61 | b := slice.Frozen() 62 | require.Equal(t, []byte("foobaz"), b[:len(b)-4]) 63 | }) 64 | 65 | t.Run("FrozenReverse", func(t *testing.T) { 66 | slice := New() 67 | slice.Append(ID{0, 1}, []byte("foo")) 68 | slice.Append(ID{0, 2}, []byte("baz")) 69 | 70 | b := slice.FrozenReverse() 71 | require.Equal(t, []byte("bazfoo"), b[:len(b)-4]) 72 | }) 73 | } 74 | -------------------------------------------------------------------------------- /pkg/uint64set/bloomfilter.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package uint64set 15 | 16 | import ( 17 | "bytes" 18 | 19 | "github.com/bits-and-blooms/bloom/v3" 20 | 21 | "github.com/chenjiandongx/grogudb/pkg/binaryx" 22 | ) 23 | 24 | const ( 25 | defaultFalsePositiveRate = 0.001 26 | ) 27 | 28 | // BloomFilter BloomFilter 定义 29 | // 使用尽量少的内存快速判断 key 是否在 disk segment 中 30 | type BloomFilter interface { 31 | Test(k uint64) bool 32 | Bytes() []byte 33 | Count() int 34 | } 35 | 36 | type bloomFilter struct { 37 | n int 38 | filter *bloom.BloomFilter 39 | } 40 | 41 | // Test 测试 k 是否存在 42 | func (b *bloomFilter) Test(k uint64) bool { 43 | return b.filter.Test(binaryx.PutUint64(k)) 44 | } 45 | 46 | // Count 返回元素个数 47 | func (b *bloomFilter) Count() int { 48 | return b.n 49 | } 50 | 51 | // Bytes 返回字节数组 52 | func (b *bloomFilter) Bytes() []byte { 53 | buf := &bytes.Buffer{} 54 | _, _ = b.filter.WriteTo(buf) 55 | return buf.Bytes() 56 | } 57 | 58 | // NewBloomFilterFromSets 将 *Sets 转换为 BloomFilter 59 | func NewBloomFilterFromSets(sets *Sets) BloomFilter { 60 | n := sets.CountAll() 61 | filter := bloom.NewWithEstimates(uint(n), defaultFalsePositiveRate) 62 | 63 | sets.IterAllKeys(func(k uint64) { 64 | filter.Add(binaryx.PutUint64(k)) 65 | }) 66 | 67 | return &bloomFilter{ 68 | n: n, 69 | filter: filter, 70 | } 71 | } 72 | 73 | // LoadBloomFilter 读取字节数组并转换为 BloomFilter 74 | func LoadBloomFilter(n int, b []byte) (BloomFilter, error) { 75 | buf := bytes.NewBuffer(b) 76 | 77 | filter := bloom.NewWithEstimates(uint(n), defaultFalsePositiveRate) 78 | _, err := filter.ReadFrom(buf) 79 | if err != nil { 80 | return nil, err 81 | } 82 | return &bloomFilter{ 83 | n: n, 84 | filter: filter, 85 | }, nil 86 | } 87 | -------------------------------------------------------------------------------- /pkg/uint64set/bloomfilter_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package uint64set 15 | 16 | import ( 17 | "testing" 18 | 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | func TestBloomFilterFromSets(t *testing.T) { 23 | const N = 10000 24 | 25 | sets := NewSets() 26 | set := sets.GetOrCreate("set1") 27 | for i := 0; i < N; i++ { 28 | set.Insert(uint64(i)) 29 | } 30 | 31 | bf := NewBloomFilterFromSets(sets) 32 | var hint, miss int 33 | for i := 0; i < N*2; i++ { 34 | if bf.Test(uint64(i)) { 35 | hint++ 36 | } else { 37 | miss++ 38 | } 39 | } 40 | 41 | require.Equal(t, 10007, hint) 42 | require.Equal(t, 9993, miss) 43 | require.Equal(t, N, bf.Count()) 44 | } 45 | 46 | func TestLoadBloomFilter(t *testing.T) { 47 | const N = 10000 48 | 49 | sets := NewSets() 50 | set := sets.GetOrCreate("set1") 51 | for i := 0; i < N; i++ { 52 | set.Insert(uint64(i)) 53 | } 54 | 55 | bf := NewBloomFilterFromSets(sets) 56 | b := bf.Bytes() 57 | require.Equal(t, 18000, len(b)) 58 | 59 | loaded, err := LoadBloomFilter(N, b) 60 | require.NoError(t, err) 61 | 62 | var hint, miss int 63 | for i := 0; i < N*2; i++ { 64 | if loaded.Test(uint64(i)) { 65 | hint++ 66 | } else { 67 | miss++ 68 | } 69 | } 70 | 71 | require.Equal(t, 10007, hint) 72 | require.Equal(t, 9993, miss) 73 | require.Equal(t, N, bf.Count()) 74 | } 75 | -------------------------------------------------------------------------------- /pkg/uint64set/uint64set.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package uint64set 15 | 16 | import ( 17 | "sync" 18 | 19 | cmap "github.com/orcaman/concurrent-map/v2" 20 | ) 21 | 22 | // Set 是线程安全的 uint64 集合 23 | type Set struct { 24 | set cmap.ConcurrentMap[uint64, struct{}] 25 | } 26 | 27 | // NewSet 生成并返回 Set 实例 28 | func NewSet() *Set { 29 | var shard uint64 = 32 30 | set := cmap.NewWithCustomShardingFunction[uint64, struct{}](func(key uint64) uint32 { 31 | return uint32(key % shard) 32 | }) 33 | return &Set{set: set} 34 | } 35 | 36 | // Count 返回集合元素个数 37 | func (s *Set) Count() int { 38 | return s.set.Count() 39 | } 40 | 41 | // Insert 新增元素 42 | func (s *Set) Insert(n uint64) { 43 | s.set.Set(n, struct{}{}) 44 | } 45 | 46 | // Remove 移除元素 47 | func (s *Set) Remove(n uint64) { 48 | s.set.Remove(n) 49 | } 50 | 51 | // Clear 清除所有元素 52 | func (s *Set) Clear() { 53 | s.set.Clear() 54 | } 55 | 56 | // Has 判断元素是否存在 57 | func (s *Set) Has(n uint64) bool { 58 | return s.set.Has(n) 59 | } 60 | 61 | // Keys 返回元素列表 62 | func (s *Set) Keys() []uint64 { 63 | return s.set.Keys() 64 | } 65 | 66 | // Merge 合并 *Set 67 | func (s *Set) Merge(set *Set) { 68 | if set == nil { 69 | return 70 | } 71 | for _, key := range set.set.Keys() { 72 | s.Insert(key) 73 | } 74 | } 75 | 76 | // Sets 管理着多个 *Set 示例 77 | type Sets struct { 78 | mut sync.RWMutex 79 | sets map[string]*Set 80 | } 81 | 82 | // NewSets 生成并返回 *Sets 实例 83 | func NewSets() *Sets { 84 | return &Sets{sets: make(map[string]*Set)} 85 | } 86 | 87 | // AsBloomFilter 将 Sets 转换为 BloomFilter 88 | func (ss *Sets) AsBloomFilter() BloomFilter { 89 | return NewBloomFilterFromSets(ss) 90 | } 91 | 92 | // HasKey 判断 Sets[name] 中是否存在 key 93 | func (ss *Sets) HasKey(name string, key uint64) bool { 94 | ss.mut.RLock() 95 | defer ss.mut.RUnlock() 96 | 97 | set, ok := ss.sets[name] 98 | if !ok { 99 | return false 100 | } 101 | return set.Has(key) 102 | } 103 | 104 | // Remove 删除指定 name 的 Set 实例 105 | func (ss *Sets) Remove(name string) { 106 | ss.mut.Lock() 107 | defer ss.mut.Unlock() 108 | 109 | delete(ss.sets, name) 110 | } 111 | 112 | // Count 返回指定 name 的 Set 元素个数 113 | func (ss *Sets) Count(name string) int { 114 | ss.mut.RLock() 115 | defer ss.mut.RUnlock() 116 | 117 | set, ok := ss.sets[name] 118 | if !ok { 119 | return 0 120 | } 121 | return set.Count() 122 | } 123 | 124 | // CountAll 返回 Sets 中所有元素个数 125 | func (ss *Sets) CountAll() int { 126 | ss.mut.RLock() 127 | defer ss.mut.RUnlock() 128 | 129 | var total int 130 | for _, set := range ss.sets { 131 | total += set.Count() 132 | } 133 | return total 134 | } 135 | 136 | // IterAllKeys 遍历所有的 key 并执行 fn 方法 137 | func (ss *Sets) IterAllKeys(fn func(k uint64)) { 138 | ss.mut.RLock() 139 | defer ss.mut.RUnlock() 140 | 141 | for _, set := range ss.sets { 142 | for _, k := range set.Keys() { 143 | fn(k) 144 | } 145 | } 146 | } 147 | 148 | // Update 更新 Set 149 | func (ss *Sets) Update(name string, set *Set) { 150 | ss.mut.Lock() 151 | defer ss.mut.Unlock() 152 | 153 | ss.sets[name] = set 154 | } 155 | 156 | // Get 获取 Set 157 | func (ss *Sets) Get(name string) *Set { 158 | ss.mut.RLock() 159 | defer ss.mut.RUnlock() 160 | 161 | return ss.sets[name] 162 | } 163 | 164 | // GetOrCreate 获取或创建 Set 实例 165 | func (ss *Sets) GetOrCreate(name string) *Set { 166 | var set *Set 167 | ss.mut.RLock() 168 | set = ss.sets[name] 169 | ss.mut.RUnlock() 170 | 171 | if set != nil { 172 | return set 173 | } 174 | 175 | ss.mut.Lock() 176 | defer ss.mut.Unlock() 177 | 178 | if set = ss.sets[name]; set != nil { 179 | return set 180 | } 181 | 182 | set = NewSet() 183 | ss.sets[name] = set 184 | return set 185 | } 186 | -------------------------------------------------------------------------------- /pkg/wait/wait.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package wait 15 | 16 | import ( 17 | "context" 18 | "sync/atomic" 19 | "time" 20 | 21 | "github.com/chenjiandongx/grogudb/pkg/rescue" 22 | ) 23 | 24 | // Until 持续运行直至 ctx cancel 25 | func Until(ctx context.Context, f func()) { 26 | UntilPeriod(ctx, f, 0) 27 | } 28 | 29 | // UntilPeriod 持续运行直至 ctx cancel 30 | // 每次 quit 等待 period 31 | func UntilPeriod(ctx context.Context, f func(), period time.Duration) { 32 | for { 33 | select { 34 | case <-ctx.Done(): 35 | return 36 | default: 37 | } 38 | 39 | func() { 40 | defer rescue.HandleCrash() 41 | f() 42 | }() 43 | time.Sleep(period) 44 | } 45 | } 46 | 47 | // Waiting 实现类似 sync.WaitGroup 功能 不过允许等待指定 n 个信号量 48 | type Waiting struct { 49 | n atomic.Int64 50 | } 51 | 52 | // Inc 增加信号量 53 | func (w *Waiting) Inc() { 54 | w.n.Add(1) 55 | } 56 | 57 | // Dec 较少信号量 58 | func (w *Waiting) Dec() { 59 | w.n.Add(-1) 60 | } 61 | 62 | // Until 等待 n 个信号量 63 | func (w *Waiting) Until(n int) { 64 | for { 65 | if w.n.Load() == int64(n) { 66 | break 67 | } 68 | time.Sleep(time.Millisecond * 10) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /segment.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "bytes" 18 | "io" 19 | "os" 20 | "sort" 21 | "sync" 22 | "sync/atomic" 23 | "time" 24 | 25 | "go.uber.org/multierr" 26 | 27 | "github.com/chenjiandongx/grogudb/pkg/codec" 28 | "github.com/chenjiandongx/grogudb/pkg/fsx" 29 | "github.com/chenjiandongx/grogudb/pkg/slice" 30 | "github.com/chenjiandongx/grogudb/pkg/uint64set" 31 | ) 32 | 33 | // memSegCallback memory segment 回调函数列表 34 | type memSegCallback struct { 35 | onRemove func(h uint64) 36 | onInsert func(h uint64) 37 | onExist func(h uint64) bool 38 | onBytes func(n int64) 39 | } 40 | 41 | // memorySegment 代表着内存 segment 即类似 LSM-Tree 中的 level0 42 | // 43 | // 所有数据优先写入到 memory segment 再定期 flush 到磁盘 44 | type memorySegment struct { 45 | name string 46 | mut sync.RWMutex 47 | keySet *uint64set.Set 48 | cb memSegCallback 49 | clearAt int64 50 | dataBytes *slice.Slice 51 | keysBytes *slice.Slice 52 | } 53 | 54 | func newMemorySegment(name string, cb memSegCallback) *memorySegment { 55 | return &memorySegment{ 56 | name: name, 57 | dataBytes: slice.New(), 58 | keysBytes: slice.New(), 59 | keySet: uint64set.NewSet(), 60 | cb: cb, 61 | } 62 | } 63 | 64 | // Len 返回 dataBytes 长度 65 | func (ms *memorySegment) Len() int { 66 | return ms.dataBytes.Len() 67 | } 68 | 69 | // Flush 冻结 ByteSlice 同时将 keys 置空 70 | func (ms *memorySegment) Flush() ([]byte, []byte, *uint64set.Set) { 71 | ms.mut.Lock() 72 | defer ms.mut.Unlock() 73 | 74 | keySet := ms.keySet 75 | ms.keySet = uint64set.NewSet() 76 | 77 | return ms.dataBytes.FrozenReverse(), ms.keysBytes.Frozen(), keySet 78 | } 79 | 80 | // Clear 清空 keys 81 | func (ms *memorySegment) Clear() { 82 | ms.mut.Lock() 83 | defer ms.mut.Unlock() 84 | 85 | sid := slice.ID{Flag: uint8(codec.FlagTombstone)} 86 | encodedKey := codec.EncodeKeyEntity(codec.FlagTombstone, 0, 0) 87 | ms.keysBytes.Append(sid, encodedKey) 88 | 89 | encodedRecord := codec.EncodeRecord(codec.FlagTombstone, nil, nil) 90 | ms.dataBytes.Append(sid, encodedRecord) 91 | ms.keySet.Clear() 92 | 93 | nano := time.Now().UnixNano() 94 | ms.clearAt = nano 95 | } 96 | 97 | func (ms *memorySegment) append(op operation, key, val []byte) { 98 | h := codec.HashKey(key) 99 | exist := ms.cb.onExist(h) 100 | if (op == opPutIf && exist) || (op == opDel && !exist) { 101 | return 102 | } 103 | 104 | flag := codec.FlagPut 105 | if op == opDel { 106 | flag = codec.FlagDel 107 | } 108 | 109 | encodedRecord := codec.EncodeRecord(flag, key, val) 110 | l := int64(len(encodedRecord)) 111 | 112 | encodedKey := codec.EncodeKeyEntity(flag, h, uint32(l)) 113 | 114 | sid := slice.ID{Flag: uint8(flag), Hash: h} 115 | var delta int 116 | 117 | ms.mut.Lock() 118 | switch op { 119 | case opPut: 120 | ms.keySet.Insert(h) 121 | ms.cb.onInsert(h) 122 | delta = ms.dataBytes.Append(sid, encodedRecord) 123 | ms.keysBytes.Append(sid, encodedKey) 124 | 125 | case opPutIf: 126 | if !ms.cb.onExist(h) { 127 | ms.keySet.Insert(h) 128 | ms.cb.onInsert(h) 129 | delta = ms.dataBytes.Append(sid, encodedRecord) 130 | ms.keysBytes.Append(sid, encodedKey) 131 | } 132 | 133 | case opDel: 134 | if ms.cb.onExist(h) { 135 | ms.keySet.Remove(h) 136 | ms.cb.onRemove(h) 137 | delta = ms.dataBytes.Append(sid, encodedRecord) 138 | ms.keysBytes.Append(sid, encodedKey) 139 | } 140 | } 141 | ms.mut.Unlock() 142 | ms.cb.onBytes(int64(delta)) 143 | } 144 | 145 | // PutIf 当 Key 不存在的时候设置 Key/Value Key 存在时不做操作 146 | func (ms *memorySegment) PutIf(key, val []byte) { 147 | ms.append(opPutIf, key, val) 148 | } 149 | 150 | // Put 新增 Key/Value 记录 151 | func (ms *memorySegment) Put(key, val []byte) { 152 | ms.append(opPut, key, val) 153 | } 154 | 155 | // Del 删除指定 Key 156 | func (ms *memorySegment) Del(key []byte) { 157 | ms.append(opDel, key, nil) 158 | } 159 | 160 | // Range 遍历每个 Key 并执行 fn 方法 161 | // pass 用于判断是否跳出 range 循环 162 | func (ms *memorySegment) Range(ifCopy bool, fn func(key, val Bytes), pass codec.PassFunc) { 163 | ms.mut.RLock() 164 | defer ms.mut.RUnlock() 165 | 166 | rangeFn := ms.dataBytes.ForEach 167 | if ifCopy { 168 | rangeFn = ms.dataBytes.CopyForEach 169 | } 170 | 171 | rangeFn(func(b []byte) bool { 172 | flag, key, val, err := codec.DecodeRecord(b) 173 | if err != nil { 174 | return true 175 | } 176 | 177 | if flag == codec.FlagTombstone { 178 | return true 179 | } 180 | if pass(flag, codec.HashKey(key)) { 181 | fn(key, val) 182 | } 183 | return false 184 | }) 185 | } 186 | 187 | // Get 返回指定 Key 对应的 Value 188 | func (ms *memorySegment) Get(key []byte) ([]byte, bool) { 189 | ms.mut.RLock() 190 | defer ms.mut.RUnlock() 191 | 192 | h := codec.HashKey(key) 193 | if !ms.keySet.Has(h) { 194 | return nil, false 195 | } 196 | 197 | var value []byte 198 | var found bool 199 | ms.dataBytes.ForEach(func(b []byte) bool { 200 | flag, k, v, err := codec.DecodeRecord(b) 201 | if err != nil { 202 | return true 203 | } 204 | 205 | if flag == codec.FlagTombstone { 206 | return true 207 | } 208 | if flag != codec.FlagDel && bytes.Equal(k, key) { 209 | value = v 210 | found = true 211 | return true 212 | } 213 | return false 214 | }) 215 | 216 | return value, found 217 | } 218 | 219 | const initRef = 1 220 | 221 | // diskSegment 磁盘分段 持有一个顺序写入的磁盘块的所有数据信息 222 | // 223 | // diskSegment 采用引用计数的方式来提高并发读写能力 即 ref == 0 时 gc 线程会负责将其清理 224 | // 每次有读操作时 ref++ 操作完毕后 ref-- 225 | type diskSegment struct { 226 | seqID int64 227 | ref int64 228 | size int 229 | path string 230 | dataCfd *fsx.CacheFD 231 | keysCfd *fsx.CacheFD 232 | bf uint64set.BloomFilter 233 | pos codec.BucketPos 234 | clearAtFn func(string) int64 235 | } 236 | 237 | type diskSegments []*diskSegment 238 | 239 | // OrderIncrement 正序排序 disk segment list 240 | func (dss diskSegments) OrderIncrement() { 241 | sort.Slice(dss, func(i, j int) bool { 242 | return dss[i].seqID < dss[j].seqID 243 | }) 244 | } 245 | 246 | // OrderDecrement 倒序排序 disk segment list 247 | func (dss diskSegments) OrderDecrement() { 248 | sort.Slice(dss, func(i, j int) bool { 249 | return dss[i].seqID > dss[j].seqID 250 | }) 251 | } 252 | 253 | func newDiskSegment(seqID int64, size int, path string, bf uint64set.BloomFilter, meta codec.BucketPos) *diskSegment { 254 | return &diskSegment{ 255 | ref: initRef, 256 | seqID: seqID, 257 | size: size, 258 | path: path, 259 | bf: bf, 260 | pos: meta, 261 | } 262 | } 263 | 264 | func (ds *diskSegment) loadRef() int64 { 265 | return atomic.LoadInt64(&ds.ref) 266 | } 267 | 268 | func (ds *diskSegment) incRef() { 269 | atomic.AddInt64(&ds.ref, 1) 270 | } 271 | 272 | func (ds *diskSegment) decRef() { 273 | if n := atomic.AddInt64(&ds.ref, -1); n < 0 { 274 | panic("BUG: ref must not be negative") 275 | } 276 | } 277 | 278 | // Install 装载 disk segment 移除 tmp 文件并生成 fd 缓存 279 | func (ds *diskSegment) Install() error { 280 | var errs []error 281 | 282 | keysFile := fsx.KeysFilename(ds.seqID, ds.path) 283 | keysTemp := fsx.KeysTmpFilename(ds.seqID, ds.path) 284 | if err := os.Rename(keysTemp, keysFile); err != nil { 285 | errs = append(errs, err) 286 | } 287 | 288 | dataFile := fsx.DataFilename(ds.seqID, ds.path) 289 | dataTemp := fsx.DataTmpFilename(ds.seqID, ds.path) 290 | if err := os.Rename(dataTemp, dataFile); err != nil { 291 | errs = append(errs, err) 292 | } 293 | 294 | var err error 295 | if ds.dataCfd == nil { 296 | if ds.dataCfd, err = fsx.NewCacheFD(dataFile, dataMaxFdHold); err != nil { 297 | errs = append(errs, err) 298 | } 299 | } 300 | if ds.keysCfd == nil { 301 | if ds.keysCfd, err = fsx.NewCacheFD(keysFile, keysMaxFdHold); err != nil { 302 | errs = append(errs, err) 303 | } 304 | } 305 | return multierr.Combine(errs...) 306 | } 307 | 308 | // Close 关闭持有的 fd 309 | func (ds *diskSegment) Close() error { 310 | return ds.dataCfd.Close() 311 | } 312 | 313 | // Remove 删除磁盘文件 314 | func (ds *diskSegment) Remove() error { 315 | return multierr.Combine( 316 | os.Remove(fsx.DataFilename(ds.seqID, ds.path)), 317 | os.Remove(fsx.KeysFilename(ds.seqID, ds.path)), 318 | ) 319 | } 320 | 321 | // Get 返回指定 Key 对应的 Value 322 | func (ds *diskSegment) Get(name string, key []byte) ([]byte, error) { 323 | ds.incRef() 324 | defer ds.decRef() 325 | 326 | // bucket 执行过 clear 不再检索 327 | if ds.clearAtFn(name) > ds.seqID { 328 | return nil, nil 329 | } 330 | 331 | // 该 seg 种不存在制定 bucket 332 | positions, ok := ds.pos.Record[name] 333 | if !ok { 334 | return nil, nil 335 | } 336 | 337 | // bloomfilter 判定 338 | h := codec.HashKey(key) 339 | if !ds.bf.Test(h) { 340 | return nil, nil 341 | } 342 | 343 | excepted := key 344 | reader, err := ds.dataCfd.FileDesc() 345 | if err != nil { 346 | return nil, err 347 | } 348 | defer ds.dataCfd.Reuse(reader) 349 | 350 | b := make([]byte, positions.MaxRange()) 351 | var value []byte 352 | var found bool 353 | 354 | for _, position := range positions { 355 | l := position.End - position.Start 356 | n, err := reader.ReadAt(b[:l], int64(position.Start)) 357 | if err != nil { 358 | return nil, err 359 | } 360 | if n != int(l) { 361 | return nil, codec.ErrReadPartial 362 | } 363 | 364 | encoded, err := codec.VerifyTailChecksum(b[:n]) 365 | if err != nil { 366 | return nil, err 367 | } 368 | 369 | rg := codec.NewRecordRanger(bytes.NewReader(encoded)) 370 | err = rg.Range(func(flag codec.Flag, key, val []byte, n int) bool { 371 | if bytes.Equal(excepted, key) { 372 | value = val 373 | found = true 374 | } 375 | return found 376 | }) 377 | 378 | if err != nil { 379 | return nil, err 380 | } 381 | 382 | if found { 383 | break 384 | } 385 | } 386 | return value, nil 387 | } 388 | 389 | // Range 遍历每个 Key 并执行 fn 方法 390 | // pass 用于判断是否跳出 range 循环 391 | func (ds *diskSegment) Range(name string, visitFn codec.RecordVisitFunc, passFn codec.PassFunc) error { 392 | ds.incRef() 393 | defer ds.decRef() 394 | 395 | if ds.clearAtFn(name) > ds.seqID { 396 | return nil 397 | } 398 | positions, ok := ds.pos.Record[name] 399 | if !ok { 400 | return nil 401 | } 402 | 403 | reader, err := ds.dataCfd.FileDesc() 404 | if err != nil { 405 | return err 406 | } 407 | defer ds.dataCfd.Reuse(reader) 408 | 409 | b := make([]byte, positions.MaxRange()) 410 | for _, position := range positions { 411 | l := position.End - position.Start 412 | n, err := reader.ReadAt(b[:l], int64(position.Start)) 413 | if err != nil { 414 | return err 415 | } 416 | if n != int(l) { 417 | return codec.ErrReadPartial 418 | } 419 | 420 | encoded, err := codec.VerifyTailChecksum(b[:n]) 421 | if err != nil { 422 | return err 423 | } 424 | 425 | rg := codec.NewRecordRanger(bytes.NewReader(encoded)) 426 | err = rg.Range(func(flag codec.Flag, key, val []byte, n int) bool { 427 | if passFn(flag, codec.HashKey(key)) { 428 | visitFn(flag, key, val, n) 429 | } 430 | return false 431 | }) 432 | 433 | if err != nil { 434 | return err 435 | } 436 | } 437 | 438 | return nil 439 | } 440 | 441 | // rangeKeys 只有在 Compact 过程中使用到 需要遍历每一个 key 442 | func (ds *diskSegment) rangeKeys(name string, visitFn codec.KeyVisitFunc) error { 443 | ds.incRef() 444 | defer ds.decRef() 445 | 446 | positions, ok := ds.pos.KeyItem[name] 447 | if !ok { 448 | return nil 449 | } 450 | 451 | reader, err := ds.keysCfd.FileDesc() 452 | if err != nil { 453 | return err 454 | } 455 | defer ds.keysCfd.Reuse(reader) 456 | 457 | b := make([]byte, positions.MaxRange()) 458 | for _, position := range positions { 459 | l := position.End - position.Start 460 | n, err := reader.ReadAt(b[:l], int64(position.Start)) 461 | if err != nil { 462 | if err == io.EOF { 463 | break 464 | } 465 | return err 466 | } 467 | if n != int(l) { 468 | return codec.ErrReadPartial 469 | } 470 | 471 | encoded, err := codec.VerifyTailChecksum(b[:n]) 472 | if err != nil { 473 | return err 474 | } 475 | 476 | rg := codec.NewKeysRanger(bytes.NewReader(encoded)) 477 | err = rg.Range(func(flag codec.Flag, h uint64, n uint32) { 478 | visitFn(flag, h, n) 479 | }) 480 | if err != nil { 481 | return err 482 | } 483 | } 484 | 485 | return nil 486 | } 487 | 488 | type iterReleaser struct { 489 | segs []*diskSegment 490 | } 491 | 492 | func newDiskSegmentVersion(segs []*diskSegment) *iterReleaser { 493 | return &iterReleaser{segs: segs} 494 | } 495 | 496 | func (dsv *iterReleaser) iter(fn func(*diskSegment) bool) { 497 | for _, seg := range dsv.segs { 498 | quit := fn(seg) 499 | if quit { 500 | return 501 | } 502 | } 503 | } 504 | 505 | func (dsv *iterReleaser) release() { 506 | for _, seg := range dsv.segs { 507 | seg.decRef() 508 | } 509 | } 510 | -------------------------------------------------------------------------------- /status.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The grogudb Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package grogudb 15 | 16 | import ( 17 | "sync/atomic" 18 | ) 19 | 20 | type statistics struct { 21 | put atomic.Uint64 22 | del atomic.Uint64 23 | gc atomic.Uint64 24 | clear atomic.Uint64 25 | compact atomic.Uint64 26 | rotate atomic.Uint64 27 | memHoldBytes atomic.Int64 28 | memSegment atomic.Int64 29 | diskSegment atomic.Int64 30 | } 31 | 32 | // Stats DB 操作统计 33 | type Stats struct { 34 | Put uint64 35 | Del uint64 36 | Gc uint64 37 | Clear uint64 38 | Compact uint64 39 | Rotate uint64 40 | MemHoldBytes int64 41 | MemSegment int64 42 | DiskSegment int64 43 | } 44 | 45 | func (s *statistics) Load() Stats { 46 | return Stats{ 47 | Put: s.put.Load(), 48 | Del: s.del.Load(), 49 | Gc: s.gc.Load(), 50 | Clear: s.clear.Load(), 51 | Compact: s.compact.Load(), 52 | Rotate: s.rotate.Load(), 53 | MemHoldBytes: s.memHoldBytes.Load(), 54 | MemSegment: s.memSegment.Load(), 55 | DiskSegment: s.diskSegment.Load(), 56 | } 57 | } 58 | 59 | type state struct { 60 | rotating atomic.Bool 61 | compacting atomic.Bool 62 | gc atomic.Bool 63 | closed atomic.Bool 64 | } 65 | 66 | // State DB 状态描述 67 | type State struct { 68 | Rotating bool 69 | Compacting bool 70 | Gc bool 71 | Closed bool 72 | } 73 | 74 | func (s *state) Load() State { 75 | return State{ 76 | Rotating: s.rotating.Load(), 77 | Compacting: s.compacting.Load(), 78 | Gc: s.gc.Load(), 79 | Closed: s.closed.Load(), 80 | } 81 | } 82 | --------------------------------------------------------------------------------