├── .github └── workflows │ └── build.yml ├── LICENSE ├── README.md ├── encoding.go ├── encoding_test.go ├── fbptree.go ├── fbptree_test.go ├── go.mod ├── iterator.go ├── pager.go ├── pager_test.go ├── records.go ├── records_test.go └── storage.go /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | 15 | - name: Set up Go 16 | uses: actions/setup-go@v2 17 | with: 18 | go-version: 1.16 19 | 20 | - name: Build 21 | run: go build -v . 22 | 23 | - name: Test 24 | run: go test -v . -race -cover -coverprofile=coverage.txt 25 | 26 | - name: Upload coverage report 27 | uses: codecov/codecov-action@v2 28 | with: 29 | file: ./coverage.txt 30 | fail_ci_if_error: true 31 | verbose: true -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 Dmytro Krasun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 | OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **fbp**tree 2 | 3 | [![Build](https://github.com/krasun/fbptree/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/krasun/fbptree/actions/workflows/build.yml) 4 | [![codecov](https://codecov.io/gh/krasun/fbptree/branch/main/graph/badge.svg?token=8NU6LR4FQD)](https://codecov.io/gh/krasun/fbptree) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/krasun/fbptree)](https://goreportcard.com/report/github.com/krasun/fbptree) 6 | [![GoDoc](https://godoc.org/https://godoc.org/github.com/krasun/fbptree?status.svg)](https://godoc.org/github.com/krasun/fbptree) 7 | 8 | `fbptree` is a persistent key-value storage engine based on [B+ tree](https://en.wikipedia.org/wiki/B%2B_tree) with byte-slice keys and values. 9 | 10 | ## Installation 11 | 12 | To install, run: 13 | 14 | ``` 15 | go get github.com/krasun/fbptree 16 | ``` 17 | 18 | ## Usage 19 | 20 | An example of usage: 21 | 22 | ```go 23 | package fbptree_test 24 | 25 | import ( 26 | "fmt" 27 | "io/ioutil" 28 | "os" 29 | "path" 30 | 31 | "github.com/krasun/fbptree" 32 | ) 33 | 34 | func Example() { 35 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 36 | if err != nil { 37 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 38 | } 39 | defer func() { 40 | if err := os.RemoveAll(dbDir); err != nil { 41 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 42 | } 43 | }() 44 | 45 | dbPath := path.Join(dbDir, "sample.data") 46 | 47 | tree, err := fbptree.Open(dbPath, fbptree.PageSize(4096), fbptree.Order(500)) 48 | if err != nil { 49 | panic(fmt.Errorf("failed to open B+ tree %s: %w", dbDir, err)) 50 | } 51 | 52 | _, _, err = tree.Put([]byte("Hi!"), []byte("Hello world, B+ tree!")) 53 | if err != nil { 54 | panic(fmt.Errorf("failed to put: %w", err)) 55 | } 56 | 57 | _, _, err = tree.Put([]byte("Does it override key?"), []byte("No!")) 58 | if err != nil { 59 | panic(fmt.Errorf("failed to put: %w", err)) 60 | } 61 | 62 | _, _, err = tree.Put([]byte("Does it override key?"), []byte("Yes, absolutely! The key has been overridden.")) 63 | if err != nil { 64 | panic(fmt.Errorf("failed to put: %w", err)) 65 | } 66 | 67 | if err := tree.Close(); err != nil { 68 | panic(fmt.Errorf("failed to close: %w", err)) 69 | } 70 | 71 | tree, err = fbptree.Open(dbPath, fbptree.PageSize(4096), fbptree.Order(500)) 72 | if err != nil { 73 | panic(fmt.Errorf("failed to open B+ tree %s: %w", dbDir, err)) 74 | } 75 | 76 | value, ok, err := tree.Get([]byte("Hi!")) 77 | if err != nil { 78 | panic(fmt.Errorf("failed to get value: %w", err)) 79 | } 80 | if !ok { 81 | fmt.Println("failed to find value") 82 | } 83 | 84 | fmt.Println(string(value)) 85 | 86 | value, ok, err = tree.Get([]byte("Does it override key?")) 87 | if err != nil { 88 | panic(fmt.Errorf("failed to get value: %w", err)) 89 | } 90 | if !ok { 91 | fmt.Println("failed to find value") 92 | } 93 | 94 | if err := tree.Close(); err != nil { 95 | panic(fmt.Errorf("failed to close: %w", err)) 96 | } 97 | 98 | fmt.Println(string(value)) 99 | // Output: 100 | // Hello world, B+ tree! 101 | // Yes, absolutely! The key has been overridden. 102 | } 103 | ``` 104 | 105 | ## Tests 106 | 107 | Run tests with: 108 | 109 | ``` 110 | $ go test . 111 | ok github.com/krasun/fbptree 0.679s 112 | ``` 113 | 114 | ## License 115 | 116 | **fbp**tree is released under [the MIT license](LICENSE). 117 | -------------------------------------------------------------------------------- /encoding.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "encoding/binary" 5 | ) 6 | 7 | func decodeUint16(data []byte) uint16 { 8 | return binary.BigEndian.Uint16(data) 9 | } 10 | 11 | func encodeUint16(v uint16) []byte { 12 | var data [2]byte 13 | binary.BigEndian.PutUint16(data[:], v) 14 | 15 | return data[:] 16 | } 17 | 18 | func decodeUint32(data []byte) uint32 { 19 | return binary.BigEndian.Uint32(data) 20 | } 21 | 22 | func encodeUint32(v uint32) []byte { 23 | var data [4]byte 24 | binary.BigEndian.PutUint32(data[:], v) 25 | 26 | return data[:] 27 | } 28 | 29 | func encodeBool(v bool) []byte { 30 | var data [1]byte 31 | if v { 32 | data[0] = 1 33 | } 34 | 35 | return data[:] 36 | } 37 | 38 | func decodeBool(data []byte) bool { 39 | return data[0] == 1 40 | } 41 | 42 | func encodeNode(node *node) []byte { 43 | data := make([]byte, 0) 44 | 45 | data = append(data, encodeUint32(node.id)...) 46 | data = append(data, encodeUint32(node.parentID)...) 47 | data = append(data, encodeBool(node.leaf)...) 48 | data = append(data, encodeUint16(uint16(node.keyNum))...) 49 | data = append(data, encodeUint16(uint16(len(node.keys)))...) 50 | 51 | for _, key := range node.keys { 52 | if key == nil { 53 | break 54 | } 55 | 56 | data = append(data, encodeUint16(uint16(len(key)))...) 57 | data = append(data, key...) 58 | } 59 | 60 | pointerNum := node.keyNum 61 | if !node.leaf { 62 | pointerNum += 1 63 | } 64 | 65 | data = append(data, encodeUint16(uint16(pointerNum))...) 66 | data = append(data, encodeUint16(uint16(len(node.pointers)))...) 67 | for i := 0; i < pointerNum; i++ { 68 | pointer := node.pointers[i] 69 | if pointer.isNodeID() { 70 | data = append(data, 0) 71 | data = append(data, encodeUint32(pointer.asNodeID())...) 72 | } else if pointer.isValue() { 73 | data = append(data, 1) 74 | data = append(data, encodeUint16(uint16(len(pointer.asValue())))...) 75 | data = append(data, pointer.asValue()...) 76 | } 77 | } 78 | 79 | var nextID uint32 80 | if node.next() != nil { 81 | nextID = node.next().asNodeID() 82 | data = append(data, encodeBool(true)...) 83 | data = append(data, encodeUint32(nextID)...) 84 | } else { 85 | data = append(data, encodeBool(false)...) 86 | data = append(data, 0) 87 | } 88 | 89 | return data 90 | } 91 | 92 | func decodeNode(data []byte) (*node, error) { 93 | position := 0 94 | nodeID := decodeUint32(data[position : position+4]) 95 | position += 4 96 | parentID := decodeUint32(data[position : position+4]) 97 | position += 4 98 | leaf := decodeBool(data[position : position+1]) 99 | position += 1 100 | 101 | keyNum := decodeUint16(data[position : position+2]) 102 | position += 2 103 | keyLen := int(decodeUint16(data[position : position+2])) 104 | position += 2 105 | keys := make([][]byte, keyLen) 106 | for k := 0; k < int(keyNum); k++ { 107 | keySize := int(decodeUint16(data[position : position+2])) 108 | position += 2 109 | 110 | key := data[position : position+keySize] 111 | keys[k] = key 112 | position += keySize 113 | } 114 | 115 | pointerNum := decodeUint16(data[position : position+2]) 116 | position += 2 117 | pointerLen := int(decodeUint16(data[position : position+2])) 118 | position += 2 119 | pointers := make([]*pointer, pointerLen) 120 | for p := 0; p < int(pointerNum); p++ { 121 | if data[position] == 0 { 122 | position += 1 123 | // nodeID 124 | 125 | nodeID := decodeUint32(data[position : position+4]) 126 | position += 4 127 | 128 | pointers[p] = &pointer{nodeID} 129 | } else if data[position] == 1 { 130 | position += 1 131 | // value 132 | 133 | valueSize := int(decodeUint16(data[position : position+2])) 134 | position += 2 135 | 136 | value := data[position : position+valueSize] 137 | position += valueSize 138 | 139 | pointers[p] = &pointer{value} 140 | } 141 | } 142 | 143 | n := &node{ 144 | nodeID, 145 | leaf, 146 | parentID, 147 | keys, 148 | int(keyNum), 149 | pointers, 150 | } 151 | 152 | hasNextID := decodeBool(data[position : position+1]) 153 | position += 1 154 | 155 | if hasNextID { 156 | nextID := decodeUint32(data[position : position+4]) 157 | n.setNext(&pointer{nextID}) 158 | } 159 | 160 | return n, nil 161 | } 162 | 163 | func encodeTreeMetadata(metadata *treeMetadata) []byte { 164 | var data [14]byte 165 | 166 | copy(data[0:2], encodeUint16(metadata.order)) 167 | copy(data[2:6], encodeUint32(metadata.rootID)) 168 | copy(data[6:10], encodeUint32(metadata.leftmostID)) 169 | copy(data[10:14], encodeUint32(metadata.size)) 170 | 171 | return data[:] 172 | } 173 | 174 | func decodeTreeMetadata(data []byte) (*treeMetadata, error) { 175 | return &treeMetadata{ 176 | order: decodeUint16(data[0:2]), 177 | rootID: decodeUint32(data[2:6]), 178 | leftmostID: decodeUint32(data[6:10]), 179 | size: decodeUint32(data[10:14]), 180 | }, nil 181 | } 182 | -------------------------------------------------------------------------------- /encoding_test.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestEncodeDecodeTreeMetadata(t *testing.T) { 9 | treeMetadata := &treeMetadata{ 10 | order: 542, 11 | rootID: 12, 12 | leftmostID: 42, 13 | } 14 | 15 | decoded, err := decodeTreeMetadata(encodeTreeMetadata(treeMetadata)) 16 | if err != nil { 17 | t.Fatalf("failed to decode node: %s", err) 18 | } 19 | 20 | if !reflect.DeepEqual(treeMetadata, decoded) { 21 | t.Fatalf("tree metadata %v != decoded tree metadata %v", treeMetadata, decoded) 22 | } 23 | } 24 | 25 | func TestEncodeDecodeNode(t *testing.T) { 26 | node := &node{ 27 | id: 42, 28 | leaf: true, 29 | parentID: 75, 30 | keys: [][]byte{ 31 | {1, 2, 3, 4}, 32 | {5, 6, 7, 8}, 33 | nil, 34 | }, 35 | pointers: []*pointer{ 36 | {uint32(42)}, 37 | {[]byte{1, 2, 3, 4}}, 38 | {uint32(17)}, 39 | }, 40 | keyNum: 2, 41 | } 42 | 43 | decoded, err := decodeNode(encodeNode(node)) 44 | if err != nil { 45 | t.Fatalf("failed to decode node: %s", err) 46 | } 47 | 48 | if !reflect.DeepEqual(node, decoded) { 49 | t.Fatalf("node %v != decoded node %v", node, decoded) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /fbptree.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "math" 7 | "os" 8 | ) 9 | 10 | const defaultOrder = 500 11 | 12 | const maxKeySize = math.MaxUint16 13 | const maxValueSize = math.MaxUint16 14 | const maxTreeSize = math.MaxUint32 15 | 16 | // the limit for the B+ tree order, must be less than math.MaxUint16 17 | const maxOrder = 1000 18 | 19 | // FBPTree represents B+ tree store in the file. 20 | type FBPTree struct { 21 | order int 22 | 23 | storage *storage 24 | 25 | metadata *treeMetadata 26 | 27 | // minimum allowed number of keys in the tree ceil(order/2)-1 28 | minKeyNum int 29 | } 30 | 31 | type treeMetadata struct { 32 | order uint16 33 | rootID uint32 34 | leftmostID uint32 35 | size uint32 36 | } 37 | 38 | type config struct { 39 | order uint16 40 | pageSize uint16 41 | } 42 | 43 | // Order option specifies the order of the B+ tree, between 3 and 1000. 44 | func Order(order int) func(*config) error { 45 | return func(c *config) error { 46 | if order < 3 { 47 | return fmt.Errorf("order must be >= 3") 48 | } 49 | 50 | if order > maxOrder { 51 | return fmt.Errorf("order must be <= %d", maxOrder) 52 | } 53 | 54 | c.order = uint16(order) 55 | 56 | return nil 57 | } 58 | } 59 | 60 | // PageSize option specifies the page size for the B+ tree file. 61 | func PageSize(pageSize int) func(*config) error { 62 | return func(t *config) error { 63 | if pageSize < minPageSize { 64 | return fmt.Errorf("page size must be greater than or equal to %d", minPageSize) 65 | } 66 | 67 | if pageSize > maxPageSize { 68 | return fmt.Errorf("page size must not be greater than %d", maxPageSize) 69 | } 70 | 71 | t.pageSize = uint16(pageSize) 72 | 73 | return nil 74 | } 75 | } 76 | 77 | // Open opens an existent B+ tree or creates a new file. 78 | func Open(path string, options ...func(*config) error) (*FBPTree, error) { 79 | defaultPageSize := os.Getpagesize() 80 | if defaultPageSize > maxPageSize { 81 | defaultPageSize = maxPageSize 82 | } 83 | 84 | cfg := &config{pageSize: uint16(defaultPageSize), order: defaultOrder} 85 | for _, option := range options { 86 | err := option(cfg) 87 | if err != nil { 88 | return nil, err 89 | } 90 | } 91 | 92 | storage, err := newStorage(path, cfg.pageSize) 93 | if err != nil { 94 | return nil, fmt.Errorf("failed to initialize the storage: %w", err) 95 | } 96 | 97 | metadata, err := storage.loadMetadata() 98 | if err != nil { 99 | return nil, fmt.Errorf("failed to load the metadata: %w", err) 100 | } 101 | 102 | if metadata != nil && metadata.order != cfg.order { 103 | return nil, fmt.Errorf("the tree was created with %d order, but the new order value is given %d", metadata.order, cfg.order) 104 | } 105 | 106 | minKeyNum := ceil(int(cfg.order), 2) - 1 107 | 108 | return &FBPTree{storage: storage, order: int(cfg.order), metadata: metadata, minKeyNum: minKeyNum}, nil 109 | } 110 | 111 | // node reprents a node in the B+ tree. 112 | type node struct { 113 | id uint32 114 | 115 | // true for leaf node and root without children 116 | // and false for internal node and root with children 117 | leaf bool 118 | parentID uint32 119 | 120 | // Real key number is stored under the keyNum. 121 | keys [][]byte 122 | keyNum int 123 | 124 | // Leaf nodes can point to the value, 125 | // but internal nodes point to the nodes. So 126 | // to save space, we can use pointers abstraction. 127 | // The size of pointers equals to the size of keys + 1. 128 | // In the leaf node, the last pointers element points to 129 | // the next leaf node. 130 | pointers []*pointer 131 | } 132 | 133 | // pointer wraps the node or the value. 134 | type pointer struct { 135 | value interface{} 136 | } 137 | 138 | func (p *pointer) isNodeID() bool { 139 | _, ok := p.value.(uint32) 140 | 141 | return ok 142 | } 143 | 144 | func (p *pointer) isValue() bool { 145 | _, ok := p.value.([]byte) 146 | 147 | return ok 148 | } 149 | 150 | // asNode returns a node ID. 151 | func (p *pointer) asNodeID() uint32 { 152 | return p.value.(uint32) 153 | } 154 | 155 | // asValue returns a asValue instance of the value. 156 | func (p *pointer) asValue() []byte { 157 | return p.value.([]byte) 158 | } 159 | 160 | // Get return the value by the key. Returns true if the 161 | // key exists. 162 | func (t *FBPTree) Get(key []byte) ([]byte, bool, error) { 163 | if t.metadata == nil { 164 | return nil, false, nil 165 | } 166 | 167 | leaf, err := t.findLeaf(key) 168 | if err != nil { 169 | return nil, false, fmt.Errorf("failed to find leaf: %w", err) 170 | } 171 | 172 | for i := 0; i < leaf.keyNum; i++ { 173 | if compare(key, leaf.keys[i]) == 0 { 174 | return leaf.pointers[i].asValue(), true, nil 175 | } 176 | } 177 | 178 | return nil, false, nil 179 | } 180 | 181 | // findLeaf finds a leaf that might contain the key. 182 | func (t *FBPTree) findLeaf(key []byte) (*node, error) { 183 | root, err := t.storage.loadNodeByID(t.metadata.rootID) 184 | if err != nil { 185 | return nil, fmt.Errorf("failed to load root node: %w", err) 186 | } 187 | 188 | current := root 189 | for !current.leaf { 190 | position := 0 191 | for position < current.keyNum { 192 | if less(key, current.keys[position]) { 193 | break 194 | } else { 195 | position += 1 196 | } 197 | } 198 | 199 | nextID := current.pointers[position].asNodeID() 200 | nextNode, err := t.storage.loadNodeByID(nextID) 201 | if err != nil { 202 | return nil, fmt.Errorf("failed to load next node %d: %w", nextID, err) 203 | } 204 | 205 | current = nextNode 206 | } 207 | 208 | return current, nil 209 | } 210 | 211 | // Put puts the key and the value into the tree. Returns true if the 212 | // key already exists and anyway overwrites it. 213 | func (t *FBPTree) Put(key, value []byte) ([]byte, bool, error) { 214 | if len(key) > maxKeySize { 215 | return nil, false, fmt.Errorf("maximum key size is %d, but received %d", maxKeySize, len(key)) 216 | } else if len(value) > maxValueSize { 217 | return nil, false, fmt.Errorf("maximum value size is %d, but received %d", maxValueSize, len(value)) 218 | } else if t.metadata != nil && t.metadata.size >= maxTreeSize { 219 | return nil, false, fmt.Errorf("maximum tree size is reached: %d", maxTreeSize) 220 | } 221 | 222 | if t.metadata == nil { 223 | err := t.initializeRoot(key, value) 224 | if err != nil { 225 | return nil, false, fmt.Errorf("failed to initialize root: %w", err) 226 | } 227 | 228 | return nil, false, nil 229 | } 230 | 231 | leaf, err := t.findLeaf(key) 232 | if err != nil { 233 | return nil, false, fmt.Errorf("failed to find leaf: %w", err) 234 | } 235 | 236 | oldValue, overridden, err := t.putIntoLeaf(leaf, key, value) 237 | if err != nil { 238 | return nil, false, fmt.Errorf("failed to put into the leaf %d: %w", leaf.id, err) 239 | } 240 | 241 | return oldValue, overridden, nil 242 | } 243 | 244 | // initializeRoot initializes root in the empty tree. 245 | func (t *FBPTree) initializeRoot(key, value []byte) error { 246 | newNodeID, err := t.storage.newNode() 247 | if err != nil { 248 | return fmt.Errorf("failed to instantiate new node: %w", err) 249 | } 250 | 251 | // new tree 252 | keys := make([][]byte, t.order-1) 253 | keys[0] = copyBytes(key) 254 | 255 | pointers := make([]*pointer, t.order) 256 | pointers[0] = &pointer{value} 257 | 258 | rootNode := &node{ 259 | id: newNodeID, 260 | leaf: true, 261 | parentID: 0, 262 | keys: keys, 263 | keyNum: 1, 264 | pointers: pointers, 265 | } 266 | 267 | err = t.storage.updateNodeByID(newNodeID, rootNode) 268 | if err != nil { 269 | return fmt.Errorf("failed to store root node: %w", err) 270 | } 271 | 272 | err = t.updateMetadata(newNodeID, newNodeID, 1) 273 | if err != nil { 274 | return fmt.Errorf("failed to update metadata: %w", err) 275 | } 276 | 277 | return nil 278 | } 279 | 280 | func (t *FBPTree) updateMetadata(rootID, leftmostID, size uint32) error { 281 | if t.metadata == nil { 282 | // initialization 283 | t.metadata = new(treeMetadata) 284 | t.metadata.order = uint16(t.order) 285 | } 286 | 287 | t.metadata.rootID = rootID 288 | t.metadata.leftmostID = leftmostID 289 | t.metadata.size = size 290 | 291 | err := t.storage.updateMetadata(t.metadata) 292 | if err != nil { 293 | return fmt.Errorf("failed to store metadata: %w", err) 294 | } 295 | 296 | return nil 297 | } 298 | 299 | func (t *FBPTree) deleteMetadata() error { 300 | t.metadata = nil 301 | 302 | err := t.storage.deleteMetadata() 303 | if err != nil { 304 | return fmt.Errorf("failed to delete metadata: %w", err) 305 | } 306 | 307 | return nil 308 | } 309 | 310 | // putIntoNewRoot creates new root, inserts left and right entries 311 | // and updates the tree. 312 | func (t *FBPTree) putIntoNewRoot(key []byte, l, r *node) error { 313 | newNodeID, err := t.storage.newNode() 314 | if err != nil { 315 | return fmt.Errorf("failed to instantiate new node: %w", err) 316 | } 317 | 318 | // new root 319 | newRoot := &node{ 320 | id: newNodeID, 321 | leaf: false, 322 | keys: make([][]byte, t.order-1), 323 | pointers: make([]*pointer, t.order), 324 | parentID: 0, 325 | keyNum: 1, // we are going to put just one key 326 | } 327 | 328 | newRoot.keys[0] = key 329 | newRoot.pointers[0] = &pointer{l.id} 330 | newRoot.pointers[1] = &pointer{r.id} 331 | 332 | err = t.storage.updateNodeByID(newNodeID, newRoot) 333 | if err != nil { 334 | return fmt.Errorf("failed to update node by ID %d: %w", newNodeID, err) 335 | } 336 | 337 | l.parentID = newNodeID 338 | err = t.storage.updateNodeByID(l.id, l) 339 | if err != nil { 340 | return fmt.Errorf("failed to update left node %d: %w", l.id, err) 341 | } 342 | 343 | r.parentID = newNodeID 344 | err = t.storage.updateNodeByID(r.id, r) 345 | if err != nil { 346 | return fmt.Errorf("failed to update right node %d: %w", r.id, err) 347 | } 348 | 349 | err = t.updateRootID(newNodeID) 350 | if err != nil { 351 | return fmt.Errorf("failed to update root ID to %d: %w", newNodeID, err) 352 | } 353 | 354 | return nil 355 | } 356 | 357 | func (t *FBPTree) updateSize(size uint32) error { 358 | return t.updateMetadata(t.metadata.rootID, t.metadata.leftmostID, size) 359 | } 360 | 361 | func (t *FBPTree) updateRootID(rootID uint32) error { 362 | var leftmostID uint32 363 | if t.metadata != nil { 364 | leftmostID = t.metadata.leftmostID 365 | } 366 | 367 | return t.updateMetadata(rootID, leftmostID, t.metadata.size) 368 | } 369 | 370 | // putIntoLeaf puts key and value into the node. 371 | func (t *FBPTree) putIntoLeaf(n *node, k, v []byte) ([]byte, bool, error) { 372 | insertPos := 0 373 | for insertPos < n.keyNum { 374 | cmp := compare(k, n.keys[insertPos]) 375 | if cmp == 0 { 376 | // found the exact match 377 | oldValue := n.pointers[insertPos].overrideValue(v) 378 | 379 | err := t.storage.updateNodeByID(n.id, n) 380 | if err != nil { 381 | return nil, false, fmt.Errorf("failed to update the node %d: %w", n.id, err) 382 | } 383 | 384 | return oldValue, true, nil 385 | } else if cmp < 0 { 386 | // found the insert position, 387 | // can break the loop 388 | break 389 | } 390 | 391 | insertPos++ 392 | } 393 | 394 | // if we did not find the same key, we continue to insert 395 | if n.keyNum < len(n.keys) { 396 | // if the node is not full 397 | 398 | // shift the keys and pointers 399 | for j := n.keyNum; j > insertPos; j-- { 400 | n.keys[j] = n.keys[j-1] 401 | n.pointers[j] = n.pointers[j-1] 402 | } 403 | 404 | // insert 405 | n.keys[insertPos] = k 406 | n.pointers[insertPos] = &pointer{v} 407 | // and update key num 408 | n.keyNum++ 409 | 410 | err := t.storage.updateNodeByID(n.id, n) 411 | if err != nil { 412 | return nil, false, fmt.Errorf("failed to update the node %d: %w", n.id, err) 413 | } 414 | } else { 415 | // if the node is full 416 | var parentNode *node 417 | if n.parentID != 0 { 418 | p, err := t.storage.loadNodeByID(n.parentID) 419 | if err != nil { 420 | return nil, false, fmt.Errorf("failed to load parent node %d: %w", n.parentID, err) 421 | } 422 | 423 | parentNode = p 424 | } 425 | parent := parentNode 426 | 427 | left, right, err := t.putIntoLeafAndSplit(n, insertPos, k, v) 428 | if err != nil { 429 | return nil, false, fmt.Errorf("failed to split the node %d: %w", n.id, err) 430 | } 431 | 432 | insertKey := right.keys[0] 433 | for left != nil && right != nil { 434 | if parent == nil { 435 | t.putIntoNewRoot(insertKey, left, right) 436 | break 437 | } else { 438 | if parent.keyNum < len(parent.keys) { 439 | // if the parent is not full 440 | err := t.putIntoParent(parent, insertKey, left, right) 441 | if err != nil { 442 | return nil, false, fmt.Errorf("failed to put into the parent: %w", err) 443 | } 444 | 445 | break 446 | } else { 447 | // if the parent is full 448 | // split parent, insert into the new parent and continue 449 | insertKey, left, right, err = t.putIntoParentAndSplit(parent, insertKey, left, right) 450 | if err != nil { 451 | return nil, false, fmt.Errorf("failed to put into the parent and split: %w", err) 452 | } 453 | } 454 | } 455 | 456 | var parentParentNode *node 457 | if parent.parentID != 0 { 458 | p, err := t.storage.loadNodeByID(parent.parentID) 459 | if err != nil { 460 | return nil, false, fmt.Errorf("failed to load the parent of the parent node %d: %w", parent.parentID, err) 461 | } 462 | 463 | parentParentNode = p 464 | } 465 | 466 | parent = parentParentNode 467 | } 468 | } 469 | 470 | t.metadata.size++ 471 | err := t.updateSize(t.metadata.size) 472 | if err != nil { 473 | return nil, false, fmt.Errorf("failed to update the tree size to %d: %w", t.metadata.size, err) 474 | } 475 | 476 | return nil, false, nil 477 | } 478 | 479 | // putIntoParent puts the node into the parent and update the left and the right 480 | // pointers. 481 | func (t *FBPTree) putIntoParent(parent *node, k []byte, l, r *node) error { 482 | insertPos := 0 483 | for insertPos < parent.keyNum { 484 | if less(k, parent.keys[insertPos]) { 485 | // found the insert position, 486 | // can break the loop 487 | break 488 | } 489 | 490 | insertPos++ 491 | } 492 | 493 | // shift the keys and pointers 494 | parent.pointers[parent.keyNum+1] = parent.pointers[parent.keyNum] 495 | for j := parent.keyNum; j > insertPos; j-- { 496 | parent.keys[j] = parent.keys[j-1] 497 | parent.pointers[j] = parent.pointers[j-1] 498 | } 499 | 500 | // insert 501 | parent.keys[insertPos] = k 502 | parent.pointers[insertPos] = &pointer{l.id} 503 | parent.pointers[insertPos+1] = &pointer{r.id} 504 | // and update key num 505 | parent.keyNum++ 506 | 507 | err := t.storage.updateNodeByID(parent.id, parent) 508 | if err != nil { 509 | return fmt.Errorf("failed to update parent node %d: %w", parent.id, err) 510 | } 511 | 512 | l.parentID = parent.id 513 | err = t.storage.updateNodeByID(l.id, l) 514 | if err != nil { 515 | return fmt.Errorf("failed to update left node %d: %w", l.id, err) 516 | } 517 | 518 | r.parentID = parent.id 519 | err = t.storage.updateNodeByID(r.id, r) 520 | if err != nil { 521 | return fmt.Errorf("failed to update right node %d: %w", r.id, err) 522 | } 523 | 524 | return nil 525 | } 526 | 527 | // putIntoParentAndSplit puts key in the parent, splits the node and returns the splitten 528 | // nodes with all fixed pointers. 529 | func (t *FBPTree) putIntoParentAndSplit(parent *node, k []byte, l, r *node) ([]byte, *node, *node, error) { 530 | insertPos := 0 531 | for insertPos < parent.keyNum { 532 | if less(k, parent.keys[insertPos]) { 533 | // found the insert position, 534 | // can break the loop 535 | break 536 | } 537 | 538 | insertPos++ 539 | } 540 | 541 | newNodeID, err := t.storage.newNode() 542 | if err != nil { 543 | return nil, nil, nil, fmt.Errorf("failed to instantiate new node: %w", err) 544 | } 545 | 546 | right := &node{ 547 | id: newNodeID, 548 | leaf: false, 549 | keys: make([][]byte, t.order-1), 550 | keyNum: 0, 551 | pointers: make([]*pointer, t.order), 552 | parentID: 0, 553 | } 554 | 555 | middlePos := ceil(len(parent.keys), 2) 556 | copyFrom := middlePos 557 | if insertPos < middlePos { 558 | // since the elements will be shifted 559 | copyFrom -= 1 560 | } 561 | 562 | copy(right.keys, parent.keys[copyFrom:]) 563 | copy(right.pointers, parent.pointers[copyFrom:]) 564 | // copy the pointer to the next node 565 | right.keyNum = len(right.keys) - copyFrom 566 | 567 | // the given node becomes the left node 568 | left := parent 569 | left.keyNum = copyFrom 570 | // clean up keys and pointers 571 | for i := len(left.keys) - 1; i >= copyFrom; i-- { 572 | left.keys[i] = nil 573 | left.pointers[i+1] = nil 574 | } 575 | 576 | insertNode := left 577 | if insertPos >= middlePos { 578 | insertNode = right 579 | insertPos -= middlePos 580 | } 581 | 582 | // insert into the node 583 | insertNode.pointers[insertNode.keyNum+1] = insertNode.pointers[insertNode.keyNum] 584 | for j := insertNode.keyNum; j > insertPos; j-- { 585 | insertNode.keys[j] = insertNode.keys[j-1] 586 | insertNode.pointers[j] = insertNode.pointers[j-1] 587 | } 588 | 589 | insertNode.keys[insertPos] = k 590 | insertNode.pointers[insertPos] = &pointer{l.id} 591 | insertNode.pointers[insertPos+1] = &pointer{r.id} 592 | insertNode.keyNum++ 593 | 594 | l.parentID = insertNode.id 595 | err = t.storage.updateNodeByID(l.id, l) 596 | if err != nil { 597 | return nil, nil, nil, fmt.Errorf("failed to update the l node %d: %w", parent.id, err) 598 | } 599 | 600 | r.parentID = insertNode.id 601 | err = t.storage.updateNodeByID(r.id, r) 602 | if err != nil { 603 | return nil, nil, nil, fmt.Errorf("failed to update the r node %d: %w", right.id, err) 604 | } 605 | 606 | middleKey := right.keys[0] 607 | 608 | // clean up the right node 609 | for i := 1; i < right.keyNum; i++ { 610 | right.keys[i-1] = right.keys[i] 611 | right.pointers[i-1] = right.pointers[i] 612 | } 613 | right.pointers[right.keyNum-1] = right.pointers[right.keyNum] 614 | right.pointers[right.keyNum] = nil 615 | right.keys[right.keyNum-1] = nil 616 | right.keyNum-- 617 | 618 | // update the pointers 619 | for _, p := range left.pointers { 620 | if p != nil { 621 | nodeID := p.asNodeID() 622 | node, err := t.storage.loadNodeByID(nodeID) 623 | if err != nil { 624 | return nil, nil, nil, fmt.Errorf("failed to load the node by id %d: %w", nodeID, err) 625 | } 626 | 627 | if node.parentID == left.id { 628 | continue 629 | } 630 | 631 | node.parentID = left.id 632 | err = t.storage.updateNodeByID(node.id, node) 633 | if err != nil { 634 | return nil, nil, nil, fmt.Errorf("failed to update node by id %d: %w", node.id, err) 635 | } 636 | } 637 | } 638 | 639 | for _, p := range right.pointers { 640 | if p != nil { 641 | nodeID := p.asNodeID() 642 | node, err := t.storage.loadNodeByID(nodeID) 643 | if err != nil { 644 | return nil, nil, nil, fmt.Errorf("failed to load the node by id %d: %w", nodeID, err) 645 | } 646 | 647 | if node.parentID == right.id { 648 | continue 649 | } 650 | 651 | node.parentID = right.id 652 | err = t.storage.updateNodeByID(node.id, node) 653 | if err != nil { 654 | return nil, nil, nil, fmt.Errorf("failed to update node by id %d: %w", node.id, err) 655 | } 656 | } 657 | } 658 | 659 | err = t.storage.updateNodeByID(parent.id, parent) 660 | if err != nil { 661 | return nil, nil, nil, fmt.Errorf("failed to update the right node %d: %w", right.id, err) 662 | } 663 | err = t.storage.updateNodeByID(right.id, right) 664 | if err != nil { 665 | return nil, nil, nil, fmt.Errorf("failed to update the right node %d: %w", right.id, err) 666 | } 667 | err = t.storage.updateNodeByID(left.id, left) 668 | if err != nil { 669 | return nil, nil, nil, fmt.Errorf("failed to update the left node %d: %w", left.id, err) 670 | } 671 | 672 | return middleKey, left, right, nil 673 | } 674 | 675 | // putIntoLeafAndSplit puts the new key and splits the node into the left and right nodes 676 | // and returns the left and the right nodes. 677 | // The given node becomes left node. 678 | // The tree is right-biased, so the first element in 679 | // the right node is the "middle" key. 680 | func (t *FBPTree) putIntoLeafAndSplit(n *node, insertPos int, k, v []byte) (*node, *node, error) { 681 | newNodeID, err := t.storage.newNode() 682 | if err != nil { 683 | return nil, nil, fmt.Errorf("failed to instantiate new node: %w", err) 684 | } 685 | 686 | right := &node{ 687 | id: newNodeID, 688 | leaf: true, 689 | keys: make([][]byte, t.order-1), 690 | keyNum: 0, 691 | pointers: make([]*pointer, t.order), 692 | parentID: 0, 693 | } 694 | 695 | middlePos := ceil(len(n.keys), 2) 696 | copyFrom := middlePos 697 | if insertPos < middlePos { 698 | // since the elements will be shifted 699 | copyFrom -= 1 700 | } 701 | 702 | copy(right.keys, n.keys[copyFrom:]) 703 | copy(right.pointers, n.pointers[copyFrom:len(n.pointers)-1]) 704 | 705 | // copy the pointer to the next node 706 | right.setNext(n.next()) 707 | right.keyNum = len(right.keys) - copyFrom 708 | 709 | // the given node becomes the left node 710 | left := n 711 | left.parentID = 0 712 | left.keyNum = copyFrom 713 | // clean up keys and pointers 714 | for i := len(left.keys) - 1; i >= copyFrom; i-- { 715 | left.keys[i] = nil 716 | left.pointers[i] = nil 717 | } 718 | left.setNext(&pointer{right.id}) 719 | 720 | insertNode := left 721 | if insertPos >= middlePos { 722 | insertNode = right 723 | // normalize insert position 724 | insertPos -= middlePos 725 | } 726 | 727 | // insert into the node 728 | insertNode.insertAt(insertPos, k, insertPos, &pointer{v}) 729 | 730 | err = t.storage.updateNodeByID(right.id, right) 731 | if err != nil { 732 | return nil, nil, fmt.Errorf("failed to update the right node %d: %w", right.id, err) 733 | } 734 | 735 | err = t.storage.updateNodeByID(left.id, left) 736 | if err != nil { 737 | return nil, nil, fmt.Errorf("failed to update the left node %d: %w", left.id, err) 738 | } 739 | 740 | return left, right, nil 741 | } 742 | 743 | // insertAt inserts the specified key and pointer at the specified position. 744 | // Only works with leaf nodes. 745 | func (n *node) insertAt(keyPosition int, key []byte, pointerPosition int, pointer *pointer) { 746 | for j := n.keyNum; j > keyPosition; j-- { 747 | n.keys[j] = n.keys[j-1] 748 | } 749 | 750 | pointerNum := n.keyNum 751 | if !n.leaf { 752 | pointerNum += 1 753 | } 754 | 755 | for j := pointerNum; j > pointerPosition; j-- { 756 | n.pointers[j] = n.pointers[j-1] 757 | } 758 | 759 | n.keys[keyPosition] = key 760 | n.pointers[pointerPosition] = pointer 761 | n.keyNum++ 762 | } 763 | 764 | // overrideValue overrides the value 765 | func (p *pointer) overrideValue(newValue []byte) []byte { 766 | oldValue := p.value.([]byte) 767 | p.value = newValue 768 | 769 | return oldValue 770 | } 771 | 772 | // setNext sets the "next" pointer (the last pointer) to the next node. Only relevant 773 | // for the leaf nodes. 774 | func (n *node) setNext(p *pointer) { 775 | n.pointers[len(n.pointers)-1] = p 776 | } 777 | 778 | // next returns the pointer to the next leaf node. Only relevant 779 | // for the leaf nodes. 780 | func (n *node) next() *pointer { 781 | return n.pointers[len(n.pointers)-1] 782 | } 783 | 784 | // Delete deletes the value by the key. Returns true if the 785 | // key exists. 786 | func (t *FBPTree) Delete(key []byte) ([]byte, bool, error) { 787 | if t.metadata == nil { 788 | return nil, false, nil 789 | } 790 | 791 | leaf, err := t.findLeaf(key) 792 | if err != nil { 793 | return nil, false, fmt.Errorf("failed to find the leaf: %w", err) 794 | } 795 | 796 | value, deleted, err := t.deleteAtLeafAndRebalance(leaf, key) 797 | if err != nil { 798 | return nil, false, fmt.Errorf("failed to delete and rebalance: %w", err) 799 | } 800 | 801 | if !deleted { 802 | return nil, false, nil 803 | } 804 | 805 | if t.metadata != nil { 806 | t.metadata.size-- 807 | err = t.updateSize(t.metadata.size) 808 | if err != nil { 809 | return nil, false, fmt.Errorf("failed to update the tree size to %d: %w", t.metadata.size, err) 810 | } 811 | } 812 | 813 | return value, true, nil 814 | } 815 | 816 | // deleteAtLeafAndRebalance deletes the key from the given node and rebalances it. 817 | func (t *FBPTree) deleteAtLeafAndRebalance(n *node, key []byte) ([]byte, bool, error) { 818 | keyPos := n.keyPosition(key) 819 | if keyPos == -1 { 820 | return nil, false, nil 821 | } 822 | 823 | value := n.pointers[keyPos].asValue() 824 | n.deleteAt(keyPos, keyPos) 825 | err := t.storage.updateNodeByID(n.id, n) 826 | if err != nil { 827 | return nil, false, fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 828 | } 829 | 830 | if n.parentID == 0 { 831 | if n.keyNum == 0 { 832 | // remove the root (as leaf) 833 | err := t.storage.deleteNodeByID(n.id) 834 | if err != nil { 835 | return nil, false, fmt.Errorf("failed to delete the node by id %d: %w", n.id, err) 836 | } 837 | 838 | err = t.deleteMetadata() 839 | if err != nil { 840 | return nil, false, fmt.Errorf("failed to delete the metadata: %w", err) 841 | } 842 | } else { 843 | // update the root 844 | err := t.storage.updateNodeByID(n.id, n) 845 | if err != nil { 846 | return nil, false, fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 847 | } 848 | } 849 | 850 | return value, true, nil 851 | } 852 | 853 | if n.keyNum < t.minKeyNum { 854 | err := t.rebalanceFromLeafNode(n) 855 | if err != nil { 856 | return nil, false, fmt.Errorf("failed to rebalance from the leaf node: %w", err) 857 | } 858 | } 859 | 860 | err = t.removeFromIndex(key) 861 | if err != nil { 862 | return nil, false, fmt.Errorf("failed to remove the key from the index: %w", err) 863 | } 864 | 865 | return value, true, nil 866 | } 867 | 868 | // deleteAt deletes the entry at the position and shifts 869 | // the keys and the pointers. 870 | func (n *node) deleteAt(keyPosition int, pointerPosition int) { 871 | // shift the keys 872 | for j := keyPosition; j < n.keyNum-1; j++ { 873 | n.keys[j] = n.keys[j+1] 874 | } 875 | n.keys[n.keyNum-1] = nil 876 | 877 | pointerNum := n.keyNum 878 | if !n.leaf { 879 | pointerNum++ 880 | } 881 | // shift the pointers 882 | for j := pointerPosition; j < pointerNum-1; j++ { 883 | n.pointers[j] = n.pointers[j+1] 884 | } 885 | n.pointers[pointerNum-1] = nil 886 | 887 | n.keyNum-- 888 | } 889 | 890 | // removeFromIndex searches the key in the index (internal nodes and if finds it changes to 891 | // the leftmost key in the right subtree. 892 | func (t *FBPTree) removeFromIndex(key []byte) error { 893 | root, err := t.storage.loadNodeByID(t.metadata.rootID) 894 | if err != nil { 895 | return fmt.Errorf("failed to load the root node %d: %w", t.metadata.rootID, err) 896 | } 897 | 898 | current := root 899 | for !current.leaf { 900 | // until the leaf is reached 901 | 902 | position := 0 903 | for position < current.keyNum { 904 | cmp := compare(key, current.keys[position]) 905 | if cmp < 0 { 906 | break 907 | } else if cmp > 0 { 908 | position += 1 909 | } else if cmp == 0 { 910 | // the key is found in the index 911 | // take the right sub-tree and find the leftmost key 912 | // and update the key 913 | nodeID := current.pointers[position+1].asNodeID() 914 | leftmostKey, err := t.findLeftmostKey(nodeID) 915 | if err != nil { 916 | return fmt.Errorf("failed to find the leftmost key for %d: %w", nodeID, err) 917 | } 918 | current.keys[position] = leftmostKey 919 | 920 | err = t.storage.updateNodeByID(current.id, current) 921 | if err != nil { 922 | return fmt.Errorf("failed to update the node %d: %w", current.id, err) 923 | } 924 | } 925 | } 926 | 927 | nextNodeID := current.pointers[position].asNodeID() 928 | nextNode, err := t.storage.loadNodeByID(nextNodeID) 929 | if err != nil { 930 | return fmt.Errorf("failed to load the next node node %d: %w", nextNodeID, err) 931 | } 932 | 933 | current = nextNode 934 | } 935 | 936 | return nil 937 | } 938 | 939 | // findLeftmostKey returns the leftmost key for the node. 940 | func (t *FBPTree) findLeftmostKey(nodeID uint32) ([]byte, error) { 941 | node, err := t.storage.loadNodeByID(nodeID) 942 | if err != nil { 943 | return nil, fmt.Errorf("failed to load the node by id %d: %w", nodeID, err) 944 | } 945 | 946 | current := node 947 | for !current.leaf { 948 | nextID := current.pointers[0].asNodeID() 949 | nextNode, err := t.storage.loadNodeByID(nextID) 950 | if err != nil { 951 | return nil, fmt.Errorf("failed to load the next node by id %d: %w", nextID, err) 952 | } 953 | 954 | current = nextNode 955 | } 956 | 957 | return current.keys[0], nil 958 | } 959 | 960 | // keyPosition returns the position of the key, but -1 if it is not present. 961 | func (n *node) keyPosition(key []byte) int { 962 | keyPosition := 0 963 | for ; keyPosition < n.keyNum; keyPosition++ { 964 | if compare(key, n.keys[keyPosition]) == 0 { 965 | return keyPosition 966 | } 967 | } 968 | 969 | return -1 970 | } 971 | 972 | // rebalanceFromLeafNode starts rebalancing the tree from the leaf node. 973 | func (t *FBPTree) rebalanceFromLeafNode(n *node) error { 974 | parent, err := t.storage.loadNodeByID(n.parentID) 975 | if err != nil { 976 | return fmt.Errorf("failed to load the parent node by id %d: %w", n.parentID, err) 977 | } 978 | 979 | pointerPositionInParent := parent.pointerPositionOf(n) 980 | keyPositionInParent := pointerPositionInParent - 1 981 | if keyPositionInParent < 0 { 982 | keyPositionInParent = 0 983 | } 984 | 985 | // trying to borrow for the leaf from any sibling 986 | 987 | // check left sibling 988 | leftSiblingPosition := pointerPositionInParent - 1 989 | var leftSibling *node 990 | if leftSiblingPosition >= 0 { 991 | // if left sibling exists 992 | leftSiblingID := parent.pointers[leftSiblingPosition].asNodeID() 993 | ls, err := t.storage.loadNodeByID(leftSiblingID) 994 | if err != nil { 995 | return fmt.Errorf("failed to load the left sibling node by id %d: %w", leftSiblingID, err) 996 | } 997 | leftSibling = ls 998 | 999 | if leftSibling.keyNum > t.minKeyNum { 1000 | // borrow from the left sibling 1001 | n.insertAt(0, leftSibling.keys[leftSibling.keyNum-1], 0, leftSibling.pointers[leftSibling.keyNum-1]) 1002 | leftSibling.deleteAt(leftSibling.keyNum-1, leftSibling.keyNum-1) 1003 | parent.keys[keyPositionInParent] = n.keys[0] 1004 | 1005 | err = t.storage.updateNodeByID(n.id, n) 1006 | if err != nil { 1007 | return fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 1008 | } 1009 | err = t.storage.updateNodeByID(leftSibling.id, leftSibling) 1010 | if err != nil { 1011 | return fmt.Errorf("failed to update the left sibling node by id %d: %w", leftSibling.id, err) 1012 | } 1013 | err = t.storage.updateNodeByID(parent.id, parent) 1014 | if err != nil { 1015 | return fmt.Errorf("failed to update the parent node by id %d: %w", parent.id, err) 1016 | } 1017 | 1018 | return nil 1019 | } 1020 | } 1021 | 1022 | rightSiblingPosition := pointerPositionInParent + 1 1023 | var rightSibling *node 1024 | if rightSiblingPosition < parent.keyNum+1 { 1025 | // if right sibling exists 1026 | rightSiblingID := parent.pointers[rightSiblingPosition].asNodeID() 1027 | rs, err := t.storage.loadNodeByID(rightSiblingID) 1028 | if err != nil { 1029 | return fmt.Errorf("failed to load the right sibling node by id %d: %w", rightSiblingID, err) 1030 | } 1031 | rightSibling = rs 1032 | 1033 | if rightSibling.keyNum > t.minKeyNum { 1034 | // borrow from the right sibling 1035 | n.append(rightSibling.keys[0], rightSibling.pointers[0], t.storage) 1036 | rightSibling.deleteAt(0, 0) 1037 | parent.keys[rightSiblingPosition-1] = rightSibling.keys[0] 1038 | 1039 | err := t.storage.updateNodeByID(n.id, n) 1040 | if err != nil { 1041 | return fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 1042 | } 1043 | err = t.storage.updateNodeByID(rightSibling.id, rightSibling) 1044 | if err != nil { 1045 | return fmt.Errorf("failed to update the right sibling node by id %d: %w", rightSibling.id, err) 1046 | } 1047 | err = t.storage.updateNodeByID(parent.id, parent) 1048 | if err != nil { 1049 | return fmt.Errorf("failed to update the parent node by id %d: %w", parent.id, err) 1050 | } 1051 | 1052 | return nil 1053 | } 1054 | } 1055 | 1056 | // if we could borrow, we would borrow 1057 | // so, we just take the first available sibling and merge with it 1058 | // and the remove the navigator key and appropriate pointer 1059 | 1060 | // merge nodes and remove the "navigator" key and appropriate 1061 | if leftSibling != nil { 1062 | err := leftSibling.copyFromRight(n, t.storage) 1063 | if err != nil { 1064 | return fmt.Errorf("failed to copy to the left sibling %d: %w", rightSibling.id, err) 1065 | } 1066 | parent.deleteAt(keyPositionInParent, pointerPositionInParent) 1067 | 1068 | err = t.storage.updateNodeByID(leftSibling.id, leftSibling) 1069 | if err != nil { 1070 | return fmt.Errorf("failed to update the left sibling node by id %d: %w", parent.id, err) 1071 | } 1072 | err = t.storage.updateNodeByID(parent.id, parent) 1073 | if err != nil { 1074 | return fmt.Errorf("failed to update the parent node by id %d: %w", parent.id, err) 1075 | } 1076 | } else if rightSibling != nil { 1077 | err := n.copyFromRight(rightSibling, t.storage) 1078 | if err != nil { 1079 | return fmt.Errorf("failed to copy from the right sibling %d: %w", rightSibling.id, err) 1080 | } 1081 | parent.deleteAt(keyPositionInParent, rightSiblingPosition) 1082 | 1083 | err = t.storage.updateNodeByID(n.id, n) 1084 | if err != nil { 1085 | return fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 1086 | } 1087 | err = t.storage.updateNodeByID(parent.id, parent) 1088 | if err != nil { 1089 | return fmt.Errorf("failed to update the parent node by id %d: %w", parent.id, err) 1090 | } 1091 | } 1092 | 1093 | err = t.rebalanceParentNode(parent) 1094 | if err != nil { 1095 | return fmt.Errorf("failed to rebalance the parent node %d: %w", parent.id, err) 1096 | } 1097 | 1098 | return nil 1099 | } 1100 | 1101 | // rebalanceInternalNode rebalances the tree from the internal node. It expects that 1102 | func (t *FBPTree) rebalanceParentNode(n *node) error { 1103 | if n.parentID == 0 { 1104 | if n.keyNum == 0 { 1105 | rootID := n.pointers[0].asNodeID() 1106 | 1107 | root, err := t.storage.loadNodeByID(rootID) 1108 | if err != nil { 1109 | return fmt.Errorf("failed to load the root node by id %d", rootID) 1110 | } 1111 | 1112 | root.parentID = 0 1113 | 1114 | err = t.storage.updateNodeByID(rootID, root) 1115 | if err != nil { 1116 | return fmt.Errorf("failed to update the root node %d: %w", rootID, err) 1117 | } 1118 | 1119 | err = t.updateRootID(rootID) 1120 | if err != nil { 1121 | return fmt.Errorf("failed to update the root id to %d", rootID) 1122 | } 1123 | } 1124 | 1125 | return nil 1126 | } 1127 | 1128 | if n.keyNum >= t.minKeyNum { 1129 | // balanced 1130 | return nil 1131 | } 1132 | 1133 | parent, err := t.storage.loadNodeByID(n.parentID) 1134 | if err != nil { 1135 | return fmt.Errorf("failed to load parent node %d: %w", n.parentID, err) 1136 | } 1137 | 1138 | pointerPositionInParent := parent.pointerPositionOf(n) 1139 | keyPositionInParent := pointerPositionInParent - 1 1140 | if keyPositionInParent < 0 { 1141 | keyPositionInParent = 0 1142 | } 1143 | 1144 | // trying to borrow for the internal node from any sibling 1145 | 1146 | // check left sibling 1147 | leftSiblingPosition := pointerPositionInParent - 1 1148 | var leftSibling *node 1149 | if leftSiblingPosition >= 0 { 1150 | leftSiblingID := parent.pointers[leftSiblingPosition].asNodeID() 1151 | // if left sibling exists 1152 | ls, err := t.storage.loadNodeByID(leftSiblingID) 1153 | if err != nil { 1154 | return fmt.Errorf("failed to load the left sibling %d: %w", leftSiblingID, err) 1155 | } 1156 | leftSibling = ls 1157 | 1158 | if leftSibling.keyNum > t.minKeyNum { 1159 | splitKey := parent.keys[keyPositionInParent] 1160 | 1161 | // borrow from the left sibling 1162 | childID := leftSibling.pointers[leftSibling.keyNum].asNodeID() 1163 | child, err := t.storage.loadNodeByID(childID) 1164 | if err != nil { 1165 | return fmt.Errorf("failed to load the child node %d for the left sibling %d: %w", childID, leftSiblingID, err) 1166 | } 1167 | 1168 | child.parentID = n.id 1169 | 1170 | err = t.storage.updateNodeByID(child.id, child) 1171 | if err != nil { 1172 | return fmt.Errorf("failed to update the child node %d for the left sibling %d: %w", childID, leftSiblingID, err) 1173 | } 1174 | 1175 | n.insertAt(0, splitKey, 0, leftSibling.pointers[leftSibling.keyNum]) 1176 | 1177 | parent.keys[keyPositionInParent] = leftSibling.keys[leftSibling.keyNum-1] 1178 | leftSibling.deleteAt(leftSibling.keyNum-1, leftSibling.keyNum) 1179 | 1180 | err = t.storage.updateNodeByID(n.id, n) 1181 | if err != nil { 1182 | return fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 1183 | } 1184 | 1185 | err = t.storage.updateNodeByID(parent.id, parent) 1186 | if err != nil { 1187 | return fmt.Errorf("failed to update the parent node %d: %w", parent.id, err) 1188 | } 1189 | err = t.storage.updateNodeByID(leftSibling.id, leftSibling) 1190 | if err != nil { 1191 | return fmt.Errorf("failed to update the left sibling %d: %w", leftSibling.id, err) 1192 | } 1193 | 1194 | return nil 1195 | } 1196 | } 1197 | 1198 | rightSiblingPosition := pointerPositionInParent + 1 1199 | var rightSibling *node 1200 | if rightSiblingPosition < parent.keyNum+1 { 1201 | // if right sibling exists 1202 | rightSiblingID := parent.pointers[rightSiblingPosition].asNodeID() 1203 | rs, err := t.storage.loadNodeByID(rightSiblingID) 1204 | if err != nil { 1205 | return fmt.Errorf("failed to load the right sibling id %d: %w", rightSiblingID, err) 1206 | } 1207 | rightSibling = rs 1208 | 1209 | if rightSibling.keyNum > t.minKeyNum { 1210 | splitKeyPosition := rightSiblingPosition - 1 1211 | splitKey := parent.keys[splitKeyPosition] 1212 | 1213 | // borrow from the right sibling 1214 | err := n.append(splitKey, rightSibling.pointers[0], t.storage) 1215 | if err != nil { 1216 | return fmt.Errorf("failed to append to node %d: %w", n.id, err) 1217 | } 1218 | 1219 | parent.keys[splitKeyPosition] = rightSibling.keys[0] 1220 | rightSibling.deleteAt(0, 0) 1221 | 1222 | err = t.storage.updateNodeByID(n.id, n) 1223 | if err != nil { 1224 | return fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 1225 | } 1226 | err = t.storage.updateNodeByID(parent.id, parent) 1227 | if err != nil { 1228 | return fmt.Errorf("failed to update the parent node %d: %w", parent.id, err) 1229 | } 1230 | err = t.storage.updateNodeByID(rightSibling.id, rightSibling) 1231 | if err != nil { 1232 | return fmt.Errorf("failed to update the right sibling %d: %w", rightSibling.id, err) 1233 | } 1234 | 1235 | return nil 1236 | } 1237 | } 1238 | 1239 | // if we could borrow, we would borrow 1240 | // so, we just take the first available sibling and merge with it 1241 | if leftSibling != nil { 1242 | splitKey := parent.keys[keyPositionInParent] 1243 | 1244 | // incorporate the split key from parent for the merging 1245 | leftSibling.keys[leftSibling.keyNum] = splitKey 1246 | leftSibling.keyNum++ 1247 | 1248 | err := leftSibling.copyFromRight(n, t.storage) 1249 | if err != nil { 1250 | return fmt.Errorf("failed to copy from to left sibling %d: %w", leftSibling.id, err) 1251 | } 1252 | err = t.storage.updateNodeByID(leftSibling.id, leftSibling) 1253 | if err != nil { 1254 | return fmt.Errorf("failed to update the left sibling by id %d: %w", leftSibling.id, err) 1255 | } 1256 | 1257 | parent.deleteAt(keyPositionInParent, pointerPositionInParent) 1258 | err = t.storage.updateNodeByID(parent.id, parent) 1259 | if err != nil { 1260 | return fmt.Errorf("failed to update the parent node %d: %w", parent.id, err) 1261 | } 1262 | } else if rightSibling != nil { 1263 | splitKey := parent.keys[keyPositionInParent] 1264 | 1265 | n.keys[n.keyNum] = splitKey 1266 | n.keyNum++ 1267 | 1268 | err = n.copyFromRight(rightSibling, t.storage) 1269 | if err != nil { 1270 | return fmt.Errorf("failed to copy from the right sibling %d: %w", rightSibling.id, err) 1271 | } 1272 | 1273 | err = t.storage.updateNodeByID(n.id, n) 1274 | if err != nil { 1275 | return fmt.Errorf("failed to update the node by id %d: %w", n.id, err) 1276 | } 1277 | 1278 | parent.deleteAt(keyPositionInParent, rightSiblingPosition) 1279 | err = t.storage.updateNodeByID(parent.id, parent) 1280 | if err != nil { 1281 | return fmt.Errorf("failed to update the parent node %d: %w", parent.id, err) 1282 | } 1283 | } 1284 | 1285 | err = t.rebalanceParentNode(parent) 1286 | if err != nil { 1287 | return fmt.Errorf("failed to rebalance the parent node %d: %w", parent.id, err) 1288 | } 1289 | 1290 | return nil 1291 | } 1292 | 1293 | // append apppends key and the pointer to the node 1294 | func (n *node) append(key []byte, p *pointer, storage *storage) error { 1295 | keyPosition := n.keyNum 1296 | pointerPosition := n.keyNum 1297 | if !n.leaf && n.pointers[pointerPosition] != nil { 1298 | pointerPosition++ 1299 | } 1300 | 1301 | n.keys[keyPosition] = key 1302 | n.pointers[pointerPosition] = p 1303 | n.keyNum++ 1304 | 1305 | if !n.leaf { 1306 | childID := p.asNodeID() 1307 | child, err := storage.loadNodeByID(childID) 1308 | if err != nil { 1309 | return fmt.Errorf("failed load the child node %d: %w", childID, err) 1310 | } 1311 | 1312 | child.parentID = n.id 1313 | 1314 | err = storage.updateNodeByID(childID, child) 1315 | if err != nil { 1316 | return fmt.Errorf("failed to update the child node %d: %w", childID, err) 1317 | } 1318 | } 1319 | 1320 | return nil 1321 | } 1322 | 1323 | // copyFromRight copies the keys and the pointer from the given node. 1324 | func (n *node) copyFromRight(from *node, storage *storage) error { 1325 | for i := 0; i < from.keyNum; i++ { 1326 | err := n.append(from.keys[i], from.pointers[i], storage) 1327 | if err != nil { 1328 | return fmt.Errorf("failed to append to %d: %w", n.id, err) 1329 | } 1330 | } 1331 | 1332 | if n.leaf { 1333 | n.setNext(from.next()) 1334 | 1335 | err := storage.updateNodeByID(n.id, n) 1336 | if err != nil { 1337 | return fmt.Errorf("failed to update the node %d: %w", n.id, err) 1338 | } 1339 | } else { 1340 | n.pointers[n.keyNum] = from.pointers[from.keyNum] 1341 | 1342 | childID := n.pointers[n.keyNum].asNodeID() 1343 | child, err := storage.loadNodeByID(childID) 1344 | if err != nil { 1345 | return fmt.Errorf("failed to load the child node %d: %w", childID, err) 1346 | } 1347 | 1348 | child.parentID = n.id 1349 | 1350 | err = storage.updateNodeByID(child.id, child) 1351 | if err != nil { 1352 | return fmt.Errorf("failed to update the parent for the child node %d: %w", childID, err) 1353 | } 1354 | } 1355 | 1356 | return nil 1357 | } 1358 | 1359 | // pointerPositionOf finds the pointer position of the given node. 1360 | // Returns -1 if it is not found. 1361 | func (n *node) pointerPositionOf(x *node) int { 1362 | for position, pointer := range n.pointers { 1363 | if pointer == nil { 1364 | // reached the end 1365 | break 1366 | } 1367 | 1368 | if pointer.asNodeID() == x.id { 1369 | return position 1370 | } 1371 | } 1372 | 1373 | // pointer not found 1374 | return -1 1375 | } 1376 | 1377 | // ForEach traverses tree in ascending key order. 1378 | func (t *FBPTree) ForEach(action func(key []byte, value []byte)) error { 1379 | it, err := t.Iterator() 1380 | if err != nil { 1381 | return fmt.Errorf("failed to initialize iterator: %w", err) 1382 | } 1383 | 1384 | for it := it; it.HasNext(); { 1385 | key, value, err := it.Next() 1386 | if err != nil { 1387 | return fmt.Errorf("failed to advance to the next element: %w", err) 1388 | } 1389 | 1390 | action(key, value) 1391 | } 1392 | 1393 | return nil 1394 | } 1395 | 1396 | // Size return the size of the tree. 1397 | func (t *FBPTree) Size() int { 1398 | if t.metadata != nil { 1399 | return int(t.metadata.size) 1400 | } 1401 | 1402 | return 0 1403 | } 1404 | 1405 | // Close closes the tree and free the underlying resources. 1406 | func (t *FBPTree) Close() error { 1407 | if err := t.storage.close(); err != nil { 1408 | return fmt.Errorf("failed to close the storage: %w", err) 1409 | } 1410 | 1411 | return nil 1412 | } 1413 | 1414 | func compare(x, y []byte) int { 1415 | return bytes.Compare(x, y) 1416 | } 1417 | 1418 | func less(x, y []byte) bool { 1419 | return compare(x, y) < 0 1420 | } 1421 | 1422 | func copyBytes(s []byte) []byte { 1423 | c := make([]byte, len(s)) 1424 | copy(c, s) 1425 | 1426 | return c 1427 | } 1428 | 1429 | func ceil(x, y int) int { 1430 | d := (x / y) 1431 | if x%y == 0 { 1432 | return d 1433 | } 1434 | 1435 | return d + 1 1436 | } 1437 | -------------------------------------------------------------------------------- /fbptree_test.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | "io/ioutil" 8 | "math/rand" 9 | "os" 10 | "path" 11 | "reflect" 12 | "sort" 13 | "time" 14 | 15 | "testing" 16 | ) 17 | 18 | func Example() { 19 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 20 | if err != nil { 21 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 22 | } 23 | defer func() { 24 | if err := os.RemoveAll(dbDir); err != nil { 25 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 26 | } 27 | }() 28 | 29 | dbPath := path.Join(dbDir, "sample.data") 30 | 31 | tree, err := Open(dbPath, PageSize(4096), Order(500)) 32 | if err != nil { 33 | panic(fmt.Errorf("failed to open B+ tree %s: %w", dbDir, err)) 34 | } 35 | 36 | _, _, err = tree.Put([]byte("Hi!"), []byte("Hello world, B+ tree!")) 37 | if err != nil { 38 | panic(fmt.Errorf("failed to put: %w", err)) 39 | } 40 | 41 | _, _, err = tree.Put([]byte("Does it override key?"), []byte("No!")) 42 | if err != nil { 43 | panic(fmt.Errorf("failed to put: %w", err)) 44 | } 45 | 46 | _, _, err = tree.Put([]byte("Does it override key?"), []byte("Yes, absolutely! The key has been overridden.")) 47 | if err != nil { 48 | panic(fmt.Errorf("failed to put: %w", err)) 49 | } 50 | 51 | if err := tree.Close(); err != nil { 52 | panic(fmt.Errorf("failed to close: %w", err)) 53 | } 54 | 55 | tree, err = Open(dbPath, PageSize(4096), Order(500)) 56 | if err != nil { 57 | panic(fmt.Errorf("failed to open B+ tree %s: %w", dbDir, err)) 58 | } 59 | 60 | value, ok, err := tree.Get([]byte("Hi!")) 61 | if err != nil { 62 | panic(fmt.Errorf("failed to get value: %w", err)) 63 | } 64 | if !ok { 65 | fmt.Println("failed to find value") 66 | } 67 | 68 | fmt.Println(string(value)) 69 | 70 | value, ok, err = tree.Get([]byte("Does it override key?")) 71 | if err != nil { 72 | panic(fmt.Errorf("failed to get value: %w", err)) 73 | } 74 | if !ok { 75 | fmt.Println("failed to find value") 76 | } 77 | 78 | if err := tree.Close(); err != nil { 79 | panic(fmt.Errorf("failed to close: %w", err)) 80 | } 81 | 82 | fmt.Println(string(value)) 83 | // Output: 84 | // Hello world, B+ tree! 85 | // Yes, absolutely! The key has been overridden. 86 | } 87 | 88 | func TestOrderError(t *testing.T) { 89 | _, err := Open("somepath", Order(2)) 90 | if err == nil { 91 | t.Fatal("must return an error, but it does not") 92 | } 93 | } 94 | 95 | var treeCases = []struct { 96 | key byte 97 | value string 98 | }{ 99 | {11, "11"}, 100 | {18, "18"}, 101 | {7, "7"}, 102 | {15, "15"}, 103 | {0, "0"}, 104 | {16, "16"}, 105 | {14, "14"}, 106 | {33, "33"}, 107 | {25, "25"}, 108 | {42, "42"}, 109 | {60, "60"}, 110 | {2, "2"}, 111 | {1, "1"}, 112 | {74, "74"}, 113 | } 114 | 115 | func TestNew(t *testing.T) { 116 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 117 | if err != nil { 118 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 119 | } 120 | defer func() { 121 | if err := os.RemoveAll(dbDir); err != nil { 122 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 123 | } 124 | }() 125 | 126 | dbPath := path.Join(dbDir, "sample.data") 127 | 128 | tree, _ := Open(dbPath) 129 | if tree == nil { 130 | t.Fatal("expected new *BPTree instance, but got nil") 131 | } 132 | } 133 | 134 | func TestPutAndGet(t *testing.T) { 135 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 136 | if err != nil { 137 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 138 | } 139 | defer func() { 140 | if err := os.RemoveAll(dbDir); err != nil { 141 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 142 | } 143 | }() 144 | 145 | for order := 3; order <= 7; order++ { 146 | dbPath := path.Join(dbDir, fmt.Sprintf("sample_%d.data", order)) 147 | 148 | tree, err := Open(dbPath, PageSize(4096), Order(order)) 149 | if err != nil { 150 | t.Fatalf("failed to open B+ tree %s: %s", dbDir, err) 151 | } 152 | 153 | for _, c := range treeCases { 154 | prev, exists, err := tree.Put([]byte{c.key}, []byte(c.value)) 155 | if err != nil { 156 | t.Fatalf("failed to put key %v: %s", c.key, err) 157 | } 158 | if prev != nil { 159 | t.Fatalf("the key already exists %v", c.key) 160 | } 161 | if exists { 162 | t.Fatalf("the key already exists %v", c.key) 163 | } 164 | } 165 | 166 | if err := tree.Close(); err != nil { 167 | t.Fatalf("failed to close: %s", err) 168 | } 169 | 170 | tree, err = Open(dbPath, PageSize(4096), Order(order)) 171 | if err != nil { 172 | panic(fmt.Errorf("failed to open B+ tree %s: %w", dbDir, err)) 173 | } 174 | 175 | for _, c := range treeCases { 176 | value, ok, err := tree.Get([]byte{c.key}) 177 | if err != nil { 178 | t.Fatalf("failed to get key %v: %s", c.key, err) 179 | } 180 | if !ok { 181 | t.Fatalf("failed to get value by key %d", c.key) 182 | } 183 | 184 | if string(value) != c.value { 185 | t.Fatalf("expected to get value %s fo key %d, but got %s", c.value, c.key, string(value)) 186 | } 187 | } 188 | } 189 | } 190 | 191 | func TestNil(t *testing.T) { 192 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 193 | if err != nil { 194 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 195 | } 196 | defer func() { 197 | if err := os.RemoveAll(dbDir); err != nil { 198 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 199 | } 200 | }() 201 | 202 | dbPath := path.Join(dbDir, "sample.data") 203 | 204 | tree, _ := Open(dbPath) 205 | if tree == nil { 206 | t.Fatal("expected new *BPTree instance, but got nil") 207 | } 208 | 209 | tree.Put(nil, []byte{1}) 210 | 211 | _, ok, _ := tree.Get(nil) 212 | if !ok { 213 | t.Fatalf("key nil is not found") 214 | } 215 | } 216 | 217 | func TestPutOverrides(t *testing.T) { 218 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 219 | if err != nil { 220 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 221 | } 222 | defer func() { 223 | if err := os.RemoveAll(dbDir); err != nil { 224 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 225 | } 226 | }() 227 | 228 | dbPath := path.Join(dbDir, "sample.data") 229 | 230 | tree, _ := Open(dbPath) 231 | if tree == nil { 232 | t.Fatal("expected new *BPTree instance, but got nil") 233 | } 234 | 235 | prev, exists, err := tree.Put([]byte{1}, []byte{1}) 236 | if err != nil { 237 | t.Fatalf("failed to put key: %s", err) 238 | } 239 | if prev != nil { 240 | t.Fatal("previous value must be nil for the new key") 241 | } 242 | if exists { 243 | t.Fatal("previous value must be nil for the new key") 244 | } 245 | 246 | prev, exists, err = tree.Put([]byte{1}, []byte{2}) 247 | if err != nil { 248 | t.Fatalf("failed to put key: %s", err) 249 | } 250 | if !bytes.Equal(prev, []byte{1}) { 251 | t.Fatalf("previous value must be %v, but got %v", []byte{1}, prev) 252 | } 253 | if !exists { 254 | t.Fatalf("exists must be true for key %v", []byte{1}) 255 | } 256 | 257 | value, ok, err := tree.Get([]byte{1}) 258 | if err != nil { 259 | t.Fatalf("failed to get key: %s", err) 260 | } 261 | if !ok { 262 | t.Fatalf("key %d is not found, but must be overridden", 1) 263 | } 264 | 265 | if !bytes.Equal(value, []byte{2}) { 266 | t.Fatalf("key %d is not overridden", 1) 267 | } 268 | } 269 | 270 | func TestGetForNonExistentValue(t *testing.T) { 271 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 272 | if err != nil { 273 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 274 | } 275 | defer func() { 276 | if err := os.RemoveAll(dbDir); err != nil { 277 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 278 | } 279 | }() 280 | 281 | dbPath := path.Join(dbDir, "sample.data") 282 | 283 | tree, err := Open(dbPath) 284 | if err != nil { 285 | t.Fatalf("failed to open tree: %s", err) 286 | } 287 | 288 | for _, c := range treeCases { 289 | tree.Put([]byte{c.key}, []byte(c.value)) 290 | } 291 | 292 | value, ok, err := tree.Get([]byte{230}) 293 | if err != nil { 294 | t.Fatalf("failed to get key: %s", err) 295 | } 296 | if value != nil { 297 | t.Fatalf("expected value to be nil, but got %s", value) 298 | } 299 | if ok { 300 | t.Fatalf("expected ok to be false, but got %v", ok) 301 | } 302 | } 303 | 304 | func TestGetForEmptyTree(t *testing.T) { 305 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 306 | if err != nil { 307 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 308 | } 309 | defer func() { 310 | if err := os.RemoveAll(dbDir); err != nil { 311 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 312 | } 313 | }() 314 | 315 | dbPath := path.Join(dbDir, "sample.data") 316 | tree, err := Open(dbPath) 317 | if err != nil { 318 | t.Fatalf("failed to open tree: %s", err) 319 | } 320 | 321 | value, ok, err := tree.Get([]byte{1}) 322 | if err != nil { 323 | t.Fatalf("failed to get key: %s", err) 324 | } 325 | if value != nil { 326 | t.Fatalf("expected value to be nil, but got %s", value) 327 | } 328 | if ok { 329 | t.Fatalf("expected ok to be false, but got %v", ok) 330 | } 331 | } 332 | 333 | func TestForEach(t *testing.T) { 334 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 335 | if err != nil { 336 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 337 | } 338 | defer func() { 339 | if err := os.RemoveAll(dbDir); err != nil { 340 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 341 | } 342 | }() 343 | 344 | dbPath := path.Join(dbDir, "sample.data") 345 | tree, err := Open(dbPath) 346 | if err != nil { 347 | t.Fatalf("failed to open tree: %s", err) 348 | } 349 | 350 | for _, c := range treeCases { 351 | tree.Put([]byte{c.key}, []byte(c.value)) 352 | } 353 | 354 | actual := make([]byte, 0) 355 | tree.ForEach(func(key []byte, value []byte) { 356 | actual = append(actual, key...) 357 | }) 358 | 359 | isSorted := sort.SliceIsSorted(actual, func(i, j int) bool { 360 | return actual[i] < actual[j] 361 | }) 362 | if !isSorted { 363 | t.Fatalf("each does not traverse in sorted order, produced result: %s", actual) 364 | } 365 | 366 | expected := make([]byte, 0) 367 | for _, c := range treeCases { 368 | expected = append(expected, c.key) 369 | } 370 | sort.Slice(expected, func(i, j int) bool { 371 | return expected[i] < expected[j] 372 | }) 373 | 374 | if !reflect.DeepEqual(expected, actual) { 375 | t.Fatalf("%v != %v", expected, actual) 376 | } 377 | } 378 | 379 | func TestForEachForEmptyTree(t *testing.T) { 380 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 381 | if err != nil { 382 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 383 | } 384 | defer func() { 385 | if err := os.RemoveAll(dbDir); err != nil { 386 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 387 | } 388 | }() 389 | 390 | dbPath := path.Join(dbDir, "sample.data") 391 | tree, err := Open(dbPath) 392 | if err != nil { 393 | t.Fatalf("failed to open tree: %s", err) 394 | } 395 | 396 | tree.ForEach(func(key []byte, value []byte) { 397 | t.Fatal("call is not expected") 398 | }) 399 | } 400 | 401 | func TestKeyOrder(t *testing.T) { 402 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 403 | if err != nil { 404 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 405 | } 406 | defer func() { 407 | if err := os.RemoveAll(dbDir); err != nil { 408 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 409 | } 410 | }() 411 | 412 | dbPath := path.Join(dbDir, "sample.data") 413 | tree, err := Open(dbPath) 414 | if err != nil { 415 | t.Fatalf("failed to open tree: %s", err) 416 | } 417 | 418 | for _, c := range treeCases { 419 | tree.Put([]byte{c.key}, []byte(c.value)) 420 | } 421 | 422 | keys := make([]byte, 0) 423 | tree.ForEach(func(key, value []byte) { 424 | keys = append(keys, key[0]) 425 | }) 426 | 427 | isSorted := sort.SliceIsSorted(keys, func(i, j int) bool { 428 | return keys[i] < keys[j] 429 | }) 430 | if len(keys) == 0 { 431 | t.Fatal("keys are empty") 432 | } 433 | if !isSorted { 434 | t.Fatal("keys are empty keys are not sorted") 435 | } 436 | } 437 | 438 | func TestPutAndGetRandomized(t *testing.T) { 439 | r := rand.New(rand.NewSource(time.Now().Unix())) 440 | size := 10000 441 | keys := r.Perm(size) 442 | 443 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 444 | if err != nil { 445 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 446 | } 447 | defer func() { 448 | if err := os.RemoveAll(dbDir); err != nil { 449 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 450 | } 451 | }() 452 | 453 | for order := 3; order <= 7; order++ { 454 | dbPath := path.Join(dbDir, fmt.Sprintf("sample_%d.data", order)) 455 | tree, err := Open(dbPath, Order(order)) 456 | if err != nil { 457 | t.Fatalf("failed to open tree: %s", err) 458 | } 459 | 460 | for i, k := range keys { 461 | key := make([]byte, 4) 462 | binary.LittleEndian.PutUint32(key, uint32(k)) 463 | value := make([]byte, 4) 464 | binary.LittleEndian.PutUint32(value, uint32(i)) 465 | 466 | prev, exists, _ := tree.Put(key, value) 467 | if prev != nil { 468 | t.Fatalf("the key already exists %v", k) 469 | } 470 | if exists { 471 | t.Fatalf("the key already exists %v", k) 472 | } 473 | } 474 | tree.Close() 475 | 476 | tree, err = Open(dbPath, Order(order)) 477 | if err != nil { 478 | t.Fatalf("failed to open tree: %s", err) 479 | } 480 | 481 | for i, k := range keys { 482 | expectedValue := uint32(i) 483 | key := make([]byte, 4) 484 | binary.LittleEndian.PutUint32(key, uint32(k)) 485 | 486 | v, ok, _ := tree.Get(key) 487 | if !ok { 488 | t.Fatalf("failed to get value by key %d, tree size = %d, order = %d", k, tree.Size(), order) 489 | } 490 | 491 | actualValue := binary.LittleEndian.Uint32(v) 492 | if expectedValue != actualValue { 493 | t.Fatalf("expected to get value %d fo key %d, but got %d", expectedValue, k, actualValue) 494 | } 495 | } 496 | } 497 | } 498 | 499 | func TestPutAndDeleteRandomized(t *testing.T) { 500 | r := rand.New(rand.NewSource(time.Now().Unix())) 501 | size := 10000 502 | keys := r.Perm(size) 503 | 504 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 505 | if err != nil { 506 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 507 | } 508 | defer func() { 509 | if err := os.RemoveAll(dbDir); err != nil { 510 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 511 | } 512 | }() 513 | 514 | for order := 3; order <= 7; order++ { 515 | dbPath := path.Join(dbDir, fmt.Sprintf("sample_%d.data", order)) 516 | tree, _ := Open(dbPath, Order(order)) 517 | if err != nil { 518 | t.Fatalf("failed to open tree: %s", err) 519 | } 520 | 521 | for i, k := range keys { 522 | key := make([]byte, 4) 523 | binary.LittleEndian.PutUint32(key, uint32(k)) 524 | value := make([]byte, 4) 525 | binary.LittleEndian.PutUint32(value, uint32(i)) 526 | 527 | prev, exists, _ := tree.Put(key, value) 528 | if prev != nil { 529 | t.Fatalf("the key already exists %v", k) 530 | } 531 | if exists { 532 | t.Fatalf("the key already exists %v", k) 533 | } 534 | } 535 | 536 | tree.Close() 537 | 538 | tree, err := Open(dbPath, Order(order)) 539 | if err != nil { 540 | t.Fatalf("failed to open tree: %s", err) 541 | } 542 | 543 | for i, k := range keys { 544 | expectedValue := uint32(i) 545 | key := make([]byte, 4) 546 | binary.LittleEndian.PutUint32(key, uint32(k)) 547 | 548 | v, ok, err := tree.Delete(key) 549 | if err != nil { 550 | t.Fatalf("failed to delete value by key %d, tree size = %d, order = %d: %s", k, tree.Size(), order, err) 551 | } 552 | 553 | if !ok { 554 | t.Fatalf("failed to delete value by key %d, tree size = %d, order = %d", k, tree.Size(), order) 555 | } 556 | 557 | actualValue := binary.LittleEndian.Uint32(v) 558 | if expectedValue != actualValue { 559 | t.Fatalf("expected to delete value %d by key %d, and got %d", expectedValue, k, actualValue) 560 | } 561 | } 562 | } 563 | } 564 | 565 | func TestDeleteFromEmptyTree(t *testing.T) { 566 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 567 | if err != nil { 568 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 569 | } 570 | defer func() { 571 | if err := os.RemoveAll(dbDir); err != nil { 572 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 573 | } 574 | }() 575 | 576 | dbPath := path.Join(dbDir, "sample.data") 577 | tree, err := Open(dbPath, Order(3)) 578 | if err != nil { 579 | t.Fatalf("failed to open tree: %s", err) 580 | } 581 | 582 | value, deleted, _ := tree.Delete([]byte{1}) 583 | if deleted { 584 | t.Fatalf("key %d is deleted, but should not, order %d", 1, 3) 585 | } 586 | if value != nil { 587 | t.Fatalf("value for key %d is not nil: %v", 1, value) 588 | } 589 | } 590 | 591 | func TestDeleteNonExistentElement(t *testing.T) { 592 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 593 | if err != nil { 594 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 595 | } 596 | defer func() { 597 | if err := os.RemoveAll(dbDir); err != nil { 598 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 599 | } 600 | }() 601 | 602 | dbPath := path.Join(dbDir, "sample.data") 603 | tree, err := Open(dbPath) 604 | if err != nil { 605 | t.Fatalf("failed to open tree: %s", err) 606 | } 607 | 608 | tree.Put([]byte{1}, []byte{2}) 609 | tree.Put([]byte{2}, []byte{2}) 610 | tree.Put([]byte{3}, []byte{3}) 611 | 612 | value, deleted, _ := tree.Delete([]byte{4}) 613 | if deleted { 614 | t.Fatalf("key %d is deleted, but should not, order %d", 4, 3) 615 | } 616 | if value != nil { 617 | t.Fatalf("value for key %d is not nil: %v", 4, value) 618 | } 619 | } 620 | 621 | func TestSize(t *testing.T) { 622 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 623 | if err != nil { 624 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 625 | } 626 | defer func() { 627 | if err := os.RemoveAll(dbDir); err != nil { 628 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 629 | } 630 | }() 631 | 632 | dbPath := path.Join(dbDir, "sample.data") 633 | 634 | expected := 0 635 | for _, c := range treeCases { 636 | tree, err := Open(dbPath, Order(3)) 637 | if err != nil { 638 | t.Fatalf("failed to open tree: %s", err) 639 | } 640 | 641 | if expected != tree.Size() { 642 | t.Fatalf("actual size %d is not equal to expected size %d", tree.Size(), expected) 643 | } 644 | 645 | tree.Put([]byte{c.key}, []byte(c.value)) 646 | expected++ 647 | 648 | tree.Close() 649 | } 650 | 651 | tree, err := Open(dbPath, Order(3)) 652 | if err != nil { 653 | t.Fatalf("failed to open tree: %s", err) 654 | } 655 | 656 | if expected != tree.Size() { 657 | t.Fatalf("actual size %d is not equal to expected size %d", tree.Size(), expected) 658 | } 659 | } 660 | 661 | func TestDeleteMergingThreeTimes(t *testing.T) { 662 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 663 | if err != nil { 664 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 665 | } 666 | defer func() { 667 | if err := os.RemoveAll(dbDir); err != nil { 668 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 669 | } 670 | }() 671 | 672 | dbPath := path.Join(dbDir, "sample.data") 673 | tree, err := Open(dbPath, Order(3)) 674 | if err != nil { 675 | t.Fatalf("failed to open tree: %s", err) 676 | } 677 | 678 | keys := []byte{7, 8, 4, 3, 2, 6, 11, 9, 10, 1, 12, 0, 5} 679 | for _, v := range keys { 680 | tree.Put([]byte{v}, []byte{v}) 681 | } 682 | 683 | for _, k := range keys { 684 | value, deleted, _ := tree.Delete([]byte{k}) 685 | if !deleted { 686 | t.Fatalf("key %d is not deleted, order %d", k, 3) 687 | } 688 | if value == nil { 689 | t.Fatalf("value for key %d is nil: %v", k, value) 690 | } 691 | } 692 | } 693 | 694 | func TestDelete(t *testing.T) { 695 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 696 | if err != nil { 697 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 698 | } 699 | defer func() { 700 | if err := os.RemoveAll(dbDir); err != nil { 701 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 702 | } 703 | }() 704 | 705 | for order := 3; order <= 7; order++ { 706 | dbPath := path.Join(dbDir, fmt.Sprintf("sample_%d.data", order)) 707 | tree, err := Open(dbPath, Order(order)) 708 | if err != nil { 709 | t.Fatalf("failed to open tree: %s", err) 710 | } 711 | 712 | for _, c := range treeCases { 713 | tree.Put([]byte{c.key}, []byte(c.value)) 714 | } 715 | 716 | tree.Close() 717 | 718 | tree, _ = Open(dbPath, Order(order)) 719 | if err != nil { 720 | t.Fatalf("failed to open tree: %s", err) 721 | } 722 | 723 | expectedSize := len(treeCases) 724 | for _, c := range treeCases { 725 | value, deleted, err := tree.Delete([]byte{c.key}) 726 | expectedSize-- 727 | 728 | if err != nil { 729 | t.Fatalf("failed to delete key %d: %s", c.key, err) 730 | } 731 | if !deleted { 732 | t.Fatalf("key %d is not deleted, order %d", c.key, order) 733 | } 734 | if value == nil { 735 | t.Fatalf("value for key %d is nil: %v", c.key, value) 736 | } 737 | if expectedSize != tree.Size() { 738 | t.Fatalf("the expected size != actual: %d != %d", expectedSize, tree.Size()) 739 | } 740 | } 741 | } 742 | } 743 | 744 | func TestForEachAfterDeletion(t *testing.T) { 745 | dbDir, err := ioutil.TempDir(os.TempDir(), "example") 746 | if err != nil { 747 | panic(fmt.Errorf("failed to create %s: %w", dbDir, err)) 748 | } 749 | defer func() { 750 | if err := os.RemoveAll(dbDir); err != nil { 751 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 752 | } 753 | }() 754 | 755 | dbPath := path.Join(dbDir, "sample.data") 756 | tree, err := Open(dbPath, Order(3)) 757 | if err != nil { 758 | t.Fatalf("failed to open tree: %s", err) 759 | } 760 | 761 | keys := []byte{7, 8, 4, 3, 2, 6, 11, 9, 10, 1, 12, 0, 5} 762 | for _, v := range keys { 763 | tree.Put([]byte{v}, []byte{v}) 764 | } 765 | 766 | for i, k := range keys { 767 | value, deleted, _ := tree.Delete([]byte{k}) 768 | if !deleted { 769 | t.Fatalf("key %d is not deleted, order %d", k, 3) 770 | } 771 | if value == nil { 772 | t.Fatalf("value for key %d is nil: %v", k, value) 773 | } 774 | 775 | actual := make([]byte, 0) 776 | tree.ForEach(func(key []byte, value []byte) { 777 | actual = append(actual, key...) 778 | }) 779 | 780 | isSorted := sort.SliceIsSorted(actual, func(i, j int) bool { 781 | return actual[i] < actual[j] 782 | }) 783 | if !isSorted { 784 | t.Fatalf("each does not traverse in sorted order, produced result: %s", actual) 785 | } 786 | 787 | expected := make([]byte, 0) 788 | for j, k := range keys { 789 | if j > i { 790 | expected = append(expected, k) 791 | } 792 | } 793 | sort.Slice(expected, func(i, j int) bool { 794 | return expected[i] < expected[j] 795 | }) 796 | 797 | if !reflect.DeepEqual(expected, actual) { 798 | t.Fatalf("%v != %v for key %d (%d)", expected, actual, k, i) 799 | } 800 | } 801 | } 802 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/krasun/fbptree 2 | 3 | go 1.16 4 | -------------------------------------------------------------------------------- /iterator.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import "fmt" 4 | 5 | // Iterator returns a stateful Iterator for traversing the tree 6 | // in ascending key order. 7 | type Iterator struct { 8 | next *node 9 | i int 10 | storage *storage 11 | } 12 | 13 | // Iterator returns a stateful iterator that traverses the tree 14 | // in ascending key order. 15 | func (t *FBPTree) Iterator() (*Iterator, error) { 16 | if t.metadata == nil { 17 | return &Iterator{nil, 0, t.storage}, nil 18 | } 19 | 20 | next, err := t.storage.loadNodeByID(t.metadata.leftmostID) 21 | if err != nil { 22 | return nil, fmt.Errorf("failed to load the leftmost node %d: %w", t.metadata.leftmostID, err) 23 | } 24 | 25 | return &Iterator{next, 0, t.storage}, nil 26 | } 27 | 28 | // HasNext returns true if there is a next element to retrive. 29 | func (it *Iterator) HasNext() bool { 30 | return it.next != nil && it.i < it.next.keyNum 31 | } 32 | 33 | // Next returns a key and a value at the current position of the iteration 34 | // and advances the iterator. 35 | // Caution! Next panics if called on the nil element. 36 | func (it *Iterator) Next() ([]byte, []byte, error) { 37 | if !it.HasNext() { 38 | // to sleep well 39 | return nil, nil, fmt.Errorf("there is no next node") 40 | } 41 | 42 | key, value := it.next.keys[it.i], it.next.pointers[it.i].asValue() 43 | 44 | it.i++ 45 | if it.i == it.next.keyNum { 46 | nextPointer := it.next.next() 47 | if nextPointer != nil { 48 | nodeID := nextPointer.asNodeID() 49 | next, err := it.storage.loadNodeByID(nodeID) 50 | if err != nil { 51 | return nil, nil, fmt.Errorf("failed to load the next node: %w", err) 52 | } 53 | 54 | it.next = next 55 | } else { 56 | it.next = nil 57 | } 58 | 59 | it.i = 0 60 | } 61 | 62 | return key, value, nil 63 | } 64 | -------------------------------------------------------------------------------- /pager.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "io/fs" 7 | "math" 8 | "os" 9 | ) 10 | 11 | // for mocking the filesystem 12 | var openFile = os.OpenFile 13 | 14 | const minPageSize = 32 15 | const maxPageSize = math.MaxUint16 16 | 17 | // the size of the first metadata block in the file, 18 | // reserved for different needs 19 | const metadataSize = 1000 20 | const customMetadataPosition = 500 21 | 22 | // the id of the first free page 23 | const firstFreePageId = uint32(1) 24 | const pageIdSize = 4 // uint32 25 | 26 | // pager is an abstaction over the file that represents the file 27 | // as a set of pages. The file is splitten into 28 | // the pages with the fixed size, usually 4096 bytes. 29 | type pager struct { 30 | file randomAccessFile 31 | pageSize uint16 32 | 33 | // id is any free page that can be used 34 | // and the value is free page container 35 | isFreePage map[uint32]*freePage 36 | // the pointer to the last free page 37 | lastFreePage *freePage 38 | 39 | // last page id is last created page id 40 | // it can be free or used - it does not matter 41 | lastPageId uint32 42 | 43 | freePages map[uint32]*freePage 44 | // key is the id of the page and the value is the id of the previous page 45 | prevPageIds map[uint32]uint32 46 | 47 | metadata *metadata 48 | } 49 | 50 | type metadata struct { 51 | pageSize uint16 52 | 53 | custom []byte 54 | } 55 | 56 | type freePage struct { 57 | pageId uint32 58 | ids map[uint32]struct{} 59 | // 0 if does not exist 60 | nextPageId uint32 61 | } 62 | 63 | func (p *freePage) copy() *freePage { 64 | newIds := make(map[uint32]struct{}) 65 | for key, value := range p.ids { 66 | newIds[key] = value 67 | } 68 | 69 | return &freePage{ 70 | p.pageId, 71 | newIds, 72 | p.nextPageId, 73 | } 74 | } 75 | 76 | type randomAccessFile interface { 77 | io.ReaderAt 78 | io.WriterAt 79 | io.Closer 80 | 81 | Sync() error 82 | Stat() (fs.FileInfo, error) 83 | Truncate(size int64) error 84 | } 85 | 86 | // newPager instantiates new pager for the given file. If the file exists, 87 | func openPager(path string, pageSize uint16) (*pager, error) { 88 | file, err := openFile(path, os.O_RDWR|os.O_CREATE, 0600) 89 | if err != nil { 90 | return nil, fmt.Errorf("failed to open %s: %w", path, err) 91 | } 92 | 93 | pager, err := newPager(file, pageSize) 94 | if err != nil { 95 | file.Close() 96 | 97 | return nil, fmt.Errorf("failed to instantiate the pager: %w", err) 98 | } 99 | 100 | return pager, nil 101 | } 102 | 103 | // newPager instantiates new pager for the given file. If the file exists, 104 | // it opens the file and reads its metadata and checks invariants, otherwise 105 | // it creates a new file and populates it with the metadata. 106 | func newPager(file randomAccessFile, pageSize uint16) (*pager, error) { 107 | if pageSize < minPageSize { 108 | return nil, fmt.Errorf("page size must be greater than or equal to %d", minPageSize) 109 | } 110 | 111 | info, err := file.Stat() 112 | if err != nil { 113 | return nil, fmt.Errorf("failed to stat the file: %w", err) 114 | } 115 | 116 | size := info.Size() 117 | if size == 0 { 118 | // initialize free pages block and metadata block 119 | p := &pager{file, pageSize, make(map[uint32]*freePage), nil, 0, make(map[uint32]*freePage), make(map[uint32]uint32), &metadata{pageSize, nil}} 120 | if err := writeMetadata(p.file, p.metadata); err != nil { 121 | return nil, fmt.Errorf("failed to initialize metadata: %w", err) 122 | } 123 | 124 | if err := initializeFreePages(p); err != nil { 125 | return nil, fmt.Errorf("failed to initialize free pages: %w", err) 126 | } 127 | 128 | if err := p.flush(); err != nil { 129 | return nil, fmt.Errorf("failed to flush initialization changes: %w", err) 130 | } 131 | 132 | return p, nil 133 | } 134 | 135 | metadata, err := readMetadata(file) 136 | if err != nil { 137 | return nil, fmt.Errorf("failed to read metadata: %w", err) 138 | } 139 | 140 | if metadata.pageSize != pageSize { 141 | return nil, fmt.Errorf("the file was created with page size %d, but given page size is %d", metadata.pageSize, pageSize) 142 | } 143 | 144 | isFreePage, lastFreePage, freePages, prevPageIds, err := readFreePages(file, pageSize) 145 | if err != nil { 146 | return nil, fmt.Errorf("failed to read free pages: %w", err) 147 | } 148 | 149 | used := (size - metadataSize) 150 | lastPageId := uint32(0) 151 | if used > 0 { 152 | lastPageId = uint32(used / int64(pageSize)) 153 | } 154 | 155 | return &pager{file, pageSize, isFreePage, lastFreePage, lastPageId, freePages, prevPageIds, metadata}, nil 156 | } 157 | 158 | func writeMetadata(w io.WriterAt, metadata *metadata) error { 159 | data := encodeMetadata(metadata) 160 | if n, err := w.WriteAt(data, 0); err != nil { 161 | return fmt.Errorf("failed to write the metadata to the file: %w", err) 162 | } else if n < len(data) { 163 | return fmt.Errorf("failed to write all the data to the file, wrote %d bytes: %w", n, err) 164 | } 165 | 166 | return nil 167 | } 168 | 169 | func initializeFreePages(p *pager) error { 170 | pageId, err := p.new() 171 | if err != nil { 172 | return fmt.Errorf("failed to instantiate new page: %w", err) 173 | } 174 | 175 | if pageId != firstFreePageId { 176 | return fmt.Errorf("expected new page id to be %d for the new file, but got %d", firstFreePageId, pageId) 177 | } 178 | 179 | ids := make(map[uint32]struct{}) 180 | freePage := &freePage{pageId, ids, 0} 181 | p.lastFreePage = freePage 182 | p.freePages[pageId] = freePage 183 | 184 | return nil 185 | } 186 | 187 | // readFreePages reads and initializes the list of free pages. 188 | func readFreePages(r io.ReaderAt, pageSize uint16) (map[uint32]*freePage, *freePage, map[uint32]*freePage, map[uint32]uint32, error) { 189 | isFreePage := make(map[uint32]*freePage) 190 | freePages := make(map[uint32]*freePage) 191 | prevPageIds := make(map[uint32]uint32) 192 | 193 | var prevPageId uint32 194 | freePageId := firstFreePageId 195 | var lastFreePage *freePage 196 | for freePageId != 0 { 197 | freePage, err := readFreePage(r, freePageId, pageSize) 198 | if err != nil { 199 | return nil, nil, nil, nil, fmt.Errorf("failed to read free page: %w", err) 200 | } 201 | 202 | for id := range freePage.ids { 203 | isFreePage[id] = freePage 204 | } 205 | freePages[freePageId] = freePage 206 | 207 | if prevPageId != 0 { 208 | prevPageIds[freePageId] = prevPageId 209 | } 210 | prevPageId = freePageId 211 | 212 | lastFreePage = freePage 213 | freePageId = freePage.nextPageId 214 | } 215 | 216 | return isFreePage, lastFreePage, freePages, prevPageIds, nil 217 | } 218 | 219 | func readFreePage(r io.ReaderAt, pageId uint32, pageSize uint16) (*freePage, error) { 220 | data, err := readPage(r, pageId, pageSize) 221 | if err != nil { 222 | return nil, fmt.Errorf("failed to read page %d: %w", pageId, err) 223 | } 224 | 225 | freePage, err := decodeFreePage(pageId, data) 226 | if err != nil { 227 | return nil, fmt.Errorf("failed to decode free page: %w", err) 228 | } 229 | 230 | return freePage, nil 231 | } 232 | 233 | func decodeFreePage(pageId uint32, data []byte) (*freePage, error) { 234 | pageIdNum := (len(data) - pageIdSize) / pageIdSize 235 | freePages := make(map[uint32]struct{}) 236 | for i := 0; i < pageIdNum; i++ { 237 | from, to := i*pageIdSize, i*pageIdSize+pageIdSize 238 | pageId := decodeUint32(data[from:to]) 239 | if pageId == 0 { 240 | break 241 | } 242 | 243 | freePages[pageId] = struct{}{} 244 | } 245 | 246 | nextPageId := decodeUint32(data[len(data)-pageIdSize:]) 247 | 248 | return &freePage{pageId, freePages, nextPageId}, nil 249 | } 250 | 251 | // reads and decodes metadata from the specified file. 252 | func readMetadata(r io.ReaderAt) (*metadata, error) { 253 | data := make([]byte, metadataSize) 254 | if read, err := r.ReadAt(data[:], 0); err != nil { 255 | return nil, fmt.Errorf("failed to read metadata from the file: %w", err) 256 | } else if read != metadataSize { 257 | return nil, fmt.Errorf("failed to read metadata from the file: read %d bytes, but must %d", read, metadataSize) 258 | } 259 | 260 | m, err := decodeMetadata(data) 261 | if err != nil { 262 | return nil, fmt.Errorf("failed to decode metadata: %w", err) 263 | } 264 | 265 | return m, nil 266 | } 267 | 268 | func encodeMetadata(m *metadata) []byte { 269 | data := make([]byte, metadataSize) 270 | 271 | d := encodeUint16(m.pageSize) 272 | copy(data[0:len(d)], d) 273 | 274 | if len(m.custom) != 0 { 275 | s := encodeUint16(uint16(len(m.custom))) 276 | copy(data[customMetadataPosition:customMetadataPosition+len(s)], s) 277 | copy(data[customMetadataPosition+len(s):], m.custom) 278 | } 279 | 280 | return data 281 | } 282 | 283 | // decodes and returns metadata from the given byte slice. 284 | func decodeMetadata(data []byte) (*metadata, error) { 285 | // the first block is the page size, encoded as uint16 286 | pageSize := decodeUint16(data[0:2]) 287 | 288 | customMetadataSize := decodeUint16(data[customMetadataPosition : customMetadataPosition+2]) 289 | var customMetadata []byte = nil 290 | if customMetadataSize != 0 { 291 | customMetadata = data[customMetadataPosition+2 : customMetadataPosition+2+customMetadataSize] 292 | } 293 | 294 | return &metadata{pageSize: pageSize, custom: customMetadata}, nil 295 | } 296 | 297 | // newPage returns an identifier of the page that is free 298 | // and can be used for write. 299 | func (p *pager) new() (uint32, error) { 300 | if len(p.isFreePage) > 0 { 301 | for freePageId := range p.isFreePage { 302 | freePage := p.isFreePage[freePageId] 303 | delete(freePage.ids, freePageId) 304 | 305 | data := encodeFreePage(freePage, p.pageSize) 306 | if err := writePage(p.file, freePage.pageId, data, p.pageSize); err != nil { 307 | freePage.ids[freePageId] = struct{}{} 308 | return 0, fmt.Errorf("failed to update the free page: %w", err) 309 | } 310 | 311 | delete(p.isFreePage, freePageId) 312 | 313 | return freePageId, nil 314 | } 315 | } 316 | 317 | offset := int64((p.lastPageId)*uint32(p.pageSize)) + metadataSize 318 | data := make([]byte, p.pageSize) 319 | if n, err := p.file.WriteAt(data, offset); err != nil { 320 | return 0, fmt.Errorf("failed to write empty block: %w", err) 321 | } else if n < int(p.pageSize) { 322 | return 0, fmt.Errorf("failed to write all bytes of the empty block, wrote only %d bytes", n) 323 | } 324 | 325 | p.lastPageId++ 326 | 327 | return p.lastPageId, nil 328 | } 329 | 330 | // writeCustomMetadata writes custom metadata into the metadata section of the file. 331 | func (p *pager) writeCustomMetadata(data []byte) error { 332 | maxCustomMetadataLen := (metadataSize - customMetadataPosition) 333 | if len(data) > maxCustomMetadataLen { 334 | return fmt.Errorf("custom metadata must be less than %d bytes", maxCustomMetadataLen) 335 | } 336 | 337 | p.metadata.custom = data 338 | 339 | err := writeMetadata(p.file, p.metadata) 340 | if err != nil { 341 | return fmt.Errorf("failed to write metadata: %w", err) 342 | } 343 | 344 | return nil 345 | } 346 | 347 | // writeMetadata reads custom metadata from the metadata section of the file. 348 | func (p *pager) readCustomMetadata() ([]byte, error) { 349 | metadata, err := readMetadata(p.file) 350 | if err != nil { 351 | return nil, fmt.Errorf("failed to read metadata: %w", err) 352 | } 353 | 354 | return metadata.custom, nil 355 | } 356 | 357 | func (p *pager) isFree(pageId uint32) bool { 358 | _, isFreePage := p.isFreePage[pageId] 359 | 360 | return isFreePage 361 | } 362 | 363 | // free marks the page as free and the page can be reused. 364 | func (p *pager) free(pageId uint32) error { 365 | if p.isFree(pageId) { 366 | return fmt.Errorf("the page is already free") 367 | } 368 | 369 | if (len(p.lastFreePage.ids)*pageIdSize + pageIdSize) < int(p.pageSize) { 370 | // update the page that contains the free pages 371 | p.lastFreePage.ids[pageId] = struct{}{} 372 | data := encodeFreePage(p.lastFreePage, p.pageSize) 373 | if err := writePage(p.file, p.lastFreePage.pageId, data, p.pageSize); err != nil { 374 | // revert the changes 375 | delete(p.lastFreePage.ids, pageId) 376 | 377 | return fmt.Errorf("failed to update the last free page: %w", err) 378 | } 379 | 380 | p.isFreePage[pageId] = p.lastFreePage 381 | } else { 382 | // if there is not enough space for the free page list 383 | newPageId, err := p.new() 384 | if err != nil { 385 | return fmt.Errorf("failed to instantiate new page: %w", err) 386 | } 387 | 388 | newIds := make(map[uint32]struct{}) 389 | newIds[pageId] = struct{}{} 390 | newFreePage := &freePage{newPageId, newIds, 0} 391 | 392 | data := encodeFreePage(newFreePage, p.pageSize) 393 | if err := writePage(p.file, newPageId, data, p.pageSize); err != nil { 394 | return fmt.Errorf("failed to write the new free page: %w", err) 395 | } 396 | 397 | p.lastFreePage.nextPageId = newPageId 398 | data = encodeFreePage(p.lastFreePage, p.pageSize) 399 | if err := writePage(p.file, p.lastFreePage.pageId, data, p.pageSize); err != nil { 400 | // revert the changes 401 | p.lastFreePage.nextPageId = 0 402 | 403 | return fmt.Errorf("failed to update the last free page: %w", err) 404 | } 405 | 406 | p.prevPageIds[newPageId] = p.lastFreePage.pageId 407 | p.lastFreePage = newFreePage 408 | p.isFreePage[pageId] = newFreePage 409 | p.freePages[newPageId] = newFreePage 410 | } 411 | 412 | return nil 413 | } 414 | 415 | // encodeFreePage encodes free page identifiers into the chunks of byte slices. 416 | func encodeFreePage(page *freePage, pageSize uint16) []byte { 417 | data := make([]byte, pageSize) 418 | copy(data[len(data)-pageIdSize:], encodeUint32(page.nextPageId)) 419 | 420 | i := 0 421 | for freePageId := range page.ids { 422 | copy(data[i:], encodeUint32(freePageId)) 423 | i += pageIdSize 424 | } 425 | 426 | return data 427 | } 428 | 429 | // read reads the page contents by the page identifier and returns 430 | // its contents. 431 | func (p *pager) read(pageId uint32) ([]byte, error) { 432 | if p.isFree(pageId) { 433 | return nil, fmt.Errorf("page %d does not exist or free", pageId) 434 | } 435 | 436 | return readPage(p.file, pageId, p.pageSize) 437 | } 438 | 439 | func writePage(w io.WriterAt, pageId uint32, data []byte, pageSize uint16) error { 440 | offset := int64(metadataSize + (pageId-1)*uint32(pageSize)) 441 | 442 | if n, err := w.WriteAt(data, offset); err != nil { 443 | return fmt.Errorf("failed to write the page: %w", err) 444 | } else if n != len(data) { 445 | return fmt.Errorf("failed to write %d bytes, wrote %d", len(data), n) 446 | } 447 | 448 | return nil 449 | } 450 | 451 | func readPage(r io.ReaderAt, pageId uint32, pageSize uint16) ([]byte, error) { 452 | offset := int64(metadataSize + (pageId-1)*uint32(pageSize)) 453 | data := make([]byte, pageSize) 454 | if n, err := r.ReadAt(data, offset); err != nil { 455 | return nil, fmt.Errorf("failed to read the page data: %w", err) 456 | } else if n != int(pageSize) { 457 | return nil, fmt.Errorf("failed to read %d bytes, read %d", pageSize, n) 458 | } 459 | 460 | return data, nil 461 | } 462 | 463 | // write writes the page content. 464 | func (p *pager) write(pageId uint32, data []byte) error { 465 | if p.isFree(pageId) { 466 | return fmt.Errorf("page %d does not exist or free", pageId) 467 | } 468 | 469 | if len(data) != int(p.pageSize) { 470 | return fmt.Errorf("data length %d is greater than the page size %d", len(data), p.pageSize) 471 | } 472 | 473 | return writePage(p.file, pageId, data, p.pageSize) 474 | } 475 | 476 | // compact removes the free pages that are placed at the end of file and 477 | // if the free page lists does not contains any free page, it frees the free page list. 478 | func (p *pager) compact() error { 479 | newLastPageId := p.lastPageId 480 | removeFreePageIds := make([]uint32, 0) 481 | removeFreePages := make(map[uint32]*freePage) 482 | // the copy of free pages to be updated 483 | updateFreePages := make(map[uint32]*freePage) 484 | for pageId := p.lastPageId; pageId > firstFreePageId; pageId-- { 485 | if p.isFree(pageId) { 486 | removeFreePageIds = append(removeFreePageIds, pageId) 487 | 488 | freePage := p.isFreePage[pageId] 489 | updatePage, ok := updateFreePages[freePage.pageId] 490 | if !ok { 491 | updatePage = freePage.copy() 492 | updateFreePages[updatePage.pageId] = updatePage 493 | } 494 | delete(updatePage.ids, pageId) 495 | 496 | newLastPageId = pageId - 1 497 | } else if p.canDeleteFreePage(pageId) { 498 | freePage := p.freePages[pageId] 499 | removeFreePages[pageId] = freePage 500 | 501 | if prevPageId, ok := p.prevPageIds[pageId]; ok { 502 | prevPage := p.freePages[prevPageId] 503 | updatePage, ok := updateFreePages[prevPageId] 504 | if !ok { 505 | updatePage = prevPage.copy() 506 | updateFreePages[prevPageId] = updatePage 507 | } 508 | updatePage.nextPageId = freePage.nextPageId 509 | } 510 | 511 | newLastPageId = pageId - 1 512 | } else { 513 | break 514 | } 515 | } 516 | 517 | // update free pages and last free page id 518 | freeBytes := int64(len(removeFreePages)+len(removeFreePageIds)) * int64(p.pageSize) 519 | if freeBytes == 0 { 520 | return nil 521 | } 522 | 523 | stat, err := p.file.Stat() 524 | if err != nil { 525 | return fmt.Errorf("failed to get the file size: %w", err) 526 | } 527 | 528 | newSize := stat.Size() - freeBytes 529 | err = p.file.Truncate(newSize) 530 | if err != nil { 531 | return fmt.Errorf("failed to truncate the file: %w", err) 532 | } 533 | 534 | for pageId := range removeFreePages { 535 | delete(updateFreePages, pageId) 536 | } 537 | for pageId, updatePage := range updateFreePages { 538 | data := encodeFreePage(updatePage, p.pageSize) 539 | if err := writePage(p.file, pageId, data, p.pageSize); err != nil { 540 | return fmt.Errorf("failed to update the free page: %w", err) 541 | } 542 | } 543 | 544 | for pageId, updateFreePage := range updateFreePages { 545 | freePage := p.freePages[pageId] 546 | freePage.pageId = updateFreePage.pageId 547 | freePage.ids = updateFreePage.ids 548 | freePage.nextPageId = updateFreePage.nextPageId 549 | } 550 | for _, removeId := range removeFreePageIds { 551 | delete(p.isFreePage, removeId) 552 | } 553 | for pageId, removePage := range removeFreePages { 554 | if p.lastFreePage == removePage { 555 | p.lastFreePage = p.freePages[p.prevPageIds[removePage.pageId]] 556 | } 557 | 558 | delete(p.prevPageIds, pageId) 559 | delete(p.freePages, pageId) 560 | } 561 | 562 | p.lastPageId = newLastPageId 563 | 564 | return nil 565 | } 566 | 567 | // canDeleteFreePage checks if the page is a free page list container 568 | // and if all the pages in the container are free. 569 | func (p *pager) canDeleteFreePage(pageId uint32) bool { 570 | freePage, isFreePage := p.freePages[pageId] 571 | if !isFreePage { 572 | return false 573 | } 574 | 575 | for id := range freePage.ids { 576 | if _, isFree := p.isFreePage[id]; !isFree { 577 | return false 578 | } 579 | } 580 | 581 | return true 582 | } 583 | 584 | // flush flushes all the changes of the file to the persistent disk. 585 | func (p *pager) flush() error { 586 | if err := p.file.Sync(); err != nil { 587 | return fmt.Errorf("failed to sync file: %w", err) 588 | } 589 | 590 | return nil 591 | } 592 | 593 | // close flushes the changes and closes all underlying resources. 594 | func (p *pager) close() error { 595 | if err := p.file.Sync(); err != nil { 596 | return fmt.Errorf("failed to sync file: %w", err) 597 | } 598 | 599 | if err := p.file.Close(); err != nil { 600 | return fmt.Errorf("failed to close the file: %w", err) 601 | } 602 | 603 | return nil 604 | } 605 | -------------------------------------------------------------------------------- /pager_test.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path" 9 | "testing" 10 | ) 11 | 12 | func TestNewPagerInitializesProperly(t *testing.T) { 13 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 14 | defer func() { 15 | if err := os.RemoveAll(dbDir); err != nil { 16 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 17 | } 18 | }() 19 | 20 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 21 | if err != nil { 22 | t.Fatalf("failed to initialize the pager: %s", err) 23 | } 24 | defer p.close() 25 | 26 | if len(p.isFreePage) != 0 { 27 | t.Fatalf("expected free pages size is 0, but got %d", len(p.isFreePage)) 28 | } 29 | 30 | if p.lastPageId != firstFreePageId { 31 | t.Fatalf("expected last page id == 1, but got %d", p.lastPageId) 32 | } 33 | 34 | if p.pageSize != 4096 { 35 | t.Fatalf("expected page size to be %d, but got %d", 4006, p.pageSize) 36 | } 37 | } 38 | 39 | func TestNewPage(t *testing.T) { 40 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 41 | defer func() { 42 | if err := os.RemoveAll(dbDir); err != nil { 43 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 44 | } 45 | }() 46 | 47 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 48 | if err != nil { 49 | t.Fatalf("failed to initialize the pager: %s", err) 50 | } 51 | defer p.close() 52 | 53 | newPageId, err := p.new() 54 | if err != nil { 55 | t.Fatalf("failed to new page: %s", err) 56 | } 57 | 58 | if newPageId <= firstFreePageId { 59 | t.Fatalf("new page id must be >= %d:", firstFreePageId) 60 | } 61 | 62 | _, exists := p.isFreePage[newPageId] 63 | if exists { 64 | t.Fatalf("new page id must not be in the free page list") 65 | } 66 | 67 | stat, err := p.file.Stat() 68 | if err != nil { 69 | t.Fatalf("failed to stat file: %s", err) 70 | } 71 | 72 | // metadata + free page + new page 73 | expectedSize := metadataSize + 4096*2 74 | if stat.Size() != int64(expectedSize) { 75 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 76 | } 77 | } 78 | 79 | func TestDeleteFreeSparseFile(t *testing.T) { 80 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 81 | defer func() { 82 | if err := os.RemoveAll(dbDir); err != nil { 83 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 84 | } 85 | }() 86 | 87 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 88 | if err != nil { 89 | t.Fatalf("failed to initialize the pager: %s", err) 90 | } 91 | defer p.close() 92 | 93 | freePageId, err := p.new() 94 | if err != nil { 95 | t.Fatalf("failed to new page: %s", err) 96 | } 97 | 98 | _, err = p.new() 99 | if err != nil { 100 | t.Fatalf("failed to new page: %s", err) 101 | } 102 | 103 | err = p.free(freePageId) 104 | if err != nil { 105 | t.Fatalf("failed to free page: %s", err) 106 | } 107 | 108 | _, exists := p.isFreePage[freePageId] 109 | if !exists { 110 | t.Fatalf("new page id must be in the free page list") 111 | } 112 | 113 | stat, err := p.file.Stat() 114 | if err != nil { 115 | t.Fatalf("failed to stat file: %s", err) 116 | } 117 | 118 | // metadata + free page + 2 new pages, but the file is sparse now 119 | expectedSize := metadataSize + 4096*3 120 | if stat.Size() != int64(expectedSize) { 121 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 122 | } 123 | } 124 | 125 | func TestDeleteFree(t *testing.T) { 126 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 127 | defer func() { 128 | if err := os.RemoveAll(dbDir); err != nil { 129 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 130 | } 131 | }() 132 | 133 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 134 | if err != nil { 135 | t.Fatalf("failed to initialize the pager: %s", err) 136 | } 137 | 138 | _, err = p.new() 139 | if err != nil { 140 | t.Fatalf("failed to new page: %s", err) 141 | } 142 | 143 | freePageId, err := p.new() 144 | if err != nil { 145 | t.Fatalf("failed to new page: %s", err) 146 | } 147 | 148 | _, err = p.new() 149 | if err != nil { 150 | t.Fatalf("failed to new page: %s", err) 151 | } 152 | 153 | err = p.free(freePageId) 154 | if err != nil { 155 | t.Fatalf("failed to free page: %s", err) 156 | } 157 | 158 | if !p.isFree(freePageId) { 159 | t.Fatalf("new page id must be in the free page list") 160 | } 161 | 162 | stat, err := p.file.Stat() 163 | if err != nil { 164 | t.Fatalf("failed to stat file: %s", err) 165 | } 166 | 167 | // metadata + free page + 3 new pages, but the file is sparse now 168 | expectedSize := metadataSize + 4096*4 169 | if stat.Size() != int64(expectedSize) { 170 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 171 | } 172 | 173 | p.close() 174 | 175 | p, err = openPager(path.Join(dbDir, "test.db"), 4096) 176 | if err != nil { 177 | t.Fatalf("failed to initialize the pager: %s", err) 178 | } 179 | 180 | if !p.isFree(freePageId) { 181 | t.Fatalf("new page id must be in the free page list") 182 | } 183 | } 184 | 185 | func TestNewAfterFreeUsesFreePage(t *testing.T) { 186 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 187 | defer func() { 188 | if err := os.RemoveAll(dbDir); err != nil { 189 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 190 | } 191 | }() 192 | 193 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 194 | if err != nil { 195 | t.Fatalf("failed to initialize the pager: %s", err) 196 | } 197 | defer p.close() 198 | 199 | freePageId, err := p.new() 200 | if err != nil { 201 | t.Fatalf("failed to new page: %s", err) 202 | } 203 | 204 | err = p.free(freePageId) 205 | if err != nil { 206 | t.Fatalf("failed to free page: %s", err) 207 | } 208 | 209 | newPageId, err := p.new() 210 | if err != nil { 211 | t.Fatalf("failed to new page: %s", err) 212 | } 213 | 214 | if newPageId != freePageId { 215 | t.Fatalf("new page id must be equal to free page id %d, but got %d", freePageId, newPageId) 216 | } 217 | 218 | _, exists := p.isFreePage[newPageId] 219 | if exists { 220 | t.Fatalf("new page id must not be in the free page list") 221 | } 222 | 223 | stat, err := p.file.Stat() 224 | if err != nil { 225 | t.Fatalf("failed to stat file: %s", err) 226 | } 227 | 228 | // metadata + free page + 1 new page 229 | expectedSize := metadataSize + 4096*2 230 | if stat.Size() != int64(expectedSize) { 231 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 232 | } 233 | } 234 | 235 | func TestFreePageSplitting(t *testing.T) { 236 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 237 | defer func() { 238 | if err := os.RemoveAll(dbDir); err != nil { 239 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 240 | } 241 | }() 242 | 243 | var pageSize uint16 = 4096 244 | p, err := openPager(path.Join(dbDir, "test.db"), pageSize) 245 | if err != nil { 246 | t.Fatalf("failed to initialize the pager: %s", err) 247 | } 248 | defer p.close() 249 | 250 | iterations := int((pageSize / pageIdSize) + 1) 251 | ids := make([]uint32, 0) 252 | for i := 0; i <= iterations; i++ { 253 | freePageId, err := p.new() 254 | if err != nil { 255 | t.Fatalf("failed to new page: %s", err) 256 | } 257 | 258 | ids = append(ids, freePageId) 259 | } 260 | 261 | var lastFreePageId uint32 262 | for _, freePageId := range ids { 263 | err = p.free(freePageId) 264 | if err != nil { 265 | t.Fatalf("failed to free page: %s", err) 266 | } 267 | 268 | lastFreePageId = freePageId 269 | } 270 | 271 | stat, err := p.file.Stat() 272 | if err != nil { 273 | t.Fatalf("failed to stat file: %s", err) 274 | } 275 | 276 | // metadata + iterations + 2 free pages 277 | expectedSize := metadataSize + 4096*(iterations+2) 278 | if stat.Size() != int64(expectedSize) { 279 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 280 | } 281 | 282 | p.close() 283 | 284 | p, err = openPager(path.Join(dbDir, "test.db"), pageSize) 285 | if err != nil { 286 | t.Fatalf("failed to initialize the pager: %s", err) 287 | } 288 | 289 | if !p.isFree(lastFreePageId) { 290 | t.Fatalf("new page id must be in the free page list") 291 | } 292 | } 293 | 294 | func TestReadAndWrite(t *testing.T) { 295 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 296 | defer func() { 297 | if err := os.RemoveAll(dbDir); err != nil { 298 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 299 | } 300 | }() 301 | 302 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 303 | if err != nil { 304 | t.Fatalf("failed to initialize the pager: %s", err) 305 | } 306 | defer p.close() 307 | 308 | newPageId, err := p.new() 309 | if err != nil { 310 | t.Fatalf("failed to new page: %s", err) 311 | } 312 | 313 | var writtenData [4096]byte 314 | // some random data 315 | writtenData[0] = 1 316 | writtenData[2] = 3 317 | writtenData[1023] = 10 318 | writtenData[2034] = 0xAE 319 | 320 | err = p.write(newPageId, writtenData[:]) 321 | if err != nil { 322 | t.Fatalf("failed to write the page: %s", err) 323 | } 324 | 325 | stat, err := p.file.Stat() 326 | if err != nil { 327 | t.Fatalf("failed to stat file: %s", err) 328 | } 329 | 330 | // metadata + free page + new page 331 | expectedSize := metadataSize + 4096*2 332 | if stat.Size() != int64(expectedSize) { 333 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 334 | } 335 | 336 | err = p.close() 337 | if err != nil { 338 | t.Fatalf("failed to close the pager: %s", err) 339 | } 340 | 341 | p, err = openPager(path.Join(dbDir, "test.db"), 4096) 342 | if err != nil { 343 | t.Fatalf("failed to initialize the pager: %s", err) 344 | } 345 | defer p.close() 346 | 347 | readData, err := p.read(newPageId) 348 | if err != nil { 349 | t.Fatalf("failed to read the data: %s", err) 350 | } 351 | 352 | if !bytes.Equal(writtenData[:], readData) { 353 | t.Fatalf("the written data is not equal to the read data") 354 | } 355 | } 356 | 357 | func TestReadNonExistentPageError(t *testing.T) { 358 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 359 | defer func() { 360 | if err := os.RemoveAll(dbDir); err != nil { 361 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 362 | } 363 | }() 364 | 365 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 366 | if err != nil { 367 | t.Fatalf("failed to initialize the pager: %s", err) 368 | } 369 | defer p.close() 370 | 371 | _, err = p.read(10) 372 | if err == nil { 373 | t.Fatal("must return an error for nonexistent page") 374 | } 375 | } 376 | 377 | func TestReadFreePageError(t *testing.T) { 378 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 379 | defer func() { 380 | if err := os.RemoveAll(dbDir); err != nil { 381 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 382 | } 383 | }() 384 | 385 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 386 | if err != nil { 387 | t.Fatalf("failed to initialize the pager: %s", err) 388 | } 389 | defer p.close() 390 | 391 | newPageId, err := p.new() 392 | if err != nil { 393 | t.Fatalf("failed to instantiate new page: %s", err) 394 | } 395 | err = p.free(newPageId) 396 | if err != nil { 397 | t.Fatalf("failed to free new page: %s", err) 398 | } 399 | 400 | _, err = p.read(newPageId) 401 | if err == nil { 402 | t.Fatal("must return an error for free page") 403 | } 404 | } 405 | 406 | func TestCreatedWithDifferentPageSize(t *testing.T) { 407 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 408 | defer func() { 409 | if err := os.RemoveAll(dbDir); err != nil { 410 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 411 | } 412 | }() 413 | 414 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 415 | if err != nil { 416 | t.Fatalf("failed to initialize the pager: %s", err) 417 | } 418 | defer p.close() 419 | 420 | _, err = openPager(path.Join(dbDir, "test.db"), 2000) 421 | if err == nil { 422 | t.Fatal("must return an error for the different page size") 423 | } 424 | } 425 | 426 | func TestReadPageInTruncatedFileError(t *testing.T) { 427 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 428 | defer func() { 429 | if err := os.RemoveAll(dbDir); err != nil { 430 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 431 | } 432 | }() 433 | 434 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 435 | if err != nil { 436 | t.Fatalf("failed to initialize the pager: %s", err) 437 | } 438 | defer p.close() 439 | 440 | newPageId, err := p.new() 441 | if err != nil { 442 | t.Fatalf("failed to instantiate new page: %s", err) 443 | } 444 | 445 | var data [4096]byte 446 | // some random data 447 | data[0] = 10 448 | data[2] = 30 449 | data[3017] = 25 450 | 451 | err = p.write(newPageId, data[:]) 452 | if err != nil { 453 | t.Fatalf("failed to write the page: %s", err) 454 | } 455 | 456 | // truncate file 457 | f, err := os.OpenFile(path.Join(dbDir, "test.db"), os.O_RDWR|os.O_CREATE, 0600) 458 | if err != nil { 459 | t.Fatalf("failed to open the file: %s", err) 460 | } 461 | 462 | info, err := f.Stat() 463 | if err != nil { 464 | t.Fatalf("failed to stat the file: %s", err) 465 | } 466 | 467 | err = f.Truncate(info.Size() - 1) 468 | if err != nil { 469 | t.Fatalf("failed to truncate the file: %s", err) 470 | } 471 | 472 | f.Close() 473 | 474 | _, err = p.read(newPageId) 475 | if err == nil { 476 | t.Fatal("must return an error for reading page in the truncated file") 477 | } 478 | } 479 | 480 | func TestFreeAlreadyFreePageError(t *testing.T) { 481 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 482 | defer func() { 483 | if err := os.RemoveAll(dbDir); err != nil { 484 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 485 | } 486 | }() 487 | 488 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 489 | if err != nil { 490 | t.Fatalf("failed to initialize the pager: %s", err) 491 | } 492 | defer p.close() 493 | 494 | freePageId, err := p.new() 495 | if err != nil { 496 | t.Fatalf("failed to new page: %s", err) 497 | } 498 | 499 | err = p.free(freePageId) 500 | if err != nil { 501 | t.Fatalf("failed to free page: %s", err) 502 | } 503 | 504 | err = p.free(freePageId) 505 | if err == nil { 506 | t.Fatal("must return an error for freeing the same page twice") 507 | } 508 | } 509 | 510 | func TestWriteToFreePageError(t *testing.T) { 511 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 512 | defer func() { 513 | if err := os.RemoveAll(dbDir); err != nil { 514 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 515 | } 516 | }() 517 | 518 | p, err := openPager(path.Join(dbDir, "test.db"), 4096) 519 | if err != nil { 520 | t.Fatalf("failed to initialize the pager: %s", err) 521 | } 522 | defer p.close() 523 | 524 | newPageId, err := p.new() 525 | if err != nil { 526 | t.Fatalf("failed to instantiate new page: %s", err) 527 | } 528 | 529 | err = p.free(newPageId) 530 | if err != nil { 531 | t.Fatalf("failed to free page: %s", err) 532 | } 533 | 534 | var data [4096]byte 535 | // some random data 536 | data[0] = 10 537 | data[2] = 30 538 | data[3017] = 25 539 | 540 | err = p.write(newPageId, data[:]) 541 | if err == nil { 542 | t.Fatal("must return an error for writing into the free page") 543 | } 544 | } 545 | 546 | func TestOpenPagerReturnsAnError(t *testing.T) { 547 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 548 | defer func() { 549 | if err := os.RemoveAll(dbDir); err != nil { 550 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 551 | } 552 | }() 553 | 554 | prevOpenFileFunc := openFile 555 | defer func() { 556 | if r := recover(); r == nil { 557 | openFile = prevOpenFileFunc 558 | } 559 | }() 560 | 561 | openFile = func(name string, flag int, perm os.FileMode) (*os.File, error) { 562 | return nil, fmt.Errorf("some error") 563 | } 564 | 565 | _, err := openPager(path.Join(dbDir, "test.db"), 4096) 566 | openFile = prevOpenFileFunc 567 | 568 | if err == nil { 569 | t.Fatal("must return the error for opening file with error") 570 | } 571 | } 572 | 573 | func TestErrorOnStat(t *testing.T) { 574 | mockedFile := newMockedFile() 575 | mockedFile.setErrorOnStat(fmt.Errorf("some error")) 576 | 577 | _, err := newPager(mockedFile, 4096) 578 | if err == nil { 579 | t.Fatal("must return the error for stat") 580 | } 581 | } 582 | 583 | func TestCompactFreesAllPagesAndFreePageListItself(t *testing.T) { 584 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 585 | defer func() { 586 | if err := os.RemoveAll(dbDir); err != nil { 587 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 588 | } 589 | }() 590 | 591 | var pageSize uint16 = 4096 592 | p, err := openPager(path.Join(dbDir, "test.db"), pageSize) 593 | if err != nil { 594 | t.Fatalf("failed to initialize the pager: %s", err) 595 | } 596 | defer p.close() 597 | 598 | iterations := int((pageSize / pageIdSize) + 1) 599 | ids := make([]uint32, 0) 600 | for i := 0; i <= iterations; i++ { 601 | freePageId, err := p.new() 602 | if err != nil { 603 | t.Fatalf("failed to new page: %s", err) 604 | } 605 | 606 | ids = append(ids, freePageId) 607 | } 608 | 609 | for _, freePageId := range ids { 610 | err = p.free(freePageId) 611 | if err != nil { 612 | t.Fatalf("failed to free page: %s", err) 613 | } 614 | } 615 | 616 | stat, err := p.file.Stat() 617 | if err != nil { 618 | t.Fatalf("failed to stat file: %s", err) 619 | } 620 | 621 | // metadata + iterations + 2 free pages 622 | expectedSize := metadataSize + 4096*(iterations+2) 623 | if stat.Size() != int64(expectedSize) { 624 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 625 | } 626 | 627 | p.close() 628 | 629 | p, err = openPager(path.Join(dbDir, "test.db"), pageSize) 630 | if err != nil { 631 | t.Fatalf("failed to initialize the pager: %s", err) 632 | } 633 | 634 | err = p.compact() 635 | if err != nil { 636 | t.Fatalf("failed to compact: %s", err) 637 | } 638 | 639 | err = p.flush() 640 | if err != nil { 641 | t.Fatalf("failed to flush: %s", err) 642 | } 643 | 644 | stat, err = p.file.Stat() 645 | if err != nil { 646 | t.Fatalf("failed to stat file: %s", err) 647 | } 648 | 649 | // metadata + 1 free page container 650 | expectedSize = metadataSize + int(pageSize) 651 | if stat.Size() != int64(expectedSize) { 652 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 653 | } 654 | } 655 | 656 | func TestCompactReadWriteAfterCompact(t *testing.T) { 657 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 658 | defer func() { 659 | if err := os.RemoveAll(dbDir); err != nil { 660 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 661 | } 662 | }() 663 | 664 | var pageSize uint16 = 4096 665 | p, err := openPager(path.Join(dbDir, "test.db"), pageSize) 666 | if err != nil { 667 | t.Fatalf("failed to initialize the pager: %s", err) 668 | } 669 | defer p.close() 670 | 671 | iterations := int((pageSize / pageIdSize) + 1) 672 | ids := make([]uint32, 0) 673 | for i := 0; i <= iterations; i++ { 674 | freePageId, err := p.new() 675 | if err != nil { 676 | t.Fatalf("failed to new page: %s", err) 677 | } 678 | 679 | ids = append(ids, freePageId) 680 | } 681 | 682 | for _, freePageId := range ids { 683 | err = p.free(freePageId) 684 | if err != nil { 685 | t.Fatalf("failed to free page: %s", err) 686 | } 687 | } 688 | 689 | stat, err := p.file.Stat() 690 | if err != nil { 691 | t.Fatalf("failed to stat file: %s", err) 692 | } 693 | 694 | // metadata + iterations + 2 free pages 695 | expectedSize := metadataSize + int(pageSize)*(iterations+2) 696 | if stat.Size() != int64(expectedSize) { 697 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 698 | } 699 | 700 | err = p.close() 701 | if err != nil { 702 | t.Fatalf("failed to close: %s", err) 703 | } 704 | 705 | p, err = openPager(path.Join(dbDir, "test.db"), pageSize) 706 | if err != nil { 707 | t.Fatalf("failed to initialize the pager: %s", err) 708 | } 709 | 710 | err = p.compact() 711 | if err != nil { 712 | t.Fatalf("failed to compact: %s", err) 713 | } 714 | 715 | err = p.flush() 716 | if err != nil { 717 | t.Fatalf("failed to flush: %s", err) 718 | } 719 | 720 | err = p.close() 721 | if err != nil { 722 | t.Fatalf("failed to close: %s", err) 723 | } 724 | 725 | p, err = openPager(path.Join(dbDir, "test.db"), pageSize) 726 | if err != nil { 727 | t.Fatalf("failed to initialize the pager: %s", err) 728 | } 729 | 730 | newPageId, err := p.new() 731 | if err != nil { 732 | t.Fatalf("failed to new page: %s", err) 733 | } 734 | 735 | var writtenData [4096]byte 736 | // some random data 737 | writtenData[0] = 1 738 | writtenData[2] = 3 739 | writtenData[1023] = 10 740 | writtenData[2034] = 0xAE 741 | 742 | err = p.write(newPageId, writtenData[:]) 743 | if err != nil { 744 | t.Fatalf("failed to write the page: %s", err) 745 | } 746 | 747 | stat, err = p.file.Stat() 748 | if err != nil { 749 | t.Fatalf("failed to stat file: %s", err) 750 | } 751 | 752 | // metadata + free page + new page 753 | expectedSize = metadataSize + int(pageSize)*2 754 | if stat.Size() != int64(expectedSize) { 755 | t.Fatalf("expected file size %d, but got %d", expectedSize, stat.Size()) 756 | } 757 | 758 | err = p.close() 759 | if err != nil { 760 | t.Fatalf("failed to close the pager: %s", err) 761 | } 762 | 763 | p, err = openPager(path.Join(dbDir, "test.db"), pageSize) 764 | if err != nil { 765 | t.Fatalf("failed to initialize the pager: %s", err) 766 | } 767 | defer p.close() 768 | 769 | readData, err := p.read(newPageId) 770 | if err != nil { 771 | t.Fatalf("failed to read the data: %s", err) 772 | } 773 | 774 | if !bytes.Equal(writtenData[:], readData) { 775 | t.Fatalf("the written data is not equal to the read data") 776 | } 777 | } 778 | 779 | type mockedFile struct { 780 | randomAccessFile 781 | 782 | errorOnStat error 783 | } 784 | 785 | func newMockedFile() *mockedFile { 786 | return new(mockedFile) 787 | } 788 | 789 | func (f *mockedFile) setErrorOnStat(errorOnStat error) { 790 | f.errorOnStat = errorOnStat 791 | } 792 | 793 | func (f *mockedFile) Stat() (os.FileInfo, error) { 794 | return nil, f.errorOnStat 795 | } 796 | -------------------------------------------------------------------------------- /records.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | ) 7 | 8 | const maxRecordSize = math.MaxUint32 9 | 10 | // records is an abstraction over the pages that 11 | // allows to gather pages into the records of the variable size. 12 | type records struct { 13 | pager *pager 14 | } 15 | 16 | // newRecords instantiates new instance of the records. 17 | func newRecords(pager *pager) *records { 18 | return &records{pager} 19 | } 20 | 21 | // new instantiates new record and returns its identifier or error. 22 | func (r *records) new() (uint32, error) { 23 | newPageId, err := r.pager.new() 24 | if err != nil { 25 | return 0, fmt.Errorf("failed to instantiate the first block page: %w", err) 26 | } 27 | 28 | return newPageId, nil 29 | } 30 | 31 | // write writes record and accepts variable data length, in case if data 32 | // length is larger than page size, it will require more pages and update them. 33 | func (r *records) write(recordId uint32, data []byte) error { 34 | recordSize := len(data) 35 | if recordSize >= maxRecordSize { 36 | return fmt.Errorf("the record size must be less than %d", maxRecordSize) 37 | } 38 | 39 | pageData, err := r.pager.read(recordId) 40 | if err != nil { 41 | return fmt.Errorf("failed to read the initial record page %d: %w", recordId, err) 42 | } 43 | nextId := nextRecordId(pageData) 44 | 45 | freeNextPage := true 46 | writeSize := recordSize 47 | if recordSize > (len(pageData) - 16) { 48 | freeNextPage = false 49 | writeSize = len(pageData) - 16 50 | } 51 | written := writeSize 52 | 53 | if freeNextPage { 54 | clearNextRecordId(pageData) 55 | } 56 | 57 | copy(pageData[8:16], encodeUint32(uint32(recordSize))) 58 | copy(pageData[16:], data[0:writeSize]) 59 | 60 | var newPageId uint32 61 | if nextId == 0 && written < recordSize { 62 | newPageId, err = r.pager.new() 63 | if err != nil { 64 | return fmt.Errorf("failed to initialize new page: %w", err) 65 | } 66 | 67 | setNextRecordId(pageData, newPageId) 68 | } 69 | 70 | if err := r.pager.write(recordId, pageData); err != nil { 71 | return fmt.Errorf("failed to write the page data for page %d: %w", recordId, err) 72 | } 73 | 74 | for nextId != 0 { 75 | pageId := nextId 76 | pageData, err := r.pager.read(pageId) 77 | if err != nil { 78 | return fmt.Errorf("failed to read page %d: %w", nextId, err) 79 | } 80 | 81 | nextId = nextRecordId(pageData) 82 | if freeNextPage { 83 | if err := r.pager.free(pageId); err != nil { 84 | return fmt.Errorf("failed to free page %d: %w", pageId, err) 85 | } 86 | 87 | continue 88 | } 89 | 90 | if written < recordSize { 91 | toWrite := recordSize - written 92 | if toWrite > (len(pageData) - 8) { 93 | toWrite = len(pageData) - 8 94 | } 95 | 96 | copy(pageData[8:], data[written:written+toWrite]) 97 | written += toWrite 98 | } 99 | 100 | freeNextPage = written >= recordSize 101 | if freeNextPage { 102 | clearNextRecordId(pageData) 103 | } 104 | 105 | if nextId == 0 && written < recordSize { 106 | newPageId, err = r.pager.new() 107 | if err != nil { 108 | return fmt.Errorf("failed to initialize new page: %w", err) 109 | } 110 | 111 | setNextRecordId(pageData, newPageId) 112 | } 113 | 114 | if err := r.pager.write(pageId, pageData); err != nil { 115 | return fmt.Errorf("failed to write page %d: %w", pageId, err) 116 | } 117 | } 118 | 119 | for written < recordSize { 120 | pageId := newPageId 121 | pageData := make([]byte, r.pager.pageSize) 122 | 123 | toWrite := recordSize - written 124 | if toWrite > (len(pageData) - 8) { 125 | toWrite = len(pageData) - 8 126 | } 127 | 128 | copy(pageData[8:], data[written:written+toWrite]) 129 | written += toWrite 130 | 131 | if written < recordSize { 132 | newPageId, err = r.pager.new() 133 | if err != nil { 134 | return fmt.Errorf("failed to initialize new page: %w", err) 135 | } 136 | 137 | setNextRecordId(pageData, newPageId) 138 | } 139 | 140 | if err := r.pager.write(pageId, pageData); err != nil { 141 | return fmt.Errorf("failed to write page %d: %w", newPageId, err) 142 | } 143 | } 144 | 145 | return nil 146 | } 147 | 148 | func reset(data []byte) { 149 | for i := 0; i < len(data); i++ { 150 | data[i] = 0 151 | } 152 | } 153 | 154 | // Free frees all pages used by the record. 155 | func (r *records) free(recordId uint32) error { 156 | nextId := recordId 157 | for nextId != 0 { 158 | pageId := nextId 159 | data, err := r.pager.read(pageId) 160 | if err != nil { 161 | return fmt.Errorf("failed to read record page %d: %w", pageId, err) 162 | } 163 | nextId = nextRecordId(data) 164 | 165 | err = r.pager.free(pageId) 166 | if err != nil { 167 | return fmt.Errorf("failed to free page %d: %w", pageId, err) 168 | } 169 | } 170 | 171 | return nil 172 | } 173 | 174 | // read reads all the data in the record pages and returns it. It is not aligned 175 | // to the page size. 176 | func (r *records) read(recordId uint32) ([]byte, error) { 177 | data, err := r.pager.read(recordId) 178 | if err != nil { 179 | return nil, fmt.Errorf("failed to read initial record page: %w", err) 180 | } 181 | 182 | recordData := make([]byte, recordSize(data)) 183 | copy(recordData, data[16:]) 184 | for nextId, pageCount := nextRecordId(data), 1; nextId != 0; nextId, pageCount = nextRecordId(data), pageCount+1 { 185 | data, err = r.pager.read(nextId) 186 | if err != nil { 187 | return nil, fmt.Errorf("failed to read page %d: %w", nextId, err) 188 | } 189 | 190 | from := pageCount*(int(r.pager.pageSize)-8) - 8 191 | copy(recordData[from:], data[8:]) 192 | } 193 | 194 | return recordData, nil 195 | } 196 | 197 | func setNextRecordId(pageData []byte, nextId uint32) { 198 | copy(pageData[0:8], encodeUint32(nextId)) 199 | } 200 | 201 | func clearNextRecordId(pageData []byte) { 202 | reset(pageData[0:8]) 203 | } 204 | 205 | func recordSize(pageData []byte) uint32 { 206 | return decodeUint32(pageData[8:16]) 207 | } 208 | 209 | func nextRecordId(pageData []byte) uint32 { 210 | return decodeUint32(pageData[0:8]) 211 | } 212 | -------------------------------------------------------------------------------- /records_test.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path" 9 | "testing" 10 | ) 11 | 12 | func TestWriteLargerThanOnePageWithNewPages(t *testing.T) { 13 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 14 | defer func() { 15 | if err := os.RemoveAll(dbDir); err != nil { 16 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 17 | } 18 | }() 19 | 20 | p, err := openPager(path.Join(dbDir, "test.db"), 32) 21 | if err != nil { 22 | t.Fatalf("failed to initialize the pager: %s", err) 23 | } 24 | defer p.close() 25 | 26 | r := newRecords(p) 27 | newRecordId, err := r.new() 28 | if err != nil { 29 | t.Fatalf("failed to new record: %s", err) 30 | } 31 | 32 | writeData := make([]byte, 100) 33 | for i := 0; i < len(writeData); i++ { 34 | writeData[i] = byte(i % 256) 35 | } 36 | 37 | err = r.write(newRecordId, writeData) 38 | if err != nil { 39 | t.Fatalf("failed to write the record: %s", err) 40 | } 41 | 42 | err = p.close() 43 | if err != nil { 44 | t.Fatalf("failed to close the pager: %s", err) 45 | } 46 | 47 | p, err = openPager(path.Join(dbDir, "test.db"), 32) 48 | if err != nil { 49 | t.Fatalf("failed to initialize the pager: %s", err) 50 | } 51 | 52 | r = newRecords(p) 53 | readData, err := r.read(newRecordId) 54 | if err != nil { 55 | t.Fatalf("failed to read the data: %s", err) 56 | } 57 | 58 | if !bytes.Equal(writeData, readData) { 59 | t.Fatalf("the written data is not equal to the read data") 60 | } 61 | } 62 | 63 | func TestFreeLargerThanOnePage(t *testing.T) { 64 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 65 | defer func() { 66 | if err := os.RemoveAll(dbDir); err != nil { 67 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 68 | } 69 | }() 70 | 71 | p, err := openPager(path.Join(dbDir, "test.db"), 32) 72 | if err != nil { 73 | t.Fatalf("failed to initialize the pager: %s", err) 74 | } 75 | defer p.close() 76 | 77 | r := newRecords(p) 78 | newRecordId, err := r.new() 79 | if err != nil { 80 | t.Fatalf("failed to new record: %s", err) 81 | } 82 | 83 | writeData := make([]byte, 100) 84 | for i := 0; i < len(writeData); i++ { 85 | writeData[i] = byte(i % 256) 86 | } 87 | 88 | err = r.write(newRecordId, writeData) 89 | if err != nil { 90 | t.Fatalf("failed to write the record: %s", err) 91 | } 92 | 93 | err = p.close() 94 | if err != nil { 95 | t.Fatalf("failed to close the pager: %s", err) 96 | } 97 | 98 | p, err = openPager(path.Join(dbDir, "test.db"), 32) 99 | if err != nil { 100 | t.Fatalf("failed to initialize the pager: %s", err) 101 | } 102 | 103 | r = newRecords(p) 104 | 105 | err = r.free(newRecordId) 106 | if err != nil { 107 | t.Fatalf("failed to free the record: %s", err) 108 | } 109 | 110 | err = p.close() 111 | if err != nil { 112 | t.Fatalf("failed to close the pager: %s", err) 113 | } 114 | 115 | p, err = openPager(path.Join(dbDir, "test.db"), 32) 116 | if err != nil { 117 | t.Fatalf("failed to initialize the pager: %s", err) 118 | } 119 | 120 | if len(p.isFreePage) < 5 { 121 | t.Fatalf("must have at least 3 pages, but has %d", len(p.isFreePage)) 122 | } 123 | 124 | err = p.close() 125 | if err != nil { 126 | t.Fatalf("failed to close the pager: %s", err) 127 | } 128 | } 129 | 130 | func TestWriteLargerThanOnePageRewritesWithLargerData(t *testing.T) { 131 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 132 | defer func() { 133 | if err := os.RemoveAll(dbDir); err != nil { 134 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 135 | } 136 | }() 137 | 138 | p, err := openPager(path.Join(dbDir, "test.db"), 32) 139 | if err != nil { 140 | t.Fatalf("failed to initialize the pager: %s", err) 141 | } 142 | defer p.close() 143 | 144 | r := newRecords(p) 145 | newRecordId, err := r.new() 146 | if err != nil { 147 | t.Fatalf("failed to new record: %s", err) 148 | } 149 | 150 | writeData := make([]byte, 100) 151 | for i := 0; i < len(writeData); i++ { 152 | writeData[i] = byte(i % 200) 153 | } 154 | 155 | err = r.write(newRecordId, writeData) 156 | if err != nil { 157 | t.Fatalf("failed to record the page: %s", err) 158 | } 159 | 160 | writeData = make([]byte, 200) 161 | for i := 0; i < len(writeData); i++ { 162 | writeData[i] = byte((i + 1) % 150) 163 | } 164 | 165 | err = r.write(newRecordId, writeData) 166 | if err != nil { 167 | t.Fatalf("failed to record the page: %s", err) 168 | } 169 | 170 | err = p.close() 171 | if err != nil { 172 | t.Fatalf("failed to close the pager: %s", err) 173 | } 174 | 175 | p, err = openPager(path.Join(dbDir, "test.db"), 32) 176 | if err != nil { 177 | t.Fatalf("failed to initialize the pager: %s", err) 178 | } 179 | 180 | r = newRecords(p) 181 | readData, err := r.read(newRecordId) 182 | if err != nil { 183 | t.Fatalf("failed to read the data: %s", err) 184 | } 185 | 186 | if !bytes.Equal(writeData, readData) { 187 | t.Fatalf("the written data is not equal to the read data") 188 | } 189 | } 190 | 191 | func TestWriteLargerThanOnePageRewritesWithLessData(t *testing.T) { 192 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 193 | defer func() { 194 | if err := os.RemoveAll(dbDir); err != nil { 195 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 196 | } 197 | }() 198 | 199 | p, err := openPager(path.Join(dbDir, "test.db"), 32) 200 | if err != nil { 201 | t.Fatalf("failed to initialize the pager: %s", err) 202 | } 203 | defer p.close() 204 | 205 | r := newRecords(p) 206 | newRecordId, err := r.new() 207 | if err != nil { 208 | t.Fatalf("failed to new record: %s", err) 209 | } 210 | 211 | writeData := make([]byte, 200) 212 | for i := 0; i < len(writeData); i++ { 213 | writeData[i] = byte(i % 200) 214 | } 215 | 216 | err = r.write(newRecordId, writeData) 217 | if err != nil { 218 | t.Fatalf("failed to record the page: %s", err) 219 | } 220 | 221 | writeData = make([]byte, 100) 222 | for i := 0; i < len(writeData); i++ { 223 | writeData[i] = byte((i + 1) % 150) 224 | } 225 | 226 | err = r.write(newRecordId, writeData) 227 | if err != nil { 228 | t.Fatalf("failed to record the page: %s", err) 229 | } 230 | 231 | err = p.close() 232 | if err != nil { 233 | t.Fatalf("failed to close the pager: %s", err) 234 | } 235 | 236 | p, err = openPager(path.Join(dbDir, "test.db"), 32) 237 | if err != nil { 238 | t.Fatalf("failed to initialize the pager: %s", err) 239 | } 240 | 241 | r = newRecords(p) 242 | readData, err := r.read(newRecordId) 243 | if err != nil { 244 | t.Fatalf("failed to read the data: %s", err) 245 | } 246 | 247 | if !bytes.Equal(writeData, readData) { 248 | t.Fatalf("the written data is not equal to the read data") 249 | } 250 | } 251 | 252 | func TestWriteTwoPagesAndRewriteWithOnePage(t *testing.T) { 253 | dbDir, _ := ioutil.TempDir(os.TempDir(), "example") 254 | defer func() { 255 | if err := os.RemoveAll(dbDir); err != nil { 256 | panic(fmt.Errorf("failed to remove %s: %w", dbDir, err)) 257 | } 258 | }() 259 | 260 | p, err := openPager(path.Join(dbDir, "test.db"), 32) 261 | if err != nil { 262 | t.Fatalf("failed to initialize the pager: %s", err) 263 | } 264 | defer p.close() 265 | 266 | r := newRecords(p) 267 | newRecordId, err := r.new() 268 | if err != nil { 269 | t.Fatalf("failed to new record: %s", err) 270 | } 271 | 272 | writeData := make([]byte, 40) 273 | for i := 0; i < len(writeData); i++ { 274 | writeData[i] = byte(i % 200) 275 | } 276 | 277 | err = r.write(newRecordId, writeData) 278 | if err != nil { 279 | t.Fatalf("failed to record the page: %s", err) 280 | } 281 | 282 | writeData = make([]byte, 10) 283 | for i := 0; i < len(writeData); i++ { 284 | writeData[i] = byte((i + 1) % 150) 285 | } 286 | 287 | err = r.write(newRecordId, writeData) 288 | if err != nil { 289 | t.Fatalf("failed to write the record: %s", err) 290 | } 291 | 292 | err = p.close() 293 | if err != nil { 294 | t.Fatalf("failed to close the pager: %s", err) 295 | } 296 | 297 | p, err = openPager(path.Join(dbDir, "test.db"), 32) 298 | if err != nil { 299 | t.Fatalf("failed to initialize the pager: %s", err) 300 | } 301 | 302 | r = newRecords(p) 303 | readData, err := r.read(newRecordId) 304 | if err != nil { 305 | t.Fatalf("failed to read the data: %s", err) 306 | } 307 | 308 | if !bytes.Equal(writeData, readData) { 309 | t.Fatalf("the written data is not equal to the read data") 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /storage.go: -------------------------------------------------------------------------------- 1 | package fbptree 2 | 3 | import "fmt" 4 | 5 | // storage an abstraction over the storing mechanism. 6 | type storage struct { 7 | pager *pager 8 | records *records 9 | } 10 | 11 | func newStorage(path string, pageSize uint16) (*storage, error) { 12 | pager, err := openPager(path, pageSize) 13 | if err != nil { 14 | return nil, fmt.Errorf("failed to instantiate the pager: %w", err) 15 | } 16 | 17 | return &storage{pager: pager, records: newRecords(pager)}, nil 18 | } 19 | 20 | func (s *storage) loadMetadata() (*treeMetadata, error) { 21 | data, err := s.pager.readCustomMetadata() 22 | if err != nil { 23 | return nil, fmt.Errorf("failed to read metadata: %w", err) 24 | } 25 | 26 | if data == nil { 27 | return nil, nil 28 | } 29 | 30 | metadata, err := decodeTreeMetadata(data) 31 | if err != nil { 32 | return nil, fmt.Errorf("failed to decode tree metadata: %w", err) 33 | } 34 | 35 | return metadata, nil 36 | } 37 | 38 | func (s *storage) updateMetadata(metadata *treeMetadata) error { 39 | data := encodeTreeMetadata(metadata) 40 | err := s.pager.writeCustomMetadata(data) 41 | if err != nil { 42 | return fmt.Errorf("failed to write metadata: %w", err) 43 | } 44 | 45 | return nil 46 | } 47 | 48 | func (s *storage) deleteMetadata() error { 49 | var empty [0]byte 50 | err := s.pager.writeCustomMetadata(empty[:]) 51 | if err != nil { 52 | return fmt.Errorf("failed to write metadata: %w", err) 53 | } 54 | 55 | return nil 56 | } 57 | 58 | func (s *storage) newNode() (uint32, error) { 59 | recordID, err := s.records.new() 60 | if err != nil { 61 | return 0, fmt.Errorf("failed to instantiate new record: %w", err) 62 | } 63 | 64 | return recordID, nil 65 | } 66 | 67 | func (s *storage) updateNodeByID(nodeID uint32, node *node) error { 68 | data := encodeNode(node) 69 | err := s.records.write(nodeID, data) 70 | 71 | if err != nil { 72 | return fmt.Errorf("failed to write the record %d: %w", nodeID, err) 73 | } 74 | 75 | return nil 76 | } 77 | 78 | func (s *storage) loadNodeByID(nodeID uint32) (*node, error) { 79 | data, err := s.records.read(nodeID) 80 | if err != nil { 81 | return nil, fmt.Errorf("failed to read record %d: %w", nodeID, err) 82 | } 83 | 84 | node, err := decodeNode(data) 85 | if err != nil { 86 | return nil, fmt.Errorf("failed to decode record %d: %w", nodeID, err) 87 | } 88 | 89 | return node, nil 90 | } 91 | 92 | func (s *storage) deleteNodeByID(nodeID uint32) error { 93 | err := s.records.free(nodeID) 94 | if err != nil { 95 | return fmt.Errorf("failed to free the record %d: %w", nodeID, err) 96 | } 97 | 98 | return nil 99 | } 100 | 101 | // Close closes the tree and free the underlying resources. 102 | func (s *storage) close() error { 103 | if err := s.pager.close(); err != nil { 104 | return fmt.Errorf("failed to close the pager: %w", err) 105 | } 106 | 107 | return nil 108 | } 109 | --------------------------------------------------------------------------------