├── .gitignore ├── Part 1 ├── const.go ├── main.go ├── dal.go └── freelist.go ├── Part 2 ├── const.go ├── meta.go ├── main.go ├── freelist.go └── dal.go ├── Part 3 ├── const.go ├── mainTest ├── main.go ├── meta.go ├── freelist.go ├── dal.go └── node.go ├── Part 4 ├── const.go ├── main.go ├── meta.go ├── freelist.go ├── collection.go ├── dal.go └── node.go ├── Part 5 ├── const.go ├── main.go ├── meta.go ├── freelist.go ├── collection.go ├── dal.go └── node.go ├── Part 6 ├── const.go ├── db.go ├── meta.go ├── tx.go ├── freelist.go ├── dal.go ├── collection.go └── node.go ├── README.md └── Part 7 ├── const.go ├── db.go ├── main.go ├── meta.go ├── freelist.go ├── tx.go ├── dal.go ├── collection.go └── node.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *.so 4 | .idea 5 | .vscode -------------------------------------------------------------------------------- /Part 1/const.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | const ( 4 | pageNumSize = 8 5 | ) 6 | -------------------------------------------------------------------------------- /Part 2/const.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | const ( 4 | pageNumSize = 8 5 | ) 6 | -------------------------------------------------------------------------------- /Part 3/const.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | const ( 4 | pageNumSize = 8 5 | ) 6 | -------------------------------------------------------------------------------- /Part 4/const.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | const ( 4 | nodeHeaderSize = 3 5 | pageNumSize = 8 6 | ) 7 | -------------------------------------------------------------------------------- /Part 5/const.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | const ( 4 | nodeHeaderSize = 3 5 | pageNumSize = 8 6 | ) 7 | -------------------------------------------------------------------------------- /Part 3/mainTest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amit-davidson/Building-a-NoSQL-database-from-zero/HEAD/Part 3/mainTest -------------------------------------------------------------------------------- /Part 6/const.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "errors" 4 | 5 | const ( 6 | nodeHeaderSize = 3 7 | pageNumSize = 8 8 | ) 9 | 10 | var writeInsideReadTxErr = errors.New("can't perform a write operation inside a read transaction") 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repo accompanies my blog on writing a database in Go. 2 | You can checkout the completed repo of the [database](https://github.com/amit-davidson/LibraDB) and the [blog post](https://betterprogramming.pub/build-a-nosql-database-from-the-scratch-in-1000-lines-of-code-8ed1c15ed924) 3 | -------------------------------------------------------------------------------- /Part 7/const.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "errors" 4 | 5 | const ( 6 | magicNumberSize = 4 7 | counterSize = 4 8 | nodeHeaderSize = 3 9 | 10 | collectionSize = 16 11 | pageNumSize = 8 12 | ) 13 | 14 | var writeInsideReadTxErr = errors.New("can't perform a write operation inside a read transaction") -------------------------------------------------------------------------------- /Part 1/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "os" 4 | 5 | func main() { 6 | // initialize db 7 | dal, _ := newDal("db.db", os.Getpagesize()) 8 | 9 | // create a new page 10 | p := dal.allocateEmptyPage() 11 | p.num = dal.getNextPage() 12 | copy(p.data[:], "data") 13 | 14 | // commit it 15 | _ = dal.writePage(p) 16 | } 17 | -------------------------------------------------------------------------------- /Part 3/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func main() { 6 | dal, _ := newDal("./mainTest") 7 | 8 | node, _ := dal.getNode(dal.root) 9 | node.dal = dal 10 | index, containingNode, _ := node.findKey([]byte("Key1")) 11 | res := containingNode.items[index] 12 | 13 | fmt.Printf("key is: %s, value is: %s\n", res.key, res.value) 14 | _ = dal.close() 15 | } 16 | -------------------------------------------------------------------------------- /Part 2/meta.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | const ( 6 | metaPageNum = 0 7 | ) 8 | 9 | // meta is the meta page of the db 10 | type meta struct { 11 | freelistPage pgnum 12 | } 13 | 14 | func newEmptyMeta() *meta { 15 | return &meta{} 16 | } 17 | 18 | func (m *meta) serialize(buf []byte) { 19 | pos := 0 20 | 21 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.freelistPage)) 22 | pos += pageNumSize 23 | } 24 | 25 | func (m *meta) deserialize(buf []byte) { 26 | pos := 0 27 | m.freelistPage = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 28 | pos += pageNumSize 29 | } 30 | -------------------------------------------------------------------------------- /Part 6/db.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "sync" 6 | ) 7 | 8 | type DB struct { 9 | rwlock sync.RWMutex // Allows only one writer at a time 10 | *dal 11 | } 12 | 13 | func Open(path string, options *Options) (*DB, error) { 14 | options.pageSize = os.Getpagesize() 15 | dal, err := newDal(path, options) 16 | if err != nil { 17 | return nil, err 18 | } 19 | 20 | db := &DB{ 21 | sync.RWMutex{}, 22 | dal, 23 | } 24 | 25 | return db, nil 26 | } 27 | 28 | func (db *DB) Close() error { 29 | return db.close() 30 | } 31 | 32 | func (db *DB) ReadTx() *tx { 33 | db.rwlock.RLock() 34 | return newTx(db, false) 35 | } 36 | 37 | func (db *DB) WriteTx() *tx { 38 | db.rwlock.Lock() 39 | return newTx(db, true) 40 | } -------------------------------------------------------------------------------- /Part 7/db.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "sync" 6 | ) 7 | 8 | type DB struct { 9 | rwlock sync.RWMutex // Allows only one writer at a time 10 | *dal 11 | } 12 | 13 | func Open(path string, options *Options) (*DB, error) { 14 | options.pageSize = os.Getpagesize() 15 | dal, err := newDal(path, options) 16 | if err != nil { 17 | return nil, err 18 | } 19 | 20 | db := &DB{ 21 | sync.RWMutex{}, 22 | dal, 23 | } 24 | 25 | return db, nil 26 | } 27 | 28 | func (db *DB) Close() error { 29 | return db.close() 30 | } 31 | 32 | func (db *DB) ReadTx() *tx { 33 | db.rwlock.RLock() 34 | return newTx(db, false) 35 | } 36 | 37 | func (db *DB) WriteTx() *tx { 38 | db.rwlock.Lock() 39 | return newTx(db, true) 40 | } -------------------------------------------------------------------------------- /Part 4/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | func main() { 9 | options := &Options{ 10 | pageSize: os.Getpagesize(), 11 | MinFillPercent: 0.0125, 12 | MaxFillPercent: 0.025, 13 | } 14 | dal, _ := newDal("./mainTest", options) 15 | 16 | c := newCollection([]byte("collection1"), dal.root) 17 | c.dal = dal 18 | 19 | _ = c.Put([]byte("Key1"), []byte("Value1")) 20 | _ = c.Put([]byte("Key2"), []byte("Value2")) 21 | _ = c.Put([]byte("Key3"), []byte("Value3")) 22 | _ = c.Put([]byte("Key4"), []byte("Value4")) 23 | _ = c.Put([]byte("Key5"), []byte("Value5")) 24 | _ = c.Put([]byte("Key6"), []byte("Value6")) 25 | item, _ := c.Find([]byte("Key1")) 26 | 27 | fmt.Printf("key is: %s, value is: %s\n", item.key, item.value) 28 | _ = dal.close() 29 | } 30 | -------------------------------------------------------------------------------- /Part 7/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func main() { 6 | db, _ := Open("Demo7", &Options{MinFillPercent: 0.5, MaxFillPercent: 1.0}) 7 | 8 | tx := db.WriteTx() 9 | collectionName := "Demo7Collection" 10 | createdCollection, _ := tx.CreateCollection([]byte(collectionName)) 11 | 12 | newKey := []byte("key0") 13 | newVal := []byte("value0") 14 | _ = createdCollection.Put(newKey, newVal) 15 | 16 | _ = tx.Commit() 17 | _ = db.Close() 18 | 19 | db, _ = Open("Demo7", &Options{MinFillPercent: 0.5, MaxFillPercent: 1.0}) 20 | tx = db.ReadTx() 21 | createdCollection, _ = tx.GetCollection([]byte(collectionName)) 22 | 23 | item, _ := createdCollection.Find(newKey) 24 | 25 | _ = tx.Commit() 26 | _ = db.Close() 27 | 28 | fmt.Printf("key is: %s, value is: %s\n", item.key, item.value) 29 | } 30 | -------------------------------------------------------------------------------- /Part 2/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | // initialize db 5 | dal, _ := newDal("db.db") 6 | 7 | // create a new page 8 | p := dal.allocateEmptyPage() 9 | p.num = dal.getNextPage() 10 | copy(p.data[:], "data") 11 | 12 | // commit it 13 | _ = dal.writePage(p) 14 | _, _ = dal.writeFreelist() 15 | 16 | // Close the db 17 | _ = dal.close() 18 | 19 | // We expect the freelist state was saved, so we write to 20 | // page number 3 and not overwrite the one at number 2 21 | dal, _ = newDal("db.db") 22 | p = dal.allocateEmptyPage() 23 | p.num = dal.getNextPage() 24 | copy(p.data[:], "data2") 25 | _ = dal.writePage(p) 26 | 27 | // Create a page and free it so the released pages will be updated 28 | pageNum := dal.getNextPage() 29 | dal.releasePage(pageNum) 30 | 31 | // commit it 32 | _, _ = dal.writeFreelist() 33 | } 34 | -------------------------------------------------------------------------------- /Part 5/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | func main() { 9 | options := &Options{ 10 | pageSize: os.Getpagesize(), 11 | MinFillPercent: 0.0125, 12 | MaxFillPercent: 0.025, 13 | } 14 | dal, _ := newDal("./mainTest", options) 15 | 16 | c := newCollection([]byte("collection1"), dal.root) 17 | c.dal = dal 18 | 19 | _ = c.Put([]byte("Key1"), []byte("Value1")) 20 | _ = c.Put([]byte("Key2"), []byte("Value2")) 21 | _ = c.Put([]byte("Key3"), []byte("Value3")) 22 | _ = c.Put([]byte("Key4"), []byte("Value4")) 23 | _ = c.Put([]byte("Key5"), []byte("Value5")) 24 | _ = c.Put([]byte("Key6"), []byte("Value6")) 25 | item, _ := c.Find([]byte("Key1")) 26 | 27 | fmt.Printf("key is: %s, value is: %s\n", item.key, item.value) 28 | 29 | _ = c.Remove([]byte("Key1")) 30 | item, _ = c.Find([]byte("Key1")) 31 | 32 | dal.writeFreelist() 33 | fmt.Printf("item is: %+v\n", item) 34 | _ = dal.close() 35 | } 36 | -------------------------------------------------------------------------------- /Part 3/meta.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | const ( 6 | metaPageNum = 0 7 | ) 8 | 9 | // meta is the meta page of the db 10 | type meta struct { 11 | // The database has a root collection that holds all the collections in the database. It is called root and the 12 | // root property of meta holds page number containing the root of collections collection. The keys are the 13 | // collections names and the values are the page number of the root of each collection. Then, once the collection 14 | // and the root page are located, a search inside a collection can be made. 15 | root pgnum 16 | freelistPage pgnum 17 | } 18 | 19 | func newEmptyMeta() *meta { 20 | return &meta{} 21 | } 22 | 23 | func (m *meta) serialize(buf []byte) { 24 | pos := 0 25 | 26 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.root)) 27 | pos += pageNumSize 28 | 29 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.freelistPage)) 30 | pos += pageNumSize 31 | } 32 | 33 | func (m *meta) deserialize(buf []byte) { 34 | pos := 0 35 | 36 | m.root = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 37 | pos += pageNumSize 38 | 39 | m.freelistPage = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 40 | pos += pageNumSize 41 | } 42 | -------------------------------------------------------------------------------- /Part 4/meta.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | const ( 6 | metaPageNum = 0 7 | ) 8 | 9 | // meta is the meta page of the db 10 | type meta struct { 11 | // The database has a root collection that holds all the collections in the database. It is called root and the 12 | // root property of meta holds page number containing the root of collections collection. The keys are the 13 | // collections names and the values are the page number of the root of each collection. Then, once the collection 14 | // and the root page are located, a search inside a collection can be made. 15 | root pgnum 16 | freelistPage pgnum 17 | } 18 | 19 | func newEmptyMeta() *meta { 20 | return &meta{} 21 | } 22 | 23 | func (m *meta) serialize(buf []byte) { 24 | pos := 0 25 | 26 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.root)) 27 | pos += pageNumSize 28 | 29 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.freelistPage)) 30 | pos += pageNumSize 31 | } 32 | 33 | func (m *meta) deserialize(buf []byte) { 34 | pos := 0 35 | 36 | m.root = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 37 | pos += pageNumSize 38 | 39 | m.freelistPage = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 40 | pos += pageNumSize 41 | } 42 | -------------------------------------------------------------------------------- /Part 5/meta.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | const ( 6 | metaPageNum = 0 7 | ) 8 | 9 | // meta is the meta page of the db 10 | type meta struct { 11 | // The database has a root collection that holds all the collections in the database. It is called root and the 12 | // root property of meta holds page number containing the root of collections collection. The keys are the 13 | // collections names and the values are the page number of the root of each collection. Then, once the collection 14 | // and the root page are located, a search inside a collection can be made. 15 | root pgnum 16 | freelistPage pgnum 17 | } 18 | 19 | func newEmptyMeta() *meta { 20 | return &meta{} 21 | } 22 | 23 | func (m *meta) serialize(buf []byte) { 24 | pos := 0 25 | 26 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.root)) 27 | pos += pageNumSize 28 | 29 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.freelistPage)) 30 | pos += pageNumSize 31 | } 32 | 33 | func (m *meta) deserialize(buf []byte) { 34 | pos := 0 35 | 36 | m.root = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 37 | pos += pageNumSize 38 | 39 | m.freelistPage = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 40 | pos += pageNumSize 41 | } 42 | -------------------------------------------------------------------------------- /Part 6/meta.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | const ( 6 | metaPageNum = 0 7 | ) 8 | 9 | // meta is the meta page of the db 10 | type meta struct { 11 | // The database has a root collection that holds all the collections in the database. It is called root and the 12 | // root property of meta holds page number containing the root of collections collection. The keys are the 13 | // collections names and the values are the page number of the root of each collection. Then, once the collection 14 | // and the root page are located, a search inside a collection can be made. 15 | root pgnum 16 | freelistPage pgnum 17 | } 18 | 19 | func newEmptyMeta() *meta { 20 | return &meta{} 21 | } 22 | 23 | func (m *meta) serialize(buf []byte) { 24 | pos := 0 25 | 26 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.root)) 27 | pos += pageNumSize 28 | 29 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.freelistPage)) 30 | pos += pageNumSize 31 | } 32 | 33 | func (m *meta) deserialize(buf []byte) { 34 | pos := 0 35 | 36 | m.root = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 37 | pos += pageNumSize 38 | 39 | m.freelistPage = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 40 | pos += pageNumSize 41 | } 42 | -------------------------------------------------------------------------------- /Part 1/dal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | type pgnum uint64 9 | 10 | type page struct { 11 | num pgnum 12 | data []byte 13 | } 14 | 15 | type dal struct { 16 | file *os.File 17 | pageSize int 18 | 19 | *freelist 20 | } 21 | 22 | func newDal(path string, pageSize int) (*dal, error) { 23 | file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 24 | if err != nil { 25 | return nil, err 26 | } 27 | dal := &dal{ 28 | file, 29 | pageSize, 30 | newFreelist(), 31 | } 32 | return dal, nil 33 | } 34 | 35 | func (d *dal) close() error { 36 | if d.file != nil { 37 | err := d.file.Close() 38 | if err != nil { 39 | return fmt.Errorf("could not close file: %s", err) 40 | } 41 | d.file = nil 42 | } 43 | 44 | return nil 45 | } 46 | 47 | func (d *dal) allocateEmptyPage() *page { 48 | return &page{ 49 | data: make([]byte, d.pageSize, d.pageSize), 50 | } 51 | } 52 | 53 | func (d *dal) readPage(pageNum pgnum) (*page, error) { 54 | p := d.allocateEmptyPage() 55 | 56 | offset := int(pageNum) * d.pageSize 57 | _, err := d.file.ReadAt(p.data, int64(offset)) 58 | if err != nil { 59 | return nil, err 60 | } 61 | return p, err 62 | } 63 | 64 | func (d *dal) writePage(p *page) error { 65 | offset := int64(p.num) * int64(d.pageSize) 66 | _, err := d.file.WriteAt(p.data, offset) 67 | return err 68 | } -------------------------------------------------------------------------------- /Part 7/meta.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | const ( 6 | magicNumber uint32 = 0xD00DB00D 7 | metaPageNum = 0 8 | ) 9 | 10 | // meta is the meta page of the db 11 | type meta struct { 12 | // The database has a root collection that holds all the collections in the database. It is called root and the 13 | // root property of meta holds page number containing the root of collections collection. The keys are the 14 | // collections names and the values are the page number of the root of each collection. Then, once the collection 15 | // and the root page are located, a search inside a collection can be made. 16 | root pgnum 17 | freelistPage pgnum 18 | } 19 | 20 | func newEmptyMeta() *meta { 21 | return &meta{} 22 | } 23 | 24 | func (m *meta) serialize(buf []byte) { 25 | pos := 0 26 | binary.LittleEndian.PutUint32(buf[pos:], magicNumber) 27 | pos += magicNumberSize 28 | 29 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.root)) 30 | pos += pageNumSize 31 | 32 | binary.LittleEndian.PutUint64(buf[pos:], uint64(m.freelistPage)) 33 | pos += pageNumSize 34 | } 35 | 36 | func (m *meta) deserialize(buf []byte) { 37 | pos := 0 38 | magicNumberRes := binary.LittleEndian.Uint32(buf[pos:]) 39 | pos += magicNumberSize 40 | 41 | if magicNumberRes != magicNumber { 42 | panic("The file is not a libra db file") 43 | } 44 | 45 | m.root = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 46 | pos += pageNumSize 47 | 48 | m.freelistPage = pgnum(binary.LittleEndian.Uint64(buf[pos:])) 49 | pos += pageNumSize 50 | } 51 | -------------------------------------------------------------------------------- /Part 6/tx.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type tx struct { 4 | dirtyNodes map[pgnum]*Node 5 | pagesToDelete []pgnum 6 | 7 | // new pages allocated during the transaction. They will be released if rollback is called. 8 | allocatedPageNums []pgnum 9 | 10 | write bool 11 | 12 | db *DB 13 | } 14 | 15 | func newTx(db *DB, write bool) *tx { 16 | return &tx{ 17 | map[pgnum]*Node{}, 18 | make([]pgnum, 0), 19 | make([]pgnum, 0), 20 | write, 21 | db, 22 | } 23 | } 24 | 25 | func (tx *tx) newNode(items []*Item, childNodes []pgnum) *Node { 26 | node := NewEmptyNode() 27 | node.items = items 28 | node.childNodes = childNodes 29 | node.pageNum = tx.db.getNextPage() 30 | node.tx = tx 31 | 32 | node.tx.allocatedPageNums = append(node.tx.allocatedPageNums, node.pageNum) 33 | return node 34 | } 35 | 36 | func (tx *tx) getNode(pageNum pgnum) (*Node, error) { 37 | if node, ok := tx.dirtyNodes[pageNum]; ok { 38 | return node, nil 39 | } 40 | 41 | node, err := tx.db.getNode(pageNum) 42 | if err != nil { 43 | return nil, err 44 | } 45 | node.tx = tx 46 | return node, nil 47 | } 48 | 49 | func (tx *tx) writeNode(node *Node) *Node { 50 | tx.dirtyNodes[node.pageNum] = node 51 | node.tx = tx 52 | return node 53 | } 54 | 55 | func (tx *tx) deleteNode(node *Node) { 56 | tx.pagesToDelete = append(tx.pagesToDelete, node.pageNum) 57 | } 58 | 59 | func (tx *tx) Rollback() { 60 | if !tx.write { 61 | tx.db.rwlock.RUnlock() 62 | return 63 | } 64 | 65 | tx.dirtyNodes = nil 66 | tx.pagesToDelete = nil 67 | for _, pageNum := range tx.allocatedPageNums { 68 | tx.db.freelist.releasePage(pageNum) 69 | } 70 | tx.allocatedPageNums = nil 71 | tx.db.rwlock.Unlock() 72 | } 73 | 74 | func (tx *tx) Commit() error { 75 | if !tx.write { 76 | tx.db.rwlock.RUnlock() 77 | return nil 78 | } 79 | 80 | for _, node := range tx.dirtyNodes { 81 | _, err := tx.db.writeNode(node) 82 | if err != nil { 83 | return err 84 | } 85 | } 86 | 87 | for _, pageNum := range tx.pagesToDelete { 88 | tx.db.deleteNode(pageNum) 89 | } 90 | _, err := tx.db.writeFreelist() 91 | if err != nil { 92 | return err 93 | } 94 | 95 | tx.dirtyNodes = nil 96 | tx.pagesToDelete = nil 97 | tx.allocatedPageNums = nil 98 | tx.db.rwlock.Unlock() 99 | return nil 100 | } -------------------------------------------------------------------------------- /Part 2/freelist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | // metaPage is the maximum pgnum that is used by the db for its own purposes. For now, only page 0 is used as the 6 | // header page. It means all other page numbers can be used. 7 | const metaPage = 0 8 | 9 | // freelist manages the manages free and used pages. 10 | type freelist struct { 11 | // maxPage holds the latest page num allocated. releasedPages holds all the ids that were released during 12 | // delete. New page ids are first given from the releasedPageIDs to avoid growing the file. If it's empty, then 13 | // maxPage is incremented and a new page is created thus increasing the file size. 14 | maxPage pgnum 15 | releasedPages []pgnum 16 | } 17 | 18 | func newFreelist() *freelist { 19 | return &freelist{ 20 | maxPage: metaPage, 21 | releasedPages: []pgnum{}, 22 | } 23 | } 24 | 25 | // getNextPage returns page ids for writing New page ids are first given from the releasedPageIDs to avoid growing 26 | // the file. If it's empty, then maxPage is incremented and a new page is created thus increasing the file size. 27 | func (fr *freelist) getNextPage() pgnum { 28 | if len(fr.releasedPages) != 0 { 29 | // Take the last element and remove it from the list 30 | pageID := fr.releasedPages[len(fr.releasedPages)-1] 31 | fr.releasedPages = fr.releasedPages[:len(fr.releasedPages)-1] 32 | return pageID 33 | } 34 | fr.maxPage += 1 35 | return fr.maxPage 36 | } 37 | 38 | func (fr *freelist) releasePage(page pgnum) { 39 | fr.releasedPages = append(fr.releasedPages, page) 40 | } 41 | 42 | func (fr *freelist) serialize(buf []byte) []byte { 43 | pos := 0 44 | 45 | binary.LittleEndian.PutUint16(buf[pos:], uint16(fr.maxPage)) 46 | pos += 2 47 | 48 | // released pages count 49 | binary.LittleEndian.PutUint16(buf[pos:], uint16(len(fr.releasedPages))) 50 | pos += 2 51 | 52 | for _, page := range fr.releasedPages { 53 | binary.LittleEndian.PutUint64(buf[pos:], uint64(page)) 54 | pos += pageNumSize 55 | 56 | } 57 | return buf 58 | } 59 | 60 | func (fr *freelist) deserialize(buf []byte) { 61 | pos := 0 62 | fr.maxPage = pgnum(binary.LittleEndian.Uint16(buf[pos:])) 63 | pos += 2 64 | 65 | // released pages count 66 | releasedPagesCount := int(binary.LittleEndian.Uint16(buf[pos:])) 67 | pos += 2 68 | 69 | for i := 0; i < releasedPagesCount; i++ { 70 | fr.releasedPages = append(fr.releasedPages, pgnum(binary.LittleEndian.Uint64(buf[pos:]))) 71 | pos += pageNumSize 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Part 3/freelist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | // metaPage is the maximum pgnum that is used by the db for its own purposes. For now, only page 0 is used as the 6 | // header page. It means all other page numbers can be used. 7 | const metaPage = 0 8 | 9 | // freelist manages the manages free and used pages. 10 | type freelist struct { 11 | // maxPage holds the latest page num allocated. releasedPages holds all the ids that were released during 12 | // delete. New page ids are first given from the releasedPageIDs to avoid growing the file. If it's empty, then 13 | // maxPage is incremented and a new page is created thus increasing the file size. 14 | maxPage pgnum 15 | releasedPages []pgnum 16 | } 17 | 18 | func newFreelist() *freelist { 19 | return &freelist{ 20 | maxPage: metaPage, 21 | releasedPages: []pgnum{}, 22 | } 23 | } 24 | 25 | // getNextPage returns page ids for writing New page ids are first given from the releasedPageIDs to avoid growing 26 | // the file. If it's empty, then maxPage is incremented and a new page is created thus increasing the file size. 27 | func (fr *freelist) getNextPage() pgnum { 28 | if len(fr.releasedPages) != 0 { 29 | // Take the last element and remove it from the list 30 | pageID := fr.releasedPages[len(fr.releasedPages)-1] 31 | fr.releasedPages = fr.releasedPages[:len(fr.releasedPages)-1] 32 | return pageID 33 | } 34 | fr.maxPage += 1 35 | return fr.maxPage 36 | } 37 | 38 | func (fr *freelist) releasePage(page pgnum) { 39 | fr.releasedPages = append(fr.releasedPages, page) 40 | } 41 | 42 | func (fr *freelist) serialize(buf []byte) []byte { 43 | pos := 0 44 | 45 | binary.LittleEndian.PutUint16(buf[pos:], uint16(fr.maxPage)) 46 | pos += 2 47 | 48 | // released pages count 49 | binary.LittleEndian.PutUint16(buf[pos:], uint16(len(fr.releasedPages))) 50 | pos += 2 51 | 52 | for _, page := range fr.releasedPages { 53 | binary.LittleEndian.PutUint64(buf[pos:], uint64(page)) 54 | pos += pageNumSize 55 | 56 | } 57 | return buf 58 | } 59 | 60 | func (fr *freelist) deserialize(buf []byte) { 61 | pos := 0 62 | fr.maxPage = pgnum(binary.LittleEndian.Uint16(buf[pos:])) 63 | pos += 2 64 | 65 | // released pages count 66 | releasedPagesCount := int(binary.LittleEndian.Uint16(buf[pos:])) 67 | pos += 2 68 | 69 | for i := 0; i < releasedPagesCount; i++ { 70 | fr.releasedPages = append(fr.releasedPages, pgnum(binary.LittleEndian.Uint64(buf[pos:]))) 71 | pos += pageNumSize 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Part 4/freelist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | // metaPage is the maximum pgnum that is used by the db for its own purposes. For now, only page 0 is used as the 6 | // header page. It means all other page numbers can be used. 7 | const metaPage = 0 8 | 9 | // freelist manages the manages free and used pages. 10 | type freelist struct { 11 | // maxPage holds the latest page num allocated. releasedPages holds all the ids that were released during 12 | // delete. New page ids are first given from the releasedPageIDs to avoid growing the file. If it's empty, then 13 | // maxPage is incremented and a new page is created thus increasing the file size. 14 | maxPage pgnum 15 | releasedPages []pgnum 16 | } 17 | 18 | func newFreelist() *freelist { 19 | return &freelist{ 20 | maxPage: metaPage, 21 | releasedPages: []pgnum{}, 22 | } 23 | } 24 | 25 | // getNextPage returns page ids for writing New page ids are first given from the releasedPageIDs to avoid growing 26 | // the file. If it's empty, then maxPage is incremented and a new page is created thus increasing the file size. 27 | func (fr *freelist) getNextPage() pgnum { 28 | if len(fr.releasedPages) != 0 { 29 | // Take the last element and remove it from the list 30 | pageID := fr.releasedPages[len(fr.releasedPages)-1] 31 | fr.releasedPages = fr.releasedPages[:len(fr.releasedPages)-1] 32 | return pageID 33 | } 34 | fr.maxPage += 1 35 | return fr.maxPage 36 | } 37 | 38 | func (fr *freelist) releasePage(page pgnum) { 39 | fr.releasedPages = append(fr.releasedPages, page) 40 | } 41 | 42 | func (fr *freelist) serialize(buf []byte) []byte { 43 | pos := 0 44 | 45 | binary.LittleEndian.PutUint16(buf[pos:], uint16(fr.maxPage)) 46 | pos += 2 47 | 48 | // released pages count 49 | binary.LittleEndian.PutUint16(buf[pos:], uint16(len(fr.releasedPages))) 50 | pos += 2 51 | 52 | for _, page := range fr.releasedPages { 53 | binary.LittleEndian.PutUint64(buf[pos:], uint64(page)) 54 | pos += pageNumSize 55 | 56 | } 57 | return buf 58 | } 59 | 60 | func (fr *freelist) deserialize(buf []byte) { 61 | pos := 0 62 | fr.maxPage = pgnum(binary.LittleEndian.Uint16(buf[pos:])) 63 | pos += 2 64 | 65 | // released pages count 66 | releasedPagesCount := int(binary.LittleEndian.Uint16(buf[pos:])) 67 | pos += 2 68 | 69 | for i := 0; i < releasedPagesCount; i++ { 70 | fr.releasedPages = append(fr.releasedPages, pgnum(binary.LittleEndian.Uint64(buf[pos:]))) 71 | pos += pageNumSize 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Part 5/freelist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | // metaPage is the maximum pgnum that is used by the db for its own purposes. For now, only page 0 is used as the 6 | // header page. It means all other page numbers can be used. 7 | const metaPage = 0 8 | 9 | // freelist manages the manages free and used pages. 10 | type freelist struct { 11 | // maxPage holds the latest page num allocated. releasedPages holds all the ids that were released during 12 | // delete. New page ids are first given from the releasedPageIDs to avoid growing the file. If it's empty, then 13 | // maxPage is incremented and a new page is created thus increasing the file size. 14 | maxPage pgnum 15 | releasedPages []pgnum 16 | } 17 | 18 | func newFreelist() *freelist { 19 | return &freelist{ 20 | maxPage: metaPage, 21 | releasedPages: []pgnum{}, 22 | } 23 | } 24 | 25 | // getNextPage returns page ids for writing New page ids are first given from the releasedPageIDs to avoid growing 26 | // the file. If it's empty, then maxPage is incremented and a new page is created thus increasing the file size. 27 | func (fr *freelist) getNextPage() pgnum { 28 | if len(fr.releasedPages) != 0 { 29 | // Take the last element and remove it from the list 30 | pageID := fr.releasedPages[len(fr.releasedPages)-1] 31 | fr.releasedPages = fr.releasedPages[:len(fr.releasedPages)-1] 32 | return pageID 33 | } 34 | fr.maxPage += 1 35 | return fr.maxPage 36 | } 37 | 38 | func (fr *freelist) releasePage(page pgnum) { 39 | fr.releasedPages = append(fr.releasedPages, page) 40 | } 41 | 42 | func (fr *freelist) serialize(buf []byte) []byte { 43 | pos := 0 44 | 45 | binary.LittleEndian.PutUint16(buf[pos:], uint16(fr.maxPage)) 46 | pos += 2 47 | 48 | // released pages count 49 | binary.LittleEndian.PutUint16(buf[pos:], uint16(len(fr.releasedPages))) 50 | pos += 2 51 | 52 | for _, page := range fr.releasedPages { 53 | binary.LittleEndian.PutUint64(buf[pos:], uint64(page)) 54 | pos += pageNumSize 55 | 56 | } 57 | return buf 58 | } 59 | 60 | func (fr *freelist) deserialize(buf []byte) { 61 | pos := 0 62 | fr.maxPage = pgnum(binary.LittleEndian.Uint16(buf[pos:])) 63 | pos += 2 64 | 65 | // released pages count 66 | releasedPagesCount := int(binary.LittleEndian.Uint16(buf[pos:])) 67 | pos += 2 68 | 69 | for i := 0; i < releasedPagesCount; i++ { 70 | fr.releasedPages = append(fr.releasedPages, pgnum(binary.LittleEndian.Uint64(buf[pos:]))) 71 | pos += pageNumSize 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Part 6/freelist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | // metaPage is the maximum pgnum that is used by the db for its own purposes. For now, only page 0 is used as the 6 | // header page. It means all other page numbers can be used. 7 | const metaPage = 0 8 | 9 | // freelist manages the manages free and used pages. 10 | type freelist struct { 11 | // maxPage holds the latest page num allocated. releasedPages holds all the ids that were released during 12 | // delete. New page ids are first given from the releasedPageIDs to avoid growing the file. If it's empty, then 13 | // maxPage is incremented and a new page is created thus increasing the file size. 14 | maxPage pgnum 15 | releasedPages []pgnum 16 | } 17 | 18 | func newFreelist() *freelist { 19 | return &freelist{ 20 | maxPage: metaPage, 21 | releasedPages: []pgnum{}, 22 | } 23 | } 24 | 25 | // getNextPage returns page ids for writing New page ids are first given from the releasedPageIDs to avoid growing 26 | // the file. If it's empty, then maxPage is incremented and a new page is created thus increasing the file size. 27 | func (fr *freelist) getNextPage() pgnum { 28 | if len(fr.releasedPages) != 0 { 29 | // Take the last element and remove it from the list 30 | pageID := fr.releasedPages[len(fr.releasedPages)-1] 31 | fr.releasedPages = fr.releasedPages[:len(fr.releasedPages)-1] 32 | return pageID 33 | } 34 | fr.maxPage += 1 35 | return fr.maxPage 36 | } 37 | 38 | func (fr *freelist) releasePage(page pgnum) { 39 | fr.releasedPages = append(fr.releasedPages, page) 40 | } 41 | 42 | func (fr *freelist) serialize(buf []byte) []byte { 43 | pos := 0 44 | 45 | binary.LittleEndian.PutUint16(buf[pos:], uint16(fr.maxPage)) 46 | pos += 2 47 | 48 | // released pages count 49 | binary.LittleEndian.PutUint16(buf[pos:], uint16(len(fr.releasedPages))) 50 | pos += 2 51 | 52 | for _, page := range fr.releasedPages { 53 | binary.LittleEndian.PutUint64(buf[pos:], uint64(page)) 54 | pos += pageNumSize 55 | 56 | } 57 | return buf 58 | } 59 | 60 | func (fr *freelist) deserialize(buf []byte) { 61 | pos := 0 62 | fr.maxPage = pgnum(binary.LittleEndian.Uint16(buf[pos:])) 63 | pos += 2 64 | 65 | // released pages count 66 | releasedPagesCount := int(binary.LittleEndian.Uint16(buf[pos:])) 67 | pos += 2 68 | 69 | for i := 0; i < releasedPagesCount; i++ { 70 | fr.releasedPages = append(fr.releasedPages, pgnum(binary.LittleEndian.Uint64(buf[pos:]))) 71 | pos += pageNumSize 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Part 7/freelist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | // metaPage is the maximum pgnum that is used by the db for its own purposes. For now, only page 0 is used as the 6 | // header page. It means all other page numbers can be used. 7 | const metaPage = 0 8 | 9 | // freelist manages the manages free and used pages. 10 | type freelist struct { 11 | // maxPage holds the latest page num allocated. releasedPages holds all the ids that were released during 12 | // delete. New page ids are first given from the releasedPageIDs to avoid growing the file. If it's empty, then 13 | // maxPage is incremented and a new page is created thus increasing the file size. 14 | maxPage pgnum 15 | releasedPages []pgnum 16 | } 17 | 18 | func newFreelist() *freelist { 19 | return &freelist{ 20 | maxPage: metaPage, 21 | releasedPages: []pgnum{}, 22 | } 23 | } 24 | 25 | // getNextPage returns page ids for writing New page ids are first given from the releasedPageIDs to avoid growing 26 | // the file. If it's empty, then maxPage is incremented and a new page is created thus increasing the file size. 27 | func (fr *freelist) getNextPage() pgnum { 28 | if len(fr.releasedPages) != 0 { 29 | // Take the last element and remove it from the list 30 | pageID := fr.releasedPages[len(fr.releasedPages)-1] 31 | fr.releasedPages = fr.releasedPages[:len(fr.releasedPages)-1] 32 | return pageID 33 | } 34 | fr.maxPage += 1 35 | return fr.maxPage 36 | } 37 | 38 | func (fr *freelist) releasePage(page pgnum) { 39 | fr.releasedPages = append(fr.releasedPages, page) 40 | } 41 | 42 | func (fr *freelist) serialize(buf []byte) []byte { 43 | pos := 0 44 | 45 | binary.LittleEndian.PutUint16(buf[pos:], uint16(fr.maxPage)) 46 | pos += 2 47 | 48 | // released pages count 49 | binary.LittleEndian.PutUint16(buf[pos:], uint16(len(fr.releasedPages))) 50 | pos += 2 51 | 52 | for _, page := range fr.releasedPages { 53 | binary.LittleEndian.PutUint64(buf[pos:], uint64(page)) 54 | pos += pageNumSize 55 | 56 | } 57 | return buf 58 | } 59 | 60 | func (fr *freelist) deserialize(buf []byte) { 61 | pos := 0 62 | fr.maxPage = pgnum(binary.LittleEndian.Uint16(buf[pos:])) 63 | pos += 2 64 | 65 | // released pages count 66 | releasedPagesCount := int(binary.LittleEndian.Uint16(buf[pos:])) 67 | pos += 2 68 | 69 | for i := 0; i < releasedPagesCount; i++ { 70 | fr.releasedPages = append(fr.releasedPages, pgnum(binary.LittleEndian.Uint64(buf[pos:]))) 71 | pos += pageNumSize 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Part 1/freelist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "encoding/binary" 4 | 5 | // metaPage is the maximum pgnum that is used by the db for its own purposes. For now, only page 0 is used as the 6 | // header page. It means all other page numbers can be used. 7 | const metaPage = 0 8 | 9 | // freelist manages the manages free and used pages. 10 | type freelist struct { 11 | // maxPage holds the latest page num allocated. releasedPages holds all the ids that were released during 12 | // delete. New page ids are first given from the releasedPageIDs to avoid growing the file. If it's empty, then 13 | // maxPage is incremented and a new page is created thus increasing the file size. 14 | maxPage pgnum 15 | releasedPages []pgnum 16 | } 17 | 18 | func newFreelist() *freelist { 19 | return &freelist{ 20 | maxPage: metaPage, 21 | releasedPages: []pgnum{}, 22 | } 23 | } 24 | 25 | // getNextPage returns page ids for writing New page ids are first given from the releasedPageIDs to avoid growing 26 | // the file. If it's empty, then maxPage is incremented and a new page is created thus increasing the file size. 27 | func (fr *freelist) getNextPage() pgnum { 28 | if len(fr.releasedPages) != 0 { 29 | // Take the last element and remove it from the list 30 | pageID := fr.releasedPages[len(fr.releasedPages)-1] 31 | fr.releasedPages = fr.releasedPages[:len(fr.releasedPages)-1] 32 | return pageID 33 | } 34 | fr.maxPage += 1 35 | return fr.maxPage 36 | } 37 | 38 | func (fr *freelist) releasePage(page pgnum) { 39 | fr.releasedPages = append(fr.releasedPages, page) 40 | } 41 | 42 | func (fr *freelist) serialize() []byte { 43 | buf := make([]byte, 4+pageNumSize*len(fr.releasedPages)) 44 | pos := 0 45 | 46 | binary.LittleEndian.PutUint16(buf[pos:], uint16(fr.maxPage)) 47 | pos += 2 48 | 49 | // released pages count 50 | binary.LittleEndian.PutUint16(buf[pos:], uint16(len(fr.releasedPages))) 51 | pos += 2 52 | 53 | for _, page := range fr.releasedPages { 54 | binary.LittleEndian.PutUint64(buf[pos:], uint64(page)) 55 | pos += pageNumSize 56 | 57 | } 58 | return buf 59 | } 60 | 61 | func (fr *freelist) deserialize(buf []byte) { 62 | pos := 0 63 | fr.maxPage = pgnum(binary.LittleEndian.Uint16(buf[pos:])) 64 | pos += 2 65 | 66 | // released pages count 67 | releasedPagesCount := int(binary.LittleEndian.Uint16(buf[pos:])) 68 | pos += 2 69 | 70 | for i := 0; i < releasedPagesCount; i++ { 71 | fr.releasedPages = append(fr.releasedPages, pgnum(binary.LittleEndian.Uint64(buf[pos:]))) 72 | pos += pageNumSize 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /Part 2/dal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | type pgnum uint64 10 | 11 | type page struct { 12 | num pgnum 13 | data []byte 14 | } 15 | 16 | type dal struct { 17 | file *os.File 18 | pageSize int 19 | 20 | *meta 21 | *freelist 22 | } 23 | 24 | func newDal(path string) (*dal, error) { 25 | dal := &dal{ 26 | meta: newEmptyMeta(), 27 | pageSize: os.Getpagesize(), 28 | } 29 | 30 | // exist 31 | if _, err := os.Stat(path); err == nil { 32 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 33 | if err != nil { 34 | _ = dal.close() 35 | return nil, err 36 | } 37 | 38 | meta, err := dal.readMeta() 39 | if err != nil { 40 | return nil, err 41 | } 42 | dal.meta = meta 43 | 44 | freelist, err := dal.readFreelist() 45 | if err != nil { 46 | return nil, err 47 | } 48 | dal.freelist = freelist 49 | // doesn't exist 50 | } else if errors.Is(err, os.ErrNotExist) { 51 | // init freelist 52 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 53 | if err != nil { 54 | _ = dal.close() 55 | return nil, err 56 | } 57 | 58 | dal.freelist = newFreelist() 59 | dal.freelistPage = dal.getNextPage() 60 | _, err := dal.writeFreelist() 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | // write meta page 66 | _, err = dal.writeMeta(dal.meta) // other error 67 | } else { 68 | return nil, err 69 | } 70 | return dal, nil 71 | } 72 | 73 | func (d *dal) close() error { 74 | if d.file != nil { 75 | err := d.file.Close() 76 | if err != nil { 77 | return fmt.Errorf("could not close file: %s", err) 78 | } 79 | d.file = nil 80 | } 81 | 82 | return nil 83 | } 84 | 85 | func (d *dal) allocateEmptyPage() *page { 86 | return &page{ 87 | data: make([]byte, d.pageSize, d.pageSize), 88 | } 89 | } 90 | 91 | func (d *dal) readPage(pageNum pgnum) (*page, error) { 92 | p := d.allocateEmptyPage() 93 | 94 | offset := int(pageNum) * d.pageSize 95 | _, err := d.file.ReadAt(p.data, int64(offset)) 96 | if err != nil { 97 | return nil, err 98 | } 99 | return p, err 100 | } 101 | 102 | func (d *dal) writePage(p *page) error { 103 | offset := int64(p.num) * int64(d.pageSize) 104 | _, err := d.file.WriteAt(p.data, offset) 105 | return err 106 | } 107 | 108 | func (d *dal) readFreelist() (*freelist, error) { 109 | p, err := d.readPage(d.freelistPage) 110 | if err != nil { 111 | return nil, err 112 | } 113 | 114 | freelist := newFreelist() 115 | freelist.deserialize(p.data) 116 | return freelist, nil 117 | } 118 | 119 | func (d *dal) writeFreelist() (*page, error) { 120 | p := d.allocateEmptyPage() 121 | p.num = d.freelistPage 122 | d.freelist.serialize(p.data) 123 | 124 | err := d.writePage(p) 125 | if err != nil { 126 | return nil, err 127 | } 128 | d.freelistPage = p.num 129 | return p, nil 130 | } 131 | 132 | func (d *dal) writeMeta(meta *meta) (*page, error) { 133 | p := d.allocateEmptyPage() 134 | p.num = metaPageNum 135 | meta.serialize(p.data) 136 | 137 | err := d.writePage(p) 138 | if err != nil { 139 | return nil, err 140 | } 141 | return p, nil 142 | } 143 | 144 | func (d *dal) readMeta() (*meta, error) { 145 | p, err := d.readPage(metaPageNum) 146 | if err != nil { 147 | return nil, err 148 | } 149 | 150 | meta := newEmptyMeta() 151 | meta.deserialize(p.data) 152 | return meta, nil 153 | } 154 | -------------------------------------------------------------------------------- /Part 4/collection.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "bytes" 4 | 5 | type Collection struct { 6 | name []byte 7 | root pgnum 8 | 9 | dal *dal 10 | } 11 | 12 | func newCollection(name []byte, root pgnum) *Collection { 13 | return &Collection{ 14 | name: name, 15 | root: root, 16 | } 17 | } 18 | 19 | // Put adds a key to the tree. It finds the correct node and the insertion index and adds the item. When performing the 20 | // search, the ancestors are returned as well. This way we can iterate over them to check which nodes were modified and 21 | // rebalance by splitting them accordingly. If the root has too many items, then a new root of a new layer is 22 | // created and the created nodes from the split are added as children. 23 | func (c *Collection) Put(key []byte, value []byte) error { 24 | i := newItem(key, value) 25 | 26 | // On first insertion the root node does not exist, so it should be created 27 | var root *Node 28 | var err error 29 | if c.root == 0 { 30 | root, err = c.dal.writeNode(c.dal.newNode([]*Item{i}, []pgnum{})) 31 | if err != nil { 32 | return nil 33 | } 34 | c.root = root.pageNum 35 | return nil 36 | } else { 37 | root, err = c.dal.getNode(c.root) 38 | if err != nil { 39 | return err 40 | } 41 | } 42 | 43 | // Find the path to the node where the insertion should happen 44 | insertionIndex, nodeToInsertIn, ancestorsIndexes, err := root.findKey(i.key, false) 45 | if err != nil { 46 | return err 47 | } 48 | 49 | // If key already exists 50 | if nodeToInsertIn.items != nil && insertionIndex < len(nodeToInsertIn.items) && bytes.Compare(nodeToInsertIn.items[insertionIndex].key, key) == 0 { 51 | nodeToInsertIn.items[insertionIndex] = i 52 | } else { 53 | // Add item to the leaf node 54 | nodeToInsertIn.addItem(i, insertionIndex) 55 | } 56 | nodeToInsertIn.writeNode(nodeToInsertIn) 57 | 58 | ancestors, err := c.getNodes(ancestorsIndexes) 59 | if err != nil { 60 | return err 61 | } 62 | 63 | // Rebalance the nodes all the way up. Start From one node before the last and go all the way up. Exclude root. 64 | for i := len(ancestors) - 2; i >= 0; i-- { 65 | pnode := ancestors[i] 66 | node := ancestors[i+1] 67 | nodeIndex := ancestorsIndexes[i+1] 68 | if node.isOverPopulated() { 69 | pnode.split(node, nodeIndex) 70 | } 71 | } 72 | 73 | // Handle root 74 | rootNode := ancestors[0] 75 | if rootNode.isOverPopulated() { 76 | newRoot := c.dal.newNode([]*Item{}, []pgnum{rootNode.pageNum}) 77 | newRoot.split(rootNode, 0) 78 | 79 | // commit newly created root 80 | newRoot, err = c.dal.writeNode(newRoot) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | c.root = newRoot.pageNum 86 | } 87 | 88 | return nil 89 | } 90 | 91 | // Find Returns an item according based on the given key by performing a binary search. 92 | func (c *Collection) Find(key []byte) (*Item, error) { 93 | n, err := c.dal.getNode(c.root) 94 | if err != nil { 95 | return nil, err 96 | } 97 | 98 | index, containingNode, _, err := n.findKey(key, true) 99 | if err != nil { 100 | return nil, err 101 | } 102 | if index == -1 { 103 | return nil, nil 104 | } 105 | return containingNode.items[index], nil 106 | } 107 | 108 | // getNodes returns a list of nodes based on their indexes (the breadcrumbs) from the root 109 | // p 110 | // / \ 111 | // a b 112 | // / \ / \ 113 | // c d e f 114 | // For [0,1,0] -> p,b,e 115 | func (c *Collection) getNodes(indexes []int) ([]*Node, error) { 116 | root, err := c.dal.getNode(c.root) 117 | if err != nil { 118 | return nil, err 119 | } 120 | 121 | nodes := []*Node{root} 122 | child := root 123 | for i := 1; i < len(indexes); i++ { 124 | child, err = c.dal.getNode(child.childNodes[indexes[i]]) 125 | if err != nil { 126 | return nil, err 127 | } 128 | nodes = append(nodes, child) 129 | } 130 | return nodes, nil 131 | } 132 | -------------------------------------------------------------------------------- /Part 7/tx.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type tx struct { 4 | dirtyNodes map[pgnum]*Node 5 | pagesToDelete []pgnum 6 | 7 | // new pages allocated during the transaction. They will be released if rollback is called. 8 | allocatedPageNums []pgnum 9 | 10 | write bool 11 | 12 | db *DB 13 | } 14 | 15 | func newTx(db *DB, write bool) *tx { 16 | return &tx{ 17 | map[pgnum]*Node{}, 18 | make([]pgnum, 0), 19 | make([]pgnum, 0), 20 | write, 21 | db, 22 | } 23 | } 24 | 25 | func (tx *tx) newNode(items []*Item, childNodes []pgnum) *Node { 26 | node := NewEmptyNode() 27 | node.items = items 28 | node.childNodes = childNodes 29 | node.pageNum = tx.db.getNextPage() 30 | node.tx = tx 31 | 32 | node.tx.allocatedPageNums = append(node.tx.allocatedPageNums, node.pageNum) 33 | return node 34 | } 35 | 36 | func (tx *tx) getNode(pageNum pgnum) (*Node, error) { 37 | if node, ok := tx.dirtyNodes[pageNum]; ok { 38 | return node, nil 39 | } 40 | 41 | node, err := tx.db.getNode(pageNum) 42 | if err != nil { 43 | return nil, err 44 | } 45 | node.tx = tx 46 | return node, nil 47 | } 48 | 49 | func (tx *tx) writeNode(node *Node) *Node { 50 | tx.dirtyNodes[node.pageNum] = node 51 | node.tx = tx 52 | return node 53 | } 54 | 55 | func (tx *tx) deleteNode(node *Node) { 56 | tx.pagesToDelete = append(tx.pagesToDelete, node.pageNum) 57 | } 58 | 59 | func (tx *tx) Rollback() { 60 | if !tx.write { 61 | tx.db.rwlock.RUnlock() 62 | return 63 | } 64 | 65 | tx.dirtyNodes = nil 66 | tx.pagesToDelete = nil 67 | for _, pageNum := range tx.allocatedPageNums { 68 | tx.db.freelist.releasePage(pageNum) 69 | } 70 | tx.allocatedPageNums = nil 71 | tx.db.rwlock.Unlock() 72 | } 73 | 74 | func (tx *tx) Commit() error { 75 | if !tx.write { 76 | tx.db.rwlock.RUnlock() 77 | return nil 78 | } 79 | 80 | for _, node := range tx.dirtyNodes { 81 | _, err := tx.db.writeNode(node) 82 | if err != nil { 83 | return err 84 | } 85 | } 86 | 87 | for _, pageNum := range tx.pagesToDelete { 88 | tx.db.deleteNode(pageNum) 89 | } 90 | _, err := tx.db.writeFreelist() 91 | if err != nil { 92 | return err 93 | } 94 | 95 | tx.dirtyNodes = nil 96 | tx.pagesToDelete = nil 97 | tx.allocatedPageNums = nil 98 | tx.db.rwlock.Unlock() 99 | return nil 100 | } 101 | 102 | func (tx *tx) getRootCollection() *Collection { 103 | rootCollection := newEmptyCollection() 104 | rootCollection.root = tx.db.root 105 | rootCollection.tx = tx 106 | return rootCollection 107 | } 108 | 109 | func (tx *tx) GetCollection(name []byte) (*Collection, error) { 110 | rootCollection := tx.getRootCollection() 111 | item, err := rootCollection.Find(name) 112 | if err != nil { 113 | return nil, err 114 | } 115 | 116 | if item == nil { 117 | return nil, nil 118 | } 119 | 120 | collection := newEmptyCollection() 121 | collection.deserialize(item) 122 | collection.tx = tx 123 | return collection, nil 124 | } 125 | 126 | func (tx *tx) CreateCollection(name []byte) (*Collection, error) { 127 | if !tx.write { 128 | return nil, writeInsideReadTxErr 129 | } 130 | 131 | newCollectionPage, err := tx.db.writeNode(NewEmptyNode()) 132 | if err != nil { 133 | return nil, err 134 | } 135 | 136 | newCollection := newEmptyCollection() 137 | newCollection.name = name 138 | newCollection.root = newCollectionPage.pageNum 139 | return tx.createCollection(newCollection) 140 | } 141 | 142 | func (tx *tx) DeleteCollection(name []byte) error { 143 | if !tx.write { 144 | return writeInsideReadTxErr 145 | } 146 | 147 | rootCollection := tx.getRootCollection() 148 | 149 | return rootCollection.Remove(name) 150 | 151 | } 152 | 153 | func (tx *tx) createCollection(collection *Collection) (*Collection, error) { 154 | collection.tx = tx 155 | collectionBytes := collection.serialize() 156 | 157 | rootCollection := tx.getRootCollection() 158 | err := rootCollection.Put(collection.name, collectionBytes.value) 159 | if err != nil { 160 | return nil, err 161 | } 162 | 163 | return collection, nil 164 | } -------------------------------------------------------------------------------- /Part 3/dal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | type pgnum uint64 10 | 11 | type page struct { 12 | num pgnum 13 | data []byte 14 | } 15 | 16 | type dal struct { 17 | file *os.File 18 | pageSize int 19 | 20 | *meta 21 | *freelist 22 | } 23 | 24 | func newDal(path string) (*dal, error) { 25 | dal := &dal{ 26 | meta: newEmptyMeta(), 27 | pageSize: os.Getpagesize(), 28 | } 29 | 30 | // exist 31 | if _, err := os.Stat(path); err == nil { 32 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 33 | if err != nil { 34 | _ = dal.close() 35 | return nil, err 36 | } 37 | 38 | meta, err := dal.readMeta() 39 | if err != nil { 40 | return nil, err 41 | } 42 | dal.meta = meta 43 | 44 | freelist, err := dal.readFreelist() 45 | if err != nil { 46 | return nil, err 47 | } 48 | dal.freelist = freelist 49 | // doesn't exist 50 | } else if errors.Is(err, os.ErrNotExist) { 51 | // init freelist 52 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 53 | if err != nil { 54 | _ = dal.close() 55 | return nil, err 56 | } 57 | 58 | dal.freelist = newFreelist() 59 | dal.freelistPage = dal.getNextPage() 60 | _, err := dal.writeFreelist() 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | // write meta page 66 | _, err = dal.writeMeta(dal.meta) // other error 67 | } else { 68 | return nil, err 69 | } 70 | return dal, nil 71 | } 72 | 73 | func (d *dal) close() error { 74 | if d.file != nil { 75 | err := d.file.Close() 76 | if err != nil { 77 | return fmt.Errorf("could not close file: %s", err) 78 | } 79 | d.file = nil 80 | } 81 | 82 | return nil 83 | } 84 | 85 | func (d *dal) allocateEmptyPage() *page { 86 | return &page{ 87 | data: make([]byte, d.pageSize, d.pageSize), 88 | } 89 | } 90 | 91 | func (d *dal) getNode(pageNum pgnum) (*Node, error) { 92 | p, err := d.readPage(pageNum) 93 | if err != nil { 94 | return nil, err 95 | } 96 | node := NewEmptyNode() 97 | node.deserialize(p.data) 98 | node.pageNum = pageNum 99 | return node, nil 100 | } 101 | 102 | func (d *dal) writeNode(n *Node) (*Node, error) { 103 | p := d.allocateEmptyPage() 104 | if n.pageNum == 0 { 105 | p.num = d.getNextPage() 106 | n.pageNum = p.num 107 | } else { 108 | p.num = n.pageNum 109 | } 110 | 111 | p.data = n.serialize(p.data) 112 | 113 | err := d.writePage(p) 114 | if err != nil { 115 | return nil, err 116 | } 117 | return n, nil 118 | } 119 | 120 | func (d *dal) deleteNode(pageNum pgnum) { 121 | d.releasePage(pageNum) 122 | } 123 | 124 | func (d *dal) readPage(pageNum pgnum) (*page, error) { 125 | p := d.allocateEmptyPage() 126 | 127 | offset := int(pageNum) * d.pageSize 128 | _, err := d.file.ReadAt(p.data, int64(offset)) 129 | if err != nil { 130 | return nil, err 131 | } 132 | return p, err 133 | } 134 | 135 | func (d *dal) writePage(p *page) error { 136 | offset := int64(p.num) * int64(d.pageSize) 137 | _, err := d.file.WriteAt(p.data, offset) 138 | return err 139 | } 140 | 141 | func (d *dal) readFreelist() (*freelist, error) { 142 | p, err := d.readPage(d.freelistPage) 143 | if err != nil { 144 | return nil, err 145 | } 146 | 147 | freelist := newFreelist() 148 | freelist.deserialize(p.data) 149 | return freelist, nil 150 | } 151 | 152 | func (d *dal) writeFreelist() (*page, error) { 153 | p := d.allocateEmptyPage() 154 | p.num = d.freelistPage 155 | d.freelist.serialize(p.data) 156 | 157 | err := d.writePage(p) 158 | if err != nil { 159 | return nil, err 160 | } 161 | d.freelistPage = p.num 162 | return p, nil 163 | } 164 | 165 | func (d *dal) writeMeta(meta *meta) (*page, error) { 166 | p := d.allocateEmptyPage() 167 | p.num = metaPageNum 168 | meta.serialize(p.data) 169 | 170 | err := d.writePage(p) 171 | if err != nil { 172 | return nil, err 173 | } 174 | return p, nil 175 | } 176 | 177 | func (d *dal) readMeta() (*meta, error) { 178 | p, err := d.readPage(metaPageNum) 179 | if err != nil { 180 | return nil, err 181 | } 182 | 183 | meta := newEmptyMeta() 184 | meta.deserialize(p.data) 185 | return meta, nil 186 | } 187 | -------------------------------------------------------------------------------- /Part 6/dal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | type pgnum uint64 10 | 11 | type Options struct { 12 | pageSize int 13 | 14 | MinFillPercent float32 15 | MaxFillPercent float32 16 | } 17 | 18 | var DefaultOptions = &Options{ 19 | MinFillPercent: 0.5, 20 | MaxFillPercent: 0.95, 21 | } 22 | 23 | type page struct { 24 | num pgnum 25 | data []byte 26 | } 27 | 28 | type dal struct { 29 | pageSize int 30 | minFillPercent float32 31 | maxFillPercent float32 32 | file *os.File 33 | 34 | *meta 35 | *freelist 36 | } 37 | 38 | func newDal(path string, options *Options) (*dal, error) { 39 | dal := &dal{ 40 | meta: newEmptyMeta(), 41 | pageSize: options.pageSize, 42 | minFillPercent: options.MinFillPercent, 43 | maxFillPercent: options.MaxFillPercent, 44 | } 45 | 46 | // exist 47 | if _, err := os.Stat(path); err == nil { 48 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 49 | if err != nil { 50 | _ = dal.close() 51 | return nil, err 52 | } 53 | 54 | meta, err := dal.readMeta() 55 | if err != nil { 56 | return nil, err 57 | } 58 | dal.meta = meta 59 | 60 | freelist, err := dal.readFreelist() 61 | if err != nil { 62 | return nil, err 63 | } 64 | dal.freelist = freelist 65 | // doesn't exist 66 | } else if errors.Is(err, os.ErrNotExist) { 67 | // init freelist 68 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 69 | if err != nil { 70 | _ = dal.close() 71 | return nil, err 72 | } 73 | 74 | dal.freelist = newFreelist() 75 | dal.freelistPage = dal.getNextPage() 76 | _, err := dal.writeFreelist() 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | // init root 82 | collectionsNode, err := dal.writeNode(NewNodeForSerialization([]*Item{}, []pgnum{})) 83 | if err != nil { 84 | return nil, err 85 | } 86 | dal.root = collectionsNode.pageNum 87 | 88 | // write meta page 89 | _, err = dal.writeMeta(dal.meta) // other error 90 | } else { 91 | return nil, err 92 | } 93 | return dal, nil 94 | } 95 | 96 | // getSplitIndex should be called when performing rebalance after an item is removed. It checks if a node can spare an 97 | // element, and if it does then it returns the index when there the split should happen. Otherwise -1 is returned. 98 | func (d *dal) getSplitIndex(node *Node) int { 99 | size := 0 100 | size += nodeHeaderSize 101 | 102 | for i := range node.items { 103 | size += node.elementSize(i) 104 | 105 | // if we have a big enough page size (more than minimum), and didn't reach the last node, which means we can 106 | // spare an element 107 | if float32(size) > d.minThreshold() && i < len(node.items) - 1 { 108 | return i + 1 109 | } 110 | } 111 | 112 | return -1 113 | } 114 | 115 | 116 | func (d *dal) maxThreshold() float32 { 117 | return d.maxFillPercent * float32(d.pageSize) 118 | } 119 | 120 | func (d *dal) isOverPopulated(node *Node) bool { 121 | return float32(node.nodeSize()) > d.maxThreshold() 122 | } 123 | 124 | func (d *dal) minThreshold() float32 { 125 | return d.minFillPercent * float32(d.pageSize) 126 | } 127 | 128 | func (d *dal) isUnderPopulated(node *Node) bool { 129 | return float32(node.nodeSize()) < d.minThreshold() 130 | } 131 | 132 | func (d *dal) close() error { 133 | if d.file != nil { 134 | err := d.file.Close() 135 | if err != nil { 136 | return fmt.Errorf("could not close file: %s", err) 137 | } 138 | d.file = nil 139 | } 140 | 141 | return nil 142 | } 143 | 144 | func (d *dal) allocateEmptyPage() *page { 145 | return &page{ 146 | data: make([]byte, d.pageSize, d.pageSize), 147 | } 148 | } 149 | 150 | func (d *dal) readPage(pageNum pgnum) (*page, error) { 151 | p := d.allocateEmptyPage() 152 | 153 | offset := int(pageNum) * d.pageSize 154 | _, err := d.file.ReadAt(p.data, int64(offset)) 155 | if err != nil { 156 | return nil, err 157 | } 158 | return p, err 159 | } 160 | 161 | func (d *dal) writePage(p *page) error { 162 | offset := int64(p.num) * int64(d.pageSize) 163 | _, err := d.file.WriteAt(p.data, offset) 164 | return err 165 | } 166 | 167 | 168 | func (d *dal) getNode(pageNum pgnum) (*Node, error) { 169 | p, err := d.readPage(pageNum) 170 | if err != nil { 171 | return nil, err 172 | } 173 | node := NewEmptyNode() 174 | node.deserialize(p.data) 175 | node.pageNum = pageNum 176 | return node, nil 177 | } 178 | 179 | func (d *dal) writeNode(n *Node) (*Node, error) { 180 | p := d.allocateEmptyPage() 181 | if n.pageNum == 0 { 182 | p.num = d.getNextPage() 183 | n.pageNum = p.num 184 | } else { 185 | p.num = n.pageNum 186 | } 187 | 188 | p.data = n.serialize(p.data) 189 | 190 | err := d.writePage(p) 191 | if err != nil { 192 | return nil, err 193 | } 194 | return n, nil 195 | } 196 | 197 | func (d *dal) deleteNode(pageNum pgnum) { 198 | d.releasePage(pageNum) 199 | } 200 | 201 | func (d *dal) readFreelist() (*freelist, error) { 202 | p, err := d.readPage(d.freelistPage) 203 | if err != nil { 204 | return nil, err 205 | } 206 | 207 | freelist := newFreelist() 208 | freelist.deserialize(p.data) 209 | return freelist, nil 210 | } 211 | 212 | func (d *dal) writeFreelist() (*page, error) { 213 | p := d.allocateEmptyPage() 214 | p.num = d.freelistPage 215 | d.freelist.serialize(p.data) 216 | 217 | err := d.writePage(p) 218 | if err != nil { 219 | return nil, err 220 | } 221 | d.freelistPage = p.num 222 | return p, nil 223 | } 224 | 225 | func (d *dal) writeMeta(meta *meta) (*page, error) { 226 | p := d.allocateEmptyPage() 227 | p.num = metaPageNum 228 | meta.serialize(p.data) 229 | 230 | err := d.writePage(p) 231 | if err != nil { 232 | return nil, err 233 | } 234 | return p, nil 235 | } 236 | 237 | func (d *dal) readMeta() (*meta, error) { 238 | p, err := d.readPage(metaPageNum) 239 | if err != nil { 240 | return nil, err 241 | } 242 | 243 | meta := newEmptyMeta() 244 | meta.deserialize(p.data) 245 | return meta, nil 246 | } 247 | -------------------------------------------------------------------------------- /Part 7/dal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | type pgnum uint64 10 | 11 | type Options struct { 12 | pageSize int 13 | 14 | MinFillPercent float32 15 | MaxFillPercent float32 16 | } 17 | 18 | var DefaultOptions = &Options{ 19 | MinFillPercent: 0.5, 20 | MaxFillPercent: 0.95, 21 | } 22 | 23 | type page struct { 24 | num pgnum 25 | data []byte 26 | } 27 | 28 | type dal struct { 29 | pageSize int 30 | minFillPercent float32 31 | maxFillPercent float32 32 | file *os.File 33 | 34 | *meta 35 | *freelist 36 | } 37 | 38 | func newDal(path string, options *Options) (*dal, error) { 39 | dal := &dal{ 40 | meta: newEmptyMeta(), 41 | pageSize: options.pageSize, 42 | minFillPercent: options.MinFillPercent, 43 | maxFillPercent: options.MaxFillPercent, 44 | } 45 | 46 | // exist 47 | if _, err := os.Stat(path); err == nil { 48 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 49 | if err != nil { 50 | _ = dal.close() 51 | return nil, err 52 | } 53 | 54 | meta, err := dal.readMeta() 55 | if err != nil { 56 | return nil, err 57 | } 58 | dal.meta = meta 59 | 60 | freelist, err := dal.readFreelist() 61 | if err != nil { 62 | return nil, err 63 | } 64 | dal.freelist = freelist 65 | // doesn't exist 66 | } else if errors.Is(err, os.ErrNotExist) { 67 | // init freelist 68 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 69 | if err != nil { 70 | _ = dal.close() 71 | return nil, err 72 | } 73 | 74 | dal.freelist = newFreelist() 75 | dal.freelistPage = dal.getNextPage() 76 | _, err := dal.writeFreelist() 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | // init root 82 | collectionsNode, err := dal.writeNode(NewNodeForSerialization([]*Item{}, []pgnum{})) 83 | if err != nil { 84 | return nil, err 85 | } 86 | dal.root = collectionsNode.pageNum 87 | 88 | // write meta page 89 | _, err = dal.writeMeta(dal.meta) // other error 90 | } else { 91 | return nil, err 92 | } 93 | return dal, nil 94 | } 95 | 96 | // getSplitIndex should be called when performing rebalance after an item is removed. It checks if a node can spare an 97 | // element, and if it does then it returns the index when there the split should happen. Otherwise -1 is returned. 98 | func (d *dal) getSplitIndex(node *Node) int { 99 | size := 0 100 | size += nodeHeaderSize 101 | 102 | for i := range node.items { 103 | size += node.elementSize(i) 104 | 105 | // if we have a big enough page size (more than minimum), and didn't reach the last node, which means we can 106 | // spare an element 107 | if float32(size) > d.minThreshold() && i < len(node.items) - 1 { 108 | return i + 1 109 | } 110 | } 111 | 112 | return -1 113 | } 114 | 115 | 116 | func (d *dal) maxThreshold() float32 { 117 | return d.maxFillPercent * float32(d.pageSize) 118 | } 119 | 120 | func (d *dal) isOverPopulated(node *Node) bool { 121 | return float32(node.nodeSize()) > d.maxThreshold() 122 | } 123 | 124 | func (d *dal) minThreshold() float32 { 125 | return d.minFillPercent * float32(d.pageSize) 126 | } 127 | 128 | func (d *dal) isUnderPopulated(node *Node) bool { 129 | return float32(node.nodeSize()) < d.minThreshold() 130 | } 131 | 132 | func (d *dal) close() error { 133 | if d.file != nil { 134 | err := d.file.Close() 135 | if err != nil { 136 | return fmt.Errorf("could not close file: %s", err) 137 | } 138 | d.file = nil 139 | } 140 | 141 | return nil 142 | } 143 | 144 | func (d *dal) allocateEmptyPage() *page { 145 | return &page{ 146 | data: make([]byte, d.pageSize, d.pageSize), 147 | } 148 | } 149 | 150 | func (d *dal) readPage(pageNum pgnum) (*page, error) { 151 | p := d.allocateEmptyPage() 152 | 153 | offset := int(pageNum) * d.pageSize 154 | _, err := d.file.ReadAt(p.data, int64(offset)) 155 | if err != nil { 156 | return nil, err 157 | } 158 | return p, err 159 | } 160 | 161 | func (d *dal) writePage(p *page) error { 162 | offset := int64(p.num) * int64(d.pageSize) 163 | _, err := d.file.WriteAt(p.data, offset) 164 | return err 165 | } 166 | 167 | 168 | func (d *dal) getNode(pageNum pgnum) (*Node, error) { 169 | p, err := d.readPage(pageNum) 170 | if err != nil { 171 | return nil, err 172 | } 173 | node := NewEmptyNode() 174 | node.deserialize(p.data) 175 | node.pageNum = pageNum 176 | return node, nil 177 | } 178 | 179 | func (d *dal) writeNode(n *Node) (*Node, error) { 180 | p := d.allocateEmptyPage() 181 | if n.pageNum == 0 { 182 | p.num = d.getNextPage() 183 | n.pageNum = p.num 184 | } else { 185 | p.num = n.pageNum 186 | } 187 | 188 | p.data = n.serialize(p.data) 189 | 190 | err := d.writePage(p) 191 | if err != nil { 192 | return nil, err 193 | } 194 | return n, nil 195 | } 196 | 197 | func (d *dal) deleteNode(pageNum pgnum) { 198 | d.releasePage(pageNum) 199 | } 200 | 201 | func (d *dal) readFreelist() (*freelist, error) { 202 | p, err := d.readPage(d.freelistPage) 203 | if err != nil { 204 | return nil, err 205 | } 206 | 207 | freelist := newFreelist() 208 | freelist.deserialize(p.data) 209 | return freelist, nil 210 | } 211 | 212 | func (d *dal) writeFreelist() (*page, error) { 213 | p := d.allocateEmptyPage() 214 | p.num = d.freelistPage 215 | d.freelist.serialize(p.data) 216 | 217 | err := d.writePage(p) 218 | if err != nil { 219 | return nil, err 220 | } 221 | d.freelistPage = p.num 222 | return p, nil 223 | } 224 | 225 | func (d *dal) writeMeta(meta *meta) (*page, error) { 226 | p := d.allocateEmptyPage() 227 | p.num = metaPageNum 228 | meta.serialize(p.data) 229 | 230 | err := d.writePage(p) 231 | if err != nil { 232 | return nil, err 233 | } 234 | return p, nil 235 | } 236 | 237 | func (d *dal) readMeta() (*meta, error) { 238 | p, err := d.readPage(metaPageNum) 239 | if err != nil { 240 | return nil, err 241 | } 242 | 243 | meta := newEmptyMeta() 244 | meta.deserialize(p.data) 245 | return meta, nil 246 | } 247 | -------------------------------------------------------------------------------- /Part 5/collection.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "bytes" 4 | 5 | type Collection struct { 6 | name []byte 7 | root pgnum 8 | 9 | dal *dal 10 | } 11 | 12 | func newCollection(name []byte, root pgnum) *Collection { 13 | return &Collection{ 14 | name: name, 15 | root: root, 16 | } 17 | } 18 | 19 | // Put adds a key to the tree. It finds the correct node and the insertion index and adds the item. When performing the 20 | // search, the ancestors are returned as well. This way we can iterate over them to check which nodes were modified and 21 | // rebalance by splitting them accordingly. If the root has too many items, then a new root of a new layer is 22 | // created and the created nodes from the split are added as children. 23 | func (c *Collection) Put(key []byte, value []byte) error { 24 | i := newItem(key, value) 25 | 26 | // On first insertion the root node does not exist, so it should be created 27 | var root *Node 28 | var err error 29 | if c.root == 0 { 30 | root, err = c.dal.writeNode(c.dal.newNode([]*Item{i}, []pgnum{})) 31 | if err != nil { 32 | return nil 33 | } 34 | c.root = root.pageNum 35 | return nil 36 | } else { 37 | root, err = c.dal.getNode(c.root) 38 | if err != nil { 39 | return err 40 | } 41 | } 42 | 43 | // Find the path to the node where the insertion should happen 44 | insertionIndex, nodeToInsertIn, ancestorsIndexes, err := root.findKey(i.key, false) 45 | if err != nil { 46 | return err 47 | } 48 | 49 | // If key already exists 50 | if nodeToInsertIn.items != nil && insertionIndex < len(nodeToInsertIn.items) && bytes.Compare(nodeToInsertIn.items[insertionIndex].key, key) == 0 { 51 | nodeToInsertIn.items[insertionIndex] = i 52 | } else { 53 | // Add item to the leaf node 54 | nodeToInsertIn.addItem(i, insertionIndex) 55 | } 56 | nodeToInsertIn.writeNode(nodeToInsertIn) 57 | 58 | ancestors, err := c.getNodes(ancestorsIndexes) 59 | if err != nil { 60 | return err 61 | } 62 | 63 | // Rebalance the nodes all the way up. Start From one node before the last and go all the way up. Exclude root. 64 | for i := len(ancestors) - 2; i >= 0; i-- { 65 | pnode := ancestors[i] 66 | node := ancestors[i+1] 67 | nodeIndex := ancestorsIndexes[i+1] 68 | if node.isOverPopulated() { 69 | pnode.split(node, nodeIndex) 70 | } 71 | } 72 | 73 | // Handle root 74 | rootNode := ancestors[0] 75 | if rootNode.isOverPopulated() { 76 | newRoot := c.dal.newNode([]*Item{}, []pgnum{rootNode.pageNum}) 77 | newRoot.split(rootNode, 0) 78 | 79 | // commit newly created root 80 | newRoot, err = c.dal.writeNode(newRoot) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | c.root = newRoot.pageNum 86 | } 87 | 88 | return nil 89 | } 90 | 91 | // Find Returns an item according based on the given key by performing a binary search. 92 | func (c *Collection) Find(key []byte) (*Item, error) { 93 | n, err := c.dal.getNode(c.root) 94 | if err != nil { 95 | return nil, err 96 | } 97 | 98 | index, containingNode, _, err := n.findKey(key, true) 99 | if err != nil { 100 | return nil, err 101 | } 102 | if index == -1 { 103 | return nil, nil 104 | } 105 | return containingNode.items[index], nil 106 | } 107 | 108 | // Remove removes a key from the tree. It finds the correct node and the index to remove the item from and removes it. 109 | // When performing the search, the ancestors are returned as well. This way we can iterate over them to check which 110 | // nodes were modified and rebalance by rotating or merging the unbalanced nodes. Rotation is done first. If the 111 | // siblings don't have enough items, then merging occurs. If the root is without items after a split, then the root is 112 | // removed and the tree is one level shorter. 113 | func (c *Collection) Remove(key []byte) error { 114 | // Find the path to the node where the deletion should happen 115 | rootNode, err := c.dal.getNode(c.root) 116 | if err != nil { 117 | return err 118 | } 119 | 120 | removeItemIndex, nodeToRemoveFrom, ancestorsIndexes, err := rootNode.findKey(key, true) 121 | if err != nil { 122 | return err 123 | } 124 | 125 | if removeItemIndex == -1 { 126 | return nil 127 | } 128 | 129 | if nodeToRemoveFrom.isLeaf() { 130 | nodeToRemoveFrom.removeItemFromLeaf(removeItemIndex) 131 | } else { 132 | affectedNodes, err := nodeToRemoveFrom.removeItemFromInternal(removeItemIndex) 133 | if err != nil { 134 | return err 135 | } 136 | ancestorsIndexes = append(ancestorsIndexes, affectedNodes...) 137 | } 138 | 139 | ancestors, err := c.getNodes(ancestorsIndexes) 140 | if err != nil { 141 | return err 142 | } 143 | 144 | // Rebalance the nodes all the way up. Start From one node before the last and go all the way up. Exclude root. 145 | for i := len(ancestors) - 2; i >= 0; i-- { 146 | pnode := ancestors[i] 147 | node := ancestors[i+1] 148 | if node.isUnderPopulated() { 149 | err = pnode.rebalanceRemove(node, ancestorsIndexes[i+1]) 150 | if err != nil { 151 | return err 152 | } 153 | } 154 | } 155 | 156 | rootNode = ancestors[0] 157 | // If the root has no items after rebalancing, there's no need to save it because we ignore it. 158 | if len(rootNode.items) == 0 && len(rootNode.childNodes) > 0 { 159 | c.root = ancestors[1].pageNum 160 | } 161 | 162 | return nil 163 | } 164 | 165 | // getNodes returns a list of nodes based on their indexes (the breadcrumbs) from the root 166 | // p 167 | // / \ 168 | // a b 169 | // / \ / \ 170 | // c d e f 171 | // For [0,1,0] -> p,b,e 172 | func (c *Collection) getNodes(indexes []int) ([]*Node, error) { 173 | root, err := c.dal.getNode(c.root) 174 | if err != nil { 175 | return nil, err 176 | } 177 | 178 | nodes := []*Node{root} 179 | child := root 180 | for i := 1; i < len(indexes); i++ { 181 | child, err = c.dal.getNode(child.childNodes[indexes[i]]) 182 | if err != nil { 183 | return nil, err 184 | } 185 | nodes = append(nodes, child) 186 | } 187 | return nodes, nil 188 | } 189 | -------------------------------------------------------------------------------- /Part 6/collection.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "bytes" 4 | 5 | type Collection struct { 6 | name []byte 7 | root pgnum 8 | 9 | // associated transaction 10 | tx *tx 11 | } 12 | 13 | func newCollection(name []byte, root pgnum) *Collection { 14 | return &Collection{ 15 | name: name, 16 | root: root, 17 | } 18 | } 19 | 20 | // Put adds a key to the tree. It finds the correct node and the insertion index and adds the item. When performing the 21 | // search, the ancestors are returned as well. This way we can iterate over them to check which nodes were modified and 22 | // rebalance by splitting them accordingly. If the root has too many items, then a new root of a new layer is 23 | // created and the created nodes from the split are added as children. 24 | func (c *Collection) Put(key []byte, value []byte) error { 25 | if !c.tx.write { 26 | return writeInsideReadTxErr 27 | } 28 | 29 | i := newItem(key, value) 30 | 31 | // On first insertion the root node does not exist, so it should be created 32 | var root *Node 33 | var err error 34 | if c.root == 0 { 35 | root = c.tx.writeNode(c.tx.newNode([]*Item{i}, []pgnum{})) 36 | if err != nil { 37 | return nil 38 | } 39 | c.root = root.pageNum 40 | return nil 41 | } else { 42 | root, err = c.tx.getNode(c.root) 43 | if err != nil { 44 | return err 45 | } 46 | } 47 | 48 | // Find the path to the node where the insertion should happen 49 | insertionIndex, nodeToInsertIn, ancestorsIndexes, err := root.findKey(i.key, false) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | // If key already exists 55 | if nodeToInsertIn.items != nil && insertionIndex < len(nodeToInsertIn.items) && bytes.Compare(nodeToInsertIn.items[insertionIndex].key, key) == 0 { 56 | nodeToInsertIn.items[insertionIndex] = i 57 | } else { 58 | // Add item to the leaf node 59 | nodeToInsertIn.addItem(i, insertionIndex) 60 | } 61 | nodeToInsertIn.writeNode(nodeToInsertIn) 62 | 63 | ancestors, err := c.getNodes(ancestorsIndexes) 64 | if err != nil { 65 | return err 66 | } 67 | 68 | // Rebalance the nodes all the way up. Start From one node before the last and go all the way up. Exclude root. 69 | for i := len(ancestors) - 2; i >= 0; i-- { 70 | pnode := ancestors[i] 71 | node := ancestors[i+1] 72 | nodeIndex := ancestorsIndexes[i+1] 73 | if node.isOverPopulated() { 74 | pnode.split(node, nodeIndex) 75 | } 76 | } 77 | 78 | // Handle root 79 | rootNode := ancestors[0] 80 | if rootNode.isOverPopulated() { 81 | newRoot := c.tx.newNode([]*Item{}, []pgnum{rootNode.pageNum}) 82 | newRoot.split(rootNode, 0) 83 | 84 | // commit newly created root 85 | newRoot = c.tx.writeNode(newRoot) 86 | 87 | c.root = newRoot.pageNum 88 | } 89 | 90 | return nil 91 | } 92 | 93 | // Find Returns an item according based on the given key by performing a binary search. 94 | func (c *Collection) Find(key []byte) (*Item, error) { 95 | n, err := c.tx.getNode(c.root) 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | index, containingNode, _, err := n.findKey(key, true) 101 | if err != nil { 102 | return nil, err 103 | } 104 | if index == -1 { 105 | return nil, nil 106 | } 107 | return containingNode.items[index], nil 108 | } 109 | 110 | // Remove removes a key from the tree. It finds the correct node and the index to remove the item from and removes it. 111 | // When performing the search, the ancestors are returned as well. This way we can iterate over them to check which 112 | // nodes were modified and rebalance by rotating or merging the unbalanced nodes. Rotation is done first. If the 113 | // siblings don't have enough items, then merging occurs. If the root is without items after a split, then the root is 114 | // removed and the tree is one level shorter. 115 | func (c *Collection) Remove(key []byte) error { 116 | if !c.tx.write { 117 | return writeInsideReadTxErr 118 | } 119 | 120 | // Find the path to the node where the deletion should happen 121 | rootNode, err := c.tx.getNode(c.root) 122 | if err != nil { 123 | return err 124 | } 125 | 126 | removeItemIndex, nodeToRemoveFrom, ancestorsIndexes, err := rootNode.findKey(key, true) 127 | if err != nil { 128 | return err 129 | } 130 | 131 | if removeItemIndex == -1 { 132 | return nil 133 | } 134 | 135 | if nodeToRemoveFrom.isLeaf() { 136 | nodeToRemoveFrom.removeItemFromLeaf(removeItemIndex) 137 | } else { 138 | affectedNodes, err := nodeToRemoveFrom.removeItemFromInternal(removeItemIndex) 139 | if err != nil { 140 | return err 141 | } 142 | ancestorsIndexes = append(ancestorsIndexes, affectedNodes...) 143 | } 144 | 145 | ancestors, err := c.getNodes(ancestorsIndexes) 146 | if err != nil { 147 | return err 148 | } 149 | 150 | // Rebalance the nodes all the way up. Start From one node before the last and go all the way up. Exclude root. 151 | for i := len(ancestors) - 2; i >= 0; i-- { 152 | pnode := ancestors[i] 153 | node := ancestors[i+1] 154 | if node.isUnderPopulated() { 155 | err = pnode.rebalanceRemove(node, ancestorsIndexes[i+1]) 156 | if err != nil { 157 | return err 158 | } 159 | } 160 | } 161 | 162 | rootNode = ancestors[0] 163 | // If the root has no items after rebalancing, there's no need to save it because we ignore it. 164 | if len(rootNode.items) == 0 && len(rootNode.childNodes) > 0 { 165 | c.root = ancestors[1].pageNum 166 | } 167 | 168 | return nil 169 | } 170 | 171 | // getNodes returns a list of nodes based on their indexes (the breadcrumbs) from the root 172 | // p 173 | // / \ 174 | // a b 175 | // / \ / \ 176 | // c d e f 177 | // For [0,1,0] -> p,b,e 178 | func (c *Collection) getNodes(indexes []int) ([]*Node, error) { 179 | root, err := c.tx.getNode(c.root) 180 | if err != nil { 181 | return nil, err 182 | } 183 | 184 | nodes := []*Node{root} 185 | child := root 186 | for i := 1; i < len(indexes); i++ { 187 | child, err = c.tx.getNode(child.childNodes[indexes[i]]) 188 | if err != nil { 189 | return nil, err 190 | } 191 | nodes = append(nodes, child) 192 | } 193 | return nodes, nil 194 | } 195 | -------------------------------------------------------------------------------- /Part 4/dal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | type pgnum uint64 10 | 11 | type Options struct { 12 | pageSize int 13 | 14 | MinFillPercent float32 15 | MaxFillPercent float32 16 | } 17 | 18 | var DefaultOptions = &Options{ 19 | MinFillPercent: 0.5, 20 | MaxFillPercent: 0.95, 21 | } 22 | 23 | type page struct { 24 | num pgnum 25 | data []byte 26 | } 27 | 28 | type dal struct { 29 | pageSize int 30 | minFillPercent float32 31 | maxFillPercent float32 32 | file *os.File 33 | 34 | *meta 35 | *freelist 36 | } 37 | 38 | func newDal(path string, options *Options) (*dal, error) { 39 | dal := &dal{ 40 | meta: newEmptyMeta(), 41 | pageSize: options.pageSize, 42 | minFillPercent: options.MinFillPercent, 43 | maxFillPercent: options.MaxFillPercent, 44 | } 45 | 46 | // exist 47 | if _, err := os.Stat(path); err == nil { 48 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 49 | if err != nil { 50 | _ = dal.close() 51 | return nil, err 52 | } 53 | 54 | meta, err := dal.readMeta() 55 | if err != nil { 56 | return nil, err 57 | } 58 | dal.meta = meta 59 | 60 | freelist, err := dal.readFreelist() 61 | if err != nil { 62 | return nil, err 63 | } 64 | dal.freelist = freelist 65 | // doesn't exist 66 | } else if errors.Is(err, os.ErrNotExist) { 67 | // init freelist 68 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 69 | if err != nil { 70 | _ = dal.close() 71 | return nil, err 72 | } 73 | 74 | dal.freelist = newFreelist() 75 | dal.freelistPage = dal.getNextPage() 76 | _, err := dal.writeFreelist() 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | // init root 82 | collectionsNode, err := dal.writeNode(NewNodeForSerialization([]*Item{}, []pgnum{})) 83 | if err != nil { 84 | return nil, err 85 | } 86 | dal.root = collectionsNode.pageNum 87 | 88 | // write meta page 89 | _, err = dal.writeMeta(dal.meta) // other error 90 | } else { 91 | return nil, err 92 | } 93 | return dal, nil 94 | } 95 | 96 | // getSplitIndex should be called when performing rebalance after an item is removed. It checks if a node can spare an 97 | // element, and if it does then it returns the index when there the split should happen. Otherwise -1 is returned. 98 | func (d *dal) getSplitIndex(node *Node) int { 99 | size := 0 100 | size += nodeHeaderSize 101 | 102 | for i := range node.items { 103 | size += node.elementSize(i) 104 | 105 | // if we have a big enough page size (more than minimum), and didn't reach the last node, which means we can 106 | // spare an element 107 | if float32(size) > d.minThreshold() && i < len(node.items) - 1 { 108 | return i + 1 109 | } 110 | } 111 | 112 | return -1 113 | } 114 | 115 | 116 | func (d *dal) maxThreshold() float32 { 117 | return d.maxFillPercent * float32(d.pageSize) 118 | } 119 | 120 | func (d *dal) isOverPopulated(node *Node) bool { 121 | return float32(node.nodeSize()) > d.maxThreshold() 122 | } 123 | 124 | func (d *dal) minThreshold() float32 { 125 | return d.minFillPercent * float32(d.pageSize) 126 | } 127 | 128 | func (d *dal) isUnderPopulated(node *Node) bool { 129 | return float32(node.nodeSize()) < d.minThreshold() 130 | } 131 | 132 | func (d *dal) close() error { 133 | if d.file != nil { 134 | err := d.file.Close() 135 | if err != nil { 136 | return fmt.Errorf("could not close file: %s", err) 137 | } 138 | d.file = nil 139 | } 140 | 141 | return nil 142 | } 143 | 144 | func (d *dal) allocateEmptyPage() *page { 145 | return &page{ 146 | data: make([]byte, d.pageSize, d.pageSize), 147 | } 148 | } 149 | 150 | func (d *dal) readPage(pageNum pgnum) (*page, error) { 151 | p := d.allocateEmptyPage() 152 | 153 | offset := int(pageNum) * d.pageSize 154 | _, err := d.file.ReadAt(p.data, int64(offset)) 155 | if err != nil { 156 | return nil, err 157 | } 158 | return p, err 159 | } 160 | 161 | func (d *dal) writePage(p *page) error { 162 | offset := int64(p.num) * int64(d.pageSize) 163 | _, err := d.file.WriteAt(p.data, offset) 164 | return err 165 | } 166 | 167 | func (d *dal) newNode(items []*Item, childNodes []pgnum) *Node { 168 | node := NewEmptyNode() 169 | node.items = items 170 | node.childNodes = childNodes 171 | node.pageNum = d.getNextPage() 172 | node.dal = d 173 | return node 174 | } 175 | 176 | func (d *dal) getNode(pageNum pgnum) (*Node, error) { 177 | p, err := d.readPage(pageNum) 178 | if err != nil { 179 | return nil, err 180 | } 181 | node := NewEmptyNode() 182 | node.deserialize(p.data) 183 | node.pageNum = pageNum 184 | node.dal=d 185 | return node, nil 186 | } 187 | 188 | func (d *dal) writeNode(n *Node) (*Node, error) { 189 | p := d.allocateEmptyPage() 190 | if n.pageNum == 0 { 191 | p.num = d.getNextPage() 192 | n.pageNum = p.num 193 | } else { 194 | p.num = n.pageNum 195 | } 196 | 197 | p.data = n.serialize(p.data) 198 | 199 | err := d.writePage(p) 200 | if err != nil { 201 | return nil, err 202 | } 203 | return n, nil 204 | } 205 | 206 | func (d *dal) deleteNode(pageNum pgnum) { 207 | d.releasePage(pageNum) 208 | } 209 | 210 | func (d *dal) readFreelist() (*freelist, error) { 211 | p, err := d.readPage(d.freelistPage) 212 | if err != nil { 213 | return nil, err 214 | } 215 | 216 | freelist := newFreelist() 217 | freelist.deserialize(p.data) 218 | return freelist, nil 219 | } 220 | 221 | func (d *dal) writeFreelist() (*page, error) { 222 | p := d.allocateEmptyPage() 223 | p.num = d.freelistPage 224 | d.freelist.serialize(p.data) 225 | 226 | err := d.writePage(p) 227 | if err != nil { 228 | return nil, err 229 | } 230 | d.freelistPage = p.num 231 | return p, nil 232 | } 233 | 234 | func (d *dal) writeMeta(meta *meta) (*page, error) { 235 | p := d.allocateEmptyPage() 236 | p.num = metaPageNum 237 | meta.serialize(p.data) 238 | 239 | err := d.writePage(p) 240 | if err != nil { 241 | return nil, err 242 | } 243 | return p, nil 244 | } 245 | 246 | func (d *dal) readMeta() (*meta, error) { 247 | p, err := d.readPage(metaPageNum) 248 | if err != nil { 249 | return nil, err 250 | } 251 | 252 | meta := newEmptyMeta() 253 | meta.deserialize(p.data) 254 | return meta, nil 255 | } 256 | -------------------------------------------------------------------------------- /Part 5/dal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | type pgnum uint64 10 | 11 | type Options struct { 12 | pageSize int 13 | 14 | MinFillPercent float32 15 | MaxFillPercent float32 16 | } 17 | 18 | var DefaultOptions = &Options{ 19 | MinFillPercent: 0.5, 20 | MaxFillPercent: 0.95, 21 | } 22 | 23 | type page struct { 24 | num pgnum 25 | data []byte 26 | } 27 | 28 | type dal struct { 29 | pageSize int 30 | minFillPercent float32 31 | maxFillPercent float32 32 | file *os.File 33 | 34 | *meta 35 | *freelist 36 | } 37 | 38 | func newDal(path string, options *Options) (*dal, error) { 39 | dal := &dal{ 40 | meta: newEmptyMeta(), 41 | pageSize: options.pageSize, 42 | minFillPercent: options.MinFillPercent, 43 | maxFillPercent: options.MaxFillPercent, 44 | } 45 | 46 | // exist 47 | if _, err := os.Stat(path); err == nil { 48 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 49 | if err != nil { 50 | _ = dal.close() 51 | return nil, err 52 | } 53 | 54 | meta, err := dal.readMeta() 55 | if err != nil { 56 | return nil, err 57 | } 58 | dal.meta = meta 59 | 60 | freelist, err := dal.readFreelist() 61 | if err != nil { 62 | return nil, err 63 | } 64 | dal.freelist = freelist 65 | // doesn't exist 66 | } else if errors.Is(err, os.ErrNotExist) { 67 | // init freelist 68 | dal.file, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 69 | if err != nil { 70 | _ = dal.close() 71 | return nil, err 72 | } 73 | 74 | dal.freelist = newFreelist() 75 | dal.freelistPage = dal.getNextPage() 76 | _, err := dal.writeFreelist() 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | // init root 82 | collectionsNode, err := dal.writeNode(NewNodeForSerialization([]*Item{}, []pgnum{})) 83 | if err != nil { 84 | return nil, err 85 | } 86 | dal.root = collectionsNode.pageNum 87 | 88 | // write meta page 89 | _, err = dal.writeMeta(dal.meta) // other error 90 | } else { 91 | return nil, err 92 | } 93 | return dal, nil 94 | } 95 | 96 | // getSplitIndex should be called when performing rebalance after an item is removed. It checks if a node can spare an 97 | // element, and if it does then it returns the index when there the split should happen. Otherwise -1 is returned. 98 | func (d *dal) getSplitIndex(node *Node) int { 99 | size := 0 100 | size += nodeHeaderSize 101 | 102 | for i := range node.items { 103 | size += node.elementSize(i) 104 | 105 | // if we have a big enough page size (more than minimum), and didn't reach the last node, which means we can 106 | // spare an element 107 | if float32(size) > d.minThreshold() && i < len(node.items) - 1 { 108 | return i + 1 109 | } 110 | } 111 | 112 | return -1 113 | } 114 | 115 | 116 | func (d *dal) maxThreshold() float32 { 117 | return d.maxFillPercent * float32(d.pageSize) 118 | } 119 | 120 | func (d *dal) isOverPopulated(node *Node) bool { 121 | return float32(node.nodeSize()) > d.maxThreshold() 122 | } 123 | 124 | func (d *dal) minThreshold() float32 { 125 | return d.minFillPercent * float32(d.pageSize) 126 | } 127 | 128 | func (d *dal) isUnderPopulated(node *Node) bool { 129 | return float32(node.nodeSize()) < d.minThreshold() 130 | } 131 | 132 | func (d *dal) close() error { 133 | if d.file != nil { 134 | err := d.file.Close() 135 | if err != nil { 136 | return fmt.Errorf("could not close file: %s", err) 137 | } 138 | d.file = nil 139 | } 140 | 141 | return nil 142 | } 143 | 144 | func (d *dal) allocateEmptyPage() *page { 145 | return &page{ 146 | data: make([]byte, d.pageSize, d.pageSize), 147 | } 148 | } 149 | 150 | func (d *dal) readPage(pageNum pgnum) (*page, error) { 151 | p := d.allocateEmptyPage() 152 | 153 | offset := int(pageNum) * d.pageSize 154 | _, err := d.file.ReadAt(p.data, int64(offset)) 155 | if err != nil { 156 | return nil, err 157 | } 158 | return p, err 159 | } 160 | 161 | func (d *dal) writePage(p *page) error { 162 | offset := int64(p.num) * int64(d.pageSize) 163 | _, err := d.file.WriteAt(p.data, offset) 164 | return err 165 | } 166 | 167 | func (d *dal) newNode(items []*Item, childNodes []pgnum) *Node { 168 | node := NewEmptyNode() 169 | node.items = items 170 | node.childNodes = childNodes 171 | node.pageNum = d.getNextPage() 172 | node.dal = d 173 | return node 174 | } 175 | 176 | func (d *dal) getNode(pageNum pgnum) (*Node, error) { 177 | p, err := d.readPage(pageNum) 178 | if err != nil { 179 | return nil, err 180 | } 181 | node := NewEmptyNode() 182 | node.deserialize(p.data) 183 | node.pageNum = pageNum 184 | node.dal=d 185 | return node, nil 186 | } 187 | 188 | func (d *dal) writeNode(n *Node) (*Node, error) { 189 | p := d.allocateEmptyPage() 190 | if n.pageNum == 0 { 191 | p.num = d.getNextPage() 192 | n.pageNum = p.num 193 | } else { 194 | p.num = n.pageNum 195 | } 196 | 197 | p.data = n.serialize(p.data) 198 | 199 | err := d.writePage(p) 200 | if err != nil { 201 | return nil, err 202 | } 203 | return n, nil 204 | } 205 | 206 | func (d *dal) deleteNode(pageNum pgnum) { 207 | d.releasePage(pageNum) 208 | } 209 | 210 | func (d *dal) readFreelist() (*freelist, error) { 211 | p, err := d.readPage(d.freelistPage) 212 | if err != nil { 213 | return nil, err 214 | } 215 | 216 | freelist := newFreelist() 217 | freelist.deserialize(p.data) 218 | return freelist, nil 219 | } 220 | 221 | func (d *dal) writeFreelist() (*page, error) { 222 | p := d.allocateEmptyPage() 223 | p.num = d.freelistPage 224 | d.freelist.serialize(p.data) 225 | 226 | err := d.writePage(p) 227 | if err != nil { 228 | return nil, err 229 | } 230 | d.freelistPage = p.num 231 | return p, nil 232 | } 233 | 234 | func (d *dal) writeMeta(meta *meta) (*page, error) { 235 | p := d.allocateEmptyPage() 236 | p.num = metaPageNum 237 | meta.serialize(p.data) 238 | 239 | err := d.writePage(p) 240 | if err != nil { 241 | return nil, err 242 | } 243 | return p, nil 244 | } 245 | 246 | func (d *dal) readMeta() (*meta, error) { 247 | p, err := d.readPage(metaPageNum) 248 | if err != nil { 249 | return nil, err 250 | } 251 | 252 | meta := newEmptyMeta() 253 | meta.deserialize(p.data) 254 | return meta, nil 255 | } 256 | -------------------------------------------------------------------------------- /Part 3/node.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type Item struct { 9 | key []byte 10 | value []byte 11 | } 12 | 13 | type Node struct { 14 | *dal 15 | 16 | pageNum pgnum 17 | items []*Item 18 | childNodes []pgnum 19 | } 20 | 21 | func NewEmptyNode() *Node { 22 | return &Node{} 23 | } 24 | 25 | func newItem(key []byte, value []byte) *Item { 26 | return &Item{ 27 | key: key, 28 | value: value, 29 | } 30 | } 31 | 32 | func (n *Node) isLeaf() bool { 33 | return len(n.childNodes) == 0 34 | } 35 | 36 | func (n *Node) writeNode(node *Node) *Node { 37 | node, _ = n.dal.writeNode(node) 38 | return node 39 | } 40 | 41 | func (n *Node) writeNodes(nodes ...*Node) { 42 | for _, node := range nodes { 43 | n.writeNode(node) 44 | } 45 | } 46 | 47 | func (n *Node) getNode(pageNum pgnum) (*Node, error) { 48 | return n.dal.getNode(pageNum) 49 | } 50 | 51 | func (n *Node) serialize(buf []byte) []byte { 52 | leftPos := 0 53 | rightPos := len(buf) - 1 54 | 55 | // Add page header: isLeaf, key-value pairs count, node num 56 | // isLeaf 57 | isLeaf := n.isLeaf() 58 | var bitSetVar uint64 59 | if isLeaf { 60 | bitSetVar = 1 61 | } 62 | buf[leftPos] = byte(bitSetVar) 63 | leftPos += 1 64 | 65 | // key-value pairs count 66 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(len(n.items))) 67 | leftPos += 2 68 | 69 | // We use slotted pages for storing data in the page. It means the actual keys and values (the cells) are appended 70 | // to right of the page whereas offsets have a fixed size and are appended from the left. 71 | // It's easier to preserve the logical order (alphabetical in the case of b-tree) using the metadata and performing 72 | // pointer arithmetic. Using the data itself is harder as it varies by size. 73 | 74 | // Page structure is: 75 | // ---------------------------------------------------------------------------------- 76 | // | Page | key-value / child node key-value | key-value | 77 | // | Header | offset / pointer offset .... | data ..... | 78 | // ---------------------------------------------------------------------------------- 79 | 80 | for i := 0; i < len(n.items); i++ { 81 | item := n.items[i] 82 | if !isLeaf { 83 | childNode := n.childNodes[i] 84 | 85 | // Write the child page as a fixed size of 8 bytes 86 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(childNode)) 87 | leftPos += pageNumSize 88 | } 89 | 90 | klen := len(item.key) 91 | vlen := len(item.value) 92 | 93 | // write offset 94 | offset := rightPos - klen - vlen - 2 95 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(offset)) 96 | leftPos += 2 97 | 98 | rightPos -= vlen 99 | copy(buf[rightPos:], item.value) 100 | 101 | rightPos -= 1 102 | buf[rightPos] = byte(vlen) 103 | 104 | rightPos -= klen 105 | copy(buf[rightPos:], item.key) 106 | 107 | rightPos -= 1 108 | buf[rightPos] = byte(klen) 109 | } 110 | 111 | if !isLeaf { 112 | // Write the last child node 113 | lastChildNode := n.childNodes[len(n.childNodes)-1] 114 | // Write the child page as a fixed size of 8 bytes 115 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(lastChildNode)) 116 | } 117 | 118 | return buf 119 | } 120 | 121 | func (n *Node) deserialize(buf []byte) { 122 | leftPos := 0 123 | 124 | // Read header 125 | isLeaf := uint16(buf[0]) 126 | 127 | itemsCount := int(binary.LittleEndian.Uint16(buf[1:3])) 128 | leftPos += 3 129 | 130 | // Read body 131 | for i := 0; i < itemsCount; i++ { 132 | if isLeaf == 0 { // False 133 | pageNum := binary.LittleEndian.Uint64(buf[leftPos:]) 134 | leftPos += pageNumSize 135 | 136 | n.childNodes = append(n.childNodes, pgnum(pageNum)) 137 | } 138 | 139 | // Read offset 140 | offset := binary.LittleEndian.Uint16(buf[leftPos:]) 141 | leftPos += 2 142 | 143 | klen := uint16(buf[int(offset)]) 144 | offset += 1 145 | 146 | key := buf[offset : offset+klen] 147 | offset += klen 148 | 149 | vlen := uint16(buf[int(offset)]) 150 | offset += 1 151 | 152 | value := buf[offset : offset+vlen] 153 | offset += vlen 154 | n.items = append(n.items, newItem(key, value)) 155 | } 156 | 157 | if isLeaf == 0 { // False 158 | // Read the last child node 159 | pageNum := pgnum(binary.LittleEndian.Uint64(buf[leftPos:])) 160 | n.childNodes = append(n.childNodes, pageNum) 161 | } 162 | } 163 | 164 | 165 | // findKey searches for a key inside the tree. Once the key is found, the parent node and the correct index are returned 166 | // so the key itself can be accessed in the following way parent[index]. 167 | // If the key isn't found, a falsey answer is returned. 168 | func (n *Node) findKey(key []byte) (int, *Node ,error) { 169 | index, node, err := findKeyHelper(n, key) 170 | if err != nil { 171 | return -1, nil, err 172 | } 173 | return index, node, nil 174 | } 175 | 176 | func findKeyHelper(node *Node, key []byte) (int, *Node ,error) { 177 | // Search for the key inside the node 178 | wasFound, index := node.findKeyInNode(key) 179 | if wasFound { 180 | return index, node, nil 181 | } 182 | 183 | // If we reached a leaf node and the key wasn't found, it means it doesn't exist. 184 | if node.isLeaf() { 185 | return -1, nil, nil 186 | } 187 | 188 | // Else keep searching the tree 189 | nextChild, err := node.getNode(node.childNodes[index]) 190 | if err != nil { 191 | return -1, nil, err 192 | } 193 | return findKeyHelper(nextChild, key) 194 | } 195 | 196 | // findKeyInNode iterates all the items and finds the key. If the key is found, then the item is returned. If the key 197 | // isn't found then return the index where it should have been (the first index that key is greater than it's previous) 198 | func (n *Node) findKeyInNode(key []byte) (bool, int) { 199 | for i, existingItem := range n.items { 200 | res := bytes.Compare(existingItem.key, key) 201 | if res == 0 { // Keys match 202 | return true, i 203 | } 204 | 205 | // The key is bigger than the previous item, so it doesn't exist in the node, but may exist in child nodes. 206 | if res == 1 { 207 | return false, i 208 | } 209 | } 210 | 211 | // The key isn't bigger than any of the items which means it's in the last index. 212 | return false, len(n.items) 213 | } -------------------------------------------------------------------------------- /Part 7/collection.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type Collection struct { 9 | name []byte 10 | root pgnum 11 | counter uint64 12 | 13 | // associated transaction 14 | tx *tx 15 | 16 | } 17 | 18 | func newCollection(name []byte, root pgnum) *Collection { 19 | return &Collection{ 20 | name: name, 21 | root: root, 22 | } 23 | } 24 | 25 | func newEmptyCollection() *Collection { 26 | return &Collection{} 27 | } 28 | 29 | func (c *Collection) ID() uint64 { 30 | if !c.tx.write { 31 | return 0 32 | } 33 | 34 | id := c.counter 35 | c.counter += 1 36 | return id 37 | } 38 | 39 | func (c *Collection) serialize() *Item { 40 | b := make([]byte, collectionSize) 41 | leftPos := 0 42 | binary.LittleEndian.PutUint64(b[leftPos:], uint64(c.root)) 43 | leftPos += pageNumSize 44 | binary.LittleEndian.PutUint64(b[leftPos:], c.counter) 45 | leftPos += counterSize 46 | return newItem(c.name, b) 47 | } 48 | 49 | func (c *Collection) deserialize(item *Item) { 50 | c.name = item.key 51 | 52 | if len(item.value) != 0 { 53 | leftPos := 0 54 | c.root = pgnum(binary.LittleEndian.Uint64(item.value[leftPos:])) 55 | leftPos += pageNumSize 56 | 57 | c.counter = binary.LittleEndian.Uint64(item.value[leftPos:]) 58 | leftPos += counterSize 59 | } 60 | } 61 | 62 | // Put adds a key to the tree. It finds the correct node and the insertion index and adds the item. When performing the 63 | // search, the ancestors are returned as well. This way we can iterate over them to check which nodes were modified and 64 | // rebalance by splitting them accordingly. If the root has too many items, then a new root of a new layer is 65 | // created and the created nodes from the split are added as children. 66 | func (c *Collection) Put(key []byte, value []byte) error { 67 | if !c.tx.write { 68 | return writeInsideReadTxErr 69 | } 70 | 71 | i := newItem(key, value) 72 | 73 | // On first insertion the root node does not exist, so it should be created 74 | var root *Node 75 | var err error 76 | if c.root == 0 { 77 | root = c.tx.writeNode(c.tx.newNode([]*Item{i}, []pgnum{})) 78 | c.root = root.pageNum 79 | return nil 80 | } else { 81 | root, err = c.tx.getNode(c.root) 82 | if err != nil { 83 | return err 84 | } 85 | } 86 | 87 | // Find the path to the node where the insertion should happen 88 | insertionIndex, nodeToInsertIn, ancestorsIndexes, err := root.findKey(i.key, false) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | // If key already exists 94 | if nodeToInsertIn.items != nil && insertionIndex < len(nodeToInsertIn.items) && bytes.Compare(nodeToInsertIn.items[insertionIndex].key, key) == 0 { 95 | nodeToInsertIn.items[insertionIndex] = i 96 | } else { 97 | // Add item to the leaf node 98 | nodeToInsertIn.addItem(i, insertionIndex) 99 | } 100 | nodeToInsertIn.writeNode(nodeToInsertIn) 101 | 102 | ancestors, err := c.getNodes(ancestorsIndexes) 103 | if err != nil { 104 | return err 105 | } 106 | 107 | // Rebalance the nodes all the way up. Start From one node before the last and go all the way up. Exclude root. 108 | for i := len(ancestors) - 2; i >= 0; i-- { 109 | pnode := ancestors[i] 110 | node := ancestors[i+1] 111 | nodeIndex := ancestorsIndexes[i+1] 112 | if node.isOverPopulated() { 113 | pnode.split(node, nodeIndex) 114 | } 115 | } 116 | 117 | // Handle root 118 | rootNode := ancestors[0] 119 | if rootNode.isOverPopulated() { 120 | newRoot := c.tx.newNode([]*Item{}, []pgnum{rootNode.pageNum}) 121 | newRoot.split(rootNode, 0) 122 | 123 | // commit newly created root 124 | newRoot = c.tx.writeNode(newRoot) 125 | 126 | c.root = newRoot.pageNum 127 | } 128 | 129 | return nil 130 | } 131 | 132 | // Find Returns an item according based on the given key by performing a binary search. 133 | func (c *Collection) Find(key []byte) (*Item, error) { 134 | n, err := c.tx.getNode(c.root) 135 | if err != nil { 136 | return nil, err 137 | } 138 | 139 | index, containingNode, _, err := n.findKey(key, true) 140 | if err != nil { 141 | return nil, err 142 | } 143 | if index == -1 { 144 | return nil, nil 145 | } 146 | return containingNode.items[index], nil 147 | } 148 | 149 | // Remove removes a key from the tree. It finds the correct node and the index to remove the item from and removes it. 150 | // When performing the search, the ancestors are returned as well. This way we can iterate over them to check which 151 | // nodes were modified and rebalance by rotating or merging the unbalanced nodes. Rotation is done first. If the 152 | // siblings don't have enough items, then merging occurs. If the root is without items after a split, then the root is 153 | // removed and the tree is one level shorter. 154 | func (c *Collection) Remove(key []byte) error { 155 | if !c.tx.write { 156 | return writeInsideReadTxErr 157 | } 158 | 159 | // Find the path to the node where the deletion should happen 160 | rootNode, err := c.tx.getNode(c.root) 161 | if err != nil { 162 | return err 163 | } 164 | 165 | removeItemIndex, nodeToRemoveFrom, ancestorsIndexes, err := rootNode.findKey(key, true) 166 | if err != nil { 167 | return err 168 | } 169 | 170 | if removeItemIndex == -1 { 171 | return nil 172 | } 173 | 174 | if nodeToRemoveFrom.isLeaf() { 175 | nodeToRemoveFrom.removeItemFromLeaf(removeItemIndex) 176 | } else { 177 | affectedNodes, err := nodeToRemoveFrom.removeItemFromInternal(removeItemIndex) 178 | if err != nil { 179 | return err 180 | } 181 | ancestorsIndexes = append(ancestorsIndexes, affectedNodes...) 182 | } 183 | 184 | ancestors, err := c.getNodes(ancestorsIndexes) 185 | if err != nil { 186 | return err 187 | } 188 | 189 | // Rebalance the nodes all the way up. Start From one node before the last and go all the way up. Exclude root. 190 | for i := len(ancestors) - 2; i >= 0; i-- { 191 | pnode := ancestors[i] 192 | node := ancestors[i+1] 193 | if node.isUnderPopulated() { 194 | err = pnode.rebalanceRemove(node, ancestorsIndexes[i+1]) 195 | if err != nil { 196 | return err 197 | } 198 | } 199 | } 200 | 201 | rootNode = ancestors[0] 202 | // If the root has no items after rebalancing, there's no need to save it because we ignore it. 203 | if len(rootNode.items) == 0 && len(rootNode.childNodes) > 0 { 204 | c.root = ancestors[1].pageNum 205 | } 206 | 207 | return nil 208 | } 209 | 210 | // getNodes returns a list of nodes based on their indexes (the breadcrumbs) from the root 211 | // p 212 | // / \ 213 | // a b 214 | // / \ / \ 215 | // c d e f 216 | // For [0,1,0] -> p,b,e 217 | func (c *Collection) getNodes(indexes []int) ([]*Node, error) { 218 | root, err := c.tx.getNode(c.root) 219 | if err != nil { 220 | return nil, err 221 | } 222 | 223 | nodes := []*Node{root} 224 | child := root 225 | for i := 1; i < len(indexes); i++ { 226 | child, _ = c.tx.getNode(child.childNodes[indexes[i]]) 227 | nodes = append(nodes, child) 228 | } 229 | return nodes, nil 230 | } 231 | -------------------------------------------------------------------------------- /Part 4/node.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type Item struct { 9 | key []byte 10 | value []byte 11 | } 12 | 13 | type Node struct { 14 | *dal 15 | 16 | pageNum pgnum 17 | items []*Item 18 | childNodes []pgnum 19 | } 20 | 21 | func NewEmptyNode() *Node { 22 | return &Node{} 23 | } 24 | 25 | // NewNodeForSerialization creates a new node only with the properties that are relevant when saving to the disk 26 | func NewNodeForSerialization(items []*Item, childNodes []pgnum) *Node { 27 | return &Node{ 28 | items: items, 29 | childNodes: childNodes, 30 | } 31 | } 32 | 33 | func newItem(key []byte, value []byte) *Item { 34 | return &Item{ 35 | key: key, 36 | value: value, 37 | } 38 | } 39 | 40 | func isLast(index int, parentNode *Node) bool { 41 | return index == len(parentNode.items) 42 | } 43 | 44 | func isFirst(index int) bool { 45 | return index == 0 46 | } 47 | 48 | func (n *Node) isLeaf() bool { 49 | return len(n.childNodes) == 0 50 | } 51 | 52 | func (n *Node) writeNode(node *Node) *Node { 53 | node, _ = n.dal.writeNode(node) 54 | return node 55 | } 56 | 57 | func (n *Node) writeNodes(nodes ...*Node) { 58 | for _, node := range nodes { 59 | n.writeNode(node) 60 | } 61 | } 62 | 63 | func (n *Node) getNode(pageNum pgnum) (*Node, error) { 64 | return n.dal.getNode(pageNum) 65 | } 66 | 67 | // isOverPopulated checks if the node size is bigger than the size of a page. 68 | func (n *Node) isOverPopulated() bool { 69 | return n.dal.isOverPopulated(n) 70 | } 71 | 72 | // canSpareAnElement checks if the node size is big enough to populate a page after giving away one item. 73 | func (n *Node) canSpareAnElement() bool { 74 | splitIndex := n.dal.getSplitIndex(n) 75 | if splitIndex == -1 { 76 | return false 77 | } 78 | return true 79 | } 80 | 81 | // isUnderPopulated checks if the node size is smaller than the size of a page. 82 | func (n *Node) isUnderPopulated() bool { 83 | return n.dal.isUnderPopulated(n) 84 | } 85 | 86 | func (n *Node) serialize(buf []byte) []byte { 87 | leftPos := 0 88 | rightPos := len(buf) - 1 89 | 90 | // Add page header: isLeaf, key-value pairs count, node num 91 | // isLeaf 92 | isLeaf := n.isLeaf() 93 | var bitSetVar uint64 94 | if isLeaf { 95 | bitSetVar = 1 96 | } 97 | buf[leftPos] = byte(bitSetVar) 98 | leftPos += 1 99 | 100 | // key-value pairs count 101 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(len(n.items))) 102 | leftPos += 2 103 | 104 | // We use slotted pages for storing data in the page. It means the actual keys and values (the cells) are appended 105 | // to right of the page whereas offsets have a fixed size and are appended from the left. 106 | // It's easier to preserve the logical order (alphabetical in the case of b-tree) using the metadata and performing 107 | // pointer arithmetic. Using the data itself is harder as it varies by size. 108 | 109 | // Page structure is: 110 | // ---------------------------------------------------------------------------------- 111 | // | Page | key-value / child node key-value | key-value | 112 | // | Header | offset / pointer offset .... | data ..... | 113 | // ---------------------------------------------------------------------------------- 114 | 115 | for i := 0; i < len(n.items); i++ { 116 | item := n.items[i] 117 | if !isLeaf { 118 | childNode := n.childNodes[i] 119 | 120 | // Write the child page as a fixed size of 8 bytes 121 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(childNode)) 122 | leftPos += pageNumSize 123 | } 124 | 125 | klen := len(item.key) 126 | vlen := len(item.value) 127 | 128 | // write offset 129 | offset := rightPos - klen - vlen - 2 130 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(offset)) 131 | leftPos += 2 132 | 133 | rightPos -= vlen 134 | copy(buf[rightPos:], item.value) 135 | 136 | rightPos -= 1 137 | buf[rightPos] = byte(vlen) 138 | 139 | rightPos -= klen 140 | copy(buf[rightPos:], item.key) 141 | 142 | rightPos -= 1 143 | buf[rightPos] = byte(klen) 144 | } 145 | 146 | if !isLeaf { 147 | // Write the last child node 148 | lastChildNode := n.childNodes[len(n.childNodes)-1] 149 | // Write the child page as a fixed size of 8 bytes 150 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(lastChildNode)) 151 | } 152 | 153 | return buf 154 | } 155 | 156 | func (n *Node) deserialize(buf []byte) { 157 | leftPos := 0 158 | 159 | // Read header 160 | isLeaf := uint16(buf[0]) 161 | 162 | itemsCount := int(binary.LittleEndian.Uint16(buf[1:3])) 163 | leftPos += 3 164 | 165 | // Read body 166 | for i := 0; i < itemsCount; i++ { 167 | if isLeaf == 0 { // False 168 | pageNum := binary.LittleEndian.Uint64(buf[leftPos:]) 169 | leftPos += pageNumSize 170 | 171 | n.childNodes = append(n.childNodes, pgnum(pageNum)) 172 | } 173 | 174 | // Read offset 175 | offset := binary.LittleEndian.Uint16(buf[leftPos:]) 176 | leftPos += 2 177 | 178 | klen := uint16(buf[int(offset)]) 179 | offset += 1 180 | 181 | key := buf[offset : offset+klen] 182 | offset += klen 183 | 184 | vlen := uint16(buf[int(offset)]) 185 | offset += 1 186 | 187 | value := buf[offset : offset+vlen] 188 | offset += vlen 189 | n.items = append(n.items, newItem(key, value)) 190 | } 191 | 192 | if isLeaf == 0 { // False 193 | // Read the last child node 194 | pageNum := pgnum(binary.LittleEndian.Uint64(buf[leftPos:])) 195 | n.childNodes = append(n.childNodes, pageNum) 196 | } 197 | } 198 | 199 | // elementSize returns the size of a key-value-childNode triplet at a given index. 200 | // If the node is a leaf, then the size of a key-value pair is returned. 201 | // It's assumed i <= len(n.items) 202 | func (n *Node) elementSize(i int) int { 203 | size := 0 204 | size += len(n.items[i].key) 205 | size += len(n.items[i].value) 206 | size += pageNumSize // 8 is the pgnum size 207 | return size 208 | } 209 | 210 | // nodeSize returns the node's size in bytes 211 | func (n *Node) nodeSize() int { 212 | size := 0 213 | size += nodeHeaderSize 214 | 215 | for i := range n.items { 216 | size += n.elementSize(i) 217 | } 218 | 219 | // Add last page 220 | size += pageNumSize // 8 is the pgnum size 221 | return size 222 | } 223 | 224 | // findKey searches for a key inside the tree. Once the key is found, the parent node and the correct index are returned 225 | // so the key itself can be accessed in the following way parent[index]. A list of the node ancestors (not including the 226 | // node itself) is also returned. 227 | // If the key isn't found, we have 2 options. If exact is true, it means we expect findKey 228 | // to find the key, so a falsey answer. If exact is false, then findKey is used to locate where a new key should be 229 | // inserted so the position is returned. 230 | func (n *Node) findKey(key []byte, exact bool) (int, *Node, []int ,error) { 231 | ancestorsIndexes := []int{0} // index of root 232 | index, node, err := findKeyHelper(n, key, exact, &ancestorsIndexes) 233 | if err != nil { 234 | return -1, nil, nil, err 235 | } 236 | return index, node, ancestorsIndexes, nil 237 | } 238 | 239 | func findKeyHelper(node *Node, key []byte, exact bool, ancestorsIndexes *[]int) (int, *Node ,error) { 240 | wasFound, index := node.findKeyInNode(key) 241 | if wasFound { 242 | return index, node, nil 243 | } 244 | 245 | if node.isLeaf() { 246 | if exact { 247 | return -1, nil, nil 248 | } 249 | return index, node, nil 250 | } 251 | 252 | *ancestorsIndexes = append(*ancestorsIndexes, index) 253 | nextChild, err := node.getNode(node.childNodes[index]) 254 | if err != nil { 255 | return -1, nil, err 256 | } 257 | return findKeyHelper(nextChild, key, exact, ancestorsIndexes) 258 | } 259 | 260 | // findKeyInNode iterates all the items and finds the key. If the key is found, then the item is returned. If the key 261 | // isn't found then return the index where it should have been (the first index that key is greater than it's previous) 262 | func (n *Node) findKeyInNode(key []byte) (bool, int) { 263 | for i, existingItem := range n.items { 264 | res := bytes.Compare(existingItem.key, key) 265 | if res == 0 { // Keys match 266 | return true, i 267 | } 268 | 269 | // The key is bigger than the previous item, so it doesn't exist in the node, but may exist in child nodes. 270 | if res == 1 { 271 | return false, i 272 | } 273 | } 274 | 275 | // The key isn't bigger than any of the items which means it's in the last index. 276 | return false, len(n.items) 277 | } 278 | 279 | func (n *Node) addItem(item *Item, insertionIndex int) int { 280 | if len(n.items) == insertionIndex { // nil or empty slice or after last element 281 | n.items = append(n.items, item) 282 | return insertionIndex 283 | } 284 | 285 | n.items = append(n.items[:insertionIndex+1], n.items[insertionIndex:]...) 286 | n.items[insertionIndex] = item 287 | return insertionIndex 288 | } 289 | 290 | // split rebalances the tree after adding. After insertion the modified node has to be checked to make sure it 291 | // didn't exceed the maximum number of elements. If it did, then it has to be split and rebalanced. The transformation 292 | // is depicted in the graph below. If it's not a leaf node, then the children has to be moved as well as shown. 293 | // This may leave the parent unbalanced by having too many items so rebalancing has to be checked for all the ancestors. 294 | // The split is performed in a for loop to support splitting a node more than once. (Though in practice used only once). 295 | // n n 296 | // 3 3,6 297 | // / \ ------> / | \ 298 | // a modifiedNode a modifiedNode newNode 299 | // 1,2 4,5,6,7,8 1,2 4,5 7,8 300 | func (n *Node) split(nodeToSplit *Node, nodeToSplitIndex int) { 301 | // The first index where min amount of bytes to populate a page is achieved. Then add 1 so it will be split one 302 | // index after. 303 | splitIndex := nodeToSplit.dal.getSplitIndex(nodeToSplit) 304 | 305 | middleItem := nodeToSplit.items[splitIndex] 306 | var newNode *Node 307 | 308 | if nodeToSplit.isLeaf() { 309 | newNode = n.writeNode(n.dal.newNode(nodeToSplit.items[splitIndex+1:], []pgnum{})) 310 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 311 | } else { 312 | newNode = n.writeNode(n.dal.newNode(nodeToSplit.items[splitIndex+1:], nodeToSplit.childNodes[splitIndex+1:])) 313 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 314 | nodeToSplit.childNodes = nodeToSplit.childNodes[:splitIndex+1] 315 | } 316 | n.addItem(middleItem, nodeToSplitIndex) 317 | if len(n.childNodes) == nodeToSplitIndex+1 { // If middle of list, then move items forward 318 | n.childNodes = append(n.childNodes, newNode.pageNum) 319 | } else { 320 | n.childNodes = append(n.childNodes[:nodeToSplitIndex+1], n.childNodes[nodeToSplitIndex:]...) 321 | n.childNodes[nodeToSplitIndex+1] = newNode.pageNum 322 | } 323 | 324 | n.writeNodes(n, nodeToSplit) 325 | } -------------------------------------------------------------------------------- /Part 5/node.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type Item struct { 9 | key []byte 10 | value []byte 11 | } 12 | 13 | type Node struct { 14 | *dal 15 | 16 | pageNum pgnum 17 | items []*Item 18 | childNodes []pgnum 19 | } 20 | 21 | func NewEmptyNode() *Node { 22 | return &Node{} 23 | } 24 | 25 | // NewNodeForSerialization creates a new node only with the properties that are relevant when saving to the disk 26 | func NewNodeForSerialization(items []*Item, childNodes []pgnum) *Node { 27 | return &Node{ 28 | items: items, 29 | childNodes: childNodes, 30 | } 31 | } 32 | 33 | func newItem(key []byte, value []byte) *Item { 34 | return &Item{ 35 | key: key, 36 | value: value, 37 | } 38 | } 39 | 40 | func isLast(index int, parentNode *Node) bool { 41 | return index == len(parentNode.items) 42 | } 43 | 44 | func isFirst(index int) bool { 45 | return index == 0 46 | } 47 | 48 | func (n *Node) isLeaf() bool { 49 | return len(n.childNodes) == 0 50 | } 51 | 52 | func (n *Node) writeNode(node *Node) *Node { 53 | node, _ = n.dal.writeNode(node) 54 | return node 55 | } 56 | 57 | func (n *Node) writeNodes(nodes ...*Node) { 58 | for _, node := range nodes { 59 | n.writeNode(node) 60 | } 61 | } 62 | 63 | func (n *Node) getNode(pageNum pgnum) (*Node, error) { 64 | return n.dal.getNode(pageNum) 65 | } 66 | 67 | // isOverPopulated checks if the node size is bigger than the size of a page. 68 | func (n *Node) isOverPopulated() bool { 69 | return n.dal.isOverPopulated(n) 70 | } 71 | 72 | // canSpareAnElement checks if the node size is big enough to populate a page after giving away one item. 73 | func (n *Node) canSpareAnElement() bool { 74 | splitIndex := n.dal.getSplitIndex(n) 75 | if splitIndex == -1 { 76 | return false 77 | } 78 | return true 79 | } 80 | 81 | // isUnderPopulated checks if the node size is smaller than the size of a page. 82 | func (n *Node) isUnderPopulated() bool { 83 | return n.dal.isUnderPopulated(n) 84 | } 85 | 86 | func (n *Node) serialize(buf []byte) []byte { 87 | leftPos := 0 88 | rightPos := len(buf) - 1 89 | 90 | // Add page header: isLeaf, key-value pairs count, node num 91 | // isLeaf 92 | isLeaf := n.isLeaf() 93 | var bitSetVar uint64 94 | if isLeaf { 95 | bitSetVar = 1 96 | } 97 | buf[leftPos] = byte(bitSetVar) 98 | leftPos += 1 99 | 100 | // key-value pairs count 101 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(len(n.items))) 102 | leftPos += 2 103 | 104 | // We use slotted pages for storing data in the page. It means the actual keys and values (the cells) are appended 105 | // to right of the page whereas offsets have a fixed size and are appended from the left. 106 | // It's easier to preserve the logical order (alphabetical in the case of b-tree) using the metadata and performing 107 | // pointer arithmetic. Using the data itself is harder as it varies by size. 108 | 109 | // Page structure is: 110 | // ---------------------------------------------------------------------------------- 111 | // | Page | key-value / child node key-value | key-value | 112 | // | Header | offset / pointer offset .... | data ..... | 113 | // ---------------------------------------------------------------------------------- 114 | 115 | for i := 0; i < len(n.items); i++ { 116 | item := n.items[i] 117 | if !isLeaf { 118 | childNode := n.childNodes[i] 119 | 120 | // Write the child page as a fixed size of 8 bytes 121 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(childNode)) 122 | leftPos += pageNumSize 123 | } 124 | 125 | klen := len(item.key) 126 | vlen := len(item.value) 127 | 128 | // write offset 129 | offset := rightPos - klen - vlen - 2 130 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(offset)) 131 | leftPos += 2 132 | 133 | rightPos -= vlen 134 | copy(buf[rightPos:], item.value) 135 | 136 | rightPos -= 1 137 | buf[rightPos] = byte(vlen) 138 | 139 | rightPos -= klen 140 | copy(buf[rightPos:], item.key) 141 | 142 | rightPos -= 1 143 | buf[rightPos] = byte(klen) 144 | } 145 | 146 | if !isLeaf { 147 | // Write the last child node 148 | lastChildNode := n.childNodes[len(n.childNodes)-1] 149 | // Write the child page as a fixed size of 8 bytes 150 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(lastChildNode)) 151 | } 152 | 153 | return buf 154 | } 155 | 156 | func (n *Node) deserialize(buf []byte) { 157 | leftPos := 0 158 | 159 | // Read header 160 | isLeaf := uint16(buf[0]) 161 | 162 | itemsCount := int(binary.LittleEndian.Uint16(buf[1:3])) 163 | leftPos += 3 164 | 165 | // Read body 166 | for i := 0; i < itemsCount; i++ { 167 | if isLeaf == 0 { // False 168 | pageNum := binary.LittleEndian.Uint64(buf[leftPos:]) 169 | leftPos += pageNumSize 170 | 171 | n.childNodes = append(n.childNodes, pgnum(pageNum)) 172 | } 173 | 174 | // Read offset 175 | offset := binary.LittleEndian.Uint16(buf[leftPos:]) 176 | leftPos += 2 177 | 178 | klen := uint16(buf[int(offset)]) 179 | offset += 1 180 | 181 | key := buf[offset : offset+klen] 182 | offset += klen 183 | 184 | vlen := uint16(buf[int(offset)]) 185 | offset += 1 186 | 187 | value := buf[offset : offset+vlen] 188 | offset += vlen 189 | n.items = append(n.items, newItem(key, value)) 190 | } 191 | 192 | if isLeaf == 0 { // False 193 | // Read the last child node 194 | pageNum := pgnum(binary.LittleEndian.Uint64(buf[leftPos:])) 195 | n.childNodes = append(n.childNodes, pageNum) 196 | } 197 | } 198 | 199 | // elementSize returns the size of a key-value-childNode triplet at a given index. 200 | // If the node is a leaf, then the size of a key-value pair is returned. 201 | // It's assumed i <= len(n.items) 202 | func (n *Node) elementSize(i int) int { 203 | size := 0 204 | size += len(n.items[i].key) 205 | size += len(n.items[i].value) 206 | size += pageNumSize // 8 is the pgnum size 207 | return size 208 | } 209 | 210 | // nodeSize returns the node's size in bytes 211 | func (n *Node) nodeSize() int { 212 | size := 0 213 | size += nodeHeaderSize 214 | 215 | for i := range n.items { 216 | size += n.elementSize(i) 217 | } 218 | 219 | // Add last page 220 | size += pageNumSize // 8 is the pgnum size 221 | return size 222 | } 223 | 224 | // findKey searches for a key inside the tree. Once the key is found, the parent node and the correct index are returned 225 | // so the key itself can be accessed in the following way parent[index]. A list of the node ancestors (not including the 226 | // node itself) is also returned. 227 | // If the key isn't found, we have 2 options. If exact is true, it means we expect findKey 228 | // to find the key, so a falsey answer. If exact is false, then findKey is used to locate where a new key should be 229 | // inserted so the position is returned. 230 | func (n *Node) findKey(key []byte, exact bool) (int, *Node, []int ,error) { 231 | ancestorsIndexes := []int{0} // index of root 232 | index, node, err := findKeyHelper(n, key, exact, &ancestorsIndexes) 233 | if err != nil { 234 | return -1, nil, nil, err 235 | } 236 | return index, node, ancestorsIndexes, nil 237 | } 238 | 239 | func findKeyHelper(node *Node, key []byte, exact bool, ancestorsIndexes *[]int) (int, *Node ,error) { 240 | wasFound, index := node.findKeyInNode(key) 241 | if wasFound { 242 | return index, node, nil 243 | } 244 | 245 | if node.isLeaf() { 246 | if exact { 247 | return -1, nil, nil 248 | } 249 | return index, node, nil 250 | } 251 | 252 | *ancestorsIndexes = append(*ancestorsIndexes, index) 253 | nextChild, err := node.getNode(node.childNodes[index]) 254 | if err != nil { 255 | return -1, nil, err 256 | } 257 | return findKeyHelper(nextChild, key, exact, ancestorsIndexes) 258 | } 259 | 260 | // findKeyInNode iterates all the items and finds the key. If the key is found, then the item is returned. If the key 261 | // isn't found then return the index where it should have been (the first index that key is greater than it's previous) 262 | func (n *Node) findKeyInNode(key []byte) (bool, int) { 263 | for i, existingItem := range n.items { 264 | res := bytes.Compare(existingItem.key, key) 265 | if res == 0 { // Keys match 266 | return true, i 267 | } 268 | 269 | // The key is bigger than the previous item, so it doesn't exist in the node, but may exist in child nodes. 270 | if res == 1 { 271 | return false, i 272 | } 273 | } 274 | 275 | // The key isn't bigger than any of the items which means it's in the last index. 276 | return false, len(n.items) 277 | } 278 | 279 | func (n *Node) addItem(item *Item, insertionIndex int) int { 280 | if len(n.items) == insertionIndex { // nil or empty slice or after last element 281 | n.items = append(n.items, item) 282 | return insertionIndex 283 | } 284 | 285 | n.items = append(n.items[:insertionIndex+1], n.items[insertionIndex:]...) 286 | n.items[insertionIndex] = item 287 | return insertionIndex 288 | } 289 | 290 | // split rebalances the tree after adding. After insertion the modified node has to be checked to make sure it 291 | // didn't exceed the maximum number of elements. If it did, then it has to be split and rebalanced. The transformation 292 | // is depicted in the graph below. If it's not a leaf node, then the children has to be moved as well as shown. 293 | // This may leave the parent unbalanced by having too many items so rebalancing has to be checked for all the ancestors. 294 | // The split is performed in a for loop to support splitting a node more than once. (Though in practice used only once). 295 | // n n 296 | // 3 3,6 297 | // / \ ------> / | \ 298 | // a modifiedNode a modifiedNode newNode 299 | // 1,2 4,5,6,7,8 1,2 4,5 7,8 300 | func (n *Node) split(nodeToSplit *Node, nodeToSplitIndex int) { 301 | // The first index where min amount of bytes to populate a page is achieved. Then add 1 so it will be split one 302 | // index after. 303 | splitIndex := nodeToSplit.dal.getSplitIndex(nodeToSplit) 304 | 305 | middleItem := nodeToSplit.items[splitIndex] 306 | var newNode *Node 307 | 308 | if nodeToSplit.isLeaf() { 309 | newNode = n.writeNode(n.dal.newNode(nodeToSplit.items[splitIndex+1:], []pgnum{})) 310 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 311 | } else { 312 | newNode = n.writeNode(n.dal.newNode(nodeToSplit.items[splitIndex+1:], nodeToSplit.childNodes[splitIndex+1:])) 313 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 314 | nodeToSplit.childNodes = nodeToSplit.childNodes[:splitIndex+1] 315 | } 316 | n.addItem(middleItem, nodeToSplitIndex) 317 | if len(n.childNodes) == nodeToSplitIndex+1 { // If middle of list, then move items forward 318 | n.childNodes = append(n.childNodes, newNode.pageNum) 319 | } else { 320 | n.childNodes = append(n.childNodes[:nodeToSplitIndex+1], n.childNodes[nodeToSplitIndex:]...) 321 | n.childNodes[nodeToSplitIndex+1] = newNode.pageNum 322 | } 323 | 324 | n.writeNodes(n, nodeToSplit) 325 | } 326 | 327 | // rebalanceRemove rebalances the tree after a remove operation. This can be either by rotating to the right, to the 328 | // left or by merging. First, the sibling nodes are checked to see if they have enough items for rebalancing 329 | // (>= minItems+1). If they don't have enough items, then merging with one of the sibling nodes occurs. This may leave 330 | // the parent unbalanced by having too little items so rebalancing has to be checked for all the ancestors. 331 | func (n *Node) rebalanceRemove(unbalancedNode *Node, unbalancedNodeIndex int) error { 332 | pNode := n 333 | 334 | // Right rotate 335 | if unbalancedNodeIndex != 0 { 336 | leftNode, err := n.getNode(pNode.childNodes[unbalancedNodeIndex-1]) 337 | if err != nil { 338 | return err 339 | } 340 | if leftNode.canSpareAnElement() { 341 | rotateRight(leftNode, pNode, unbalancedNode, unbalancedNodeIndex) 342 | n.writeNodes(leftNode, pNode, unbalancedNode) 343 | return nil 344 | } 345 | } 346 | 347 | // Left Balance 348 | if unbalancedNodeIndex != len(pNode.childNodes)-1 { 349 | rightNode, err := n.getNode(pNode.childNodes[unbalancedNodeIndex+1]) 350 | if err != nil { 351 | return err 352 | } 353 | if rightNode.canSpareAnElement() { 354 | rotateLeft(unbalancedNode, pNode, rightNode, unbalancedNodeIndex) 355 | n.writeNodes(unbalancedNode, pNode, rightNode) 356 | return nil 357 | } 358 | } 359 | 360 | // The merge function merges a given node with its node to the right. So by default, we merge an unbalanced node 361 | // with its right sibling. In the case where the unbalanced node is the leftmost, we have to replace the merge 362 | // parameters, so the unbalanced node right sibling, will be merged into the unbalanced node. 363 | if unbalancedNodeIndex == 0 { 364 | rightNode, err := n.getNode(n.childNodes[unbalancedNodeIndex+1]) 365 | if err != nil { 366 | return err 367 | } 368 | 369 | return pNode.merge(rightNode, unbalancedNodeIndex+1) 370 | } 371 | 372 | return pNode.merge(unbalancedNode, unbalancedNodeIndex) 373 | } 374 | 375 | // removeItemFromLeaf removes an item from a leaf node. It means there is no handling of child nodes. 376 | func (n *Node) removeItemFromLeaf(index int) { 377 | n.items = append(n.items[:index], n.items[index+1:]...) 378 | n.writeNode(n) 379 | } 380 | 381 | func (n *Node) removeItemFromInternal(index int) ([]int, error) { 382 | // Take element before inorder (The biggest element from the left branch), put it in the removed index and remove 383 | // it from the original node. Track in affectedNodes any nodes in the path leading to that node. It will be used 384 | // in case the tree needs to be rebalanced. 385 | // p 386 | // / 387 | // .. 388 | // / \ 389 | // .. a 390 | 391 | affectedNodes := make([]int, 0) 392 | affectedNodes = append(affectedNodes, index) 393 | 394 | // Starting from its left child, descend to the rightmost descendant. 395 | aNode, err := n.getNode(n.childNodes[index]) 396 | if err != nil { 397 | return nil, err 398 | } 399 | 400 | for !aNode.isLeaf() { 401 | traversingIndex := len(n.childNodes) - 1 402 | aNode, err = n.getNode(n.childNodes[traversingIndex]) 403 | if err != nil { 404 | return nil, err 405 | } 406 | affectedNodes = append(affectedNodes, traversingIndex) 407 | } 408 | 409 | // Replace the item that should be removed with the item before inorder which we just found. 410 | n.items[index] = aNode.items[len(aNode.items)-1] 411 | aNode.items = aNode.items[:len(aNode.items)-1] 412 | n.writeNodes(n, aNode) 413 | 414 | return affectedNodes, nil 415 | } 416 | 417 | func rotateRight(aNode, pNode, bNode *Node, bNodeIndex int) { 418 | // p p 419 | // 4 3 420 | // / \ ------> / \ 421 | // a b (unbalanced) a b (unbalanced) 422 | // 1,2,3 5 1,2 4,5 423 | 424 | // Get last item and remove it 425 | aNodeItem := aNode.items[len(aNode.items)-1] 426 | aNode.items = aNode.items[:len(aNode.items)-1] 427 | 428 | // Get item from parent node and assign the aNodeItem item instead 429 | pNodeItemIndex := bNodeIndex - 1 430 | if isFirst(bNodeIndex) { 431 | pNodeItemIndex = 0 432 | } 433 | pNodeItem := pNode.items[pNodeItemIndex] 434 | pNode.items[pNodeItemIndex] = aNodeItem 435 | 436 | // Assign parent item to b and make it first 437 | bNode.items = append([]*Item{pNodeItem}, bNode.items...) 438 | 439 | // If it's an inner leaf then move children as well. 440 | if !aNode.isLeaf() { 441 | childNodeToShift := aNode.childNodes[len(aNode.childNodes)-1] 442 | aNode.childNodes = aNode.childNodes[:len(aNode.childNodes)-1] 443 | bNode.childNodes = append([]pgnum{childNodeToShift}, bNode.childNodes...) 444 | } 445 | } 446 | 447 | func rotateLeft(aNode, pNode, bNode *Node, bNodeIndex int) { 448 | // p p 449 | // 2 3 450 | // / \ ------> / \ 451 | // a(unbalanced) b a(unbalanced) b 452 | // 1 3,4,5 1,2 4,5 453 | 454 | // Get first item and remove it 455 | bNodeItem := bNode.items[0] 456 | bNode.items = bNode.items[1:] 457 | 458 | // Get item from parent node and assign the bNodeItem item instead 459 | pNodeItemIndex := bNodeIndex 460 | if isLast(bNodeIndex, pNode) { 461 | pNodeItemIndex = len(pNode.items) - 1 462 | } 463 | pNodeItem := pNode.items[pNodeItemIndex] 464 | pNode.items[pNodeItemIndex] = bNodeItem 465 | 466 | // Assign parent item to a and make it last 467 | aNode.items = append(aNode.items, pNodeItem) 468 | 469 | // If it's an inner leaf then move children as well. 470 | if !bNode.isLeaf() { 471 | childNodeToShift := bNode.childNodes[0] 472 | bNode.childNodes = bNode.childNodes[1:] 473 | aNode.childNodes = append(aNode.childNodes, childNodeToShift) 474 | } 475 | } 476 | 477 | func (n *Node) merge(bNode *Node, bNodeIndex int) error { 478 | // p p 479 | // 3,5 5 480 | // / | \ ------> / \ 481 | // a b c a c 482 | // 1,2 4 6,7 1,2,3,4 6,7 483 | aNode, err := n.getNode(n.childNodes[bNodeIndex-1]) 484 | if err != nil { 485 | return err 486 | } 487 | 488 | // Take the item from the parent, remove it and add it to the unbalanced node 489 | pNodeItem := n.items[bNodeIndex-1] 490 | n.items = append(n.items[:bNodeIndex-1], n.items[bNodeIndex:]...) 491 | aNode.items = append(aNode.items, pNodeItem) 492 | 493 | aNode.items = append(aNode.items, bNode.items...) 494 | n.childNodes = append(n.childNodes[:bNodeIndex], n.childNodes[bNodeIndex+1:]...) 495 | if !aNode.isLeaf() { 496 | aNode.childNodes = append(aNode.childNodes, bNode.childNodes...) 497 | } 498 | n.writeNodes(aNode, n) 499 | n.dal.deleteNode(bNode.pageNum) 500 | return nil 501 | } -------------------------------------------------------------------------------- /Part 6/node.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type Item struct { 9 | key []byte 10 | value []byte 11 | } 12 | 13 | type Node struct { 14 | // associated transaction 15 | tx *tx 16 | 17 | pageNum pgnum 18 | items []*Item 19 | childNodes []pgnum 20 | } 21 | 22 | func NewEmptyNode() *Node { 23 | return &Node{} 24 | } 25 | 26 | // NewNodeForSerialization creates a new node only with the properties that are relevant when saving to the disk 27 | func NewNodeForSerialization(items []*Item, childNodes []pgnum) *Node { 28 | return &Node{ 29 | items: items, 30 | childNodes: childNodes, 31 | } 32 | } 33 | 34 | func newItem(key []byte, value []byte) *Item { 35 | return &Item{ 36 | key: key, 37 | value: value, 38 | } 39 | } 40 | 41 | func isLast(index int, parentNode *Node) bool { 42 | return index == len(parentNode.items) 43 | } 44 | 45 | func isFirst(index int) bool { 46 | return index == 0 47 | } 48 | 49 | func (n *Node) isLeaf() bool { 50 | return len(n.childNodes) == 0 51 | } 52 | 53 | func (n *Node) writeNode(node *Node) *Node { 54 | return n.tx.writeNode(node) 55 | } 56 | 57 | func (n *Node) writeNodes(nodes ...*Node) { 58 | for _, node := range nodes { 59 | n.writeNode(node) 60 | } 61 | } 62 | 63 | func (n *Node) getNode(pageNum pgnum) (*Node, error) { 64 | return n.tx.getNode(pageNum) 65 | } 66 | 67 | // isOverPopulated checks if the node size is bigger than the size of a page. 68 | func (n *Node) isOverPopulated() bool { 69 | return n.tx.db.isOverPopulated(n) 70 | } 71 | 72 | // canSpareAnElement checks if the node size is big enough to populate a page after giving away one item. 73 | func (n *Node) canSpareAnElement() bool { 74 | splitIndex := n.tx.db.getSplitIndex(n) 75 | if splitIndex == -1 { 76 | return false 77 | } 78 | return true 79 | } 80 | 81 | // isUnderPopulated checks if the node size is smaller than the size of a page. 82 | func (n *Node) isUnderPopulated() bool { 83 | return n.tx.db.isUnderPopulated(n) 84 | } 85 | 86 | func (n *Node) serialize(buf []byte) []byte { 87 | leftPos := 0 88 | rightPos := len(buf) - 1 89 | 90 | // Add page header: isLeaf, key-value pairs count, node num 91 | // isLeaf 92 | isLeaf := n.isLeaf() 93 | var bitSetVar uint64 94 | if isLeaf { 95 | bitSetVar = 1 96 | } 97 | buf[leftPos] = byte(bitSetVar) 98 | leftPos += 1 99 | 100 | // key-value pairs count 101 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(len(n.items))) 102 | leftPos += 2 103 | 104 | // We use slotted pages for storing data in the page. It means the actual keys and values (the cells) are appended 105 | // to right of the page whereas offsets have a fixed size and are appended from the left. 106 | // It's easier to preserve the logical order (alphabetical in the case of b-tree) using the metadata and performing 107 | // pointer arithmetic. Using the data itself is harder as it varies by size. 108 | 109 | // Page structure is: 110 | // ---------------------------------------------------------------------------------- 111 | // | Page | key-value / child node key-value | key-value | 112 | // | Header | offset / pointer offset .... | data ..... | 113 | // ---------------------------------------------------------------------------------- 114 | 115 | for i := 0; i < len(n.items); i++ { 116 | item := n.items[i] 117 | if !isLeaf { 118 | childNode := n.childNodes[i] 119 | 120 | // Write the child page as a fixed size of 8 bytes 121 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(childNode)) 122 | leftPos += pageNumSize 123 | } 124 | 125 | klen := len(item.key) 126 | vlen := len(item.value) 127 | 128 | // write offset 129 | offset := rightPos - klen - vlen - 2 130 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(offset)) 131 | leftPos += 2 132 | 133 | rightPos -= vlen 134 | copy(buf[rightPos:], item.value) 135 | 136 | rightPos -= 1 137 | buf[rightPos] = byte(vlen) 138 | 139 | rightPos -= klen 140 | copy(buf[rightPos:], item.key) 141 | 142 | rightPos -= 1 143 | buf[rightPos] = byte(klen) 144 | } 145 | 146 | if !isLeaf { 147 | // Write the last child node 148 | lastChildNode := n.childNodes[len(n.childNodes)-1] 149 | // Write the child page as a fixed size of 8 bytes 150 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(lastChildNode)) 151 | } 152 | 153 | return buf 154 | } 155 | 156 | func (n *Node) deserialize(buf []byte) { 157 | leftPos := 0 158 | 159 | // Read header 160 | isLeaf := uint16(buf[0]) 161 | 162 | itemsCount := int(binary.LittleEndian.Uint16(buf[1:3])) 163 | leftPos += 3 164 | 165 | // Read body 166 | for i := 0; i < itemsCount; i++ { 167 | if isLeaf == 0 { // False 168 | pageNum := binary.LittleEndian.Uint64(buf[leftPos:]) 169 | leftPos += pageNumSize 170 | 171 | n.childNodes = append(n.childNodes, pgnum(pageNum)) 172 | } 173 | 174 | // Read offset 175 | offset := binary.LittleEndian.Uint16(buf[leftPos:]) 176 | leftPos += 2 177 | 178 | klen := uint16(buf[int(offset)]) 179 | offset += 1 180 | 181 | key := buf[offset : offset+klen] 182 | offset += klen 183 | 184 | vlen := uint16(buf[int(offset)]) 185 | offset += 1 186 | 187 | value := buf[offset : offset+vlen] 188 | offset += vlen 189 | n.items = append(n.items, newItem(key, value)) 190 | } 191 | 192 | if isLeaf == 0 { // False 193 | // Read the last child node 194 | pageNum := pgnum(binary.LittleEndian.Uint64(buf[leftPos:])) 195 | n.childNodes = append(n.childNodes, pageNum) 196 | } 197 | } 198 | 199 | // elementSize returns the size of a key-value-childNode triplet at a given index. 200 | // If the node is a leaf, then the size of a key-value pair is returned. 201 | // It's assumed i <= len(n.items) 202 | func (n *Node) elementSize(i int) int { 203 | size := 0 204 | size += len(n.items[i].key) 205 | size += len(n.items[i].value) 206 | size += pageNumSize // 8 is the pgnum size 207 | return size 208 | } 209 | 210 | // nodeSize returns the node's size in bytes 211 | func (n *Node) nodeSize() int { 212 | size := 0 213 | size += nodeHeaderSize 214 | 215 | for i := range n.items { 216 | size += n.elementSize(i) 217 | } 218 | 219 | // Add last page 220 | size += pageNumSize // 8 is the pgnum size 221 | return size 222 | } 223 | 224 | // findKey searches for a key inside the tree. Once the key is found, the parent node and the correct index are returned 225 | // so the key itself can be accessed in the following way parent[index]. A list of the node ancestors (not including the 226 | // node itself) is also returned. 227 | // If the key isn't found, we have 2 options. If exact is true, it means we expect findKey 228 | // to find the key, so a falsey answer. If exact is false, then findKey is used to locate where a new key should be 229 | // inserted so the position is returned. 230 | func (n *Node) findKey(key []byte, exact bool) (int, *Node, []int ,error) { 231 | ancestorsIndexes := []int{0} // index of root 232 | index, node, err := findKeyHelper(n, key, exact, &ancestorsIndexes) 233 | if err != nil { 234 | return -1, nil, nil, err 235 | } 236 | return index, node, ancestorsIndexes, nil 237 | } 238 | 239 | func findKeyHelper(node *Node, key []byte, exact bool, ancestorsIndexes *[]int) (int, *Node ,error) { 240 | wasFound, index := node.findKeyInNode(key) 241 | if wasFound { 242 | return index, node, nil 243 | } 244 | 245 | if node.isLeaf() { 246 | if exact { 247 | return -1, nil, nil 248 | } 249 | return index, node, nil 250 | } 251 | 252 | *ancestorsIndexes = append(*ancestorsIndexes, index) 253 | nextChild, err := node.getNode(node.childNodes[index]) 254 | if err != nil { 255 | return -1, nil, err 256 | } 257 | return findKeyHelper(nextChild, key, exact, ancestorsIndexes) 258 | } 259 | 260 | // findKeyInNode iterates all the items and finds the key. If the key is found, then the item is returned. If the key 261 | // isn't found then return the index where it should have been (the first index that key is greater than it's previous) 262 | func (n *Node) findKeyInNode(key []byte) (bool, int) { 263 | for i, existingItem := range n.items { 264 | res := bytes.Compare(existingItem.key, key) 265 | if res == 0 { // Keys match 266 | return true, i 267 | } 268 | 269 | // The key is bigger than the previous item, so it doesn't exist in the node, but may exist in child nodes. 270 | if res == 1 { 271 | return false, i 272 | } 273 | } 274 | 275 | // The key isn't bigger than any of the items which means it's in the last index. 276 | return false, len(n.items) 277 | } 278 | 279 | func (n *Node) addItem(item *Item, insertionIndex int) int { 280 | if len(n.items) == insertionIndex { // nil or empty slice or after last element 281 | n.items = append(n.items, item) 282 | return insertionIndex 283 | } 284 | 285 | n.items = append(n.items[:insertionIndex+1], n.items[insertionIndex:]...) 286 | n.items[insertionIndex] = item 287 | return insertionIndex 288 | } 289 | 290 | // split rebalances the tree after adding. After insertion the modified node has to be checked to make sure it 291 | // didn't exceed the maximum number of elements. If it did, then it has to be split and rebalanced. The transformation 292 | // is depicted in the graph below. If it's not a leaf node, then the children has to be moved as well as shown. 293 | // This may leave the parent unbalanced by having too many items so rebalancing has to be checked for all the ancestors. 294 | // The split is performed in a for loop to support splitting a node more than once. (Though in practice used only once). 295 | // n n 296 | // 3 3,6 297 | // / \ ------> / | \ 298 | // a modifiedNode a modifiedNode newNode 299 | // 1,2 4,5,6,7,8 1,2 4,5 7,8 300 | func (n *Node) split(nodeToSplit *Node, nodeToSplitIndex int) { 301 | // The first index where min amount of bytes to populate a page is achieved. Then add 1 so it will be split one 302 | // index after. 303 | splitIndex := nodeToSplit.tx.db.getSplitIndex(nodeToSplit) 304 | 305 | middleItem := nodeToSplit.items[splitIndex] 306 | var newNode *Node 307 | 308 | if nodeToSplit.isLeaf() { 309 | newNode = n.writeNode(n.tx.newNode(nodeToSplit.items[splitIndex+1:], []pgnum{})) 310 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 311 | } else { 312 | newNode = n.writeNode(n.tx.newNode(nodeToSplit.items[splitIndex+1:], nodeToSplit.childNodes[splitIndex+1:])) 313 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 314 | nodeToSplit.childNodes = nodeToSplit.childNodes[:splitIndex+1] 315 | } 316 | n.addItem(middleItem, nodeToSplitIndex) 317 | if len(n.childNodes) == nodeToSplitIndex+1 { // If middle of list, then move items forward 318 | n.childNodes = append(n.childNodes, newNode.pageNum) 319 | } else { 320 | n.childNodes = append(n.childNodes[:nodeToSplitIndex+1], n.childNodes[nodeToSplitIndex:]...) 321 | n.childNodes[nodeToSplitIndex+1] = newNode.pageNum 322 | } 323 | 324 | n.writeNodes(n, nodeToSplit) 325 | } 326 | 327 | // rebalanceRemove rebalances the tree after a remove operation. This can be either by rotating to the right, to the 328 | // left or by merging. First, the sibling nodes are checked to see if they have enough items for rebalancing 329 | // (>= minItems+1). If they don't have enough items, then merging with one of the sibling nodes occurs. This may leave 330 | // the parent unbalanced by having too little items so rebalancing has to be checked for all the ancestors. 331 | func (n *Node) rebalanceRemove(unbalancedNode *Node, unbalancedNodeIndex int) error { 332 | pNode := n 333 | 334 | // Right rotate 335 | if unbalancedNodeIndex != 0 { 336 | leftNode, err := n.getNode(pNode.childNodes[unbalancedNodeIndex-1]) 337 | if err != nil { 338 | return err 339 | } 340 | if leftNode.canSpareAnElement() { 341 | rotateRight(leftNode, pNode, unbalancedNode, unbalancedNodeIndex) 342 | n.writeNodes(leftNode, pNode, unbalancedNode) 343 | return nil 344 | } 345 | } 346 | 347 | // Left Balance 348 | if unbalancedNodeIndex != len(pNode.childNodes)-1 { 349 | rightNode, err := n.getNode(pNode.childNodes[unbalancedNodeIndex+1]) 350 | if err != nil { 351 | return err 352 | } 353 | if rightNode.canSpareAnElement() { 354 | rotateLeft(unbalancedNode, pNode, rightNode, unbalancedNodeIndex) 355 | n.writeNodes(unbalancedNode, pNode, rightNode) 356 | return nil 357 | } 358 | } 359 | 360 | // The merge function merges a given node with its node to the right. So by default, we merge an unbalanced node 361 | // with its right sibling. In the case where the unbalanced node is the leftmost, we have to replace the merge 362 | // parameters, so the unbalanced node right sibling, will be merged into the unbalanced node. 363 | if unbalancedNodeIndex == 0 { 364 | rightNode, err := n.getNode(n.childNodes[unbalancedNodeIndex+1]) 365 | if err != nil { 366 | return err 367 | } 368 | 369 | return pNode.merge(rightNode, unbalancedNodeIndex+1) 370 | } 371 | 372 | return pNode.merge(unbalancedNode, unbalancedNodeIndex) 373 | } 374 | 375 | // removeItemFromLeaf removes an item from a leaf node. It means there is no handling of child nodes. 376 | func (n *Node) removeItemFromLeaf(index int) { 377 | n.items = append(n.items[:index], n.items[index+1:]...) 378 | n.writeNode(n) 379 | } 380 | 381 | func (n *Node) removeItemFromInternal(index int) ([]int, error) { 382 | // Take element before inorder (The biggest element from the left branch), put it in the removed index and remove 383 | // it from the original node. Track in affectedNodes any nodes in the path leading to that node. It will be used 384 | // in case the tree needs to be rebalanced. 385 | // p 386 | // / 387 | // .. 388 | // / \ 389 | // .. a 390 | 391 | affectedNodes := make([]int, 0) 392 | affectedNodes = append(affectedNodes, index) 393 | 394 | // Starting from its left child, descend to the rightmost descendant. 395 | aNode, err := n.getNode(n.childNodes[index]) 396 | if err != nil { 397 | return nil, err 398 | } 399 | 400 | for !aNode.isLeaf() { 401 | traversingIndex := len(n.childNodes) - 1 402 | aNode, err = n.getNode(n.childNodes[traversingIndex]) 403 | if err != nil { 404 | return nil, err 405 | } 406 | affectedNodes = append(affectedNodes, traversingIndex) 407 | } 408 | 409 | // Replace the item that should be removed with the item before inorder which we just found. 410 | n.items[index] = aNode.items[len(aNode.items)-1] 411 | aNode.items = aNode.items[:len(aNode.items)-1] 412 | n.writeNodes(n, aNode) 413 | 414 | return affectedNodes, nil 415 | } 416 | 417 | func rotateRight(aNode, pNode, bNode *Node, bNodeIndex int) { 418 | // p p 419 | // 4 3 420 | // / \ ------> / \ 421 | // a b (unbalanced) a b (unbalanced) 422 | // 1,2,3 5 1,2 4,5 423 | 424 | // Get last item and remove it 425 | aNodeItem := aNode.items[len(aNode.items)-1] 426 | aNode.items = aNode.items[:len(aNode.items)-1] 427 | 428 | // Get item from parent node and assign the aNodeItem item instead 429 | pNodeItemIndex := bNodeIndex - 1 430 | if isFirst(bNodeIndex) { 431 | pNodeItemIndex = 0 432 | } 433 | pNodeItem := pNode.items[pNodeItemIndex] 434 | pNode.items[pNodeItemIndex] = aNodeItem 435 | 436 | // Assign parent item to b and make it first 437 | bNode.items = append([]*Item{pNodeItem}, bNode.items...) 438 | 439 | // If it's an inner leaf then move children as well. 440 | if !aNode.isLeaf() { 441 | childNodeToShift := aNode.childNodes[len(aNode.childNodes)-1] 442 | aNode.childNodes = aNode.childNodes[:len(aNode.childNodes)-1] 443 | bNode.childNodes = append([]pgnum{childNodeToShift}, bNode.childNodes...) 444 | } 445 | } 446 | 447 | func rotateLeft(aNode, pNode, bNode *Node, bNodeIndex int) { 448 | // p p 449 | // 2 3 450 | // / \ ------> / \ 451 | // a(unbalanced) b a(unbalanced) b 452 | // 1 3,4,5 1,2 4,5 453 | 454 | // Get first item and remove it 455 | bNodeItem := bNode.items[0] 456 | bNode.items = bNode.items[1:] 457 | 458 | // Get item from parent node and assign the bNodeItem item instead 459 | pNodeItemIndex := bNodeIndex 460 | if isLast(bNodeIndex, pNode) { 461 | pNodeItemIndex = len(pNode.items) - 1 462 | } 463 | pNodeItem := pNode.items[pNodeItemIndex] 464 | pNode.items[pNodeItemIndex] = bNodeItem 465 | 466 | // Assign parent item to a and make it last 467 | aNode.items = append(aNode.items, pNodeItem) 468 | 469 | // If it's an inner leaf then move children as well. 470 | if !bNode.isLeaf() { 471 | childNodeToShift := bNode.childNodes[0] 472 | bNode.childNodes = bNode.childNodes[1:] 473 | aNode.childNodes = append(aNode.childNodes, childNodeToShift) 474 | } 475 | } 476 | 477 | func (n *Node) merge(bNode *Node, bNodeIndex int) error { 478 | // p p 479 | // 3,5 5 480 | // / | \ ------> / \ 481 | // a b c a c 482 | // 1,2 4 6,7 1,2,3,4 6,7 483 | aNode, err := n.getNode(n.childNodes[bNodeIndex-1]) 484 | if err != nil { 485 | return err 486 | } 487 | 488 | // Take the item from the parent, remove it and add it to the unbalanced node 489 | pNodeItem := n.items[bNodeIndex-1] 490 | n.items = append(n.items[:bNodeIndex-1], n.items[bNodeIndex:]...) 491 | aNode.items = append(aNode.items, pNodeItem) 492 | 493 | aNode.items = append(aNode.items, bNode.items...) 494 | n.childNodes = append(n.childNodes[:bNodeIndex], n.childNodes[bNodeIndex+1:]...) 495 | if !aNode.isLeaf() { 496 | aNode.childNodes = append(aNode.childNodes, bNode.childNodes...) 497 | } 498 | n.writeNodes(aNode, n) 499 | n.tx.db.deleteNode(bNode.pageNum) 500 | return nil 501 | } -------------------------------------------------------------------------------- /Part 7/node.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type Item struct { 9 | key []byte 10 | value []byte 11 | } 12 | 13 | type Node struct { 14 | // associated transaction 15 | tx *tx 16 | 17 | pageNum pgnum 18 | items []*Item 19 | childNodes []pgnum 20 | } 21 | 22 | func NewEmptyNode() *Node { 23 | return &Node{} 24 | } 25 | 26 | // NewNodeForSerialization creates a new node only with the properties that are relevant when saving to the disk 27 | func NewNodeForSerialization(items []*Item, childNodes []pgnum) *Node { 28 | return &Node{ 29 | items: items, 30 | childNodes: childNodes, 31 | } 32 | } 33 | 34 | func newItem(key []byte, value []byte) *Item { 35 | return &Item{ 36 | key: key, 37 | value: value, 38 | } 39 | } 40 | 41 | func isLast(index int, parentNode *Node) bool { 42 | return index == len(parentNode.items) 43 | } 44 | 45 | func isFirst(index int) bool { 46 | return index == 0 47 | } 48 | 49 | func (n *Node) isLeaf() bool { 50 | return len(n.childNodes) == 0 51 | } 52 | 53 | func (n *Node) writeNode(node *Node) *Node { 54 | return n.tx.writeNode(node) 55 | } 56 | 57 | func (n *Node) writeNodes(nodes ...*Node) { 58 | for _, node := range nodes { 59 | n.writeNode(node) 60 | } 61 | } 62 | 63 | func (n *Node) getNode(pageNum pgnum) (*Node, error) { 64 | return n.tx.getNode(pageNum) 65 | } 66 | 67 | // isOverPopulated checks if the node size is bigger than the size of a page. 68 | func (n *Node) isOverPopulated() bool { 69 | return n.tx.db.isOverPopulated(n) 70 | } 71 | 72 | // canSpareAnElement checks if the node size is big enough to populate a page after giving away one item. 73 | func (n *Node) canSpareAnElement() bool { 74 | splitIndex := n.tx.db.getSplitIndex(n) 75 | if splitIndex == -1 { 76 | return false 77 | } 78 | return true 79 | } 80 | 81 | // isUnderPopulated checks if the node size is smaller than the size of a page. 82 | func (n *Node) isUnderPopulated() bool { 83 | return n.tx.db.isUnderPopulated(n) 84 | } 85 | 86 | func (n *Node) serialize(buf []byte) []byte { 87 | leftPos := 0 88 | rightPos := len(buf) - 1 89 | 90 | // Add page header: isLeaf, key-value pairs count, node num 91 | // isLeaf 92 | isLeaf := n.isLeaf() 93 | var bitSetVar uint64 94 | if isLeaf { 95 | bitSetVar = 1 96 | } 97 | buf[leftPos] = byte(bitSetVar) 98 | leftPos += 1 99 | 100 | // key-value pairs count 101 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(len(n.items))) 102 | leftPos += 2 103 | 104 | // We use slotted pages for storing data in the page. It means the actual keys and values (the cells) are appended 105 | // to right of the page whereas offsets have a fixed size and are appended from the left. 106 | // It's easier to preserve the logical order (alphabetical in the case of b-tree) using the metadata and performing 107 | // pointer arithmetic. Using the data itself is harder as it varies by size. 108 | 109 | // Page structure is: 110 | // ---------------------------------------------------------------------------------- 111 | // | Page | key-value / child node key-value | key-value | 112 | // | Header | offset / pointer offset .... | data ..... | 113 | // ---------------------------------------------------------------------------------- 114 | 115 | for i := 0; i < len(n.items); i++ { 116 | item := n.items[i] 117 | if !isLeaf { 118 | childNode := n.childNodes[i] 119 | 120 | // Write the child page as a fixed size of 8 bytes 121 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(childNode)) 122 | leftPos += pageNumSize 123 | } 124 | 125 | klen := len(item.key) 126 | vlen := len(item.value) 127 | 128 | // write offset 129 | offset := rightPos - klen - vlen - 2 130 | binary.LittleEndian.PutUint16(buf[leftPos:], uint16(offset)) 131 | leftPos += 2 132 | 133 | rightPos -= vlen 134 | copy(buf[rightPos:], item.value) 135 | 136 | rightPos -= 1 137 | buf[rightPos] = byte(vlen) 138 | 139 | rightPos -= klen 140 | copy(buf[rightPos:], item.key) 141 | 142 | rightPos -= 1 143 | buf[rightPos] = byte(klen) 144 | } 145 | 146 | if !isLeaf { 147 | // Write the last child node 148 | lastChildNode := n.childNodes[len(n.childNodes)-1] 149 | // Write the child page as a fixed size of 8 bytes 150 | binary.LittleEndian.PutUint64(buf[leftPos:], uint64(lastChildNode)) 151 | } 152 | 153 | return buf 154 | } 155 | 156 | func (n *Node) deserialize(buf []byte) { 157 | leftPos := 0 158 | 159 | // Read header 160 | isLeaf := uint16(buf[0]) 161 | 162 | itemsCount := int(binary.LittleEndian.Uint16(buf[1:3])) 163 | leftPos += 3 164 | 165 | // Read body 166 | for i := 0; i < itemsCount; i++ { 167 | if isLeaf == 0 { // False 168 | pageNum := binary.LittleEndian.Uint64(buf[leftPos:]) 169 | leftPos += pageNumSize 170 | 171 | n.childNodes = append(n.childNodes, pgnum(pageNum)) 172 | } 173 | 174 | // Read offset 175 | offset := binary.LittleEndian.Uint16(buf[leftPos:]) 176 | leftPos += 2 177 | 178 | klen := uint16(buf[int(offset)]) 179 | offset += 1 180 | 181 | key := buf[offset : offset+klen] 182 | offset += klen 183 | 184 | vlen := uint16(buf[int(offset)]) 185 | offset += 1 186 | 187 | value := buf[offset : offset+vlen] 188 | offset += vlen 189 | n.items = append(n.items, newItem(key, value)) 190 | } 191 | 192 | if isLeaf == 0 { // False 193 | // Read the last child node 194 | pageNum := pgnum(binary.LittleEndian.Uint64(buf[leftPos:])) 195 | n.childNodes = append(n.childNodes, pageNum) 196 | } 197 | } 198 | 199 | // elementSize returns the size of a key-value-childNode triplet at a given index. 200 | // If the node is a leaf, then the size of a key-value pair is returned. 201 | // It's assumed i <= len(n.items) 202 | func (n *Node) elementSize(i int) int { 203 | size := 0 204 | size += len(n.items[i].key) 205 | size += len(n.items[i].value) 206 | size += pageNumSize // 8 is the pgnum size 207 | return size 208 | } 209 | 210 | // nodeSize returns the node's size in bytes 211 | func (n *Node) nodeSize() int { 212 | size := 0 213 | size += nodeHeaderSize 214 | 215 | for i := range n.items { 216 | size += n.elementSize(i) 217 | } 218 | 219 | // Add last page 220 | size += pageNumSize // 8 is the pgnum size 221 | return size 222 | } 223 | 224 | // findKey searches for a key inside the tree. Once the key is found, the parent node and the correct index are returned 225 | // so the key itself can be accessed in the following way parent[index]. A list of the node ancestors (not including the 226 | // node itself) is also returned. 227 | // If the key isn't found, we have 2 options. If exact is true, it means we expect findKey 228 | // to find the key, so a falsey answer. If exact is false, then findKey is used to locate where a new key should be 229 | // inserted so the position is returned. 230 | func (n *Node) findKey(key []byte, exact bool) (int, *Node, []int ,error) { 231 | ancestorsIndexes := []int{0} // index of root 232 | index, node, err := findKeyHelper(n, key, exact, &ancestorsIndexes) 233 | if err != nil { 234 | return -1, nil, nil, err 235 | } 236 | return index, node, ancestorsIndexes, nil 237 | } 238 | 239 | func findKeyHelper(node *Node, key []byte, exact bool, ancestorsIndexes *[]int) (int, *Node ,error) { 240 | wasFound, index := node.findKeyInNode(key) 241 | if wasFound { 242 | return index, node, nil 243 | } 244 | 245 | if node.isLeaf() { 246 | if exact { 247 | return -1, nil, nil 248 | } 249 | return index, node, nil 250 | } 251 | 252 | *ancestorsIndexes = append(*ancestorsIndexes, index) 253 | nextChild, err := node.getNode(node.childNodes[index]) 254 | if err != nil { 255 | return -1, nil, err 256 | } 257 | return findKeyHelper(nextChild, key, exact, ancestorsIndexes) 258 | } 259 | 260 | // findKeyInNode iterates all the items and finds the key. If the key is found, then the item is returned. If the key 261 | // isn't found then return the index where it should have been (the first index that key is greater than it's previous) 262 | func (n *Node) findKeyInNode(key []byte) (bool, int) { 263 | for i, existingItem := range n.items { 264 | res := bytes.Compare(existingItem.key, key) 265 | if res == 0 { // Keys match 266 | return true, i 267 | } 268 | 269 | // The key is bigger than the previous item, so it doesn't exist in the node, but may exist in child nodes. 270 | if res == 1 { 271 | return false, i 272 | } 273 | } 274 | 275 | // The key isn't bigger than any of the items which means it's in the last index. 276 | return false, len(n.items) 277 | } 278 | 279 | func (n *Node) addItem(item *Item, insertionIndex int) int { 280 | if len(n.items) == insertionIndex { // nil or empty slice or after last element 281 | n.items = append(n.items, item) 282 | return insertionIndex 283 | } 284 | 285 | n.items = append(n.items[:insertionIndex+1], n.items[insertionIndex:]...) 286 | n.items[insertionIndex] = item 287 | return insertionIndex 288 | } 289 | 290 | // split rebalances the tree after adding. After insertion the modified node has to be checked to make sure it 291 | // didn't exceed the maximum number of elements. If it did, then it has to be split and rebalanced. The transformation 292 | // is depicted in the graph below. If it's not a leaf node, then the children has to be moved as well as shown. 293 | // This may leave the parent unbalanced by having too many items so rebalancing has to be checked for all the ancestors. 294 | // The split is performed in a for loop to support splitting a node more than once. (Though in practice used only once). 295 | // n n 296 | // 3 3,6 297 | // / \ ------> / | \ 298 | // a modifiedNode a modifiedNode newNode 299 | // 1,2 4,5,6,7,8 1,2 4,5 7,8 300 | func (n *Node) split(nodeToSplit *Node, nodeToSplitIndex int) { 301 | // The first index where min amount of bytes to populate a page is achieved. Then add 1 so it will be split one 302 | // index after. 303 | splitIndex := nodeToSplit.tx.db.getSplitIndex(nodeToSplit) 304 | 305 | middleItem := nodeToSplit.items[splitIndex] 306 | var newNode *Node 307 | 308 | if nodeToSplit.isLeaf() { 309 | newNode = n.writeNode(n.tx.newNode(nodeToSplit.items[splitIndex+1:], []pgnum{})) 310 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 311 | } else { 312 | newNode = n.writeNode(n.tx.newNode(nodeToSplit.items[splitIndex+1:], nodeToSplit.childNodes[splitIndex+1:])) 313 | nodeToSplit.items = nodeToSplit.items[:splitIndex] 314 | nodeToSplit.childNodes = nodeToSplit.childNodes[:splitIndex+1] 315 | } 316 | n.addItem(middleItem, nodeToSplitIndex) 317 | if len(n.childNodes) == nodeToSplitIndex+1 { // If middle of list, then move items forward 318 | n.childNodes = append(n.childNodes, newNode.pageNum) 319 | } else { 320 | n.childNodes = append(n.childNodes[:nodeToSplitIndex+1], n.childNodes[nodeToSplitIndex:]...) 321 | n.childNodes[nodeToSplitIndex+1] = newNode.pageNum 322 | } 323 | 324 | n.writeNodes(n, nodeToSplit) 325 | } 326 | 327 | // rebalanceRemove rebalances the tree after a remove operation. This can be either by rotating to the right, to the 328 | // left or by merging. First, the sibling nodes are checked to see if they have enough items for rebalancing 329 | // (>= minItems+1). If they don't have enough items, then merging with one of the sibling nodes occurs. This may leave 330 | // the parent unbalanced by having too little items so rebalancing has to be checked for all the ancestors. 331 | func (n *Node) rebalanceRemove(unbalancedNode *Node, unbalancedNodeIndex int) error { 332 | pNode := n 333 | 334 | // Right rotate 335 | if unbalancedNodeIndex != 0 { 336 | leftNode, err := n.getNode(pNode.childNodes[unbalancedNodeIndex-1]) 337 | if err != nil { 338 | return err 339 | } 340 | if leftNode.canSpareAnElement() { 341 | rotateRight(leftNode, pNode, unbalancedNode, unbalancedNodeIndex) 342 | n.writeNodes(leftNode, pNode, unbalancedNode) 343 | return nil 344 | } 345 | } 346 | 347 | // Left Balance 348 | if unbalancedNodeIndex != len(pNode.childNodes)-1 { 349 | rightNode, err := n.getNode(pNode.childNodes[unbalancedNodeIndex+1]) 350 | if err != nil { 351 | return err 352 | } 353 | if rightNode.canSpareAnElement() { 354 | rotateLeft(unbalancedNode, pNode, rightNode, unbalancedNodeIndex) 355 | n.writeNodes(unbalancedNode, pNode, rightNode) 356 | return nil 357 | } 358 | } 359 | 360 | // The merge function merges a given node with its node to the right. So by default, we merge an unbalanced node 361 | // with its right sibling. In the case where the unbalanced node is the leftmost, we have to replace the merge 362 | // parameters, so the unbalanced node right sibling, will be merged into the unbalanced node. 363 | if unbalancedNodeIndex == 0 { 364 | rightNode, err := n.getNode(n.childNodes[unbalancedNodeIndex+1]) 365 | if err != nil { 366 | return err 367 | } 368 | 369 | return pNode.merge(rightNode, unbalancedNodeIndex+1) 370 | } 371 | 372 | return pNode.merge(unbalancedNode, unbalancedNodeIndex) 373 | } 374 | 375 | // removeItemFromLeaf removes an item from a leaf node. It means there is no handling of child nodes. 376 | func (n *Node) removeItemFromLeaf(index int) { 377 | n.items = append(n.items[:index], n.items[index+1:]...) 378 | n.writeNode(n) 379 | } 380 | 381 | func (n *Node) removeItemFromInternal(index int) ([]int, error) { 382 | // Take element before inorder (The biggest element from the left branch), put it in the removed index and remove 383 | // it from the original node. Track in affectedNodes any nodes in the path leading to that node. It will be used 384 | // in case the tree needs to be rebalanced. 385 | // p 386 | // / 387 | // .. 388 | // / \ 389 | // .. a 390 | 391 | affectedNodes := make([]int, 0) 392 | affectedNodes = append(affectedNodes, index) 393 | 394 | // Starting from its left child, descend to the rightmost descendant. 395 | aNode, err := n.getNode(n.childNodes[index]) 396 | if err != nil { 397 | return nil, err 398 | } 399 | 400 | for !aNode.isLeaf() { 401 | traversingIndex := len(n.childNodes) - 1 402 | aNode, err = n.getNode(n.childNodes[traversingIndex]) 403 | if err != nil { 404 | return nil, err 405 | } 406 | affectedNodes = append(affectedNodes, traversingIndex) 407 | } 408 | 409 | // Replace the item that should be removed with the item before inorder which we just found. 410 | n.items[index] = aNode.items[len(aNode.items)-1] 411 | aNode.items = aNode.items[:len(aNode.items)-1] 412 | n.writeNodes(n, aNode) 413 | 414 | return affectedNodes, nil 415 | } 416 | 417 | func rotateRight(aNode, pNode, bNode *Node, bNodeIndex int) { 418 | // p p 419 | // 4 3 420 | // / \ ------> / \ 421 | // a b (unbalanced) a b (unbalanced) 422 | // 1,2,3 5 1,2 4,5 423 | 424 | // Get last item and remove it 425 | aNodeItem := aNode.items[len(aNode.items)-1] 426 | aNode.items = aNode.items[:len(aNode.items)-1] 427 | 428 | // Get item from parent node and assign the aNodeItem item instead 429 | pNodeItemIndex := bNodeIndex - 1 430 | if isFirst(bNodeIndex) { 431 | pNodeItemIndex = 0 432 | } 433 | pNodeItem := pNode.items[pNodeItemIndex] 434 | pNode.items[pNodeItemIndex] = aNodeItem 435 | 436 | // Assign parent item to b and make it first 437 | bNode.items = append([]*Item{pNodeItem}, bNode.items...) 438 | 439 | // If it's an inner leaf then move children as well. 440 | if !aNode.isLeaf() { 441 | childNodeToShift := aNode.childNodes[len(aNode.childNodes)-1] 442 | aNode.childNodes = aNode.childNodes[:len(aNode.childNodes)-1] 443 | bNode.childNodes = append([]pgnum{childNodeToShift}, bNode.childNodes...) 444 | } 445 | } 446 | 447 | func rotateLeft(aNode, pNode, bNode *Node, bNodeIndex int) { 448 | // p p 449 | // 2 3 450 | // / \ ------> / \ 451 | // a(unbalanced) b a(unbalanced) b 452 | // 1 3,4,5 1,2 4,5 453 | 454 | // Get first item and remove it 455 | bNodeItem := bNode.items[0] 456 | bNode.items = bNode.items[1:] 457 | 458 | // Get item from parent node and assign the bNodeItem item instead 459 | pNodeItemIndex := bNodeIndex 460 | if isLast(bNodeIndex, pNode) { 461 | pNodeItemIndex = len(pNode.items) - 1 462 | } 463 | pNodeItem := pNode.items[pNodeItemIndex] 464 | pNode.items[pNodeItemIndex] = bNodeItem 465 | 466 | // Assign parent item to a and make it last 467 | aNode.items = append(aNode.items, pNodeItem) 468 | 469 | // If it's an inner leaf then move children as well. 470 | if !bNode.isLeaf() { 471 | childNodeToShift := bNode.childNodes[0] 472 | bNode.childNodes = bNode.childNodes[1:] 473 | aNode.childNodes = append(aNode.childNodes, childNodeToShift) 474 | } 475 | } 476 | 477 | func (n *Node) merge(bNode *Node, bNodeIndex int) error { 478 | // p p 479 | // 3,5 5 480 | // / | \ ------> / \ 481 | // a b c a c 482 | // 1,2 4 6,7 1,2,3,4 6,7 483 | aNode, err := n.getNode(n.childNodes[bNodeIndex-1]) 484 | if err != nil { 485 | return err 486 | } 487 | 488 | // Take the item from the parent, remove it and add it to the unbalanced node 489 | pNodeItem := n.items[bNodeIndex-1] 490 | n.items = append(n.items[:bNodeIndex-1], n.items[bNodeIndex:]...) 491 | aNode.items = append(aNode.items, pNodeItem) 492 | 493 | aNode.items = append(aNode.items, bNode.items...) 494 | n.childNodes = append(n.childNodes[:bNodeIndex], n.childNodes[bNodeIndex+1:]...) 495 | if !aNode.isLeaf() { 496 | aNode.childNodes = append(aNode.childNodes, bNode.childNodes...) 497 | } 498 | n.writeNodes(aNode, n) 499 | n.tx.db.deleteNode(bNode.pageNum) 500 | return nil 501 | } --------------------------------------------------------------------------------