├── README.md ├── core └── varint.go ├── delta ├── .gitignore ├── README.md └── main_test.go ├── go.mod ├── http-protocol ├── README.md └── main.go ├── seq-random ├── .gitignore ├── README.md ├── main.go └── plot-1.svg ├── utils └── fileio.go └── varint ├── README.md └── main_test.go /README.md: -------------------------------------------------------------------------------- 1 | Database Fundamentals 2 | === 3 | 4 | The repository contains prototype implementations of fundamental concepts and algorithms covering Database Internals. 5 | 6 | - [x] Variable length integer encoding • [Source](https://github.com/arpitbbhayani/database-fundamentals/tree/master/varint) • [YouTube Video](https://youtu.be/9b2e_iRVJ0k) 7 | - [x] Delta compression • [Source](https://github.com/arpitbbhayani/database-fundamentals/tree/master/delta) • [YouTube Video](https://youtu.be/J7VJtuRCkuI) 8 | - [x] Random vs Sequential IO • [Source](https://github.com/arpitbbhayani/database-fundamentals/tree/master/seq-random) 9 | - [x] Importance of protocols like HTTP and how to implement it from scratch • [Source](https://github.com/arpitbbhayani/database-fundamentals/blob/master/http-protocol/main.go) • [YouTube Video](https://youtu.be/SzwjnoPI--M) 10 | 11 | Do star the repository, if you find these prototypes helpful. 12 | -------------------------------------------------------------------------------- /core/varint.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | var BITMASK = []byte{ 4 | 0b00000001, 5 | 0b00000011, 6 | 0b00000111, 7 | 0b00001111, 8 | 0b00011111, 9 | 0b00111111, 10 | 0b01111111, 11 | 0b11111111, 12 | } 13 | 14 | // getLSB returns the least significant `n` bits from 15 | // the byte value `x`. 16 | func getLSB(x byte, n uint8) byte { 17 | if n > 8 { 18 | panic("can extract at max 8 bits from the number") 19 | } 20 | return byte(x) & BITMASK[n-1] 21 | } 22 | 23 | // TOOD: not thread safe 24 | var buf [11]byte 25 | var bitShifts []uint8 = []uint8{7, 7, 7, 7, 7, 7, 7, 7, 7, 1} 26 | 27 | // EncodeInt64 encodes the unsigned 64 bit integer value into a varint 28 | // and returns an array of bytes (little endian encoded) 29 | func EncodeUInt64(x uint64) []byte { 30 | var i int = 0 31 | for i = 0; i < len(bitShifts); i++ { 32 | buf[i] = getLSB(byte(x), bitShifts[i]) | 0b10000000 // marking the continuation bit 33 | x = x >> bitShifts[i] 34 | if x == 0 { 35 | break 36 | } 37 | } 38 | buf[i] = buf[i] & 0b01111111 // marking the termination bit 39 | return append(make([]byte, 0, i+1), buf[:i+1]...) 40 | } 41 | 42 | // DecodeUInt64 decodes the array of bytes and returns an unsigned 64 bit integer 43 | func DecodeUInt64(vint []byte) uint64 { 44 | var i int = 0 45 | var v uint64 = 0 46 | for i = 0; i < len(vint); i++ { 47 | b := getLSB(vint[i], 7) 48 | v = v | uint64(b)<<(7*i) 49 | } 50 | return v 51 | } 52 | -------------------------------------------------------------------------------- /delta/.gitignore: -------------------------------------------------------------------------------- 1 | *.dat 2 | -------------------------------------------------------------------------------- /delta/README.md: -------------------------------------------------------------------------------- 1 | Delta Compression 2 | === 3 | 4 | ``` 5 | $ go test -v main_test.go 6 | $ wc -c numbers_* 7 | 1000001 numbers_delta.dat 8 | 8000000 numbers_raw.dat 9 | 2996794 numbers_varint.dat 10 | ``` 11 | -------------------------------------------------------------------------------- /delta/main_test.go: -------------------------------------------------------------------------------- 1 | package delta 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | 7 | "github.com/arpitbbhayani/database-fundamentals/utils" 8 | ) 9 | 10 | const TOTAL_NUMBERS int = 1000000 11 | 12 | func TestMain(t *testing.T) { 13 | var numbers []uint64 = make([]uint64, TOTAL_NUMBERS) 14 | numbers[0] = 10000 15 | for i := 1; i < TOTAL_NUMBERS; i++ { 16 | numbers[i] = numbers[i-1] + rand.Uint64()%5 17 | } 18 | utils.PersistSliceUint64(numbers, "numbers_raw.dat", false) 19 | utils.PersistSliceUint64(numbers, "numbers_varint.dat", true) 20 | 21 | var numbersDelta []uint64 = make([]uint64, len(numbers)) 22 | numbersDelta[0] = numbers[0] 23 | for i := 1; i < TOTAL_NUMBERS; i++ { 24 | numbersDelta[i] = numbers[i] - numbers[i-1] 25 | } 26 | utils.PersistSliceUint64(numbersDelta, "numbers_delta.dat", true) 27 | } 28 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/arpitbbhayani/database-fundamentals 2 | 3 | go 1.18 4 | -------------------------------------------------------------------------------- /http-protocol/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | $ go run http-protocol/main.go 3 | $ curl http://localhost:1729/foo 4 | $ netcat localhost 1729 5 | GET /foo HTTP/1.1 6 | Host: localhost 7 | $ netcat localhost 1729 8 | POST /login HTTP/1.1 9 | Host: localhost 10 | Content-Length: 28 11 | 12 | username=arpit&password=pass 13 | ``` 14 | -------------------------------------------------------------------------------- /http-protocol/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "net/http" 7 | ) 8 | 9 | func getFoo(w http.ResponseWriter, r *http.Request) { 10 | for k, v := range r.Header { 11 | fmt.Println("header", k, "value", v) 12 | } 13 | w.Write([]byte("bar")) 14 | } 15 | 16 | func loginHandler(w http.ResponseWriter, r *http.Request) { 17 | for k, v := range r.Header { 18 | fmt.Println("header", k, "value", v) 19 | } 20 | data, err := ioutil.ReadAll(r.Body) 21 | if err != nil { 22 | panic(err) 23 | } 24 | fmt.Println("body", string(data)) 25 | w.Write([]byte("login successful")) 26 | } 27 | 28 | func main() { 29 | http.HandleFunc("/foo", getFoo) 30 | http.HandleFunc("/login", loginHandler) 31 | http.ListenAndServe(":1729", nil) 32 | } 33 | -------------------------------------------------------------------------------- /seq-random/.gitignore: -------------------------------------------------------------------------------- 1 | data.txt 2 | -------------------------------------------------------------------------------- /seq-random/README.md: -------------------------------------------------------------------------------- 1 | Sequential vs Random IO 2 | === 3 | 4 | ``` 5 | $ go run main.go 6 | ``` 7 | 8 | ``` 9 | buf_size,random (ms), sequential (ms) 10 | 1KB,9410,3474 11 | 2KB,3747,1922 12 | 4KB,1902,1093 13 | 8KB,1105,622 14 | 16KB,673,404 15 | 32KB,443,299 16 | 64KB,340,243 17 | 128KB,254,213 18 | 256KB,237,198 19 | 512KB,215,192 20 | 1024KB,214,193 21 | ``` 22 | -------------------------------------------------------------------------------- /seq-random/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "os" 7 | "time" 8 | ) 9 | 10 | const FILENAME = "data.txt" 11 | const FILE_SIZE = 1 * 1024 * 1024 * 1024 12 | const MB1 = 1 * 1024 * 1024 13 | 14 | func setupFile() { 15 | 16 | if _, err := os.Stat(FILENAME); err == nil { 17 | return 18 | } 19 | 20 | file, err := os.OpenFile(FILENAME, os.O_WRONLY|os.O_CREATE, 0644) 21 | if err != nil { 22 | panic(err) 23 | } 24 | defer file.Close() 25 | 26 | for i := 0; i < FILE_SIZE; i += MB1 { 27 | buffer := make([]byte, MB1) 28 | _, err := rand.Read(buffer) 29 | if err != nil { 30 | panic(err) 31 | } 32 | _, err = file.Write(buffer) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | } 38 | 39 | type stat struct { 40 | random int64 41 | sequential int64 42 | } 43 | 44 | type paramters struct { 45 | name string 46 | pageSize int 47 | stats *stat 48 | } 49 | 50 | var params = []paramters{ 51 | {"1KB", 1024, &stat{0, 0}}, 52 | {"2KB", 2 * 1024, &stat{0, 0}}, 53 | {"4KB", 4 * 1024, &stat{0, 0}}, 54 | {"8KB", 8 * 1024, &stat{0, 0}}, 55 | {"16KB", 16 * 1024, &stat{0, 0}}, 56 | {"32KB", 32 * 1024, &stat{0, 0}}, 57 | {"64KB", 64 * 1024, &stat{0, 0}}, 58 | {"128KB", 128 * 1024, &stat{0, 0}}, 59 | {"256KB", 256 * 1024, &stat{0, 0}}, 60 | {"512KB", 512 * 1024, &stat{0, 0}}, 61 | {"1024KB", 1024 * 1024, &stat{0, 0}}, 62 | } 63 | 64 | func benchmark() { 65 | setupFile() 66 | fp, err := os.OpenFile(FILENAME, os.O_RDONLY, 0644) 67 | if err != nil { 68 | panic(err) 69 | } 70 | defer fp.Close() 71 | 72 | for p := range params { 73 | var buf []byte = make([]byte, params[p].pageSize) 74 | 75 | st := time.Now() 76 | for i := 0; i < FILE_SIZE/params[p].pageSize; i++ { 77 | r := rand.Int() % FILE_SIZE 78 | offset := r - r%params[p].pageSize 79 | fp.Seek(int64(offset), 0) 80 | fp.Read(buf) 81 | } 82 | params[p].stats.random = time.Since(st).Milliseconds() 83 | } 84 | 85 | for p := range params { 86 | fp.Seek(0, 0) 87 | var buf []byte = make([]byte, params[p].pageSize) 88 | 89 | st := time.Now() 90 | for i := 0; i < int(FILE_SIZE/params[p].pageSize); i++ { 91 | fp.Read(buf) 92 | } 93 | params[p].stats.sequential = time.Since(st).Milliseconds() 94 | } 95 | } 96 | 97 | func main() { 98 | benchmark() 99 | fmt.Printf("buf_size,random (ms), sequential (ms)\n") 100 | for p := range params { 101 | fmt.Printf("%s,%d,%d\n", params[p].name, params[p].stats.random, params[p].stats.sequential) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /seq-random/plot-1.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/fileio.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/binary" 5 | "log" 6 | "os" 7 | 8 | "github.com/arpitbbhayani/database-fundamentals/core" 9 | ) 10 | 11 | func PersistSliceUint64(arr []uint64, filename string, shouldVarint bool) { 12 | file, err := os.Create(filename) 13 | if err != nil { 14 | log.Fatalln("error creating file:", err) 15 | } 16 | defer file.Close() 17 | 18 | for i := range arr { 19 | var buf []byte 20 | if shouldVarint { 21 | buf = core.EncodeUInt64(arr[i]) 22 | } else { 23 | buf = make([]byte, 8) 24 | binary.LittleEndian.PutUint64(buf, arr[i]) 25 | } 26 | file.Write(buf) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /varint/README.md: -------------------------------------------------------------------------------- 1 | varint 2 | === 3 | 4 | ``` 5 | $ go test -v main_test.go 6 | ``` 7 | -------------------------------------------------------------------------------- /varint/main_test.go: -------------------------------------------------------------------------------- 1 | package varint 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/arpitbbhayani/database-fundamentals/core" 7 | ) 8 | 9 | func TestMain(t *testing.T) { 10 | t.Log(core.EncodeUInt64(123)) 11 | t.Log(core.EncodeUInt64(292)) 12 | } 13 | --------------------------------------------------------------------------------