├── .gitignore ├── go.mod ├── examples └── main.go ├── go.sum ├── .github └── workflows │ └── go.yml ├── benchmark └── bench_test.go ├── options.go ├── README.md ├── CHANGELOG.md ├── wal_test.go ├── segment_test.go ├── LICENSE ├── wal.go └── segment.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .vscode/ 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/rosedblabs/wal 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/stretchr/testify v1.9.0 7 | github.com/valyala/bytebufferpool v1.0.0 8 | ) 9 | 10 | require ( 11 | github.com/davecgh/go-spew v1.1.1 // indirect 12 | github.com/pmezard/go-difflib v1.0.0 // indirect 13 | gopkg.in/yaml.v3 v3.0.1 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /examples/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | 8 | "github.com/rosedblabs/wal" 9 | ) 10 | 11 | func main() { 12 | walFile, _ := wal.Open(wal.DefaultOptions) 13 | // write some data 14 | chunkPosition, _ := walFile.Write([]byte("some data 1")) 15 | // read by the position 16 | val, _ := walFile.Read(chunkPosition) 17 | fmt.Println(string(val)) 18 | 19 | _, err := walFile.Write([]byte("some data 2")) 20 | if err != nil { 21 | log.Println(err) 22 | } 23 | _, err = walFile.Write([]byte("some data 3")) 24 | if err != nil { 25 | log.Println(err) 26 | } 27 | 28 | // iterate all data in wal 29 | reader := walFile.NewReader() 30 | for { 31 | val, pos, err := reader.Next() 32 | if err == io.EOF { 33 | break 34 | } 35 | fmt.Println(string(val)) 36 | fmt.Println(pos) // get position of the data for next read 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 6 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 7 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= 8 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | ubuntu-test: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.19 20 | 21 | - name: Run Go Vet 22 | run: | 23 | go vet ./... 24 | 25 | - name: Run Go Fmt 26 | run: | 27 | files=$(go fmt ./...) 28 | if [ -n "$files" ]; then 29 | echo "Please run gofmt on these files ..." 30 | echo "$files" 31 | exit 1 32 | fi 33 | 34 | - name: Build 35 | run: go build -v 36 | 37 | - name: Run Unit Test 38 | run: go test -count 1 -v ./... 39 | 40 | windows-test: 41 | runs-on: windows-latest 42 | steps: 43 | - uses: actions/checkout@v2 44 | 45 | - name: Set up Go 46 | uses: actions/setup-go@v2 47 | with: 48 | go-version: 1.19 49 | 50 | - name: Run Go Vet 51 | run: | 52 | go vet ./... 53 | 54 | - name: Build 55 | run: go build -v 56 | 57 | - name: Run Unit Test 58 | run: go test -count 1 -v ./... 59 | -------------------------------------------------------------------------------- /benchmark/bench_test.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "math/rand" 5 | "os" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/rosedblabs/wal" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | var walFile *wal.WAL 14 | 15 | func init() { 16 | dir, _ := os.MkdirTemp("", "wal-benchmark-test") 17 | opts := wal.Options{ 18 | DirPath: dir, 19 | SegmentFileExt: ".SEG", 20 | SegmentSize: wal.GB, 21 | } 22 | var err error 23 | walFile, err = wal.Open(opts) 24 | if err != nil { 25 | panic(err) 26 | } 27 | } 28 | 29 | func BenchmarkWAL_WriteLargeSize(b *testing.B) { 30 | b.ResetTimer() 31 | b.ReportAllocs() 32 | content := []byte(strings.Repeat("X", 256*wal.KB+500)) 33 | for i := 0; i < b.N; i++ { 34 | _, err := walFile.Write(content) 35 | assert.Nil(b, err) 36 | } 37 | } 38 | 39 | func BenchmarkWAL_Write(b *testing.B) { 40 | b.ResetTimer() 41 | b.ReportAllocs() 42 | 43 | for i := 0; i < b.N; i++ { 44 | _, err := walFile.Write([]byte("Hello World")) 45 | assert.Nil(b, err) 46 | } 47 | } 48 | 49 | func BenchmarkWAL_WriteBatch(b *testing.B) { 50 | b.ResetTimer() 51 | b.ReportAllocs() 52 | for i := 0; i < b.N; i++ { 53 | for j := 0; j < 31; j++ { 54 | walFile.PendingWrites([]byte(strings.Repeat("X", wal.MB))) 55 | } 56 | walFile.PendingWrites([]byte(strings.Repeat("X", wal.MB))) 57 | pos, err := walFile.WriteAll() 58 | assert.Nil(b, err) 59 | assert.Equal(b, 32, len(pos)) 60 | } 61 | } 62 | 63 | func BenchmarkWAL_Read(b *testing.B) { 64 | var positions []*wal.ChunkPosition 65 | for i := 0; i < 1000000; i++ { 66 | pos, err := walFile.Write([]byte("Hello World")) 67 | assert.Nil(b, err) 68 | positions = append(positions, pos) 69 | } 70 | 71 | b.ResetTimer() 72 | b.ReportAllocs() 73 | 74 | for i := 0; i < b.N; i++ { 75 | _, err := walFile.Read(positions[rand.Intn(len(positions))]) 76 | assert.Nil(b, err) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | import ( 4 | "os" 5 | "time" 6 | ) 7 | 8 | // Options represents the configuration options for a Write-Ahead Log (WAL). 9 | type Options struct { 10 | // DirPath specifies the directory path where the WAL segment files will be stored. 11 | DirPath string 12 | 13 | // SegmentSize specifies the maximum size of each segment file in bytes. 14 | SegmentSize int64 15 | 16 | // SegmentFileExt specifies the file extension of the segment files. 17 | // The file extension must start with a dot ".", default value is ".SEG". 18 | // It is used to identify the different types of files in the directory. 19 | // Now it is used by rosedb to identify the segment files and hint files. 20 | // Not a common usage for most users. 21 | SegmentFileExt string 22 | 23 | // Sync is whether to synchronize writes through os buffer cache and down onto the actual disk. 24 | // Setting sync is required for durability of a single write operation, but also results in slower writes. 25 | // 26 | // If false, and the machine crashes, then some recent writes may be lost. 27 | // Note that if it is just the process that crashes (machine does not) then no writes will be lost. 28 | // 29 | // In other words, Sync being false has the same semantics as a write 30 | // system call. Sync being true means write followed by fsync. 31 | Sync bool 32 | 33 | // BytesPerSync specifies the number of bytes to write before calling fsync. 34 | BytesPerSync uint32 35 | 36 | // SyncInterval is the time duration in which explicit synchronization is performed. 37 | // If SyncInterval is zero, no periodic synchronization is performed. 38 | SyncInterval time.Duration 39 | } 40 | 41 | const ( 42 | B = 1 43 | KB = 1024 * B 44 | MB = 1024 * KB 45 | GB = 1024 * MB 46 | ) 47 | 48 | var DefaultOptions = Options{ 49 | DirPath: os.TempDir(), 50 | SegmentSize: GB, 51 | SegmentFileExt: ".SEG", 52 | Sync: false, 53 | BytesPerSync: 0, 54 | SyncInterval: 0, 55 | } 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wal 2 | Write Ahead Log for LSM or bitcask storage. 3 | 4 | ## Key Features 5 | * Disk based, support large data volume 6 | * Append only write, high performance 7 | * Fast read, one disk seek to retrieve any value 8 | * Support batch write, all data in a batch will be written in a single disk seek 9 | * Iterate all data in wal with `NewReader` function 10 | * Extremely fast read 11 | * Support concurrent write and read, all functions are thread safe 12 | 13 | ## Design Overview 14 | 15 | ![wal-logo.png](https://s2.loli.net/2025/01/12/SF9vThRkAObm4WD.png) 16 | 17 | ## Format 18 | 19 | **Format of a single segment file:** 20 | 21 | ``` 22 | +-----+-------------+--+----+----------+------+-- ... ----+ 23 | File | r0 | r1 |P | r2 | r3 | r4 | | 24 | +-----+-------------+--+----+----------+------+-- ... ----+ 25 | |<---- BlockSize ----->|<---- BlockSize ----->| 26 | 27 | rn = variable size records 28 | P = Padding 29 | BlockSize = 32KB 30 | ``` 31 | 32 | **Format of a single record:** 33 | 34 | ``` 35 | +----------+-------------+-----------+--- ... ---+ 36 | | CRC (4B) | Length (2B) | Type (1B) | Payload | 37 | +----------+-------------+-----------+--- ... ---+ 38 | 39 | CRC = 32-bit hash computed over the payload using CRC 40 | Length = Length of the payload data 41 | Type = Type of record 42 | (FullType, FirstType, MiddleType, LastType) 43 | The type is used to group a bunch of records together to represent 44 | blocks that are larger than BlockSize 45 | Payload = Byte stream as long as specified by the payload size 46 | ``` 47 | 48 | ## Getting Started 49 | 50 | ```go 51 | func main() { 52 | wal, _ := wal.Open(wal.DefaultOptions) 53 | // write some data 54 | chunkPosition, _ := wal.Write([]byte("some data 1")) 55 | // read by the position 56 | val, _ := wal.Read(chunkPosition) 57 | fmt.Println(string(val)) 58 | 59 | wal.Write([]byte("some data 2")) 60 | wal.Write([]byte("some data 3")) 61 | 62 | // iterate all data in wal 63 | reader := wal.NewReader() 64 | for { 65 | val, pos, err := reader.Next() 66 | if err == io.EOF { 67 | break 68 | } 69 | fmt.Println(string(val)) 70 | fmt.Println(pos) // get position of the data for next read 71 | } 72 | } 73 | 74 | ``` 75 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Release 1.3.8 (2024-07-06) 2 | ## 🎄 Enhancements 3 | * fix concurrent read(https://github.com/rosedblabs/wal/commit/4206c62ab8b46223b9fbd030c90219222cf1707f) 4 | ## 🎠 Community 5 | * thanks to @ghosx 6 | * Fix seg.Remove Swallowing Error (https://github.com/rosedblabs/wal/pull/38) 7 | * fix panic (https://github.com/rosedblabs/wal/pull/39) 8 | 9 | # Release 1.3.7 (2024-06-13) 10 | 11 | ## 🎄 Enhancements 12 | * remove block cache(https://github.com/rosedblabs/wal/commit/e77db044463f4ba32455b7a0b42db3161248b5a4) 13 | * improve iterate performance(https://github.com/rosedblabs/wal/commit/213913a6a9145740cff75340de899b0ff03894e4) 14 | 15 | # Release 1.3.6 (2023-09-25) 16 | 17 | ## 🎄 Enhancements 18 | * avoid resetting pool to optimize the memory usage 19 | * no need to return err in pendingWrites 20 | * fix benchmark error 21 | ## 🎠 Community 22 | * Thanks to @akiozihao 23 | * check ErrPendingSizeTooLarge first (https://github.com/rosedblabs/wal/pull/32) 24 | 25 | # Release 1.3.5 (2023-09-19) 26 | 27 | ## 🎄 Enhancements 28 | * Rotate file when pending writes exceed the left space of the segment file. 29 | 30 | # Release 1.3.4 (2023-09-18) 31 | 32 | ## 🚀 New Features 33 | * add RenameFileExt function 34 | 35 | ## 🎠 Community 36 | * Thanks to @akiozihao 37 | * add EncodeFixedSize (https://github.com/rosedblabs/wal/pull/28) 38 | * add WriteBatch (https://github.com/rosedblabs/wal/pull/26) 39 | 40 | # Release 1.3.3 (2023-08-19) 41 | 42 | ## 🎠 Community 43 | * Thanks to @LEAVING-7 44 | * Keep function name consistent in wal_test.go (https://github.com/rosedblabs/wal/pull/24) 45 | * Thanks to @amityahav 46 | * Improved performance for writing large records (> blockSize) (https://github.com/rosedblabs/wal/pull/21) 47 | ## 🐞 Bug Fixes 48 | * fix a bug if the segment size exceeds 4GB 49 | * Enhancement: use bufferpool to aviod writing twice https://github.com/rosedblabs/wal/commit/1345f5013113781c59ddaca36ddb13bdcc58ce27 50 | 51 | # Release 1.3.2 (2023-08-07) 52 | 53 | ## 🎄 Enhancements 54 | * Enhancement: use bufferpool to aviod writing twice https://github.com/rosedblabs/wal/commit/1345f5013113781c59ddaca36ddb13bdcc58ce27 55 | 56 | # Release 1.3.1 (2023-08-04) 57 | 58 | ## 🐞 Bug Fixes 59 | * Add a condition to avoid cache repeatedly https://github.com/rosedblabs/wal/commit/cb708139c877b1ef102c0be057ba33cb4af6abb2 60 | 61 | # Release 1.3.0 (2023-08-02) 62 | 63 | ## 🚀 New Features 64 | * Add ChunkPosition Encode and Decode 65 | 66 | ## 🎄 Enhancements 67 | * Avoid to make new bytes while writing 68 | * Use sync.Pool to optimize read performace 69 | * Add more code comments 70 | 71 | ## 🎠 Community 72 | * Thanks to @chinazmc 73 | * update SementFileExt to SegmentFileExt (https://github.com/rosedblabs/wal/pull/11) 74 | * Thanks to @xzhseh 75 | * feat(docs): improve README.md format & fix several typos (https://github.com/rosedblabs/wal/pull/12) 76 | * Thanks to @yanxiaoqi932 77 | * BlockCache must smaller than SegmentSize (https://github.com/rosedblabs/wal/pull/14) 78 | * Thanks to @mitingjin 79 | * Fix typo in wal.go (https://github.com/rosedblabs/wal/pull/15) 80 | 81 | # Release 1.2.0 (2023-07-01) 82 | 83 | ## 🚀 New Features 84 | * Add `NewReaderWithStart` function to support read log from specified position. 85 | 86 | ## 🎠 Community 87 | * Thanks to@yanxiaoqi932 88 | * enhancement: add wal delete function ([#7](https://github.com/rosedblabs/wal/pull/9)) 89 | 90 | # Release 1.1.0 (2023-06-21) 91 | 92 | ## 🚀 New Features 93 | * Add tests in windows, with worlflow. 94 | * Add some functions to support rosedb Merge operation. 95 | 96 | ## 🎠 Community 97 | * Thanks to@SPCDTS 98 | * fix: calculate seg fle size by seg.size ([#7](https://github.com/rosedblabs/wal/pull/7)) 99 | * fix: limit data size ([#6](https://github.com/rosedblabs/wal/pull/6)) 100 | * fix: spelling error ([#5](https://github.com/rosedblabs/wal/pull/5)) 101 | 102 | # Release 1.0.0 (2023-06-13) 103 | 104 | ## 🚀 New Features 105 | * First release, basic operations, read, write, and iterate the log files. 106 | * Add block cache for log files. 107 | -------------------------------------------------------------------------------- /wal_test.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func destroyWAL(wal *WAL) { 13 | if wal != nil { 14 | _ = wal.Close() 15 | _ = os.RemoveAll(wal.options.DirPath) 16 | } 17 | } 18 | 19 | func TestWAL_WriteALL(t *testing.T) { 20 | dir, _ := os.MkdirTemp("", "wal-test-write-batch-1") 21 | opts := Options{ 22 | DirPath: dir, 23 | SegmentFileExt: ".SEG", 24 | SegmentSize: 32 * 1024 * 1024, 25 | } 26 | wal, err := Open(opts) 27 | assert.Nil(t, err) 28 | defer destroyWAL(wal) 29 | 30 | testWriteAllIterate(t, wal, 0, 10) 31 | assert.True(t, wal.IsEmpty()) 32 | 33 | testWriteAllIterate(t, wal, 10000, 512) 34 | assert.False(t, wal.IsEmpty()) 35 | } 36 | 37 | func TestWAL_Write(t *testing.T) { 38 | dir, _ := os.MkdirTemp("", "wal-test-write1") 39 | opts := Options{ 40 | DirPath: dir, 41 | SegmentFileExt: ".SEG", 42 | SegmentSize: 32 * 1024 * 1024, 43 | } 44 | wal, err := Open(opts) 45 | assert.Nil(t, err) 46 | defer destroyWAL(wal) 47 | 48 | // write 1 49 | pos1, err := wal.Write([]byte("hello1")) 50 | assert.Nil(t, err) 51 | assert.NotNil(t, pos1) 52 | pos2, err := wal.Write([]byte("hello2")) 53 | assert.Nil(t, err) 54 | assert.NotNil(t, pos2) 55 | pos3, err := wal.Write([]byte("hello3")) 56 | assert.Nil(t, err) 57 | assert.NotNil(t, pos3) 58 | 59 | val, err := wal.Read(pos1) 60 | assert.Nil(t, err) 61 | assert.Equal(t, "hello1", string(val)) 62 | val, err = wal.Read(pos2) 63 | assert.Nil(t, err) 64 | assert.Equal(t, "hello2", string(val)) 65 | val, err = wal.Read(pos3) 66 | assert.Nil(t, err) 67 | assert.Equal(t, "hello3", string(val)) 68 | } 69 | 70 | func TestWAL_Write_large(t *testing.T) { 71 | dir, _ := os.MkdirTemp("", "wal-test-write2") 72 | opts := Options{ 73 | DirPath: dir, 74 | SegmentFileExt: ".SEG", 75 | SegmentSize: 32 * 1024 * 1024, 76 | } 77 | wal, err := Open(opts) 78 | assert.Nil(t, err) 79 | defer destroyWAL(wal) 80 | 81 | testWriteAndIterate(t, wal, 100000, 512) 82 | } 83 | 84 | func TestWAL_Write_large2(t *testing.T) { 85 | dir, _ := os.MkdirTemp("", "wal-test-write3") 86 | opts := Options{ 87 | DirPath: dir, 88 | SegmentFileExt: ".SEG", 89 | SegmentSize: 32 * 1024 * 1024, 90 | } 91 | wal, err := Open(opts) 92 | assert.Nil(t, err) 93 | defer destroyWAL(wal) 94 | 95 | testWriteAndIterate(t, wal, 2000, 32*1024*3+10) 96 | } 97 | 98 | func TestWAL_OpenNewActiveSegment(t *testing.T) { 99 | dir, _ := os.MkdirTemp("", "wal-test-new-active-segment") 100 | opts := Options{ 101 | DirPath: dir, 102 | SegmentFileExt: ".SEG", 103 | SegmentSize: 32 * 1024 * 1024, 104 | } 105 | wal, err := Open(opts) 106 | assert.Nil(t, err) 107 | defer destroyWAL(wal) 108 | 109 | testWriteAndIterate(t, wal, 2000, 512) 110 | err = wal.OpenNewActiveSegment() 111 | assert.Nil(t, err) 112 | 113 | val := strings.Repeat("wal", 100) 114 | for i := 0; i < 100; i++ { 115 | pos, err := wal.Write([]byte(val)) 116 | assert.Nil(t, err) 117 | assert.NotNil(t, pos) 118 | } 119 | } 120 | 121 | func TestWAL_IsEmpty(t *testing.T) { 122 | dir, _ := os.MkdirTemp("", "wal-test-is-empty") 123 | opts := Options{ 124 | DirPath: dir, 125 | SegmentFileExt: ".SEG", 126 | SegmentSize: 32 * 1024 * 1024, 127 | } 128 | wal, err := Open(opts) 129 | assert.Nil(t, err) 130 | defer destroyWAL(wal) 131 | 132 | assert.True(t, wal.IsEmpty()) 133 | testWriteAndIterate(t, wal, 2000, 512) 134 | assert.False(t, wal.IsEmpty()) 135 | } 136 | 137 | func TestWAL_Reader(t *testing.T) { 138 | dir, _ := os.MkdirTemp("", "wal-test-wal-reader") 139 | opts := Options{ 140 | DirPath: dir, 141 | SegmentFileExt: ".SEG", 142 | SegmentSize: 32 * 1024 * 1024, 143 | } 144 | wal, err := Open(opts) 145 | assert.Nil(t, err) 146 | defer destroyWAL(wal) 147 | 148 | var size = 100000 149 | val := strings.Repeat("wal", 512) 150 | for i := 0; i < size; i++ { 151 | _, err := wal.Write([]byte(val)) 152 | assert.Nil(t, err) 153 | } 154 | 155 | validate := func(walInner *WAL, size int) { 156 | var i = 0 157 | reader := walInner.NewReader() 158 | for { 159 | chunk, position, err := reader.Next() 160 | if err != nil { 161 | if err == io.EOF { 162 | break 163 | } 164 | panic(err) 165 | } 166 | assert.NotNil(t, chunk) 167 | assert.NotNil(t, position) 168 | assert.Equal(t, position.SegmentId, reader.CurrentSegmentId()) 169 | i++ 170 | } 171 | assert.Equal(t, i, size) 172 | } 173 | 174 | validate(wal, size) 175 | err = wal.Close() 176 | assert.Nil(t, err) 177 | 178 | wal2, err := Open(opts) 179 | assert.Nil(t, err) 180 | defer func() { 181 | _ = wal2.Close() 182 | }() 183 | validate(wal2, size) 184 | } 185 | 186 | func testWriteAllIterate(t *testing.T, wal *WAL, size, valueSize int) { 187 | for i := 0; i < size; i++ { 188 | val := strings.Repeat("wal", valueSize) 189 | wal.PendingWrites([]byte(val)) 190 | } 191 | positions, err := wal.WriteAll() 192 | assert.Nil(t, err) 193 | assert.Equal(t, len(positions), size) 194 | 195 | count := 0 196 | reader := wal.NewReader() 197 | for { 198 | data, pos, err := reader.Next() 199 | if err != nil { 200 | break 201 | } 202 | assert.Equal(t, strings.Repeat("wal", valueSize), string(data)) 203 | 204 | assert.Equal(t, positions[count].SegmentId, pos.SegmentId) 205 | assert.Equal(t, positions[count].BlockNumber, pos.BlockNumber) 206 | assert.Equal(t, positions[count].ChunkOffset, pos.ChunkOffset) 207 | 208 | count++ 209 | } 210 | assert.Equal(t, len(wal.pendingWrites), 0) 211 | } 212 | 213 | func testWriteAndIterate(t *testing.T, wal *WAL, size int, valueSize int) { 214 | val := strings.Repeat("wal", valueSize) 215 | positions := make([]*ChunkPosition, size) 216 | for i := 0; i < size; i++ { 217 | pos, err := wal.Write([]byte(val)) 218 | assert.Nil(t, err) 219 | positions[i] = pos 220 | } 221 | 222 | var count int 223 | // iterates all the data 224 | reader := wal.NewReader() 225 | for { 226 | data, pos, err := reader.Next() 227 | if err != nil { 228 | break 229 | } 230 | assert.Equal(t, val, string(data)) 231 | 232 | assert.Equal(t, positions[count].SegmentId, pos.SegmentId) 233 | assert.Equal(t, positions[count].BlockNumber, pos.BlockNumber) 234 | assert.Equal(t, positions[count].ChunkOffset, pos.ChunkOffset) 235 | 236 | count++ 237 | } 238 | assert.Equal(t, size, count) 239 | } 240 | 241 | func TestWAL_Delete(t *testing.T) { 242 | dir, _ := os.MkdirTemp("", "wal-test-delete") 243 | opts := Options{ 244 | DirPath: dir, 245 | SegmentFileExt: ".SEG", 246 | SegmentSize: 32 * 1024 * 1024, 247 | } 248 | wal, err := Open(opts) 249 | assert.Nil(t, err) 250 | testWriteAndIterate(t, wal, 2000, 512) 251 | assert.False(t, wal.IsEmpty()) 252 | defer destroyWAL(wal) 253 | 254 | err = wal.Delete() 255 | assert.Nil(t, err) 256 | 257 | wal, err = Open(opts) 258 | assert.Nil(t, err) 259 | assert.True(t, wal.IsEmpty()) 260 | } 261 | 262 | func TestWAL_ReaderWithStart(t *testing.T) { 263 | dir, _ := os.MkdirTemp("", "wal-test-wal-reader-with-start") 264 | opts := Options{ 265 | DirPath: dir, 266 | SegmentFileExt: ".SEG", 267 | SegmentSize: 8 * 1024 * 1024, 268 | } 269 | wal, err := Open(opts) 270 | assert.Nil(t, err) 271 | defer destroyWAL(wal) 272 | 273 | _, err = wal.NewReaderWithStart(nil) 274 | assert.NotNil(t, err) 275 | 276 | reader1, err := wal.NewReaderWithStart(&ChunkPosition{SegmentId: 0, BlockNumber: 0, ChunkOffset: 100}) 277 | assert.Nil(t, err) 278 | _, _, err = reader1.Next() 279 | assert.Equal(t, err, io.EOF) 280 | 281 | testWriteAndIterate(t, wal, 20000, 512) 282 | reader2, err := wal.NewReaderWithStart(&ChunkPosition{SegmentId: 0, BlockNumber: 0, ChunkOffset: 0}) 283 | assert.Nil(t, err) 284 | _, pos2, err := reader2.Next() 285 | assert.Nil(t, err) 286 | assert.Equal(t, pos2.BlockNumber, uint32(0)) 287 | assert.Equal(t, pos2.ChunkOffset, int64(0)) 288 | 289 | reader3, err := wal.NewReaderWithStart(&ChunkPosition{SegmentId: 3, BlockNumber: 5, ChunkOffset: 0}) 290 | assert.Nil(t, err) 291 | _, pos3, err := reader3.Next() 292 | assert.Nil(t, err) 293 | assert.Equal(t, pos3.SegmentId, uint32(3)) 294 | assert.Equal(t, pos3.BlockNumber, uint32(5)) 295 | } 296 | 297 | func TestWAL_RenameFileExt(t *testing.T) { 298 | dir, _ := os.MkdirTemp("", "wal-test-rename-ext") 299 | opts := Options{ 300 | DirPath: dir, 301 | SegmentFileExt: ".VLOG.1.temp", 302 | SegmentSize: 8 * 1024 * 1024, 303 | } 304 | wal, err := Open(opts) 305 | assert.Nil(t, err) 306 | defer destroyWAL(wal) 307 | testWriteAndIterate(t, wal, 20000, 512) 308 | 309 | err = wal.Close() 310 | assert.Nil(t, err) 311 | 312 | err = wal.RenameFileExt(".VLOG.1") 313 | assert.Nil(t, err) 314 | 315 | opts.SegmentFileExt = ".VLOG.1" 316 | wal2, err := Open(opts) 317 | assert.Nil(t, err) 318 | defer func() { 319 | _ = wal2.Close() 320 | }() 321 | for i := 0; i < 20000; i++ { 322 | _, err = wal2.Write([]byte(strings.Repeat("W", 512))) 323 | assert.Nil(t, err) 324 | } 325 | } 326 | -------------------------------------------------------------------------------- /segment_test.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | "math" 7 | "os" 8 | "strings" 9 | "testing" 10 | 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func TestSegment_Write_FULL1(t *testing.T) { 15 | dir, _ := os.MkdirTemp("", "seg-test-full1") 16 | seg, err := openSegmentFile(dir, ".SEG", 1) 17 | assert.Nil(t, err) 18 | defer func() { 19 | _ = seg.Remove() 20 | }() 21 | 22 | // 1. FULL chunks 23 | val := []byte(strings.Repeat("X", 100)) 24 | 25 | pos1, err := seg.Write(val) 26 | assert.Nil(t, err) 27 | pos2, err := seg.Write(val) 28 | assert.Nil(t, err) 29 | 30 | val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset) 31 | assert.Nil(t, err) 32 | assert.Equal(t, val, val1) 33 | 34 | val2, err := seg.Read(pos2.BlockNumber, pos2.ChunkOffset) 35 | assert.Nil(t, err) 36 | assert.Equal(t, val, val2) 37 | 38 | // 2. Write until a new block 39 | for i := 0; i < 100000; i++ { 40 | pos, err := seg.Write(val) 41 | assert.Nil(t, err) 42 | res, err := seg.Read(pos.BlockNumber, pos.ChunkOffset) 43 | assert.Nil(t, err) 44 | assert.Equal(t, val, res) 45 | } 46 | } 47 | 48 | func TestSegment_Write_FULL2(t *testing.T) { 49 | dir, _ := os.MkdirTemp("", "seg-test-full2") 50 | seg, err := openSegmentFile(dir, ".SEG", 1) 51 | assert.Nil(t, err) 52 | defer func() { 53 | _ = seg.Remove() 54 | }() 55 | 56 | // 3. chunk full with a block 57 | val := []byte(strings.Repeat("X", blockSize-chunkHeaderSize)) 58 | 59 | pos1, err := seg.Write(val) 60 | assert.Nil(t, err) 61 | assert.Equal(t, pos1.BlockNumber, uint32(0)) 62 | assert.Equal(t, pos1.ChunkOffset, int64(0)) 63 | val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset) 64 | assert.Nil(t, err) 65 | assert.Equal(t, val, val1) 66 | 67 | pos2, err := seg.Write(val) 68 | assert.Nil(t, err) 69 | assert.Equal(t, pos2.BlockNumber, uint32(1)) 70 | assert.Equal(t, pos2.ChunkOffset, int64(0)) 71 | val2, err := seg.Read(pos2.BlockNumber, pos2.ChunkOffset) 72 | assert.Nil(t, err) 73 | assert.Equal(t, val, val2) 74 | } 75 | 76 | func TestSegment_Write_Padding(t *testing.T) { 77 | dir, _ := os.MkdirTemp("", "seg-test-padding") 78 | seg, err := openSegmentFile(dir, ".SEG", 1) 79 | assert.Nil(t, err) 80 | defer func() { 81 | _ = seg.Remove() 82 | }() 83 | 84 | // 4. padding 85 | val := []byte(strings.Repeat("X", blockSize-chunkHeaderSize-3)) 86 | 87 | _, err = seg.Write(val) 88 | assert.Nil(t, err) 89 | 90 | pos1, err := seg.Write(val) 91 | assert.Nil(t, err) 92 | assert.Equal(t, pos1.BlockNumber, uint32(1)) 93 | assert.Equal(t, pos1.ChunkOffset, int64(0)) 94 | val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset) 95 | assert.Nil(t, err) 96 | assert.Equal(t, val, val1) 97 | } 98 | 99 | func TestSegment_Write_NOT_FULL(t *testing.T) { 100 | dir, _ := os.MkdirTemp("", "seg-test-not-full") 101 | seg, err := openSegmentFile(dir, ".SEG", 1) 102 | assert.Nil(t, err) 103 | defer func() { 104 | _ = seg.Remove() 105 | }() 106 | 107 | // 5. FIRST-LAST 108 | bytes1 := []byte(strings.Repeat("X", blockSize+100)) 109 | 110 | pos1, err := seg.Write(bytes1) 111 | assert.Nil(t, err) 112 | val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset) 113 | assert.Nil(t, err) 114 | assert.Equal(t, bytes1, val1) 115 | 116 | pos2, err := seg.Write(bytes1) 117 | assert.Nil(t, err) 118 | val2, err := seg.Read(pos2.BlockNumber, pos2.ChunkOffset) 119 | assert.Nil(t, err) 120 | assert.Equal(t, bytes1, val2) 121 | 122 | pos3, err := seg.Write(bytes1) 123 | assert.Nil(t, err) 124 | val3, err := seg.Read(pos3.BlockNumber, pos3.ChunkOffset) 125 | assert.Nil(t, err) 126 | assert.Equal(t, bytes1, val3) 127 | 128 | // 6. FIRST-MIDDLE-LAST 129 | bytes2 := []byte(strings.Repeat("X", blockSize*3+100)) 130 | pos4, err := seg.Write(bytes2) 131 | assert.Nil(t, err) 132 | val4, err := seg.Read(pos4.BlockNumber, pos4.ChunkOffset) 133 | assert.Nil(t, err) 134 | assert.Equal(t, bytes2, val4) 135 | } 136 | 137 | func TestSegment_Reader_FULL(t *testing.T) { 138 | dir, _ := os.MkdirTemp("", "seg-test-reader-full") 139 | seg, err := openSegmentFile(dir, ".SEG", 1) 140 | assert.Nil(t, err) 141 | defer func() { 142 | _ = seg.Remove() 143 | }() 144 | 145 | // FULL chunks 146 | bytes1 := []byte(strings.Repeat("X", blockSize+100)) 147 | pos1, err := seg.Write(bytes1) 148 | assert.Nil(t, err) 149 | pos2, err := seg.Write(bytes1) 150 | assert.Nil(t, err) 151 | 152 | reader := seg.NewReader() 153 | val, rpos1, err := reader.Next() 154 | assert.Nil(t, err) 155 | assert.Equal(t, bytes1, val) 156 | assert.Equal(t, pos1, rpos1) 157 | 158 | val, rpos2, err := reader.Next() 159 | assert.Nil(t, err) 160 | assert.Equal(t, bytes1, val) 161 | assert.Equal(t, pos2, rpos2) 162 | 163 | val, rpos3, err := reader.Next() 164 | assert.Nil(t, val) 165 | assert.Equal(t, err, io.EOF) 166 | assert.Nil(t, rpos3) 167 | } 168 | 169 | func TestSegment_Reader_Padding(t *testing.T) { 170 | dir, _ := os.MkdirTemp("", "seg-test-reader-padding") 171 | seg, err := openSegmentFile(dir, ".SEG", 1) 172 | assert.Nil(t, err) 173 | defer func() { 174 | _ = seg.Remove() 175 | }() 176 | 177 | bytes1 := []byte(strings.Repeat("X", blockSize-chunkHeaderSize-7)) 178 | 179 | pos1, err := seg.Write(bytes1) 180 | assert.Nil(t, err) 181 | pos2, err := seg.Write(bytes1) 182 | assert.Nil(t, err) 183 | 184 | reader := seg.NewReader() 185 | val, rpos1, err := reader.Next() 186 | assert.Nil(t, err) 187 | assert.Equal(t, bytes1, val) 188 | assert.Equal(t, pos1.SegmentId, rpos1.SegmentId) 189 | assert.Equal(t, pos1.BlockNumber, rpos1.BlockNumber) 190 | assert.Equal(t, pos1.ChunkOffset, rpos1.ChunkOffset) 191 | 192 | val, rpos2, err := reader.Next() 193 | assert.Nil(t, err) 194 | assert.Equal(t, bytes1, val) 195 | assert.Equal(t, pos2.SegmentId, rpos2.SegmentId) 196 | assert.Equal(t, pos2.BlockNumber, rpos2.BlockNumber) 197 | assert.Equal(t, pos2.ChunkOffset, rpos2.ChunkOffset) 198 | 199 | _, _, err = reader.Next() 200 | assert.Equal(t, err, io.EOF) 201 | } 202 | 203 | func TestSegment_Reader_NOT_FULL(t *testing.T) { 204 | dir, _ := os.MkdirTemp("", "seg-test-reader-not-full") 205 | seg, err := openSegmentFile(dir, ".SEG", 1) 206 | assert.Nil(t, err) 207 | defer func() { 208 | _ = seg.Remove() 209 | }() 210 | 211 | bytes1 := []byte(strings.Repeat("X", blockSize+100)) 212 | pos1, err := seg.Write(bytes1) 213 | assert.Nil(t, err) 214 | pos2, err := seg.Write(bytes1) 215 | assert.Nil(t, err) 216 | 217 | bytes2 := []byte(strings.Repeat("X", blockSize*3+10)) 218 | pos3, err := seg.Write(bytes2) 219 | assert.Nil(t, err) 220 | pos4, err := seg.Write(bytes2) 221 | assert.Nil(t, err) 222 | 223 | reader := seg.NewReader() 224 | val, rpos1, err := reader.Next() 225 | assert.Nil(t, err) 226 | assert.Equal(t, bytes1, val) 227 | 228 | val, rpos2, err := reader.Next() 229 | assert.Nil(t, err) 230 | assert.Equal(t, bytes1, val) 231 | 232 | val, rpos3, err := reader.Next() 233 | assert.Nil(t, err) 234 | assert.Equal(t, bytes2, val) 235 | 236 | val, rpos4, err := reader.Next() 237 | assert.Nil(t, err) 238 | assert.Equal(t, bytes2, val) 239 | 240 | _, _, err = reader.Next() 241 | assert.Equal(t, err, io.EOF) 242 | 243 | assert.Equal(t, pos1, rpos1) 244 | assert.Equal(t, pos2, rpos2) 245 | assert.Equal(t, pos3, rpos3) 246 | assert.Equal(t, pos4, rpos4) 247 | } 248 | 249 | func TestSegment_Reader_ManyChunks_FULL(t *testing.T) { 250 | dir, _ := os.MkdirTemp("", "seg-test-reader-ManyChunks_FULL") 251 | seg, err := openSegmentFile(dir, ".SEG", 1) 252 | assert.Nil(t, err) 253 | defer func() { 254 | _ = seg.Remove() 255 | }() 256 | 257 | positions := make([]*ChunkPosition, 0) 258 | bytes1 := []byte(strings.Repeat("X", 128)) 259 | for i := 1; i <= 1000000; i++ { 260 | pos, err := seg.Write(bytes1) 261 | assert.Nil(t, err) 262 | positions = append(positions, pos) 263 | } 264 | 265 | reader := seg.NewReader() 266 | var values [][]byte 267 | var i = 0 268 | for { 269 | val, pos, err := reader.Next() 270 | if err == io.EOF { 271 | break 272 | } 273 | assert.Nil(t, err) 274 | assert.Equal(t, bytes1, val) 275 | values = append(values, val) 276 | 277 | assert.Equal(t, positions[i].SegmentId, pos.SegmentId) 278 | assert.Equal(t, positions[i].BlockNumber, pos.BlockNumber) 279 | assert.Equal(t, positions[i].ChunkOffset, pos.ChunkOffset) 280 | 281 | i++ 282 | } 283 | assert.Equal(t, 1000000, len(values)) 284 | } 285 | 286 | func TestSegment_Reader_ManyChunks_NOT_FULL(t *testing.T) { 287 | dir, _ := os.MkdirTemp("", "seg-test-reader-ManyChunks_NOT_FULL") 288 | seg, err := openSegmentFile(dir, ".SEG", 1) 289 | assert.Nil(t, err) 290 | defer func() { 291 | _ = seg.Remove() 292 | }() 293 | 294 | positions := make([]*ChunkPosition, 0) 295 | bytes1 := []byte(strings.Repeat("X", blockSize*3+10)) 296 | for i := 1; i <= 10000; i++ { 297 | pos, err := seg.Write(bytes1) 298 | assert.Nil(t, err) 299 | positions = append(positions, pos) 300 | } 301 | 302 | reader := seg.NewReader() 303 | var values [][]byte 304 | var i = 0 305 | for { 306 | val, pos, err := reader.Next() 307 | if err == io.EOF { 308 | break 309 | } 310 | assert.Nil(t, err) 311 | assert.Equal(t, bytes1, val) 312 | values = append(values, val) 313 | 314 | assert.Equal(t, positions[i].SegmentId, pos.SegmentId) 315 | assert.Equal(t, positions[i].BlockNumber, pos.BlockNumber) 316 | assert.Equal(t, positions[i].ChunkOffset, pos.ChunkOffset) 317 | 318 | i++ 319 | } 320 | assert.Equal(t, 10000, len(values)) 321 | } 322 | 323 | func TestSegment_Write_LargeSize(t *testing.T) { 324 | t.Run("Block-10000", func(t *testing.T) { 325 | testSegmentReaderLargeSize(t, blockSize-chunkHeaderSize, 10000) 326 | }) 327 | t.Run("32*Block-1000", func(t *testing.T) { 328 | testSegmentReaderLargeSize(t, 32*blockSize, 1000) 329 | }) 330 | t.Run("64*Block-100", func(t *testing.T) { 331 | testSegmentReaderLargeSize(t, 64*blockSize, 100) 332 | }) 333 | } 334 | 335 | func testSegmentReaderLargeSize(t *testing.T, size int, count int) { 336 | dir, _ := os.MkdirTemp("", "seg-test-reader-ManyChunks_large_size") 337 | seg, err := openSegmentFile(dir, ".SEG", 1) 338 | assert.Nil(t, err) 339 | defer func() { 340 | _ = seg.Remove() 341 | }() 342 | 343 | positions := make([]*ChunkPosition, 0) 344 | bytes1 := []byte(strings.Repeat("W", size)) 345 | for i := 1; i <= count; i++ { 346 | pos, err := seg.Write(bytes1) 347 | assert.Nil(t, err) 348 | positions = append(positions, pos) 349 | } 350 | 351 | reader := seg.NewReader() 352 | var values [][]byte 353 | var i = 0 354 | for { 355 | val, pos, err := reader.Next() 356 | if err == io.EOF { 357 | break 358 | } 359 | assert.Nil(t, err) 360 | assert.Equal(t, bytes1, val) 361 | values = append(values, val) 362 | 363 | assert.Equal(t, positions[i].SegmentId, pos.SegmentId) 364 | assert.Equal(t, positions[i].BlockNumber, pos.BlockNumber) 365 | assert.Equal(t, positions[i].ChunkOffset, pos.ChunkOffset) 366 | 367 | i++ 368 | } 369 | assert.Equal(t, count, len(values)) 370 | } 371 | 372 | func TestChunkPosition_Encode(t *testing.T) { 373 | validate := func(pos *ChunkPosition) { 374 | res := pos.Encode() 375 | assert.NotNil(t, res) 376 | decRes := DecodeChunkPosition(res) 377 | assert.Equal(t, pos, decRes) 378 | } 379 | 380 | validate(&ChunkPosition{1, 2, 3, 100}) 381 | validate(&ChunkPosition{0, 0, 0, 0}) 382 | validate(&ChunkPosition{math.MaxUint32, math.MaxUint32, math.MaxInt64, math.MaxUint32}) 383 | } 384 | 385 | func TestChunkPosition_EncodeFixedSize(t *testing.T) { 386 | validate := func(pos *ChunkPosition) { 387 | res := pos.EncodeFixedSize() 388 | assert.NotNil(t, res) 389 | assert.Equal(t, binary.MaxVarintLen32*3+binary.MaxVarintLen64, len(res)) 390 | decRes := DecodeChunkPosition(res) 391 | assert.Equal(t, pos, decRes) 392 | } 393 | 394 | validate(&ChunkPosition{1, 2, 3, 100}) 395 | validate(&ChunkPosition{0, 0, 0, 0}) 396 | validate(&ChunkPosition{math.MaxUint32, math.MaxUint32, math.MaxInt64, math.MaxUint32}) 397 | } 398 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /wal.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "sort" 10 | "strings" 11 | "sync" 12 | "time" 13 | ) 14 | 15 | const ( 16 | initialSegmentFileID = 1 17 | ) 18 | 19 | var ( 20 | ErrValueTooLarge = errors.New("the data size can't larger than segment size") 21 | ErrPendingSizeTooLarge = errors.New("the upper bound of pendingWrites can't larger than segment size") 22 | ) 23 | 24 | // WAL represents a Write-Ahead Log structure that provides durability 25 | // and fault-tolerance for incoming writes. 26 | // It consists of an activeSegment, which is the current segment file 27 | // used for new incoming writes, and olderSegments, 28 | // which is a map of segment files used for read operations. 29 | // 30 | // The options field stores various configuration options for the WAL. 31 | // 32 | // The mu sync.RWMutex is used for concurrent access to the WAL data structure, 33 | // ensuring safe access and modification. 34 | type WAL struct { 35 | activeSegment *segment // active segment file, used for new incoming writes. 36 | olderSegments map[SegmentID]*segment // older segment files, only used for read. 37 | options Options 38 | mu sync.RWMutex 39 | bytesWrite uint32 40 | renameIds []SegmentID 41 | pendingWrites [][]byte 42 | pendingSize int64 43 | pendingWritesLock sync.Mutex 44 | closeC chan struct{} 45 | syncTicker *time.Ticker 46 | } 47 | 48 | // Reader represents a reader for the WAL. 49 | // It consists of segmentReaders, which is a slice of segmentReader 50 | // structures sorted by segment id, 51 | // and currentReader, which is the index of the current segmentReader in the slice. 52 | // 53 | // The currentReader field is used to iterate over the segmentReaders slice. 54 | type Reader struct { 55 | segmentReaders []*segmentReader 56 | currentReader int 57 | } 58 | 59 | // Open opens a WAL with the given options. 60 | // It will create the directory if not exists, and open all segment files in the directory. 61 | // If there is no segment file in the directory, it will create a new one. 62 | func Open(options Options) (*WAL, error) { 63 | if !strings.HasPrefix(options.SegmentFileExt, ".") { 64 | return nil, fmt.Errorf("segment file extension must start with '.'") 65 | } 66 | wal := &WAL{ 67 | options: options, 68 | olderSegments: make(map[SegmentID]*segment), 69 | pendingWrites: make([][]byte, 0), 70 | closeC: make(chan struct{}), 71 | } 72 | 73 | // create the directory if not exists. 74 | if err := os.MkdirAll(options.DirPath, os.ModePerm); err != nil { 75 | return nil, err 76 | } 77 | 78 | // iterate the dir and open all segment files. 79 | entries, err := os.ReadDir(options.DirPath) 80 | if err != nil { 81 | return nil, err 82 | } 83 | 84 | // get all segment file ids. 85 | var segmentIDs []int 86 | for _, entry := range entries { 87 | if entry.IsDir() { 88 | continue 89 | } 90 | var id int 91 | _, err := fmt.Sscanf(entry.Name(), "%d"+options.SegmentFileExt, &id) 92 | if err != nil { 93 | continue 94 | } 95 | segmentIDs = append(segmentIDs, id) 96 | } 97 | 98 | // empty directory, just initialize a new segment file. 99 | if len(segmentIDs) == 0 { 100 | segment, err := openSegmentFile(options.DirPath, options.SegmentFileExt, 101 | initialSegmentFileID) 102 | if err != nil { 103 | return nil, err 104 | } 105 | wal.activeSegment = segment 106 | } else { 107 | // open the segment files in order, get the max one as the active segment file. 108 | sort.Ints(segmentIDs) 109 | 110 | for i, segId := range segmentIDs { 111 | segment, err := openSegmentFile(options.DirPath, options.SegmentFileExt, 112 | uint32(segId)) 113 | if err != nil { 114 | return nil, err 115 | } 116 | if i == len(segmentIDs)-1 { 117 | wal.activeSegment = segment 118 | } else { 119 | wal.olderSegments[segment.id] = segment 120 | } 121 | } 122 | } 123 | 124 | // only start the sync operation if the SyncInterval is greater than 0. 125 | if wal.options.SyncInterval > 0 { 126 | wal.syncTicker = time.NewTicker(wal.options.SyncInterval) 127 | go func() { 128 | for { 129 | select { 130 | case <-wal.syncTicker.C: 131 | _ = wal.Sync() 132 | case <-wal.closeC: 133 | wal.syncTicker.Stop() 134 | return 135 | } 136 | } 137 | }() 138 | } 139 | 140 | return wal, nil 141 | } 142 | 143 | // SegmentFileName returns the file name of a segment file. 144 | func SegmentFileName(dirPath string, extName string, id SegmentID) string { 145 | return filepath.Join(dirPath, fmt.Sprintf("%09d"+extName, id)) 146 | } 147 | 148 | // OpenNewActiveSegment opens a new segment file 149 | // and sets it as the active segment file. 150 | // It is used when even the active segment file is not full, 151 | // but the user wants to create a new segment file. 152 | // 153 | // It is now used by Merge operation of rosedb, not a common usage for most users. 154 | func (wal *WAL) OpenNewActiveSegment() error { 155 | wal.mu.Lock() 156 | defer wal.mu.Unlock() 157 | // sync the active segment file. 158 | if err := wal.activeSegment.Sync(); err != nil { 159 | return err 160 | } 161 | // create a new segment file and set it as the active one. 162 | segment, err := openSegmentFile(wal.options.DirPath, wal.options.SegmentFileExt, 163 | wal.activeSegment.id+1) 164 | if err != nil { 165 | return err 166 | } 167 | wal.olderSegments[wal.activeSegment.id] = wal.activeSegment 168 | wal.activeSegment = segment 169 | return nil 170 | } 171 | 172 | // ActiveSegmentID returns the id of the active segment file. 173 | func (wal *WAL) ActiveSegmentID() SegmentID { 174 | wal.mu.RLock() 175 | defer wal.mu.RUnlock() 176 | 177 | return wal.activeSegment.id 178 | } 179 | 180 | // IsEmpty returns whether the WAL is empty. 181 | // Only there is only one empty active segment file, which means the WAL is empty. 182 | func (wal *WAL) IsEmpty() bool { 183 | wal.mu.RLock() 184 | defer wal.mu.RUnlock() 185 | 186 | return len(wal.olderSegments) == 0 && wal.activeSegment.Size() == 0 187 | } 188 | 189 | // SetIsStartupTraversal This is only used if the WAL is during startup traversal. 190 | // Such as rosedb/lotusdb startup, so it's not a common usage for most users. 191 | // And notice that if you set it to true, only one reader can read the data from the WAL 192 | // (Single Thread). 193 | func (wal *WAL) SetIsStartupTraversal(v bool) { 194 | for _, seg := range wal.olderSegments { 195 | seg.isStartupTraversal = v 196 | } 197 | wal.activeSegment.isStartupTraversal = v 198 | } 199 | 200 | // NewReaderWithMax returns a new reader for the WAL, 201 | // and the reader will only read the data from the segment file 202 | // whose id is less than or equal to the given segId. 203 | // 204 | // It is now used by the Merge operation of rosedb, not a common usage for most users. 205 | func (wal *WAL) NewReaderWithMax(segId SegmentID) *Reader { 206 | wal.mu.RLock() 207 | defer wal.mu.RUnlock() 208 | 209 | // get all segment readers. 210 | var segmentReaders []*segmentReader 211 | for _, segment := range wal.olderSegments { 212 | if segId == 0 || segment.id <= segId { 213 | reader := segment.NewReader() 214 | segmentReaders = append(segmentReaders, reader) 215 | } 216 | } 217 | if segId == 0 || wal.activeSegment.id <= segId { 218 | reader := wal.activeSegment.NewReader() 219 | segmentReaders = append(segmentReaders, reader) 220 | } 221 | 222 | // sort the segment readers by segment id. 223 | sort.Slice(segmentReaders, func(i, j int) bool { 224 | return segmentReaders[i].segment.id < segmentReaders[j].segment.id 225 | }) 226 | 227 | return &Reader{ 228 | segmentReaders: segmentReaders, 229 | currentReader: 0, 230 | } 231 | } 232 | 233 | // NewReaderWithStart returns a new reader for the WAL, 234 | // and the reader will only read the data from the segment file 235 | // whose position is greater than or equal to the given position. 236 | func (wal *WAL) NewReaderWithStart(startPos *ChunkPosition) (*Reader, error) { 237 | if startPos == nil { 238 | return nil, errors.New("start position is nil") 239 | } 240 | wal.mu.RLock() 241 | defer wal.mu.RUnlock() 242 | 243 | reader := wal.NewReader() 244 | for { 245 | // skip the segment readers whose id is less than the given position's segment id. 246 | if reader.CurrentSegmentId() < startPos.SegmentId { 247 | reader.SkipCurrentSegment() 248 | continue 249 | } 250 | // skip the chunk whose position is less than the given position. 251 | currentPos := reader.CurrentChunkPosition() 252 | if currentPos.BlockNumber >= startPos.BlockNumber && 253 | currentPos.ChunkOffset >= startPos.ChunkOffset { 254 | break 255 | } 256 | // call Next to find again. 257 | if _, _, err := reader.Next(); err != nil { 258 | if err == io.EOF { 259 | break 260 | } 261 | return nil, err 262 | } 263 | } 264 | return reader, nil 265 | } 266 | 267 | // NewReader returns a new reader for the WAL. 268 | // It will iterate all segment files and read all data from them. 269 | func (wal *WAL) NewReader() *Reader { 270 | return wal.NewReaderWithMax(0) 271 | } 272 | 273 | // Next returns the next chunk data and its position in the WAL. 274 | // If there is no data, io.EOF will be returned. 275 | // 276 | // The position can be used to read the data from the segment file. 277 | func (r *Reader) Next() ([]byte, *ChunkPosition, error) { 278 | if r.currentReader >= len(r.segmentReaders) { 279 | return nil, nil, io.EOF 280 | } 281 | 282 | data, position, err := r.segmentReaders[r.currentReader].Next() 283 | if err == io.EOF { 284 | r.currentReader++ 285 | return r.Next() 286 | } 287 | return data, position, err 288 | } 289 | 290 | // SkipCurrentSegment skips the current segment file 291 | // when reading the WAL. 292 | // 293 | // It is now used by the Merge operation of rosedb, not a common usage for most users. 294 | func (r *Reader) SkipCurrentSegment() { 295 | r.currentReader++ 296 | } 297 | 298 | // CurrentSegmentId returns the id of the current segment file 299 | // when reading the WAL. 300 | func (r *Reader) CurrentSegmentId() SegmentID { 301 | return r.segmentReaders[r.currentReader].segment.id 302 | } 303 | 304 | // CurrentChunkPosition returns the position of the current chunk data 305 | func (r *Reader) CurrentChunkPosition() *ChunkPosition { 306 | reader := r.segmentReaders[r.currentReader] 307 | return &ChunkPosition{ 308 | SegmentId: reader.segment.id, 309 | BlockNumber: reader.blockNumber, 310 | ChunkOffset: reader.chunkOffset, 311 | } 312 | } 313 | 314 | // ClearPendingWrites clear pendingWrite and reset pendingSize 315 | func (wal *WAL) ClearPendingWrites() { 316 | wal.pendingWritesLock.Lock() 317 | defer wal.pendingWritesLock.Unlock() 318 | 319 | wal.pendingSize = 0 320 | wal.pendingWrites = wal.pendingWrites[:0] 321 | } 322 | 323 | // PendingWrites add data to wal.pendingWrites and wait for batch write. 324 | // If the data in pendingWrites exceeds the size of one segment, 325 | // it will return a 'ErrPendingSizeTooLarge' error and clear the pendingWrites. 326 | func (wal *WAL) PendingWrites(data []byte) { 327 | wal.pendingWritesLock.Lock() 328 | defer wal.pendingWritesLock.Unlock() 329 | 330 | size := wal.maxDataWriteSize(int64(len(data))) 331 | wal.pendingSize += size 332 | wal.pendingWrites = append(wal.pendingWrites, data) 333 | } 334 | 335 | // rotateActiveSegment create a new segment file and replace the activeSegment. 336 | func (wal *WAL) rotateActiveSegment() error { 337 | if err := wal.activeSegment.Sync(); err != nil { 338 | return err 339 | } 340 | wal.bytesWrite = 0 341 | segment, err := openSegmentFile(wal.options.DirPath, wal.options.SegmentFileExt, 342 | wal.activeSegment.id+1) 343 | if err != nil { 344 | return err 345 | } 346 | wal.olderSegments[wal.activeSegment.id] = wal.activeSegment 347 | wal.activeSegment = segment 348 | return nil 349 | } 350 | 351 | // WriteAll write wal.pendingWrites to WAL and then clear pendingWrites, 352 | // it will not sync the segment file based on wal.options, you should call Sync() manually. 353 | func (wal *WAL) WriteAll() ([]*ChunkPosition, error) { 354 | if len(wal.pendingWrites) == 0 { 355 | return make([]*ChunkPosition, 0), nil 356 | } 357 | 358 | wal.mu.Lock() 359 | defer func() { 360 | wal.ClearPendingWrites() 361 | wal.mu.Unlock() 362 | }() 363 | 364 | // if the pending size is still larger than segment size, return error 365 | if wal.pendingSize > wal.options.SegmentSize { 366 | return nil, ErrPendingSizeTooLarge 367 | } 368 | 369 | // if the active segment file is full, sync it and create a new one. 370 | if wal.activeSegment.Size()+wal.pendingSize > wal.options.SegmentSize { 371 | if err := wal.rotateActiveSegment(); err != nil { 372 | return nil, err 373 | } 374 | } 375 | 376 | // write all data to the active segment file. 377 | positions, err := wal.activeSegment.writeAll(wal.pendingWrites) 378 | if err != nil { 379 | return nil, err 380 | } 381 | 382 | return positions, nil 383 | } 384 | 385 | // Write writes the data to the WAL. 386 | // Actually, it writes the data to the active segment file. 387 | // It returns the position of the data in the WAL, and an error if any. 388 | func (wal *WAL) Write(data []byte) (*ChunkPosition, error) { 389 | wal.mu.Lock() 390 | defer wal.mu.Unlock() 391 | if int64(len(data))+chunkHeaderSize > wal.options.SegmentSize { 392 | return nil, ErrValueTooLarge 393 | } 394 | // if the active segment file is full, sync it and create a new one. 395 | if wal.isFull(int64(len(data))) { 396 | if err := wal.rotateActiveSegment(); err != nil { 397 | return nil, err 398 | } 399 | } 400 | 401 | // write the data to the active segment file. 402 | position, err := wal.activeSegment.Write(data) 403 | if err != nil { 404 | return nil, err 405 | } 406 | 407 | // update the bytesWrite field. 408 | wal.bytesWrite += position.ChunkSize 409 | 410 | // sync the active segment file if needed. 411 | var needSync = wal.options.Sync 412 | if !needSync && wal.options.BytesPerSync > 0 { 413 | needSync = wal.bytesWrite >= wal.options.BytesPerSync 414 | } 415 | if needSync { 416 | if err := wal.activeSegment.Sync(); err != nil { 417 | return nil, err 418 | } 419 | wal.bytesWrite = 0 420 | } 421 | 422 | return position, nil 423 | } 424 | 425 | // Read reads the data from the WAL according to the given position. 426 | func (wal *WAL) Read(pos *ChunkPosition) ([]byte, error) { 427 | wal.mu.RLock() 428 | defer wal.mu.RUnlock() 429 | 430 | // find the segment file according to the position. 431 | var segment *segment 432 | if pos.SegmentId == wal.activeSegment.id { 433 | segment = wal.activeSegment 434 | } else { 435 | segment = wal.olderSegments[pos.SegmentId] 436 | } 437 | 438 | if segment == nil { 439 | return nil, fmt.Errorf("segment file %d%s not found", pos.SegmentId, wal.options.SegmentFileExt) 440 | } 441 | 442 | // read the data from the segment file. 443 | return segment.Read(pos.BlockNumber, pos.ChunkOffset) 444 | } 445 | 446 | // Close closes the WAL. 447 | func (wal *WAL) Close() error { 448 | wal.mu.Lock() 449 | defer wal.mu.Unlock() 450 | 451 | select { 452 | case <-wal.closeC: 453 | // channel is already closed 454 | default: 455 | close(wal.closeC) 456 | } 457 | 458 | // close all segment files. 459 | for _, segment := range wal.olderSegments { 460 | if err := segment.Close(); err != nil { 461 | return err 462 | } 463 | wal.renameIds = append(wal.renameIds, segment.id) 464 | } 465 | wal.olderSegments = nil 466 | 467 | wal.renameIds = append(wal.renameIds, wal.activeSegment.id) 468 | // close the active segment file. 469 | return wal.activeSegment.Close() 470 | } 471 | 472 | // Delete deletes all segment files of the WAL. 473 | func (wal *WAL) Delete() error { 474 | wal.mu.Lock() 475 | defer wal.mu.Unlock() 476 | 477 | // delete all segment files. 478 | for _, segment := range wal.olderSegments { 479 | if err := segment.Remove(); err != nil { 480 | return err 481 | } 482 | } 483 | wal.olderSegments = nil 484 | 485 | // delete the active segment file. 486 | return wal.activeSegment.Remove() 487 | } 488 | 489 | // Sync syncs the active segment file to stable storage like disk. 490 | func (wal *WAL) Sync() error { 491 | wal.mu.Lock() 492 | defer wal.mu.Unlock() 493 | 494 | return wal.activeSegment.Sync() 495 | } 496 | 497 | // RenameFileExt renames all segment files' extension name. 498 | // It is now used by the Merge operation of loutsdb, not a common usage for most users. 499 | func (wal *WAL) RenameFileExt(ext string) error { 500 | if !strings.HasPrefix(ext, ".") { 501 | return fmt.Errorf("segment file extension must start with '.'") 502 | } 503 | wal.mu.Lock() 504 | defer wal.mu.Unlock() 505 | 506 | renameFile := func(id SegmentID) error { 507 | oldName := SegmentFileName(wal.options.DirPath, wal.options.SegmentFileExt, id) 508 | newName := SegmentFileName(wal.options.DirPath, ext, id) 509 | return os.Rename(oldName, newName) 510 | } 511 | 512 | for _, id := range wal.renameIds { 513 | if err := renameFile(id); err != nil { 514 | return err 515 | } 516 | } 517 | 518 | wal.options.SegmentFileExt = ext 519 | return nil 520 | } 521 | 522 | func (wal *WAL) isFull(delta int64) bool { 523 | return wal.activeSegment.Size()+wal.maxDataWriteSize(delta) > wal.options.SegmentSize 524 | } 525 | 526 | // maxDataWriteSize calculate the possible maximum size. 527 | // the maximum size = max padding + (num_block + 1) * headerSize + dataSize 528 | func (wal *WAL) maxDataWriteSize(size int64) int64 { 529 | return chunkHeaderSize + size + (size/blockSize+1)*chunkHeaderSize 530 | } 531 | -------------------------------------------------------------------------------- /segment.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "hash/crc32" 8 | "io" 9 | "os" 10 | "sync" 11 | 12 | "github.com/valyala/bytebufferpool" 13 | ) 14 | 15 | type ChunkType = byte 16 | type SegmentID = uint32 17 | 18 | const ( 19 | ChunkTypeFull ChunkType = iota 20 | ChunkTypeFirst 21 | ChunkTypeMiddle 22 | ChunkTypeLast 23 | ) 24 | 25 | var ( 26 | ErrClosed = errors.New("the segment file is closed") 27 | ErrInvalidCRC = errors.New("invalid crc, the data may be corrupted") 28 | ) 29 | 30 | const ( 31 | // 7 Bytes 32 | // Checksum Length Type 33 | // 4 2 1 34 | chunkHeaderSize = 7 35 | 36 | // 32 KB 37 | blockSize = 32 * KB 38 | 39 | fileModePerm = 0644 40 | 41 | // uin32 + uint32 + int64 + uin32 42 | // segmentId + BlockNumber + ChunkOffset + ChunkSize 43 | maxLen = binary.MaxVarintLen32*3 + binary.MaxVarintLen64 44 | ) 45 | 46 | // Segment represents a single segment file in WAL. 47 | // The segment file is append-only, and the data is written in blocks. 48 | // Each block is 32KB, and the data is written in chunks. 49 | type segment struct { 50 | id SegmentID 51 | fd *os.File 52 | currentBlockNumber uint32 53 | currentBlockSize uint32 54 | closed bool 55 | header []byte 56 | startupBlock *startupBlock 57 | isStartupTraversal bool 58 | } 59 | 60 | // segmentReader is used to iterate all the data from the segment file. 61 | // You can call Next to get the next chunk data, 62 | // and io.EOF will be returned when there is no data. 63 | type segmentReader struct { 64 | segment *segment 65 | blockNumber uint32 66 | chunkOffset int64 67 | } 68 | 69 | // There is only one reader(single goroutine) for startup traversal, 70 | // so we can use one block to finish the whole traversal 71 | // to avoid memory allocation. 72 | type startupBlock struct { 73 | block []byte 74 | blockNumber int64 75 | } 76 | 77 | // ChunkPosition represents the position of a chunk in a segment file. 78 | // Used to read the data from the segment file. 79 | type ChunkPosition struct { 80 | SegmentId SegmentID 81 | // BlockNumber The block number of the chunk in the segment file. 82 | BlockNumber uint32 83 | // ChunkOffset The start offset of the chunk in the segment file. 84 | ChunkOffset int64 85 | // ChunkSize How many bytes the chunk data takes up in the segment file. 86 | ChunkSize uint32 87 | } 88 | 89 | var blockPool = sync.Pool{ 90 | New: func() interface{} { 91 | return make([]byte, blockSize) 92 | }, 93 | } 94 | 95 | func getBuffer() []byte { 96 | return blockPool.Get().([]byte) 97 | } 98 | 99 | func putBuffer(buf []byte) { 100 | blockPool.Put(buf) 101 | } 102 | 103 | // openSegmentFile a new segment file. 104 | func openSegmentFile(dirPath, extName string, id uint32) (*segment, error) { 105 | fd, err := os.OpenFile( 106 | SegmentFileName(dirPath, extName, id), 107 | os.O_CREATE|os.O_RDWR|os.O_APPEND, 108 | fileModePerm, 109 | ) 110 | 111 | if err != nil { 112 | return nil, err 113 | } 114 | 115 | // set the current block number and block size. 116 | offset, err := fd.Seek(0, io.SeekEnd) 117 | if err != nil { 118 | return nil, fmt.Errorf("seek to the end of segment file %d%s failed: %v", id, extName, err) 119 | } 120 | 121 | return &segment{ 122 | id: id, 123 | fd: fd, 124 | header: make([]byte, chunkHeaderSize), 125 | currentBlockNumber: uint32(offset / blockSize), 126 | currentBlockSize: uint32(offset % blockSize), 127 | startupBlock: &startupBlock{ 128 | block: make([]byte, blockSize), 129 | blockNumber: -1, 130 | }, 131 | isStartupTraversal: false, 132 | }, nil 133 | } 134 | 135 | // NewReader creates a new segment reader. 136 | // You can call Next to get the next chunk data, 137 | // and io.EOF will be returned when there is no data. 138 | func (seg *segment) NewReader() *segmentReader { 139 | return &segmentReader{ 140 | segment: seg, 141 | blockNumber: 0, 142 | chunkOffset: 0, 143 | } 144 | } 145 | 146 | // Sync flushes the segment file to disk. 147 | func (seg *segment) Sync() error { 148 | if seg.closed { 149 | return nil 150 | } 151 | return seg.fd.Sync() 152 | } 153 | 154 | // Remove removes the segment file. 155 | func (seg *segment) Remove() error { 156 | if !seg.closed { 157 | seg.closed = true 158 | if err := seg.fd.Close(); err != nil { 159 | return err 160 | } 161 | } 162 | 163 | return os.Remove(seg.fd.Name()) 164 | } 165 | 166 | // Close closes the segment file. 167 | func (seg *segment) Close() error { 168 | if seg.closed { 169 | return nil 170 | } 171 | 172 | seg.closed = true 173 | return seg.fd.Close() 174 | } 175 | 176 | // Size returns the size of the segment file. 177 | func (seg *segment) Size() int64 { 178 | size := int64(seg.currentBlockNumber) * int64(blockSize) 179 | return size + int64(seg.currentBlockSize) 180 | } 181 | 182 | // writeToBuffer calculate chunkPosition for data, write data to bytebufferpool, update segment status 183 | // The data will be written in chunks, and the chunk has four types: 184 | // ChunkTypeFull, ChunkTypeFirst, ChunkTypeMiddle, ChunkTypeLast. 185 | // 186 | // Each chunk has a header, and the header contains the length, type and checksum. 187 | // And the payload of the chunk is the real data you want to Write. 188 | func (seg *segment) writeToBuffer(data []byte, chunkBuffer *bytebufferpool.ByteBuffer) (*ChunkPosition, error) { 189 | startBufferLen := chunkBuffer.Len() 190 | padding := uint32(0) 191 | 192 | if seg.closed { 193 | return nil, ErrClosed 194 | } 195 | 196 | // if the left block size can not hold the chunk header, padding the block 197 | if seg.currentBlockSize+chunkHeaderSize >= blockSize { 198 | // padding if necessary 199 | if seg.currentBlockSize < blockSize { 200 | p := make([]byte, blockSize-seg.currentBlockSize) 201 | chunkBuffer.B = append(chunkBuffer.B, p...) 202 | padding += blockSize - seg.currentBlockSize 203 | 204 | // a new block 205 | seg.currentBlockNumber += 1 206 | seg.currentBlockSize = 0 207 | } 208 | } 209 | 210 | // return the start position of the chunk, then the user can use it to read the data. 211 | position := &ChunkPosition{ 212 | SegmentId: seg.id, 213 | BlockNumber: seg.currentBlockNumber, 214 | ChunkOffset: int64(seg.currentBlockSize), 215 | } 216 | 217 | dataSize := uint32(len(data)) 218 | // The entire chunk can fit into the block. 219 | if seg.currentBlockSize+dataSize+chunkHeaderSize <= blockSize { 220 | seg.appendChunkBuffer(chunkBuffer, data, ChunkTypeFull) 221 | position.ChunkSize = dataSize + chunkHeaderSize 222 | } else { 223 | // If the size of the data exceeds the size of the block, 224 | // the data should be written to the block in batches. 225 | var ( 226 | leftSize = dataSize 227 | blockCount uint32 = 0 228 | currBlockSize = seg.currentBlockSize 229 | ) 230 | 231 | for leftSize > 0 { 232 | chunkSize := blockSize - currBlockSize - chunkHeaderSize 233 | if chunkSize > leftSize { 234 | chunkSize = leftSize 235 | } 236 | 237 | var end = dataSize - leftSize + chunkSize 238 | if end > dataSize { 239 | end = dataSize 240 | } 241 | 242 | // append the chunks to the buffer 243 | var chunkType ChunkType 244 | switch leftSize { 245 | case dataSize: // First chunk 246 | chunkType = ChunkTypeFirst 247 | case chunkSize: // Last chunk 248 | chunkType = ChunkTypeLast 249 | default: // Middle chunk 250 | chunkType = ChunkTypeMiddle 251 | } 252 | seg.appendChunkBuffer(chunkBuffer, data[dataSize-leftSize:end], chunkType) 253 | 254 | leftSize -= chunkSize 255 | blockCount += 1 256 | currBlockSize = (currBlockSize + chunkSize + chunkHeaderSize) % blockSize 257 | } 258 | position.ChunkSize = blockCount*chunkHeaderSize + dataSize 259 | } 260 | 261 | // the buffer length must be equal to chunkSize+padding length 262 | endBufferLen := chunkBuffer.Len() 263 | if position.ChunkSize+padding != uint32(endBufferLen-startBufferLen) { 264 | return nil, fmt.Errorf("wrong!!! the chunk size %d is not equal to the buffer len %d", 265 | position.ChunkSize+padding, endBufferLen-startBufferLen) 266 | } 267 | 268 | // update segment status 269 | seg.currentBlockSize += position.ChunkSize 270 | if seg.currentBlockSize >= blockSize { 271 | seg.currentBlockNumber += seg.currentBlockSize / blockSize 272 | seg.currentBlockSize = seg.currentBlockSize % blockSize 273 | } 274 | 275 | return position, nil 276 | } 277 | 278 | // writeAll write batch data to the segment file. 279 | func (seg *segment) writeAll(data [][]byte) (positions []*ChunkPosition, err error) { 280 | if seg.closed { 281 | return nil, ErrClosed 282 | } 283 | 284 | // if any error occurs, restore the segment status 285 | originBlockNumber := seg.currentBlockNumber 286 | originBlockSize := seg.currentBlockSize 287 | 288 | // init chunk buffer 289 | chunkBuffer := bytebufferpool.Get() 290 | chunkBuffer.Reset() 291 | defer func() { 292 | if err != nil { 293 | seg.currentBlockNumber = originBlockNumber 294 | seg.currentBlockSize = originBlockSize 295 | } 296 | bytebufferpool.Put(chunkBuffer) 297 | }() 298 | 299 | // write all data to the chunk buffer 300 | var pos *ChunkPosition 301 | positions = make([]*ChunkPosition, len(data)) 302 | for i := 0; i < len(positions); i++ { 303 | pos, err = seg.writeToBuffer(data[i], chunkBuffer) 304 | if err != nil { 305 | return 306 | } 307 | positions[i] = pos 308 | } 309 | // write the chunk buffer to the segment file 310 | if err = seg.writeChunkBuffer(chunkBuffer); err != nil { 311 | return 312 | } 313 | return 314 | } 315 | 316 | // Write writes the data to the segment file. 317 | func (seg *segment) Write(data []byte) (pos *ChunkPosition, err error) { 318 | if seg.closed { 319 | return nil, ErrClosed 320 | } 321 | 322 | originBlockNumber := seg.currentBlockNumber 323 | originBlockSize := seg.currentBlockSize 324 | 325 | // init chunk buffer 326 | chunkBuffer := bytebufferpool.Get() 327 | chunkBuffer.Reset() 328 | defer func() { 329 | if err != nil { 330 | seg.currentBlockNumber = originBlockNumber 331 | seg.currentBlockSize = originBlockSize 332 | } 333 | bytebufferpool.Put(chunkBuffer) 334 | }() 335 | 336 | // write all data to the chunk buffer 337 | pos, err = seg.writeToBuffer(data, chunkBuffer) 338 | if err != nil { 339 | return 340 | } 341 | // write the chunk buffer to the segment file 342 | if err = seg.writeChunkBuffer(chunkBuffer); err != nil { 343 | return 344 | } 345 | 346 | return 347 | } 348 | 349 | func (seg *segment) appendChunkBuffer(buf *bytebufferpool.ByteBuffer, data []byte, chunkType ChunkType) { 350 | // Length 2 Bytes index:4-5 351 | binary.LittleEndian.PutUint16(seg.header[4:6], uint16(len(data))) 352 | // Type 1 Byte index:6 353 | seg.header[6] = chunkType 354 | // Checksum 4 Bytes index:0-3 355 | sum := crc32.ChecksumIEEE(seg.header[4:]) 356 | sum = crc32.Update(sum, crc32.IEEETable, data) 357 | binary.LittleEndian.PutUint32(seg.header[:4], sum) 358 | 359 | // append the header and data to segment chunk buffer 360 | buf.B = append(buf.B, seg.header...) 361 | buf.B = append(buf.B, data...) 362 | } 363 | 364 | // write the pending chunk buffer to the segment file 365 | func (seg *segment) writeChunkBuffer(buf *bytebufferpool.ByteBuffer) error { 366 | if seg.currentBlockSize > blockSize { 367 | return errors.New("the current block size exceeds the maximum block size") 368 | } 369 | 370 | // write the data into underlying file 371 | if _, err := seg.fd.Write(buf.Bytes()); err != nil { 372 | return err 373 | } 374 | 375 | // the cached block can not be reused again after writes. 376 | seg.startupBlock.blockNumber = -1 377 | return nil 378 | } 379 | 380 | // Read reads the data from the segment file by the block number and chunk offset. 381 | func (seg *segment) Read(blockNumber uint32, chunkOffset int64) ([]byte, error) { 382 | value, _, err := seg.readInternal(blockNumber, chunkOffset) 383 | return value, err 384 | } 385 | 386 | func (seg *segment) readInternal(blockNumber uint32, chunkOffset int64) ([]byte, *ChunkPosition, error) { 387 | if seg.closed { 388 | return nil, nil, ErrClosed 389 | } 390 | 391 | var ( 392 | result []byte 393 | block []byte 394 | segSize = seg.Size() 395 | nextChunk = &ChunkPosition{SegmentId: seg.id} 396 | ) 397 | 398 | if seg.isStartupTraversal { 399 | block = seg.startupBlock.block 400 | } else { 401 | block = getBuffer() 402 | if len(block) != blockSize { 403 | block = make([]byte, blockSize) 404 | } 405 | defer putBuffer(block) 406 | } 407 | 408 | for { 409 | size := int64(blockSize) 410 | offset := int64(blockNumber) * blockSize 411 | if size+offset > segSize { 412 | size = segSize - offset 413 | } 414 | 415 | if chunkOffset >= size { 416 | return nil, nil, io.EOF 417 | } 418 | 419 | if seg.isStartupTraversal { 420 | // There are two cases that we should read block from file: 421 | // 1. the acquired block is not the cached one 422 | // 2. new writes appended to the block, and the block 423 | // is still smaller than 32KB, we must read it again because of the new writes. 424 | if seg.startupBlock.blockNumber != int64(blockNumber) || size != blockSize { 425 | // read block from segment file at the specified offset. 426 | _, err := seg.fd.ReadAt(block[0:size], offset) 427 | if err != nil { 428 | return nil, nil, err 429 | } 430 | // remember the block 431 | seg.startupBlock.blockNumber = int64(blockNumber) 432 | } 433 | } else { 434 | if _, err := seg.fd.ReadAt(block[0:size], offset); err != nil { 435 | return nil, nil, err 436 | } 437 | } 438 | 439 | // header 440 | header := block[chunkOffset : chunkOffset+chunkHeaderSize] 441 | 442 | // length 443 | length := binary.LittleEndian.Uint16(header[4:6]) 444 | 445 | // copy data 446 | start := chunkOffset + chunkHeaderSize 447 | result = append(result, block[start:start+int64(length)]...) 448 | 449 | // check sum 450 | checksumEnd := chunkOffset + chunkHeaderSize + int64(length) 451 | checksum := crc32.ChecksumIEEE(block[chunkOffset+4 : checksumEnd]) 452 | savedSum := binary.LittleEndian.Uint32(header[:4]) 453 | if savedSum != checksum { 454 | return nil, nil, ErrInvalidCRC 455 | } 456 | 457 | // type 458 | chunkType := header[6] 459 | 460 | if chunkType == ChunkTypeFull || chunkType == ChunkTypeLast { 461 | nextChunk.BlockNumber = blockNumber 462 | nextChunk.ChunkOffset = checksumEnd 463 | // If this is the last chunk in the block, and the left block 464 | // space are paddings, the next chunk should be in the next block. 465 | if checksumEnd+chunkHeaderSize >= blockSize { 466 | nextChunk.BlockNumber += 1 467 | nextChunk.ChunkOffset = 0 468 | } 469 | break 470 | } 471 | blockNumber += 1 472 | chunkOffset = 0 473 | } 474 | return result, nextChunk, nil 475 | } 476 | 477 | // Next returns the Next chunk data. 478 | // You can call it repeatedly until io.EOF is returned. 479 | func (segReader *segmentReader) Next() ([]byte, *ChunkPosition, error) { 480 | // The segment file is closed 481 | if segReader.segment.closed { 482 | return nil, nil, ErrClosed 483 | } 484 | 485 | // this position describes the current chunk info 486 | chunkPosition := &ChunkPosition{ 487 | SegmentId: segReader.segment.id, 488 | BlockNumber: segReader.blockNumber, 489 | ChunkOffset: segReader.chunkOffset, 490 | } 491 | 492 | value, nextChunk, err := segReader.segment.readInternal( 493 | segReader.blockNumber, 494 | segReader.chunkOffset, 495 | ) 496 | if err != nil { 497 | return nil, nil, err 498 | } 499 | 500 | // Calculate the chunk size. 501 | // Remember that the chunk size is just an estimated value, 502 | // not accurate, so don't use it for any important logic. 503 | chunkPosition.ChunkSize = 504 | nextChunk.BlockNumber*blockSize + uint32(nextChunk.ChunkOffset) - 505 | (segReader.blockNumber*blockSize + uint32(segReader.chunkOffset)) 506 | 507 | // update the position 508 | segReader.blockNumber = nextChunk.BlockNumber 509 | segReader.chunkOffset = nextChunk.ChunkOffset 510 | 511 | return value, chunkPosition, nil 512 | } 513 | 514 | // Encode encodes the chunk position to a byte slice. 515 | // Return the slice with the actual occupied elements. 516 | // You can decode it by calling wal.DecodeChunkPosition(). 517 | func (cp *ChunkPosition) Encode() []byte { 518 | return cp.encode(true) 519 | } 520 | 521 | // EncodeFixedSize encodes the chunk position to a byte slice. 522 | // Return a slice of size "maxLen". 523 | // You can decode it by calling wal.DecodeChunkPosition(). 524 | func (cp *ChunkPosition) EncodeFixedSize() []byte { 525 | return cp.encode(false) 526 | } 527 | 528 | // encode the chunk position to a byte slice. 529 | func (cp *ChunkPosition) encode(shrink bool) []byte { 530 | buf := make([]byte, maxLen) 531 | 532 | var index = 0 533 | // SegmentId 534 | index += binary.PutUvarint(buf[index:], uint64(cp.SegmentId)) 535 | // BlockNumber 536 | index += binary.PutUvarint(buf[index:], uint64(cp.BlockNumber)) 537 | // ChunkOffset 538 | index += binary.PutUvarint(buf[index:], uint64(cp.ChunkOffset)) 539 | // ChunkSize 540 | index += binary.PutUvarint(buf[index:], uint64(cp.ChunkSize)) 541 | 542 | if shrink { 543 | return buf[:index] 544 | } 545 | return buf 546 | } 547 | 548 | // DecodeChunkPosition decodes the chunk position from a byte slice. 549 | // You can encode it by calling wal.ChunkPosition.Encode(). 550 | func DecodeChunkPosition(buf []byte) *ChunkPosition { 551 | if len(buf) == 0 { 552 | return nil 553 | } 554 | 555 | var index = 0 556 | // SegmentId 557 | segmentId, n := binary.Uvarint(buf[index:]) 558 | index += n 559 | // BlockNumber 560 | blockNumber, n := binary.Uvarint(buf[index:]) 561 | index += n 562 | // ChunkOffset 563 | chunkOffset, n := binary.Uvarint(buf[index:]) 564 | index += n 565 | // ChunkSize 566 | chunkSize, n := binary.Uvarint(buf[index:]) 567 | index += n 568 | 569 | return &ChunkPosition{ 570 | SegmentId: uint32(segmentId), 571 | BlockNumber: uint32(blockNumber), 572 | ChunkOffset: int64(chunkOffset), 573 | ChunkSize: uint32(chunkSize), 574 | } 575 | } 576 | --------------------------------------------------------------------------------