├── .gitignore
├── go.mod
├── examples
    └── main.go
├── go.sum
├── .github
    └── workflows
    │   └── go.yml
├── benchmark
    └── bench_test.go
├── options.go
├── README.md
├── CHANGELOG.md
├── wal_test.go
├── segment_test.go
├── LICENSE
├── wal.go
└── segment.go


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .vscode/
3 | .DS_Store
4 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/rosedblabs/wal
 2 | 
 3 | go 1.21
 4 | 
 5 | require (
 6 | 	github.com/stretchr/testify v1.9.0
 7 | 	github.com/valyala/bytebufferpool v1.0.0
 8 | )
 9 | 
10 | require (
11 | 	github.com/davecgh/go-spew v1.1.1 // indirect
12 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
13 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
14 | )
15 | 


--------------------------------------------------------------------------------
/examples/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 	"log"
 7 | 
 8 | 	"github.com/rosedblabs/wal"
 9 | )
10 | 
11 | func main() {
12 | 	walFile, _ := wal.Open(wal.DefaultOptions)
13 | 	// write some data
14 | 	chunkPosition, _ := walFile.Write([]byte("some data 1"))
15 | 	// read by the position
16 | 	val, _ := walFile.Read(chunkPosition)
17 | 	fmt.Println(string(val))
18 | 
19 | 	_, err := walFile.Write([]byte("some data 2"))
20 | 	if err != nil {
21 | 		log.Println(err)
22 | 	}
23 | 	_, err = walFile.Write([]byte("some data 3"))
24 | 	if err != nil {
25 | 		log.Println(err)
26 | 	}
27 | 
28 | 	// iterate all data in wal
29 | 	reader := walFile.NewReader()
30 | 	for {
31 | 		val, pos, err := reader.Next()
32 | 		if err == io.EOF {
33 | 			break
34 | 		}
35 | 		fmt.Println(string(val))
36 | 		fmt.Println(pos) // get position of the data for next read
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 6 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 7 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 8 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 | 
11 |   ubuntu-test:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v2
15 | 
16 |       - name: Set up Go
17 |         uses: actions/setup-go@v2
18 |         with:
19 |           go-version: 1.19
20 | 
21 |       - name: Run Go Vet
22 |         run: |
23 |           go vet ./...
24 | 
25 |       - name: Run Go Fmt
26 |         run: |
27 |           files=$(go fmt ./...)
28 |           if [ -n "$files" ]; then
29 |             echo "Please run gofmt on these files ..."
30 |             echo "$files"
31 |             exit 1
32 |           fi
33 | 
34 |       - name: Build
35 |         run: go build -v
36 | 
37 |       - name: Run Unit Test
38 |         run: go test -count 1 -v ./...
39 |         
40 |   windows-test:
41 |     runs-on: windows-latest
42 |     steps:
43 |       - uses: actions/checkout@v2
44 | 
45 |       - name: Set up Go
46 |         uses: actions/setup-go@v2
47 |         with:
48 |           go-version: 1.19
49 | 
50 |       - name: Run Go Vet
51 |         run: |
52 |           go vet ./...
53 | 
54 |       - name: Build
55 |         run: go build -v
56 | 
57 |       - name: Run Unit Test
58 |         run: go test -count 1 -v ./...
59 | 


--------------------------------------------------------------------------------
/benchmark/bench_test.go:
--------------------------------------------------------------------------------
 1 | package benchmark
 2 | 
 3 | import (
 4 | 	"math/rand"
 5 | 	"os"
 6 | 	"strings"
 7 | 	"testing"
 8 | 
 9 | 	"github.com/rosedblabs/wal"
10 | 	"github.com/stretchr/testify/assert"
11 | )
12 | 
13 | var walFile *wal.WAL
14 | 
15 | func init() {
16 | 	dir, _ := os.MkdirTemp("", "wal-benchmark-test")
17 | 	opts := wal.Options{
18 | 		DirPath:        dir,
19 | 		SegmentFileExt: ".SEG",
20 | 		SegmentSize:    wal.GB,
21 | 	}
22 | 	var err error
23 | 	walFile, err = wal.Open(opts)
24 | 	if err != nil {
25 | 		panic(err)
26 | 	}
27 | }
28 | 
29 | func BenchmarkWAL_WriteLargeSize(b *testing.B) {
30 | 	b.ResetTimer()
31 | 	b.ReportAllocs()
32 | 	content := []byte(strings.Repeat("X", 256*wal.KB+500))
33 | 	for i := 0; i < b.N; i++ {
34 | 		_, err := walFile.Write(content)
35 | 		assert.Nil(b, err)
36 | 	}
37 | }
38 | 
39 | func BenchmarkWAL_Write(b *testing.B) {
40 | 	b.ResetTimer()
41 | 	b.ReportAllocs()
42 | 
43 | 	for i := 0; i < b.N; i++ {
44 | 		_, err := walFile.Write([]byte("Hello World"))
45 | 		assert.Nil(b, err)
46 | 	}
47 | }
48 | 
49 | func BenchmarkWAL_WriteBatch(b *testing.B) {
50 | 	b.ResetTimer()
51 | 	b.ReportAllocs()
52 | 	for i := 0; i < b.N; i++ {
53 | 		for j := 0; j < 31; j++ {
54 | 			walFile.PendingWrites([]byte(strings.Repeat("X", wal.MB)))
55 | 		}
56 | 		walFile.PendingWrites([]byte(strings.Repeat("X", wal.MB)))
57 | 		pos, err := walFile.WriteAll()
58 | 		assert.Nil(b, err)
59 | 		assert.Equal(b, 32, len(pos))
60 | 	}
61 | }
62 | 
63 | func BenchmarkWAL_Read(b *testing.B) {
64 | 	var positions []*wal.ChunkPosition
65 | 	for i := 0; i < 1000000; i++ {
66 | 		pos, err := walFile.Write([]byte("Hello World"))
67 | 		assert.Nil(b, err)
68 | 		positions = append(positions, pos)
69 | 	}
70 | 
71 | 	b.ResetTimer()
72 | 	b.ReportAllocs()
73 | 
74 | 	for i := 0; i < b.N; i++ {
75 | 		_, err := walFile.Read(positions[rand.Intn(len(positions))])
76 | 		assert.Nil(b, err)
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/options.go:
--------------------------------------------------------------------------------
 1 | package wal
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"time"
 6 | )
 7 | 
 8 | // Options represents the configuration options for a Write-Ahead Log (WAL).
 9 | type Options struct {
10 | 	// DirPath specifies the directory path where the WAL segment files will be stored.
11 | 	DirPath string
12 | 
13 | 	// SegmentSize specifies the maximum size of each segment file in bytes.
14 | 	SegmentSize int64
15 | 
16 | 	// SegmentFileExt specifies the file extension of the segment files.
17 | 	// The file extension must start with a dot ".", default value is ".SEG".
18 | 	// It is used to identify the different types of files in the directory.
19 | 	// Now it is used by rosedb to identify the segment files and hint files.
20 | 	// Not a common usage for most users.
21 | 	SegmentFileExt string
22 | 
23 | 	// Sync is whether to synchronize writes through os buffer cache and down onto the actual disk.
24 | 	// Setting sync is required for durability of a single write operation, but also results in slower writes.
25 | 	//
26 | 	// If false, and the machine crashes, then some recent writes may be lost.
27 | 	// Note that if it is just the process that crashes (machine does not) then no writes will be lost.
28 | 	//
29 | 	// In other words, Sync being false has the same semantics as a write
30 | 	// system call. Sync being true means write followed by fsync.
31 | 	Sync bool
32 | 
33 | 	// BytesPerSync specifies the number of bytes to write before calling fsync.
34 | 	BytesPerSync uint32
35 | 
36 | 	// SyncInterval is the time duration in which explicit synchronization is performed.
37 | 	// If SyncInterval is zero, no periodic synchronization is performed.
38 | 	SyncInterval time.Duration
39 | }
40 | 
41 | const (
42 | 	B  = 1
43 | 	KB = 1024 * B
44 | 	MB = 1024 * KB
45 | 	GB = 1024 * MB
46 | )
47 | 
48 | var DefaultOptions = Options{
49 | 	DirPath:        os.TempDir(),
50 | 	SegmentSize:    GB,
51 | 	SegmentFileExt: ".SEG",
52 | 	Sync:           false,
53 | 	BytesPerSync:   0,
54 | 	SyncInterval:   0,
55 | }
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # wal
 2 | Write Ahead Log for LSM or bitcask storage.
 3 | 
 4 | ## Key Features
 5 | * Disk based, support large data volume
 6 | * Append only write, high performance
 7 | * Fast read, one disk seek to retrieve any value
 8 | * Support batch write, all data in a batch will be written in a single disk seek
 9 | * Iterate all data in wal with `NewReader` function
10 | * Extremely fast read
11 | * Support concurrent write and read, all functions are thread safe
12 | 
13 | ## Design Overview
14 | 
15 | ![wal-logo.png](https://s2.loli.net/2025/01/12/SF9vThRkAObm4WD.png)
16 | 
17 | ## Format
18 | 
19 | **Format of a single segment file:**
20 | 
21 | ```
22 |        +-----+-------------+--+----+----------+------+-- ... ----+
23 |  File  | r0  |      r1     |P | r2 |    r3    |  r4  |           |
24 |        +-----+-------------+--+----+----------+------+-- ... ----+
25 |        |<---- BlockSize ----->|<---- BlockSize ----->|
26 | 
27 |   rn = variable size records
28 |   P = Padding
29 |   BlockSize = 32KB
30 | ```
31 | 
32 | **Format of a single record:**
33 | 
34 | ```
35 | +----------+-------------+-----------+--- ... ---+
36 | | CRC (4B) | Length (2B) | Type (1B) |  Payload  |
37 | +----------+-------------+-----------+--- ... ---+
38 | 
39 | CRC = 32-bit hash computed over the payload using CRC
40 | Length = Length of the payload data
41 | Type = Type of record
42 |        (FullType, FirstType, MiddleType, LastType)
43 |        The type is used to group a bunch of records together to represent
44 |        blocks that are larger than BlockSize
45 | Payload = Byte stream as long as specified by the payload size
46 | ```
47 | 
48 | ## Getting Started
49 | 
50 | ```go
51 | func main() {
52 | 	wal, _ := wal.Open(wal.DefaultOptions)
53 | 	// write some data
54 | 	chunkPosition, _ := wal.Write([]byte("some data 1"))
55 | 	// read by the position
56 | 	val, _ := wal.Read(chunkPosition)
57 | 	fmt.Println(string(val))
58 | 
59 | 	wal.Write([]byte("some data 2"))
60 | 	wal.Write([]byte("some data 3"))
61 | 
62 | 	// iterate all data in wal
63 | 	reader := wal.NewReader()
64 | 	for {
65 | 		val, pos, err := reader.Next()
66 | 		if err == io.EOF {
67 | 			break
68 | 		}
69 | 		fmt.Println(string(val))
70 | 		fmt.Println(pos) // get position of the data for next read
71 | 	}
72 | }
73 | 
74 | ```
75 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Release 1.3.8 (2024-07-06)
  2 | ## 🎄 Enhancements
  3 | * fix concurrent read(https://github.com/rosedblabs/wal/commit/4206c62ab8b46223b9fbd030c90219222cf1707f)
  4 | ## 🎠 Community
  5 | * thanks to @ghosx
  6 |   * Fix seg.Remove Swallowing Error (https://github.com/rosedblabs/wal/pull/38)
  7 |   * fix panic (https://github.com/rosedblabs/wal/pull/39)
  8 | 
  9 | # Release 1.3.7 (2024-06-13)
 10 | 
 11 | ## 🎄 Enhancements
 12 | * remove block cache(https://github.com/rosedblabs/wal/commit/e77db044463f4ba32455b7a0b42db3161248b5a4)
 13 | * improve iterate performance(https://github.com/rosedblabs/wal/commit/213913a6a9145740cff75340de899b0ff03894e4)
 14 | 
 15 | # Release 1.3.6 (2023-09-25)
 16 | 
 17 | ## 🎄 Enhancements
 18 | * avoid resetting pool to optimize the memory usage
 19 | * no need to return err in pendingWrites
 20 | * fix benchmark error
 21 | ## 🎠 Community
 22 | * Thanks to @akiozihao 
 23 |     * check ErrPendingSizeTooLarge first (https://github.com/rosedblabs/wal/pull/32)
 24 | 
 25 | # Release 1.3.5 (2023-09-19)
 26 | 
 27 | ## 🎄 Enhancements
 28 | * Rotate file when pending writes exceed the left space of the segment file.
 29 | 
 30 | # Release 1.3.4 (2023-09-18)
 31 | 
 32 | ## 🚀 New Features
 33 | * add RenameFileExt function
 34 | 
 35 | ## 🎠 Community
 36 | * Thanks to @akiozihao 
 37 |     * add EncodeFixedSize (https://github.com/rosedblabs/wal/pull/28)
 38 |     * add WriteBatch (https://github.com/rosedblabs/wal/pull/26)
 39 | 
 40 | # Release 1.3.3 (2023-08-19)
 41 | 
 42 | ## 🎠 Community
 43 | * Thanks to @LEAVING-7 
 44 |   * Keep function name consistent in wal_test.go (https://github.com/rosedblabs/wal/pull/24)
 45 | * Thanks to @amityahav 
 46 |   * Improved performance for writing large records (> blockSize) (https://github.com/rosedblabs/wal/pull/21)
 47 | ## 🐞 Bug Fixes
 48 | * fix a bug if the segment size exceeds 4GB
 49 | * Enhancement: use bufferpool to aviod writing twice https://github.com/rosedblabs/wal/commit/1345f5013113781c59ddaca36ddb13bdcc58ce27
 50 | 
 51 | # Release 1.3.2 (2023-08-07)
 52 | 
 53 | ## 🎄 Enhancements
 54 | * Enhancement: use bufferpool to aviod writing twice https://github.com/rosedblabs/wal/commit/1345f5013113781c59ddaca36ddb13bdcc58ce27
 55 | 
 56 | # Release 1.3.1 (2023-08-04)
 57 | 
 58 | ## 🐞 Bug Fixes
 59 | * Add a condition to avoid cache repeatedly https://github.com/rosedblabs/wal/commit/cb708139c877b1ef102c0be057ba33cb4af6abb2
 60 | 
 61 | # Release 1.3.0 (2023-08-02)
 62 | 
 63 | ## 🚀 New Features
 64 | * Add ChunkPosition Encode and Decode
 65 | 
 66 | ## 🎄 Enhancements
 67 | * Avoid to make new bytes while writing
 68 | * Use sync.Pool to optimize read performace
 69 | * Add more code comments
 70 | 
 71 | ## 🎠 Community
 72 | * Thanks to @chinazmc 
 73 |   * update SementFileExt to SegmentFileExt (https://github.com/rosedblabs/wal/pull/11)
 74 | * Thanks to @xzhseh 
 75 |   * feat(docs): improve README.md format & fix several typos (https://github.com/rosedblabs/wal/pull/12)
 76 | * Thanks to @yanxiaoqi932 
 77 |   * BlockCache must smaller than SegmentSize (https://github.com/rosedblabs/wal/pull/14)
 78 | * Thanks to @mitingjin 
 79 |   * Fix typo in wal.go (https://github.com/rosedblabs/wal/pull/15)
 80 | 
 81 | # Release 1.2.0 (2023-07-01)
 82 | 
 83 | ## 🚀 New Features
 84 | * Add `NewReaderWithStart` function to support read log from specified position.
 85 | 
 86 | ## 🎠 Community
 87 | * Thanks to@yanxiaoqi932
 88 |   * enhancement: add wal delete function ([#7](https://github.com/rosedblabs/wal/pull/9))
 89 | 
 90 | # Release 1.1.0 (2023-06-21)
 91 | 
 92 | ## 🚀 New Features
 93 | * Add tests in windows, with worlflow.
 94 | * Add some functions to support rosedb Merge operation.
 95 | 
 96 | ## 🎠 Community
 97 | * Thanks to@SPCDTS
 98 |   * fix: calculate seg fle size by seg.size ([#7](https://github.com/rosedblabs/wal/pull/7))
 99 |   * fix: limit data size ([#6](https://github.com/rosedblabs/wal/pull/6))
100 |   * fix: spelling error ([#5](https://github.com/rosedblabs/wal/pull/5))
101 | 
102 | # Release 1.0.0 (2023-06-13)
103 | 
104 | ## 🚀 New Features
105 | * First release, basic operations, read, write, and iterate the log files.
106 | * Add block cache for log files.
107 | 


--------------------------------------------------------------------------------
/wal_test.go:
--------------------------------------------------------------------------------
  1 | package wal
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"os"
  6 | 	"strings"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | func destroyWAL(wal *WAL) {
 13 | 	if wal != nil {
 14 | 		_ = wal.Close()
 15 | 		_ = os.RemoveAll(wal.options.DirPath)
 16 | 	}
 17 | }
 18 | 
 19 | func TestWAL_WriteALL(t *testing.T) {
 20 | 	dir, _ := os.MkdirTemp("", "wal-test-write-batch-1")
 21 | 	opts := Options{
 22 | 		DirPath:        dir,
 23 | 		SegmentFileExt: ".SEG",
 24 | 		SegmentSize:    32 * 1024 * 1024,
 25 | 	}
 26 | 	wal, err := Open(opts)
 27 | 	assert.Nil(t, err)
 28 | 	defer destroyWAL(wal)
 29 | 
 30 | 	testWriteAllIterate(t, wal, 0, 10)
 31 | 	assert.True(t, wal.IsEmpty())
 32 | 
 33 | 	testWriteAllIterate(t, wal, 10000, 512)
 34 | 	assert.False(t, wal.IsEmpty())
 35 | }
 36 | 
 37 | func TestWAL_Write(t *testing.T) {
 38 | 	dir, _ := os.MkdirTemp("", "wal-test-write1")
 39 | 	opts := Options{
 40 | 		DirPath:        dir,
 41 | 		SegmentFileExt: ".SEG",
 42 | 		SegmentSize:    32 * 1024 * 1024,
 43 | 	}
 44 | 	wal, err := Open(opts)
 45 | 	assert.Nil(t, err)
 46 | 	defer destroyWAL(wal)
 47 | 
 48 | 	// write 1
 49 | 	pos1, err := wal.Write([]byte("hello1"))
 50 | 	assert.Nil(t, err)
 51 | 	assert.NotNil(t, pos1)
 52 | 	pos2, err := wal.Write([]byte("hello2"))
 53 | 	assert.Nil(t, err)
 54 | 	assert.NotNil(t, pos2)
 55 | 	pos3, err := wal.Write([]byte("hello3"))
 56 | 	assert.Nil(t, err)
 57 | 	assert.NotNil(t, pos3)
 58 | 
 59 | 	val, err := wal.Read(pos1)
 60 | 	assert.Nil(t, err)
 61 | 	assert.Equal(t, "hello1", string(val))
 62 | 	val, err = wal.Read(pos2)
 63 | 	assert.Nil(t, err)
 64 | 	assert.Equal(t, "hello2", string(val))
 65 | 	val, err = wal.Read(pos3)
 66 | 	assert.Nil(t, err)
 67 | 	assert.Equal(t, "hello3", string(val))
 68 | }
 69 | 
 70 | func TestWAL_Write_large(t *testing.T) {
 71 | 	dir, _ := os.MkdirTemp("", "wal-test-write2")
 72 | 	opts := Options{
 73 | 		DirPath:        dir,
 74 | 		SegmentFileExt: ".SEG",
 75 | 		SegmentSize:    32 * 1024 * 1024,
 76 | 	}
 77 | 	wal, err := Open(opts)
 78 | 	assert.Nil(t, err)
 79 | 	defer destroyWAL(wal)
 80 | 
 81 | 	testWriteAndIterate(t, wal, 100000, 512)
 82 | }
 83 | 
 84 | func TestWAL_Write_large2(t *testing.T) {
 85 | 	dir, _ := os.MkdirTemp("", "wal-test-write3")
 86 | 	opts := Options{
 87 | 		DirPath:        dir,
 88 | 		SegmentFileExt: ".SEG",
 89 | 		SegmentSize:    32 * 1024 * 1024,
 90 | 	}
 91 | 	wal, err := Open(opts)
 92 | 	assert.Nil(t, err)
 93 | 	defer destroyWAL(wal)
 94 | 
 95 | 	testWriteAndIterate(t, wal, 2000, 32*1024*3+10)
 96 | }
 97 | 
 98 | func TestWAL_OpenNewActiveSegment(t *testing.T) {
 99 | 	dir, _ := os.MkdirTemp("", "wal-test-new-active-segment")
100 | 	opts := Options{
101 | 		DirPath:        dir,
102 | 		SegmentFileExt: ".SEG",
103 | 		SegmentSize:    32 * 1024 * 1024,
104 | 	}
105 | 	wal, err := Open(opts)
106 | 	assert.Nil(t, err)
107 | 	defer destroyWAL(wal)
108 | 
109 | 	testWriteAndIterate(t, wal, 2000, 512)
110 | 	err = wal.OpenNewActiveSegment()
111 | 	assert.Nil(t, err)
112 | 
113 | 	val := strings.Repeat("wal", 100)
114 | 	for i := 0; i < 100; i++ {
115 | 		pos, err := wal.Write([]byte(val))
116 | 		assert.Nil(t, err)
117 | 		assert.NotNil(t, pos)
118 | 	}
119 | }
120 | 
121 | func TestWAL_IsEmpty(t *testing.T) {
122 | 	dir, _ := os.MkdirTemp("", "wal-test-is-empty")
123 | 	opts := Options{
124 | 		DirPath:        dir,
125 | 		SegmentFileExt: ".SEG",
126 | 		SegmentSize:    32 * 1024 * 1024,
127 | 	}
128 | 	wal, err := Open(opts)
129 | 	assert.Nil(t, err)
130 | 	defer destroyWAL(wal)
131 | 
132 | 	assert.True(t, wal.IsEmpty())
133 | 	testWriteAndIterate(t, wal, 2000, 512)
134 | 	assert.False(t, wal.IsEmpty())
135 | }
136 | 
137 | func TestWAL_Reader(t *testing.T) {
138 | 	dir, _ := os.MkdirTemp("", "wal-test-wal-reader")
139 | 	opts := Options{
140 | 		DirPath:        dir,
141 | 		SegmentFileExt: ".SEG",
142 | 		SegmentSize:    32 * 1024 * 1024,
143 | 	}
144 | 	wal, err := Open(opts)
145 | 	assert.Nil(t, err)
146 | 	defer destroyWAL(wal)
147 | 
148 | 	var size = 100000
149 | 	val := strings.Repeat("wal", 512)
150 | 	for i := 0; i < size; i++ {
151 | 		_, err := wal.Write([]byte(val))
152 | 		assert.Nil(t, err)
153 | 	}
154 | 
155 | 	validate := func(walInner *WAL, size int) {
156 | 		var i = 0
157 | 		reader := walInner.NewReader()
158 | 		for {
159 | 			chunk, position, err := reader.Next()
160 | 			if err != nil {
161 | 				if err == io.EOF {
162 | 					break
163 | 				}
164 | 				panic(err)
165 | 			}
166 | 			assert.NotNil(t, chunk)
167 | 			assert.NotNil(t, position)
168 | 			assert.Equal(t, position.SegmentId, reader.CurrentSegmentId())
169 | 			i++
170 | 		}
171 | 		assert.Equal(t, i, size)
172 | 	}
173 | 
174 | 	validate(wal, size)
175 | 	err = wal.Close()
176 | 	assert.Nil(t, err)
177 | 
178 | 	wal2, err := Open(opts)
179 | 	assert.Nil(t, err)
180 | 	defer func() {
181 | 		_ = wal2.Close()
182 | 	}()
183 | 	validate(wal2, size)
184 | }
185 | 
186 | func testWriteAllIterate(t *testing.T, wal *WAL, size, valueSize int) {
187 | 	for i := 0; i < size; i++ {
188 | 		val := strings.Repeat("wal", valueSize)
189 | 		wal.PendingWrites([]byte(val))
190 | 	}
191 | 	positions, err := wal.WriteAll()
192 | 	assert.Nil(t, err)
193 | 	assert.Equal(t, len(positions), size)
194 | 
195 | 	count := 0
196 | 	reader := wal.NewReader()
197 | 	for {
198 | 		data, pos, err := reader.Next()
199 | 		if err != nil {
200 | 			break
201 | 		}
202 | 		assert.Equal(t, strings.Repeat("wal", valueSize), string(data))
203 | 
204 | 		assert.Equal(t, positions[count].SegmentId, pos.SegmentId)
205 | 		assert.Equal(t, positions[count].BlockNumber, pos.BlockNumber)
206 | 		assert.Equal(t, positions[count].ChunkOffset, pos.ChunkOffset)
207 | 
208 | 		count++
209 | 	}
210 | 	assert.Equal(t, len(wal.pendingWrites), 0)
211 | }
212 | 
213 | func testWriteAndIterate(t *testing.T, wal *WAL, size int, valueSize int) {
214 | 	val := strings.Repeat("wal", valueSize)
215 | 	positions := make([]*ChunkPosition, size)
216 | 	for i := 0; i < size; i++ {
217 | 		pos, err := wal.Write([]byte(val))
218 | 		assert.Nil(t, err)
219 | 		positions[i] = pos
220 | 	}
221 | 
222 | 	var count int
223 | 	// iterates all the data
224 | 	reader := wal.NewReader()
225 | 	for {
226 | 		data, pos, err := reader.Next()
227 | 		if err != nil {
228 | 			break
229 | 		}
230 | 		assert.Equal(t, val, string(data))
231 | 
232 | 		assert.Equal(t, positions[count].SegmentId, pos.SegmentId)
233 | 		assert.Equal(t, positions[count].BlockNumber, pos.BlockNumber)
234 | 		assert.Equal(t, positions[count].ChunkOffset, pos.ChunkOffset)
235 | 
236 | 		count++
237 | 	}
238 | 	assert.Equal(t, size, count)
239 | }
240 | 
241 | func TestWAL_Delete(t *testing.T) {
242 | 	dir, _ := os.MkdirTemp("", "wal-test-delete")
243 | 	opts := Options{
244 | 		DirPath:        dir,
245 | 		SegmentFileExt: ".SEG",
246 | 		SegmentSize:    32 * 1024 * 1024,
247 | 	}
248 | 	wal, err := Open(opts)
249 | 	assert.Nil(t, err)
250 | 	testWriteAndIterate(t, wal, 2000, 512)
251 | 	assert.False(t, wal.IsEmpty())
252 | 	defer destroyWAL(wal)
253 | 
254 | 	err = wal.Delete()
255 | 	assert.Nil(t, err)
256 | 
257 | 	wal, err = Open(opts)
258 | 	assert.Nil(t, err)
259 | 	assert.True(t, wal.IsEmpty())
260 | }
261 | 
262 | func TestWAL_ReaderWithStart(t *testing.T) {
263 | 	dir, _ := os.MkdirTemp("", "wal-test-wal-reader-with-start")
264 | 	opts := Options{
265 | 		DirPath:        dir,
266 | 		SegmentFileExt: ".SEG",
267 | 		SegmentSize:    8 * 1024 * 1024,
268 | 	}
269 | 	wal, err := Open(opts)
270 | 	assert.Nil(t, err)
271 | 	defer destroyWAL(wal)
272 | 
273 | 	_, err = wal.NewReaderWithStart(nil)
274 | 	assert.NotNil(t, err)
275 | 
276 | 	reader1, err := wal.NewReaderWithStart(&ChunkPosition{SegmentId: 0, BlockNumber: 0, ChunkOffset: 100})
277 | 	assert.Nil(t, err)
278 | 	_, _, err = reader1.Next()
279 | 	assert.Equal(t, err, io.EOF)
280 | 
281 | 	testWriteAndIterate(t, wal, 20000, 512)
282 | 	reader2, err := wal.NewReaderWithStart(&ChunkPosition{SegmentId: 0, BlockNumber: 0, ChunkOffset: 0})
283 | 	assert.Nil(t, err)
284 | 	_, pos2, err := reader2.Next()
285 | 	assert.Nil(t, err)
286 | 	assert.Equal(t, pos2.BlockNumber, uint32(0))
287 | 	assert.Equal(t, pos2.ChunkOffset, int64(0))
288 | 
289 | 	reader3, err := wal.NewReaderWithStart(&ChunkPosition{SegmentId: 3, BlockNumber: 5, ChunkOffset: 0})
290 | 	assert.Nil(t, err)
291 | 	_, pos3, err := reader3.Next()
292 | 	assert.Nil(t, err)
293 | 	assert.Equal(t, pos3.SegmentId, uint32(3))
294 | 	assert.Equal(t, pos3.BlockNumber, uint32(5))
295 | }
296 | 
297 | func TestWAL_RenameFileExt(t *testing.T) {
298 | 	dir, _ := os.MkdirTemp("", "wal-test-rename-ext")
299 | 	opts := Options{
300 | 		DirPath:        dir,
301 | 		SegmentFileExt: ".VLOG.1.temp",
302 | 		SegmentSize:    8 * 1024 * 1024,
303 | 	}
304 | 	wal, err := Open(opts)
305 | 	assert.Nil(t, err)
306 | 	defer destroyWAL(wal)
307 | 	testWriteAndIterate(t, wal, 20000, 512)
308 | 
309 | 	err = wal.Close()
310 | 	assert.Nil(t, err)
311 | 
312 | 	err = wal.RenameFileExt(".VLOG.1")
313 | 	assert.Nil(t, err)
314 | 
315 | 	opts.SegmentFileExt = ".VLOG.1"
316 | 	wal2, err := Open(opts)
317 | 	assert.Nil(t, err)
318 | 	defer func() {
319 | 		_ = wal2.Close()
320 | 	}()
321 | 	for i := 0; i < 20000; i++ {
322 | 		_, err = wal2.Write([]byte(strings.Repeat("W", 512)))
323 | 		assert.Nil(t, err)
324 | 	}
325 | }
326 | 


--------------------------------------------------------------------------------
/segment_test.go:
--------------------------------------------------------------------------------
  1 | package wal
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"io"
  6 | 	"math"
  7 | 	"os"
  8 | 	"strings"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/stretchr/testify/assert"
 12 | )
 13 | 
 14 | func TestSegment_Write_FULL1(t *testing.T) {
 15 | 	dir, _ := os.MkdirTemp("", "seg-test-full1")
 16 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
 17 | 	assert.Nil(t, err)
 18 | 	defer func() {
 19 | 		_ = seg.Remove()
 20 | 	}()
 21 | 
 22 | 	// 1. FULL chunks
 23 | 	val := []byte(strings.Repeat("X", 100))
 24 | 
 25 | 	pos1, err := seg.Write(val)
 26 | 	assert.Nil(t, err)
 27 | 	pos2, err := seg.Write(val)
 28 | 	assert.Nil(t, err)
 29 | 
 30 | 	val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset)
 31 | 	assert.Nil(t, err)
 32 | 	assert.Equal(t, val, val1)
 33 | 
 34 | 	val2, err := seg.Read(pos2.BlockNumber, pos2.ChunkOffset)
 35 | 	assert.Nil(t, err)
 36 | 	assert.Equal(t, val, val2)
 37 | 
 38 | 	// 2. Write until a new block
 39 | 	for i := 0; i < 100000; i++ {
 40 | 		pos, err := seg.Write(val)
 41 | 		assert.Nil(t, err)
 42 | 		res, err := seg.Read(pos.BlockNumber, pos.ChunkOffset)
 43 | 		assert.Nil(t, err)
 44 | 		assert.Equal(t, val, res)
 45 | 	}
 46 | }
 47 | 
 48 | func TestSegment_Write_FULL2(t *testing.T) {
 49 | 	dir, _ := os.MkdirTemp("", "seg-test-full2")
 50 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
 51 | 	assert.Nil(t, err)
 52 | 	defer func() {
 53 | 		_ = seg.Remove()
 54 | 	}()
 55 | 
 56 | 	// 3. chunk full with a block
 57 | 	val := []byte(strings.Repeat("X", blockSize-chunkHeaderSize))
 58 | 
 59 | 	pos1, err := seg.Write(val)
 60 | 	assert.Nil(t, err)
 61 | 	assert.Equal(t, pos1.BlockNumber, uint32(0))
 62 | 	assert.Equal(t, pos1.ChunkOffset, int64(0))
 63 | 	val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset)
 64 | 	assert.Nil(t, err)
 65 | 	assert.Equal(t, val, val1)
 66 | 
 67 | 	pos2, err := seg.Write(val)
 68 | 	assert.Nil(t, err)
 69 | 	assert.Equal(t, pos2.BlockNumber, uint32(1))
 70 | 	assert.Equal(t, pos2.ChunkOffset, int64(0))
 71 | 	val2, err := seg.Read(pos2.BlockNumber, pos2.ChunkOffset)
 72 | 	assert.Nil(t, err)
 73 | 	assert.Equal(t, val, val2)
 74 | }
 75 | 
 76 | func TestSegment_Write_Padding(t *testing.T) {
 77 | 	dir, _ := os.MkdirTemp("", "seg-test-padding")
 78 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
 79 | 	assert.Nil(t, err)
 80 | 	defer func() {
 81 | 		_ = seg.Remove()
 82 | 	}()
 83 | 
 84 | 	// 4. padding
 85 | 	val := []byte(strings.Repeat("X", blockSize-chunkHeaderSize-3))
 86 | 
 87 | 	_, err = seg.Write(val)
 88 | 	assert.Nil(t, err)
 89 | 
 90 | 	pos1, err := seg.Write(val)
 91 | 	assert.Nil(t, err)
 92 | 	assert.Equal(t, pos1.BlockNumber, uint32(1))
 93 | 	assert.Equal(t, pos1.ChunkOffset, int64(0))
 94 | 	val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset)
 95 | 	assert.Nil(t, err)
 96 | 	assert.Equal(t, val, val1)
 97 | }
 98 | 
 99 | func TestSegment_Write_NOT_FULL(t *testing.T) {
100 | 	dir, _ := os.MkdirTemp("", "seg-test-not-full")
101 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
102 | 	assert.Nil(t, err)
103 | 	defer func() {
104 | 		_ = seg.Remove()
105 | 	}()
106 | 
107 | 	// 5. FIRST-LAST
108 | 	bytes1 := []byte(strings.Repeat("X", blockSize+100))
109 | 
110 | 	pos1, err := seg.Write(bytes1)
111 | 	assert.Nil(t, err)
112 | 	val1, err := seg.Read(pos1.BlockNumber, pos1.ChunkOffset)
113 | 	assert.Nil(t, err)
114 | 	assert.Equal(t, bytes1, val1)
115 | 
116 | 	pos2, err := seg.Write(bytes1)
117 | 	assert.Nil(t, err)
118 | 	val2, err := seg.Read(pos2.BlockNumber, pos2.ChunkOffset)
119 | 	assert.Nil(t, err)
120 | 	assert.Equal(t, bytes1, val2)
121 | 
122 | 	pos3, err := seg.Write(bytes1)
123 | 	assert.Nil(t, err)
124 | 	val3, err := seg.Read(pos3.BlockNumber, pos3.ChunkOffset)
125 | 	assert.Nil(t, err)
126 | 	assert.Equal(t, bytes1, val3)
127 | 
128 | 	// 6. FIRST-MIDDLE-LAST
129 | 	bytes2 := []byte(strings.Repeat("X", blockSize*3+100))
130 | 	pos4, err := seg.Write(bytes2)
131 | 	assert.Nil(t, err)
132 | 	val4, err := seg.Read(pos4.BlockNumber, pos4.ChunkOffset)
133 | 	assert.Nil(t, err)
134 | 	assert.Equal(t, bytes2, val4)
135 | }
136 | 
137 | func TestSegment_Reader_FULL(t *testing.T) {
138 | 	dir, _ := os.MkdirTemp("", "seg-test-reader-full")
139 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
140 | 	assert.Nil(t, err)
141 | 	defer func() {
142 | 		_ = seg.Remove()
143 | 	}()
144 | 
145 | 	// FULL chunks
146 | 	bytes1 := []byte(strings.Repeat("X", blockSize+100))
147 | 	pos1, err := seg.Write(bytes1)
148 | 	assert.Nil(t, err)
149 | 	pos2, err := seg.Write(bytes1)
150 | 	assert.Nil(t, err)
151 | 
152 | 	reader := seg.NewReader()
153 | 	val, rpos1, err := reader.Next()
154 | 	assert.Nil(t, err)
155 | 	assert.Equal(t, bytes1, val)
156 | 	assert.Equal(t, pos1, rpos1)
157 | 
158 | 	val, rpos2, err := reader.Next()
159 | 	assert.Nil(t, err)
160 | 	assert.Equal(t, bytes1, val)
161 | 	assert.Equal(t, pos2, rpos2)
162 | 
163 | 	val, rpos3, err := reader.Next()
164 | 	assert.Nil(t, val)
165 | 	assert.Equal(t, err, io.EOF)
166 | 	assert.Nil(t, rpos3)
167 | }
168 | 
169 | func TestSegment_Reader_Padding(t *testing.T) {
170 | 	dir, _ := os.MkdirTemp("", "seg-test-reader-padding")
171 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
172 | 	assert.Nil(t, err)
173 | 	defer func() {
174 | 		_ = seg.Remove()
175 | 	}()
176 | 
177 | 	bytes1 := []byte(strings.Repeat("X", blockSize-chunkHeaderSize-7))
178 | 
179 | 	pos1, err := seg.Write(bytes1)
180 | 	assert.Nil(t, err)
181 | 	pos2, err := seg.Write(bytes1)
182 | 	assert.Nil(t, err)
183 | 
184 | 	reader := seg.NewReader()
185 | 	val, rpos1, err := reader.Next()
186 | 	assert.Nil(t, err)
187 | 	assert.Equal(t, bytes1, val)
188 | 	assert.Equal(t, pos1.SegmentId, rpos1.SegmentId)
189 | 	assert.Equal(t, pos1.BlockNumber, rpos1.BlockNumber)
190 | 	assert.Equal(t, pos1.ChunkOffset, rpos1.ChunkOffset)
191 | 
192 | 	val, rpos2, err := reader.Next()
193 | 	assert.Nil(t, err)
194 | 	assert.Equal(t, bytes1, val)
195 | 	assert.Equal(t, pos2.SegmentId, rpos2.SegmentId)
196 | 	assert.Equal(t, pos2.BlockNumber, rpos2.BlockNumber)
197 | 	assert.Equal(t, pos2.ChunkOffset, rpos2.ChunkOffset)
198 | 
199 | 	_, _, err = reader.Next()
200 | 	assert.Equal(t, err, io.EOF)
201 | }
202 | 
203 | func TestSegment_Reader_NOT_FULL(t *testing.T) {
204 | 	dir, _ := os.MkdirTemp("", "seg-test-reader-not-full")
205 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
206 | 	assert.Nil(t, err)
207 | 	defer func() {
208 | 		_ = seg.Remove()
209 | 	}()
210 | 
211 | 	bytes1 := []byte(strings.Repeat("X", blockSize+100))
212 | 	pos1, err := seg.Write(bytes1)
213 | 	assert.Nil(t, err)
214 | 	pos2, err := seg.Write(bytes1)
215 | 	assert.Nil(t, err)
216 | 
217 | 	bytes2 := []byte(strings.Repeat("X", blockSize*3+10))
218 | 	pos3, err := seg.Write(bytes2)
219 | 	assert.Nil(t, err)
220 | 	pos4, err := seg.Write(bytes2)
221 | 	assert.Nil(t, err)
222 | 
223 | 	reader := seg.NewReader()
224 | 	val, rpos1, err := reader.Next()
225 | 	assert.Nil(t, err)
226 | 	assert.Equal(t, bytes1, val)
227 | 
228 | 	val, rpos2, err := reader.Next()
229 | 	assert.Nil(t, err)
230 | 	assert.Equal(t, bytes1, val)
231 | 
232 | 	val, rpos3, err := reader.Next()
233 | 	assert.Nil(t, err)
234 | 	assert.Equal(t, bytes2, val)
235 | 
236 | 	val, rpos4, err := reader.Next()
237 | 	assert.Nil(t, err)
238 | 	assert.Equal(t, bytes2, val)
239 | 
240 | 	_, _, err = reader.Next()
241 | 	assert.Equal(t, err, io.EOF)
242 | 
243 | 	assert.Equal(t, pos1, rpos1)
244 | 	assert.Equal(t, pos2, rpos2)
245 | 	assert.Equal(t, pos3, rpos3)
246 | 	assert.Equal(t, pos4, rpos4)
247 | }
248 | 
249 | func TestSegment_Reader_ManyChunks_FULL(t *testing.T) {
250 | 	dir, _ := os.MkdirTemp("", "seg-test-reader-ManyChunks_FULL")
251 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
252 | 	assert.Nil(t, err)
253 | 	defer func() {
254 | 		_ = seg.Remove()
255 | 	}()
256 | 
257 | 	positions := make([]*ChunkPosition, 0)
258 | 	bytes1 := []byte(strings.Repeat("X", 128))
259 | 	for i := 1; i <= 1000000; i++ {
260 | 		pos, err := seg.Write(bytes1)
261 | 		assert.Nil(t, err)
262 | 		positions = append(positions, pos)
263 | 	}
264 | 
265 | 	reader := seg.NewReader()
266 | 	var values [][]byte
267 | 	var i = 0
268 | 	for {
269 | 		val, pos, err := reader.Next()
270 | 		if err == io.EOF {
271 | 			break
272 | 		}
273 | 		assert.Nil(t, err)
274 | 		assert.Equal(t, bytes1, val)
275 | 		values = append(values, val)
276 | 
277 | 		assert.Equal(t, positions[i].SegmentId, pos.SegmentId)
278 | 		assert.Equal(t, positions[i].BlockNumber, pos.BlockNumber)
279 | 		assert.Equal(t, positions[i].ChunkOffset, pos.ChunkOffset)
280 | 
281 | 		i++
282 | 	}
283 | 	assert.Equal(t, 1000000, len(values))
284 | }
285 | 
286 | func TestSegment_Reader_ManyChunks_NOT_FULL(t *testing.T) {
287 | 	dir, _ := os.MkdirTemp("", "seg-test-reader-ManyChunks_NOT_FULL")
288 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
289 | 	assert.Nil(t, err)
290 | 	defer func() {
291 | 		_ = seg.Remove()
292 | 	}()
293 | 
294 | 	positions := make([]*ChunkPosition, 0)
295 | 	bytes1 := []byte(strings.Repeat("X", blockSize*3+10))
296 | 	for i := 1; i <= 10000; i++ {
297 | 		pos, err := seg.Write(bytes1)
298 | 		assert.Nil(t, err)
299 | 		positions = append(positions, pos)
300 | 	}
301 | 
302 | 	reader := seg.NewReader()
303 | 	var values [][]byte
304 | 	var i = 0
305 | 	for {
306 | 		val, pos, err := reader.Next()
307 | 		if err == io.EOF {
308 | 			break
309 | 		}
310 | 		assert.Nil(t, err)
311 | 		assert.Equal(t, bytes1, val)
312 | 		values = append(values, val)
313 | 
314 | 		assert.Equal(t, positions[i].SegmentId, pos.SegmentId)
315 | 		assert.Equal(t, positions[i].BlockNumber, pos.BlockNumber)
316 | 		assert.Equal(t, positions[i].ChunkOffset, pos.ChunkOffset)
317 | 
318 | 		i++
319 | 	}
320 | 	assert.Equal(t, 10000, len(values))
321 | }
322 | 
323 | func TestSegment_Write_LargeSize(t *testing.T) {
324 | 	t.Run("Block-10000", func(t *testing.T) {
325 | 		testSegmentReaderLargeSize(t, blockSize-chunkHeaderSize, 10000)
326 | 	})
327 | 	t.Run("32*Block-1000", func(t *testing.T) {
328 | 		testSegmentReaderLargeSize(t, 32*blockSize, 1000)
329 | 	})
330 | 	t.Run("64*Block-100", func(t *testing.T) {
331 | 		testSegmentReaderLargeSize(t, 64*blockSize, 100)
332 | 	})
333 | }
334 | 
335 | func testSegmentReaderLargeSize(t *testing.T, size int, count int) {
336 | 	dir, _ := os.MkdirTemp("", "seg-test-reader-ManyChunks_large_size")
337 | 	seg, err := openSegmentFile(dir, ".SEG", 1)
338 | 	assert.Nil(t, err)
339 | 	defer func() {
340 | 		_ = seg.Remove()
341 | 	}()
342 | 
343 | 	positions := make([]*ChunkPosition, 0)
344 | 	bytes1 := []byte(strings.Repeat("W", size))
345 | 	for i := 1; i <= count; i++ {
346 | 		pos, err := seg.Write(bytes1)
347 | 		assert.Nil(t, err)
348 | 		positions = append(positions, pos)
349 | 	}
350 | 
351 | 	reader := seg.NewReader()
352 | 	var values [][]byte
353 | 	var i = 0
354 | 	for {
355 | 		val, pos, err := reader.Next()
356 | 		if err == io.EOF {
357 | 			break
358 | 		}
359 | 		assert.Nil(t, err)
360 | 		assert.Equal(t, bytes1, val)
361 | 		values = append(values, val)
362 | 
363 | 		assert.Equal(t, positions[i].SegmentId, pos.SegmentId)
364 | 		assert.Equal(t, positions[i].BlockNumber, pos.BlockNumber)
365 | 		assert.Equal(t, positions[i].ChunkOffset, pos.ChunkOffset)
366 | 
367 | 		i++
368 | 	}
369 | 	assert.Equal(t, count, len(values))
370 | }
371 | 
372 | func TestChunkPosition_Encode(t *testing.T) {
373 | 	validate := func(pos *ChunkPosition) {
374 | 		res := pos.Encode()
375 | 		assert.NotNil(t, res)
376 | 		decRes := DecodeChunkPosition(res)
377 | 		assert.Equal(t, pos, decRes)
378 | 	}
379 | 
380 | 	validate(&ChunkPosition{1, 2, 3, 100})
381 | 	validate(&ChunkPosition{0, 0, 0, 0})
382 | 	validate(&ChunkPosition{math.MaxUint32, math.MaxUint32, math.MaxInt64, math.MaxUint32})
383 | }
384 | 
385 | func TestChunkPosition_EncodeFixedSize(t *testing.T) {
386 | 	validate := func(pos *ChunkPosition) {
387 | 		res := pos.EncodeFixedSize()
388 | 		assert.NotNil(t, res)
389 | 		assert.Equal(t, binary.MaxVarintLen32*3+binary.MaxVarintLen64, len(res))
390 | 		decRes := DecodeChunkPosition(res)
391 | 		assert.Equal(t, pos, decRes)
392 | 	}
393 | 
394 | 	validate(&ChunkPosition{1, 2, 3, 100})
395 | 	validate(&ChunkPosition{0, 0, 0, 0})
396 | 	validate(&ChunkPosition{math.MaxUint32, math.MaxUint32, math.MaxInt64, math.MaxUint32})
397 | }
398 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/wal.go:
--------------------------------------------------------------------------------
  1 | package wal
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"sort"
 10 | 	"strings"
 11 | 	"sync"
 12 | 	"time"
 13 | )
 14 | 
 15 | const (
 16 | 	initialSegmentFileID = 1
 17 | )
 18 | 
 19 | var (
 20 | 	ErrValueTooLarge       = errors.New("the data size can't larger than segment size")
 21 | 	ErrPendingSizeTooLarge = errors.New("the upper bound of pendingWrites can't larger than segment size")
 22 | )
 23 | 
 24 | // WAL represents a Write-Ahead Log structure that provides durability
 25 | // and fault-tolerance for incoming writes.
 26 | // It consists of an activeSegment, which is the current segment file
 27 | // used for new incoming writes, and olderSegments,
 28 | // which is a map of segment files used for read operations.
 29 | //
 30 | // The options field stores various configuration options for the WAL.
 31 | //
 32 | // The mu sync.RWMutex is used for concurrent access to the WAL data structure,
 33 | // ensuring safe access and modification.
 34 | type WAL struct {
 35 | 	activeSegment     *segment               // active segment file, used for new incoming writes.
 36 | 	olderSegments     map[SegmentID]*segment // older segment files, only used for read.
 37 | 	options           Options
 38 | 	mu                sync.RWMutex
 39 | 	bytesWrite        uint32
 40 | 	renameIds         []SegmentID
 41 | 	pendingWrites     [][]byte
 42 | 	pendingSize       int64
 43 | 	pendingWritesLock sync.Mutex
 44 | 	closeC            chan struct{}
 45 | 	syncTicker        *time.Ticker
 46 | }
 47 | 
 48 | // Reader represents a reader for the WAL.
 49 | // It consists of segmentReaders, which is a slice of segmentReader
 50 | // structures sorted by segment id,
 51 | // and currentReader, which is the index of the current segmentReader in the slice.
 52 | //
 53 | // The currentReader field is used to iterate over the segmentReaders slice.
 54 | type Reader struct {
 55 | 	segmentReaders []*segmentReader
 56 | 	currentReader  int
 57 | }
 58 | 
 59 | // Open opens a WAL with the given options.
 60 | // It will create the directory if not exists, and open all segment files in the directory.
 61 | // If there is no segment file in the directory, it will create a new one.
 62 | func Open(options Options) (*WAL, error) {
 63 | 	if !strings.HasPrefix(options.SegmentFileExt, ".") {
 64 | 		return nil, fmt.Errorf("segment file extension must start with '.'")
 65 | 	}
 66 | 	wal := &WAL{
 67 | 		options:       options,
 68 | 		olderSegments: make(map[SegmentID]*segment),
 69 | 		pendingWrites: make([][]byte, 0),
 70 | 		closeC:        make(chan struct{}),
 71 | 	}
 72 | 
 73 | 	// create the directory if not exists.
 74 | 	if err := os.MkdirAll(options.DirPath, os.ModePerm); err != nil {
 75 | 		return nil, err
 76 | 	}
 77 | 
 78 | 	// iterate the dir and open all segment files.
 79 | 	entries, err := os.ReadDir(options.DirPath)
 80 | 	if err != nil {
 81 | 		return nil, err
 82 | 	}
 83 | 
 84 | 	// get all segment file ids.
 85 | 	var segmentIDs []int
 86 | 	for _, entry := range entries {
 87 | 		if entry.IsDir() {
 88 | 			continue
 89 | 		}
 90 | 		var id int
 91 | 		_, err := fmt.Sscanf(entry.Name(), "%d"+options.SegmentFileExt, &id)
 92 | 		if err != nil {
 93 | 			continue
 94 | 		}
 95 | 		segmentIDs = append(segmentIDs, id)
 96 | 	}
 97 | 
 98 | 	// empty directory, just initialize a new segment file.
 99 | 	if len(segmentIDs) == 0 {
100 | 		segment, err := openSegmentFile(options.DirPath, options.SegmentFileExt,
101 | 			initialSegmentFileID)
102 | 		if err != nil {
103 | 			return nil, err
104 | 		}
105 | 		wal.activeSegment = segment
106 | 	} else {
107 | 		// open the segment files in order, get the max one as the active segment file.
108 | 		sort.Ints(segmentIDs)
109 | 
110 | 		for i, segId := range segmentIDs {
111 | 			segment, err := openSegmentFile(options.DirPath, options.SegmentFileExt,
112 | 				uint32(segId))
113 | 			if err != nil {
114 | 				return nil, err
115 | 			}
116 | 			if i == len(segmentIDs)-1 {
117 | 				wal.activeSegment = segment
118 | 			} else {
119 | 				wal.olderSegments[segment.id] = segment
120 | 			}
121 | 		}
122 | 	}
123 | 
124 | 	// only start the sync operation if the SyncInterval is greater than 0.
125 | 	if wal.options.SyncInterval > 0 {
126 | 		wal.syncTicker = time.NewTicker(wal.options.SyncInterval)
127 | 		go func() {
128 | 			for {
129 | 				select {
130 | 				case <-wal.syncTicker.C:
131 | 					_ = wal.Sync()
132 | 				case <-wal.closeC:
133 | 					wal.syncTicker.Stop()
134 | 					return
135 | 				}
136 | 			}
137 | 		}()
138 | 	}
139 | 
140 | 	return wal, nil
141 | }
142 | 
143 | // SegmentFileName returns the file name of a segment file.
144 | func SegmentFileName(dirPath string, extName string, id SegmentID) string {
145 | 	return filepath.Join(dirPath, fmt.Sprintf("%09d"+extName, id))
146 | }
147 | 
148 | // OpenNewActiveSegment opens a new segment file
149 | // and sets it as the active segment file.
150 | // It is used when even the active segment file is not full,
151 | // but the user wants to create a new segment file.
152 | //
153 | // It is now used by Merge operation of rosedb, not a common usage for most users.
154 | func (wal *WAL) OpenNewActiveSegment() error {
155 | 	wal.mu.Lock()
156 | 	defer wal.mu.Unlock()
157 | 	// sync the active segment file.
158 | 	if err := wal.activeSegment.Sync(); err != nil {
159 | 		return err
160 | 	}
161 | 	// create a new segment file and set it as the active one.
162 | 	segment, err := openSegmentFile(wal.options.DirPath, wal.options.SegmentFileExt,
163 | 		wal.activeSegment.id+1)
164 | 	if err != nil {
165 | 		return err
166 | 	}
167 | 	wal.olderSegments[wal.activeSegment.id] = wal.activeSegment
168 | 	wal.activeSegment = segment
169 | 	return nil
170 | }
171 | 
172 | // ActiveSegmentID returns the id of the active segment file.
173 | func (wal *WAL) ActiveSegmentID() SegmentID {
174 | 	wal.mu.RLock()
175 | 	defer wal.mu.RUnlock()
176 | 
177 | 	return wal.activeSegment.id
178 | }
179 | 
180 | // IsEmpty returns whether the WAL is empty.
181 | // Only there is only one empty active segment file, which means the WAL is empty.
182 | func (wal *WAL) IsEmpty() bool {
183 | 	wal.mu.RLock()
184 | 	defer wal.mu.RUnlock()
185 | 
186 | 	return len(wal.olderSegments) == 0 && wal.activeSegment.Size() == 0
187 | }
188 | 
189 | // SetIsStartupTraversal This is only used if the WAL is during startup traversal.
190 | // Such as rosedb/lotusdb startup, so it's not a common usage for most users.
191 | // And notice that if you set it to true, only one reader can read the data from the WAL
192 | // (Single Thread).
193 | func (wal *WAL) SetIsStartupTraversal(v bool) {
194 | 	for _, seg := range wal.olderSegments {
195 | 		seg.isStartupTraversal = v
196 | 	}
197 | 	wal.activeSegment.isStartupTraversal = v
198 | }
199 | 
200 | // NewReaderWithMax returns a new reader for the WAL,
201 | // and the reader will only read the data from the segment file
202 | // whose id is less than or equal to the given segId.
203 | //
204 | // It is now used by the Merge operation of rosedb, not a common usage for most users.
205 | func (wal *WAL) NewReaderWithMax(segId SegmentID) *Reader {
206 | 	wal.mu.RLock()
207 | 	defer wal.mu.RUnlock()
208 | 
209 | 	// get all segment readers.
210 | 	var segmentReaders []*segmentReader
211 | 	for _, segment := range wal.olderSegments {
212 | 		if segId == 0 || segment.id <= segId {
213 | 			reader := segment.NewReader()
214 | 			segmentReaders = append(segmentReaders, reader)
215 | 		}
216 | 	}
217 | 	if segId == 0 || wal.activeSegment.id <= segId {
218 | 		reader := wal.activeSegment.NewReader()
219 | 		segmentReaders = append(segmentReaders, reader)
220 | 	}
221 | 
222 | 	// sort the segment readers by segment id.
223 | 	sort.Slice(segmentReaders, func(i, j int) bool {
224 | 		return segmentReaders[i].segment.id < segmentReaders[j].segment.id
225 | 	})
226 | 
227 | 	return &Reader{
228 | 		segmentReaders: segmentReaders,
229 | 		currentReader:  0,
230 | 	}
231 | }
232 | 
233 | // NewReaderWithStart returns a new reader for the WAL,
234 | // and the reader will only read the data from the segment file
235 | // whose position is greater than or equal to the given position.
236 | func (wal *WAL) NewReaderWithStart(startPos *ChunkPosition) (*Reader, error) {
237 | 	if startPos == nil {
238 | 		return nil, errors.New("start position is nil")
239 | 	}
240 | 	wal.mu.RLock()
241 | 	defer wal.mu.RUnlock()
242 | 
243 | 	reader := wal.NewReader()
244 | 	for {
245 | 		// skip the segment readers whose id is less than the given position's segment id.
246 | 		if reader.CurrentSegmentId() < startPos.SegmentId {
247 | 			reader.SkipCurrentSegment()
248 | 			continue
249 | 		}
250 | 		// skip the chunk whose position is less than the given position.
251 | 		currentPos := reader.CurrentChunkPosition()
252 | 		if currentPos.BlockNumber >= startPos.BlockNumber &&
253 | 			currentPos.ChunkOffset >= startPos.ChunkOffset {
254 | 			break
255 | 		}
256 | 		// call Next to find again.
257 | 		if _, _, err := reader.Next(); err != nil {
258 | 			if err == io.EOF {
259 | 				break
260 | 			}
261 | 			return nil, err
262 | 		}
263 | 	}
264 | 	return reader, nil
265 | }
266 | 
267 | // NewReader returns a new reader for the WAL.
268 | // It will iterate all segment files and read all data from them.
269 | func (wal *WAL) NewReader() *Reader {
270 | 	return wal.NewReaderWithMax(0)
271 | }
272 | 
273 | // Next returns the next chunk data and its position in the WAL.
274 | // If there is no data, io.EOF will be returned.
275 | //
276 | // The position can be used to read the data from the segment file.
277 | func (r *Reader) Next() ([]byte, *ChunkPosition, error) {
278 | 	if r.currentReader >= len(r.segmentReaders) {
279 | 		return nil, nil, io.EOF
280 | 	}
281 | 
282 | 	data, position, err := r.segmentReaders[r.currentReader].Next()
283 | 	if err == io.EOF {
284 | 		r.currentReader++
285 | 		return r.Next()
286 | 	}
287 | 	return data, position, err
288 | }
289 | 
290 | // SkipCurrentSegment skips the current segment file
291 | // when reading the WAL.
292 | //
293 | // It is now used by the Merge operation of rosedb, not a common usage for most users.
294 | func (r *Reader) SkipCurrentSegment() {
295 | 	r.currentReader++
296 | }
297 | 
298 | // CurrentSegmentId returns the id of the current segment file
299 | // when reading the WAL.
300 | func (r *Reader) CurrentSegmentId() SegmentID {
301 | 	return r.segmentReaders[r.currentReader].segment.id
302 | }
303 | 
304 | // CurrentChunkPosition returns the position of the current chunk data
305 | func (r *Reader) CurrentChunkPosition() *ChunkPosition {
306 | 	reader := r.segmentReaders[r.currentReader]
307 | 	return &ChunkPosition{
308 | 		SegmentId:   reader.segment.id,
309 | 		BlockNumber: reader.blockNumber,
310 | 		ChunkOffset: reader.chunkOffset,
311 | 	}
312 | }
313 | 
314 | // ClearPendingWrites clear pendingWrite and reset pendingSize
315 | func (wal *WAL) ClearPendingWrites() {
316 | 	wal.pendingWritesLock.Lock()
317 | 	defer wal.pendingWritesLock.Unlock()
318 | 
319 | 	wal.pendingSize = 0
320 | 	wal.pendingWrites = wal.pendingWrites[:0]
321 | }
322 | 
323 | // PendingWrites add data to wal.pendingWrites and wait for batch write.
324 | // If the data in pendingWrites exceeds the size of one segment,
325 | // it will return a 'ErrPendingSizeTooLarge' error and clear the pendingWrites.
326 | func (wal *WAL) PendingWrites(data []byte) {
327 | 	wal.pendingWritesLock.Lock()
328 | 	defer wal.pendingWritesLock.Unlock()
329 | 
330 | 	size := wal.maxDataWriteSize(int64(len(data)))
331 | 	wal.pendingSize += size
332 | 	wal.pendingWrites = append(wal.pendingWrites, data)
333 | }
334 | 
335 | // rotateActiveSegment create a new segment file and replace the activeSegment.
336 | func (wal *WAL) rotateActiveSegment() error {
337 | 	if err := wal.activeSegment.Sync(); err != nil {
338 | 		return err
339 | 	}
340 | 	wal.bytesWrite = 0
341 | 	segment, err := openSegmentFile(wal.options.DirPath, wal.options.SegmentFileExt,
342 | 		wal.activeSegment.id+1)
343 | 	if err != nil {
344 | 		return err
345 | 	}
346 | 	wal.olderSegments[wal.activeSegment.id] = wal.activeSegment
347 | 	wal.activeSegment = segment
348 | 	return nil
349 | }
350 | 
351 | // WriteAll write wal.pendingWrites to WAL and then clear pendingWrites,
352 | // it will not sync the segment file based on wal.options, you should call Sync() manually.
353 | func (wal *WAL) WriteAll() ([]*ChunkPosition, error) {
354 | 	if len(wal.pendingWrites) == 0 {
355 | 		return make([]*ChunkPosition, 0), nil
356 | 	}
357 | 
358 | 	wal.mu.Lock()
359 | 	defer func() {
360 | 		wal.ClearPendingWrites()
361 | 		wal.mu.Unlock()
362 | 	}()
363 | 
364 | 	// if the pending size is still larger than segment size, return error
365 | 	if wal.pendingSize > wal.options.SegmentSize {
366 | 		return nil, ErrPendingSizeTooLarge
367 | 	}
368 | 
369 | 	// if the active segment file is full, sync it and create a new one.
370 | 	if wal.activeSegment.Size()+wal.pendingSize > wal.options.SegmentSize {
371 | 		if err := wal.rotateActiveSegment(); err != nil {
372 | 			return nil, err
373 | 		}
374 | 	}
375 | 
376 | 	// write all data to the active segment file.
377 | 	positions, err := wal.activeSegment.writeAll(wal.pendingWrites)
378 | 	if err != nil {
379 | 		return nil, err
380 | 	}
381 | 
382 | 	return positions, nil
383 | }
384 | 
385 | // Write writes the data to the WAL.
386 | // Actually, it writes the data to the active segment file.
387 | // It returns the position of the data in the WAL, and an error if any.
388 | func (wal *WAL) Write(data []byte) (*ChunkPosition, error) {
389 | 	wal.mu.Lock()
390 | 	defer wal.mu.Unlock()
391 | 	if int64(len(data))+chunkHeaderSize > wal.options.SegmentSize {
392 | 		return nil, ErrValueTooLarge
393 | 	}
394 | 	// if the active segment file is full, sync it and create a new one.
395 | 	if wal.isFull(int64(len(data))) {
396 | 		if err := wal.rotateActiveSegment(); err != nil {
397 | 			return nil, err
398 | 		}
399 | 	}
400 | 
401 | 	// write the data to the active segment file.
402 | 	position, err := wal.activeSegment.Write(data)
403 | 	if err != nil {
404 | 		return nil, err
405 | 	}
406 | 
407 | 	// update the bytesWrite field.
408 | 	wal.bytesWrite += position.ChunkSize
409 | 
410 | 	// sync the active segment file if needed.
411 | 	var needSync = wal.options.Sync
412 | 	if !needSync && wal.options.BytesPerSync > 0 {
413 | 		needSync = wal.bytesWrite >= wal.options.BytesPerSync
414 | 	}
415 | 	if needSync {
416 | 		if err := wal.activeSegment.Sync(); err != nil {
417 | 			return nil, err
418 | 		}
419 | 		wal.bytesWrite = 0
420 | 	}
421 | 
422 | 	return position, nil
423 | }
424 | 
425 | // Read reads the data from the WAL according to the given position.
426 | func (wal *WAL) Read(pos *ChunkPosition) ([]byte, error) {
427 | 	wal.mu.RLock()
428 | 	defer wal.mu.RUnlock()
429 | 
430 | 	// find the segment file according to the position.
431 | 	var segment *segment
432 | 	if pos.SegmentId == wal.activeSegment.id {
433 | 		segment = wal.activeSegment
434 | 	} else {
435 | 		segment = wal.olderSegments[pos.SegmentId]
436 | 	}
437 | 
438 | 	if segment == nil {
439 | 		return nil, fmt.Errorf("segment file %d%s not found", pos.SegmentId, wal.options.SegmentFileExt)
440 | 	}
441 | 
442 | 	// read the data from the segment file.
443 | 	return segment.Read(pos.BlockNumber, pos.ChunkOffset)
444 | }
445 | 
446 | // Close closes the WAL.
447 | func (wal *WAL) Close() error {
448 | 	wal.mu.Lock()
449 | 	defer wal.mu.Unlock()
450 | 
451 | 	select {
452 | 	case <-wal.closeC:
453 | 		// channel is already closed
454 | 	default:
455 | 		close(wal.closeC)
456 | 	}
457 | 
458 | 	// close all segment files.
459 | 	for _, segment := range wal.olderSegments {
460 | 		if err := segment.Close(); err != nil {
461 | 			return err
462 | 		}
463 | 		wal.renameIds = append(wal.renameIds, segment.id)
464 | 	}
465 | 	wal.olderSegments = nil
466 | 
467 | 	wal.renameIds = append(wal.renameIds, wal.activeSegment.id)
468 | 	// close the active segment file.
469 | 	return wal.activeSegment.Close()
470 | }
471 | 
472 | // Delete deletes all segment files of the WAL.
473 | func (wal *WAL) Delete() error {
474 | 	wal.mu.Lock()
475 | 	defer wal.mu.Unlock()
476 | 
477 | 	// delete all segment files.
478 | 	for _, segment := range wal.olderSegments {
479 | 		if err := segment.Remove(); err != nil {
480 | 			return err
481 | 		}
482 | 	}
483 | 	wal.olderSegments = nil
484 | 
485 | 	// delete the active segment file.
486 | 	return wal.activeSegment.Remove()
487 | }
488 | 
489 | // Sync syncs the active segment file to stable storage like disk.
490 | func (wal *WAL) Sync() error {
491 | 	wal.mu.Lock()
492 | 	defer wal.mu.Unlock()
493 | 
494 | 	return wal.activeSegment.Sync()
495 | }
496 | 
497 | // RenameFileExt renames all segment files' extension name.
498 | // It is now used by the Merge operation of loutsdb, not a common usage for most users.
499 | func (wal *WAL) RenameFileExt(ext string) error {
500 | 	if !strings.HasPrefix(ext, ".") {
501 | 		return fmt.Errorf("segment file extension must start with '.'")
502 | 	}
503 | 	wal.mu.Lock()
504 | 	defer wal.mu.Unlock()
505 | 
506 | 	renameFile := func(id SegmentID) error {
507 | 		oldName := SegmentFileName(wal.options.DirPath, wal.options.SegmentFileExt, id)
508 | 		newName := SegmentFileName(wal.options.DirPath, ext, id)
509 | 		return os.Rename(oldName, newName)
510 | 	}
511 | 
512 | 	for _, id := range wal.renameIds {
513 | 		if err := renameFile(id); err != nil {
514 | 			return err
515 | 		}
516 | 	}
517 | 
518 | 	wal.options.SegmentFileExt = ext
519 | 	return nil
520 | }
521 | 
522 | func (wal *WAL) isFull(delta int64) bool {
523 | 	return wal.activeSegment.Size()+wal.maxDataWriteSize(delta) > wal.options.SegmentSize
524 | }
525 | 
526 | // maxDataWriteSize calculate the possible maximum size.
527 | // the maximum size = max padding + (num_block + 1) * headerSize + dataSize
528 | func (wal *WAL) maxDataWriteSize(size int64) int64 {
529 | 	return chunkHeaderSize + size + (size/blockSize+1)*chunkHeaderSize
530 | }
531 | 


--------------------------------------------------------------------------------
/segment.go:
--------------------------------------------------------------------------------
  1 | package wal
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"hash/crc32"
  8 | 	"io"
  9 | 	"os"
 10 | 	"sync"
 11 | 
 12 | 	"github.com/valyala/bytebufferpool"
 13 | )
 14 | 
 15 | type ChunkType = byte
 16 | type SegmentID = uint32
 17 | 
 18 | const (
 19 | 	ChunkTypeFull ChunkType = iota
 20 | 	ChunkTypeFirst
 21 | 	ChunkTypeMiddle
 22 | 	ChunkTypeLast
 23 | )
 24 | 
 25 | var (
 26 | 	ErrClosed     = errors.New("the segment file is closed")
 27 | 	ErrInvalidCRC = errors.New("invalid crc, the data may be corrupted")
 28 | )
 29 | 
 30 | const (
 31 | 	// 7 Bytes
 32 | 	// Checksum Length Type
 33 | 	//    4      2     1
 34 | 	chunkHeaderSize = 7
 35 | 
 36 | 	// 32 KB
 37 | 	blockSize = 32 * KB
 38 | 
 39 | 	fileModePerm = 0644
 40 | 
 41 | 	// uin32 + uint32 + int64 + uin32
 42 | 	// segmentId + BlockNumber + ChunkOffset + ChunkSize
 43 | 	maxLen = binary.MaxVarintLen32*3 + binary.MaxVarintLen64
 44 | )
 45 | 
 46 | // Segment represents a single segment file in WAL.
 47 | // The segment file is append-only, and the data is written in blocks.
 48 | // Each block is 32KB, and the data is written in chunks.
 49 | type segment struct {
 50 | 	id                 SegmentID
 51 | 	fd                 *os.File
 52 | 	currentBlockNumber uint32
 53 | 	currentBlockSize   uint32
 54 | 	closed             bool
 55 | 	header             []byte
 56 | 	startupBlock       *startupBlock
 57 | 	isStartupTraversal bool
 58 | }
 59 | 
 60 | // segmentReader is used to iterate all the data from the segment file.
 61 | // You can call Next to get the next chunk data,
 62 | // and io.EOF will be returned when there is no data.
 63 | type segmentReader struct {
 64 | 	segment     *segment
 65 | 	blockNumber uint32
 66 | 	chunkOffset int64
 67 | }
 68 | 
 69 | // There is only one reader(single goroutine) for startup traversal,
 70 | // so we can use one block to finish the whole traversal
 71 | // to avoid memory allocation.
 72 | type startupBlock struct {
 73 | 	block       []byte
 74 | 	blockNumber int64
 75 | }
 76 | 
 77 | // ChunkPosition represents the position of a chunk in a segment file.
 78 | // Used to read the data from the segment file.
 79 | type ChunkPosition struct {
 80 | 	SegmentId SegmentID
 81 | 	// BlockNumber The block number of the chunk in the segment file.
 82 | 	BlockNumber uint32
 83 | 	// ChunkOffset The start offset of the chunk in the segment file.
 84 | 	ChunkOffset int64
 85 | 	// ChunkSize How many bytes the chunk data takes up in the segment file.
 86 | 	ChunkSize uint32
 87 | }
 88 | 
 89 | var blockPool = sync.Pool{
 90 | 	New: func() interface{} {
 91 | 		return make([]byte, blockSize)
 92 | 	},
 93 | }
 94 | 
 95 | func getBuffer() []byte {
 96 | 	return blockPool.Get().([]byte)
 97 | }
 98 | 
 99 | func putBuffer(buf []byte) {
100 | 	blockPool.Put(buf)
101 | }
102 | 
103 | // openSegmentFile a new segment file.
104 | func openSegmentFile(dirPath, extName string, id uint32) (*segment, error) {
105 | 	fd, err := os.OpenFile(
106 | 		SegmentFileName(dirPath, extName, id),
107 | 		os.O_CREATE|os.O_RDWR|os.O_APPEND,
108 | 		fileModePerm,
109 | 	)
110 | 
111 | 	if err != nil {
112 | 		return nil, err
113 | 	}
114 | 
115 | 	// set the current block number and block size.
116 | 	offset, err := fd.Seek(0, io.SeekEnd)
117 | 	if err != nil {
118 | 		return nil, fmt.Errorf("seek to the end of segment file %d%s failed: %v", id, extName, err)
119 | 	}
120 | 
121 | 	return &segment{
122 | 		id:                 id,
123 | 		fd:                 fd,
124 | 		header:             make([]byte, chunkHeaderSize),
125 | 		currentBlockNumber: uint32(offset / blockSize),
126 | 		currentBlockSize:   uint32(offset % blockSize),
127 | 		startupBlock: &startupBlock{
128 | 			block:       make([]byte, blockSize),
129 | 			blockNumber: -1,
130 | 		},
131 | 		isStartupTraversal: false,
132 | 	}, nil
133 | }
134 | 
135 | // NewReader creates a new segment reader.
136 | // You can call Next to get the next chunk data,
137 | // and io.EOF will be returned when there is no data.
138 | func (seg *segment) NewReader() *segmentReader {
139 | 	return &segmentReader{
140 | 		segment:     seg,
141 | 		blockNumber: 0,
142 | 		chunkOffset: 0,
143 | 	}
144 | }
145 | 
146 | // Sync flushes the segment file to disk.
147 | func (seg *segment) Sync() error {
148 | 	if seg.closed {
149 | 		return nil
150 | 	}
151 | 	return seg.fd.Sync()
152 | }
153 | 
154 | // Remove removes the segment file.
155 | func (seg *segment) Remove() error {
156 | 	if !seg.closed {
157 | 		seg.closed = true
158 | 		if err := seg.fd.Close(); err != nil {
159 | 			return err
160 | 		}
161 | 	}
162 | 
163 | 	return os.Remove(seg.fd.Name())
164 | }
165 | 
166 | // Close closes the segment file.
167 | func (seg *segment) Close() error {
168 | 	if seg.closed {
169 | 		return nil
170 | 	}
171 | 
172 | 	seg.closed = true
173 | 	return seg.fd.Close()
174 | }
175 | 
176 | // Size returns the size of the segment file.
177 | func (seg *segment) Size() int64 {
178 | 	size := int64(seg.currentBlockNumber) * int64(blockSize)
179 | 	return size + int64(seg.currentBlockSize)
180 | }
181 | 
182 | // writeToBuffer calculate chunkPosition for data, write data to bytebufferpool, update segment status
183 | // The data will be written in chunks, and the chunk has four types:
184 | // ChunkTypeFull, ChunkTypeFirst, ChunkTypeMiddle, ChunkTypeLast.
185 | //
186 | // Each chunk has a header, and the header contains the length, type and checksum.
187 | // And the payload of the chunk is the real data you want to Write.
188 | func (seg *segment) writeToBuffer(data []byte, chunkBuffer *bytebufferpool.ByteBuffer) (*ChunkPosition, error) {
189 | 	startBufferLen := chunkBuffer.Len()
190 | 	padding := uint32(0)
191 | 
192 | 	if seg.closed {
193 | 		return nil, ErrClosed
194 | 	}
195 | 
196 | 	// if the left block size can not hold the chunk header, padding the block
197 | 	if seg.currentBlockSize+chunkHeaderSize >= blockSize {
198 | 		// padding if necessary
199 | 		if seg.currentBlockSize < blockSize {
200 | 			p := make([]byte, blockSize-seg.currentBlockSize)
201 | 			chunkBuffer.B = append(chunkBuffer.B, p...)
202 | 			padding += blockSize - seg.currentBlockSize
203 | 
204 | 			// a new block
205 | 			seg.currentBlockNumber += 1
206 | 			seg.currentBlockSize = 0
207 | 		}
208 | 	}
209 | 
210 | 	// return the start position of the chunk, then the user can use it to read the data.
211 | 	position := &ChunkPosition{
212 | 		SegmentId:   seg.id,
213 | 		BlockNumber: seg.currentBlockNumber,
214 | 		ChunkOffset: int64(seg.currentBlockSize),
215 | 	}
216 | 
217 | 	dataSize := uint32(len(data))
218 | 	// The entire chunk can fit into the block.
219 | 	if seg.currentBlockSize+dataSize+chunkHeaderSize <= blockSize {
220 | 		seg.appendChunkBuffer(chunkBuffer, data, ChunkTypeFull)
221 | 		position.ChunkSize = dataSize + chunkHeaderSize
222 | 	} else {
223 | 		// If the size of the data exceeds the size of the block,
224 | 		// the data should be written to the block in batches.
225 | 		var (
226 | 			leftSize             = dataSize
227 | 			blockCount    uint32 = 0
228 | 			currBlockSize        = seg.currentBlockSize
229 | 		)
230 | 
231 | 		for leftSize > 0 {
232 | 			chunkSize := blockSize - currBlockSize - chunkHeaderSize
233 | 			if chunkSize > leftSize {
234 | 				chunkSize = leftSize
235 | 			}
236 | 
237 | 			var end = dataSize - leftSize + chunkSize
238 | 			if end > dataSize {
239 | 				end = dataSize
240 | 			}
241 | 
242 | 			// append the chunks to the buffer
243 | 			var chunkType ChunkType
244 | 			switch leftSize {
245 | 			case dataSize: // First chunk
246 | 				chunkType = ChunkTypeFirst
247 | 			case chunkSize: // Last chunk
248 | 				chunkType = ChunkTypeLast
249 | 			default: // Middle chunk
250 | 				chunkType = ChunkTypeMiddle
251 | 			}
252 | 			seg.appendChunkBuffer(chunkBuffer, data[dataSize-leftSize:end], chunkType)
253 | 
254 | 			leftSize -= chunkSize
255 | 			blockCount += 1
256 | 			currBlockSize = (currBlockSize + chunkSize + chunkHeaderSize) % blockSize
257 | 		}
258 | 		position.ChunkSize = blockCount*chunkHeaderSize + dataSize
259 | 	}
260 | 
261 | 	// the buffer length must be equal to chunkSize+padding length
262 | 	endBufferLen := chunkBuffer.Len()
263 | 	if position.ChunkSize+padding != uint32(endBufferLen-startBufferLen) {
264 | 		return nil, fmt.Errorf("wrong!!! the chunk size %d is not equal to the buffer len %d",
265 | 			position.ChunkSize+padding, endBufferLen-startBufferLen)
266 | 	}
267 | 
268 | 	// update segment status
269 | 	seg.currentBlockSize += position.ChunkSize
270 | 	if seg.currentBlockSize >= blockSize {
271 | 		seg.currentBlockNumber += seg.currentBlockSize / blockSize
272 | 		seg.currentBlockSize = seg.currentBlockSize % blockSize
273 | 	}
274 | 
275 | 	return position, nil
276 | }
277 | 
278 | // writeAll write batch data to the segment file.
279 | func (seg *segment) writeAll(data [][]byte) (positions []*ChunkPosition, err error) {
280 | 	if seg.closed {
281 | 		return nil, ErrClosed
282 | 	}
283 | 
284 | 	// if any error occurs, restore the segment status
285 | 	originBlockNumber := seg.currentBlockNumber
286 | 	originBlockSize := seg.currentBlockSize
287 | 
288 | 	// init chunk buffer
289 | 	chunkBuffer := bytebufferpool.Get()
290 | 	chunkBuffer.Reset()
291 | 	defer func() {
292 | 		if err != nil {
293 | 			seg.currentBlockNumber = originBlockNumber
294 | 			seg.currentBlockSize = originBlockSize
295 | 		}
296 | 		bytebufferpool.Put(chunkBuffer)
297 | 	}()
298 | 
299 | 	// write all data to the chunk buffer
300 | 	var pos *ChunkPosition
301 | 	positions = make([]*ChunkPosition, len(data))
302 | 	for i := 0; i < len(positions); i++ {
303 | 		pos, err = seg.writeToBuffer(data[i], chunkBuffer)
304 | 		if err != nil {
305 | 			return
306 | 		}
307 | 		positions[i] = pos
308 | 	}
309 | 	// write the chunk buffer to the segment file
310 | 	if err = seg.writeChunkBuffer(chunkBuffer); err != nil {
311 | 		return
312 | 	}
313 | 	return
314 | }
315 | 
316 | // Write writes the data to the segment file.
317 | func (seg *segment) Write(data []byte) (pos *ChunkPosition, err error) {
318 | 	if seg.closed {
319 | 		return nil, ErrClosed
320 | 	}
321 | 
322 | 	originBlockNumber := seg.currentBlockNumber
323 | 	originBlockSize := seg.currentBlockSize
324 | 
325 | 	// init chunk buffer
326 | 	chunkBuffer := bytebufferpool.Get()
327 | 	chunkBuffer.Reset()
328 | 	defer func() {
329 | 		if err != nil {
330 | 			seg.currentBlockNumber = originBlockNumber
331 | 			seg.currentBlockSize = originBlockSize
332 | 		}
333 | 		bytebufferpool.Put(chunkBuffer)
334 | 	}()
335 | 
336 | 	// write all data to the chunk buffer
337 | 	pos, err = seg.writeToBuffer(data, chunkBuffer)
338 | 	if err != nil {
339 | 		return
340 | 	}
341 | 	// write the chunk buffer to the segment file
342 | 	if err = seg.writeChunkBuffer(chunkBuffer); err != nil {
343 | 		return
344 | 	}
345 | 
346 | 	return
347 | }
348 | 
349 | func (seg *segment) appendChunkBuffer(buf *bytebufferpool.ByteBuffer, data []byte, chunkType ChunkType) {
350 | 	// Length	2 Bytes	index:4-5
351 | 	binary.LittleEndian.PutUint16(seg.header[4:6], uint16(len(data)))
352 | 	// Type	1 Byte	index:6
353 | 	seg.header[6] = chunkType
354 | 	// Checksum	4 Bytes index:0-3
355 | 	sum := crc32.ChecksumIEEE(seg.header[4:])
356 | 	sum = crc32.Update(sum, crc32.IEEETable, data)
357 | 	binary.LittleEndian.PutUint32(seg.header[:4], sum)
358 | 
359 | 	// append the header and data to segment chunk buffer
360 | 	buf.B = append(buf.B, seg.header...)
361 | 	buf.B = append(buf.B, data...)
362 | }
363 | 
364 | // write the pending chunk buffer to the segment file
365 | func (seg *segment) writeChunkBuffer(buf *bytebufferpool.ByteBuffer) error {
366 | 	if seg.currentBlockSize > blockSize {
367 | 		return errors.New("the current block size exceeds the maximum block size")
368 | 	}
369 | 
370 | 	// write the data into underlying file
371 | 	if _, err := seg.fd.Write(buf.Bytes()); err != nil {
372 | 		return err
373 | 	}
374 | 
375 | 	// the cached block can not be reused again after writes.
376 | 	seg.startupBlock.blockNumber = -1
377 | 	return nil
378 | }
379 | 
380 | // Read reads the data from the segment file by the block number and chunk offset.
381 | func (seg *segment) Read(blockNumber uint32, chunkOffset int64) ([]byte, error) {
382 | 	value, _, err := seg.readInternal(blockNumber, chunkOffset)
383 | 	return value, err
384 | }
385 | 
386 | func (seg *segment) readInternal(blockNumber uint32, chunkOffset int64) ([]byte, *ChunkPosition, error) {
387 | 	if seg.closed {
388 | 		return nil, nil, ErrClosed
389 | 	}
390 | 
391 | 	var (
392 | 		result    []byte
393 | 		block     []byte
394 | 		segSize   = seg.Size()
395 | 		nextChunk = &ChunkPosition{SegmentId: seg.id}
396 | 	)
397 | 
398 | 	if seg.isStartupTraversal {
399 | 		block = seg.startupBlock.block
400 | 	} else {
401 | 		block = getBuffer()
402 | 		if len(block) != blockSize {
403 | 			block = make([]byte, blockSize)
404 | 		}
405 | 		defer putBuffer(block)
406 | 	}
407 | 
408 | 	for {
409 | 		size := int64(blockSize)
410 | 		offset := int64(blockNumber) * blockSize
411 | 		if size+offset > segSize {
412 | 			size = segSize - offset
413 | 		}
414 | 
415 | 		if chunkOffset >= size {
416 | 			return nil, nil, io.EOF
417 | 		}
418 | 
419 | 		if seg.isStartupTraversal {
420 | 			// There are two cases that we should read block from file:
421 | 			// 1. the acquired block is not the cached one
422 | 			// 2. new writes appended to the block, and the block
423 | 			// is still smaller than 32KB, we must read it again because of the new writes.
424 | 			if seg.startupBlock.blockNumber != int64(blockNumber) || size != blockSize {
425 | 				// read block from segment file at the specified offset.
426 | 				_, err := seg.fd.ReadAt(block[0:size], offset)
427 | 				if err != nil {
428 | 					return nil, nil, err
429 | 				}
430 | 				// remember the block
431 | 				seg.startupBlock.blockNumber = int64(blockNumber)
432 | 			}
433 | 		} else {
434 | 			if _, err := seg.fd.ReadAt(block[0:size], offset); err != nil {
435 | 				return nil, nil, err
436 | 			}
437 | 		}
438 | 
439 | 		// header
440 | 		header := block[chunkOffset : chunkOffset+chunkHeaderSize]
441 | 
442 | 		// length
443 | 		length := binary.LittleEndian.Uint16(header[4:6])
444 | 
445 | 		// copy data
446 | 		start := chunkOffset + chunkHeaderSize
447 | 		result = append(result, block[start:start+int64(length)]...)
448 | 
449 | 		// check sum
450 | 		checksumEnd := chunkOffset + chunkHeaderSize + int64(length)
451 | 		checksum := crc32.ChecksumIEEE(block[chunkOffset+4 : checksumEnd])
452 | 		savedSum := binary.LittleEndian.Uint32(header[:4])
453 | 		if savedSum != checksum {
454 | 			return nil, nil, ErrInvalidCRC
455 | 		}
456 | 
457 | 		// type
458 | 		chunkType := header[6]
459 | 
460 | 		if chunkType == ChunkTypeFull || chunkType == ChunkTypeLast {
461 | 			nextChunk.BlockNumber = blockNumber
462 | 			nextChunk.ChunkOffset = checksumEnd
463 | 			// If this is the last chunk in the block, and the left block
464 | 			// space are paddings, the next chunk should be in the next block.
465 | 			if checksumEnd+chunkHeaderSize >= blockSize {
466 | 				nextChunk.BlockNumber += 1
467 | 				nextChunk.ChunkOffset = 0
468 | 			}
469 | 			break
470 | 		}
471 | 		blockNumber += 1
472 | 		chunkOffset = 0
473 | 	}
474 | 	return result, nextChunk, nil
475 | }
476 | 
477 | // Next returns the Next chunk data.
478 | // You can call it repeatedly until io.EOF is returned.
479 | func (segReader *segmentReader) Next() ([]byte, *ChunkPosition, error) {
480 | 	// The segment file is closed
481 | 	if segReader.segment.closed {
482 | 		return nil, nil, ErrClosed
483 | 	}
484 | 
485 | 	// this position describes the current chunk info
486 | 	chunkPosition := &ChunkPosition{
487 | 		SegmentId:   segReader.segment.id,
488 | 		BlockNumber: segReader.blockNumber,
489 | 		ChunkOffset: segReader.chunkOffset,
490 | 	}
491 | 
492 | 	value, nextChunk, err := segReader.segment.readInternal(
493 | 		segReader.blockNumber,
494 | 		segReader.chunkOffset,
495 | 	)
496 | 	if err != nil {
497 | 		return nil, nil, err
498 | 	}
499 | 
500 | 	// Calculate the chunk size.
501 | 	// Remember that the chunk size is just an estimated value,
502 | 	// not accurate, so don't use it for any important logic.
503 | 	chunkPosition.ChunkSize =
504 | 		nextChunk.BlockNumber*blockSize + uint32(nextChunk.ChunkOffset) -
505 | 			(segReader.blockNumber*blockSize + uint32(segReader.chunkOffset))
506 | 
507 | 	// update the position
508 | 	segReader.blockNumber = nextChunk.BlockNumber
509 | 	segReader.chunkOffset = nextChunk.ChunkOffset
510 | 
511 | 	return value, chunkPosition, nil
512 | }
513 | 
514 | // Encode encodes the chunk position to a byte slice.
515 | // Return the slice with the actual occupied elements.
516 | // You can decode it by calling wal.DecodeChunkPosition().
517 | func (cp *ChunkPosition) Encode() []byte {
518 | 	return cp.encode(true)
519 | }
520 | 
521 | // EncodeFixedSize encodes the chunk position to a byte slice.
522 | // Return a slice of size "maxLen".
523 | // You can decode it by calling wal.DecodeChunkPosition().
524 | func (cp *ChunkPosition) EncodeFixedSize() []byte {
525 | 	return cp.encode(false)
526 | }
527 | 
528 | // encode the chunk position to a byte slice.
529 | func (cp *ChunkPosition) encode(shrink bool) []byte {
530 | 	buf := make([]byte, maxLen)
531 | 
532 | 	var index = 0
533 | 	// SegmentId
534 | 	index += binary.PutUvarint(buf[index:], uint64(cp.SegmentId))
535 | 	// BlockNumber
536 | 	index += binary.PutUvarint(buf[index:], uint64(cp.BlockNumber))
537 | 	// ChunkOffset
538 | 	index += binary.PutUvarint(buf[index:], uint64(cp.ChunkOffset))
539 | 	// ChunkSize
540 | 	index += binary.PutUvarint(buf[index:], uint64(cp.ChunkSize))
541 | 
542 | 	if shrink {
543 | 		return buf[:index]
544 | 	}
545 | 	return buf
546 | }
547 | 
548 | // DecodeChunkPosition decodes the chunk position from a byte slice.
549 | // You can encode it by calling wal.ChunkPosition.Encode().
550 | func DecodeChunkPosition(buf []byte) *ChunkPosition {
551 | 	if len(buf) == 0 {
552 | 		return nil
553 | 	}
554 | 
555 | 	var index = 0
556 | 	// SegmentId
557 | 	segmentId, n := binary.Uvarint(buf[index:])
558 | 	index += n
559 | 	// BlockNumber
560 | 	blockNumber, n := binary.Uvarint(buf[index:])
561 | 	index += n
562 | 	// ChunkOffset
563 | 	chunkOffset, n := binary.Uvarint(buf[index:])
564 | 	index += n
565 | 	// ChunkSize
566 | 	chunkSize, n := binary.Uvarint(buf[index:])
567 | 	index += n
568 | 
569 | 	return &ChunkPosition{
570 | 		SegmentId:   uint32(segmentId),
571 | 		BlockNumber: uint32(blockNumber),
572 | 		ChunkOffset: int64(chunkOffset),
573 | 		ChunkSize:   uint32(chunkSize),
574 | 	}
575 | }
576 | 


--------------------------------------------------------------------------------