├── pkg ├── engine │ ├── storage │ │ ├── manager_wal.go │ │ ├── retry.go │ │ └── sequence_test.go │ ├── errors.go │ ├── interfaces │ │ ├── errors.go │ │ ├── compaction.go │ │ ├── storage.go │ │ ├── transaction.go │ │ └── engine.go │ ├── replication.go │ ├── compat.go │ └── iterator │ │ └── factory.go ├── transport │ ├── network.go │ ├── common_test.go │ ├── metrics_extended.go │ ├── common.go │ ├── registry.go │ ├── metrics_test.go │ ├── registry_test.go │ ├── metrics.go │ └── interface.go ├── common │ ├── iterator │ │ ├── composite │ │ │ └── composite.go │ │ ├── iterator.go │ │ ├── adapter_pattern.go │ │ ├── filtered │ │ │ └── filtered.go │ │ └── bounded │ │ │ └── bounded.go │ └── log │ │ └── logger_test.go ├── transaction │ ├── errors.go │ ├── storage.go │ ├── interface.go │ ├── mock_stats_test.go │ └── manager.go ├── sstable │ ├── block │ │ ├── types.go │ │ ├── block_reader.go │ │ └── block_iterator_security_test.go │ ├── sstable.go │ ├── iterator_adapter.go │ ├── integration_test.go │ ├── bloom_test.go │ ├── bench_bloom_test.go │ ├── error_handling_test.go │ ├── footer │ │ ├── footer_test.go │ │ └── footer.go │ ├── reader_test.go │ └── sstable_test.go ├── client │ ├── simple_test.go │ ├── options_test.go │ ├── utils.go │ └── replication_test.go ├── replication │ ├── primary_info.go │ ├── info_provider.go │ ├── engine_applier.go │ ├── interfaces.go │ └── primary_test.go ├── wal │ ├── observer.go │ └── overflow_test.go ├── version │ ├── README.md │ └── version.go ├── stats │ └── interface.go ├── compaction │ ├── compat.go │ ├── compaction.go │ ├── file_tracker.go │ ├── interfaces.go │ └── base_strategy.go ├── memtable │ ├── iterator_adapter.go │ ├── recovery.go │ └── memtable.go └── config │ ├── config_test.go │ └── manifest_test.go ├── .gitignore ├── go.mod ├── Makefile ├── .gitea └── workflows │ └── ci.yml ├── CONTRIBUTING.md ├── cmd ├── storage-bench │ ├── README.md │ └── report.go └── kevo │ └── server_test.go ├── go.sum └── proto └── kevo └── replication └── replication.proto /pkg/engine/storage/manager_wal.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "github.com/KevoDB/kevo/pkg/wal" 5 | ) 6 | 7 | // GetWAL returns the storage manager's WAL instance 8 | // This is used by the replication manager to access the WAL 9 | func (m *Manager) GetWAL() *wal.WAL { 10 | m.mu.RLock() 11 | defer m.mu.RUnlock() 12 | 13 | return m.wal 14 | } 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Output of the coverage, benchmarking, etc. 9 | *.out 10 | *.prof 11 | benchmark-data 12 | 13 | # Executables 14 | ./gs 15 | ./storage-bench 16 | 17 | # Dependency directories 18 | vendor/ 19 | 20 | # IDE files 21 | .idea/ 22 | .vscode/ 23 | *.swp 24 | *.swo 25 | 26 | # macOS files 27 | .DS_Store 28 | -------------------------------------------------------------------------------- /pkg/engine/errors.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import "errors" 4 | 5 | var ( 6 | // ErrEngineClosed is returned when operations are performed on a closed engine 7 | ErrEngineClosed = errors.New("engine is closed") 8 | // ErrKeyNotFound is returned when a key is not found 9 | ErrKeyNotFound = errors.New("key not found") 10 | // ErrReadOnlyMode is returned when write operations are attempted while the engine is in read-only mode 11 | ErrReadOnlyMode = errors.New("engine is in read-only mode (replica)") 12 | ) 13 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/KevoDB/kevo 2 | 3 | go 1.24.2 4 | 5 | require ( 6 | github.com/cespare/xxhash/v2 v2.3.0 7 | github.com/chzyer/readline v1.5.1 8 | github.com/klauspost/compress v1.18.0 9 | google.golang.org/grpc v1.72.0 10 | google.golang.org/protobuf v1.36.6 11 | ) 12 | 13 | require ( 14 | golang.org/x/net v0.38.0 // indirect 15 | golang.org/x/sys v0.31.0 // indirect 16 | golang.org/x/text v0.23.0 // indirect 17 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a // indirect 18 | ) 19 | -------------------------------------------------------------------------------- /pkg/engine/interfaces/errors.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "errors" 4 | 5 | // Common error types used throughout the engine 6 | // Note: Some errors are defined as constants in engine.go 7 | var ( 8 | // ErrReadOnlyTransaction is returned when attempting to write in a read-only transaction 9 | ErrReadOnlyTransaction = errors.New("transaction is read-only") 10 | 11 | // ErrTransactionClosed is returned when operations are performed on a completed transaction 12 | ErrTransactionClosed = errors.New("transaction is already committed or rolled back") 13 | ) 14 | -------------------------------------------------------------------------------- /pkg/transport/network.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "crypto/tls" 5 | "net" 6 | ) 7 | 8 | // CreateListener creates a network listener with optional TLS 9 | func CreateListener(network, address string, tlsConfig *tls.Config) (net.Listener, error) { 10 | // Create the listener 11 | listener, err := net.Listen(network, address) 12 | if err != nil { 13 | return nil, err 14 | } 15 | 16 | // If TLS is configured, wrap the listener 17 | if tlsConfig != nil { 18 | listener = tls.NewListener(listener, tlsConfig) 19 | } 20 | 21 | return listener, nil 22 | } 23 | -------------------------------------------------------------------------------- /pkg/common/iterator/composite/composite.go: -------------------------------------------------------------------------------- 1 | package composite 2 | 3 | import ( 4 | "github.com/KevoDB/kevo/pkg/common/iterator" 5 | ) 6 | 7 | // CompositeIterator is an interface for iterators that combine multiple source iterators 8 | // into a single logical view. 9 | type CompositeIterator interface { 10 | // Embeds the basic Iterator interface 11 | iterator.Iterator 12 | 13 | // NumSources returns the number of source iterators 14 | NumSources() int 15 | 16 | // GetSourceIterators returns the underlying source iterators 17 | GetSourceIterators() []iterator.Iterator 18 | } 19 | -------------------------------------------------------------------------------- /pkg/transaction/errors.go: -------------------------------------------------------------------------------- 1 | package transaction 2 | 3 | import "errors" 4 | 5 | // Common errors for transaction operations 6 | var ( 7 | // ErrReadOnlyTransaction is returned when a write operation is attempted on a read-only transaction 8 | ErrReadOnlyTransaction = errors.New("cannot write to a read-only transaction") 9 | 10 | // ErrTransactionClosed is returned when an operation is attempted on a closed transaction 11 | ErrTransactionClosed = errors.New("transaction already committed or rolled back") 12 | 13 | // ErrKeyNotFound is returned when a key doesn't exist 14 | ErrKeyNotFound = errors.New("key not found") 15 | 16 | // ErrInvalidEngine is returned when an incompatible engine type is provided 17 | ErrInvalidEngine = errors.New("invalid engine type") 18 | ) 19 | -------------------------------------------------------------------------------- /pkg/transaction/storage.go: -------------------------------------------------------------------------------- 1 | package transaction 2 | 3 | import ( 4 | "github.com/KevoDB/kevo/pkg/common/iterator" 5 | "github.com/KevoDB/kevo/pkg/wal" 6 | ) 7 | 8 | // StorageBackend defines the minimal interface that a storage backend must implement 9 | // to be used with transactions 10 | type StorageBackend interface { 11 | // Get retrieves a value for the given key 12 | Get(key []byte) ([]byte, error) 13 | 14 | // ApplyBatch applies a batch of operations atomically 15 | ApplyBatch(entries []*wal.Entry) error 16 | 17 | // GetIterator returns an iterator over all keys 18 | GetIterator() (iterator.Iterator, error) 19 | 20 | // GetRangeIterator returns an iterator limited to a specific key range 21 | GetRangeIterator(startKey, endKey []byte) (iterator.Iterator, error) 22 | } 23 | -------------------------------------------------------------------------------- /pkg/engine/interfaces/compaction.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | // CompactionManager handles the compaction of SSTables 4 | type CompactionManager interface { 5 | // Core operations 6 | TriggerCompaction() error 7 | CompactRange(startKey, endKey []byte) error 8 | 9 | // Tombstone management 10 | TrackTombstone(key []byte) 11 | ForcePreserveTombstone(key []byte) 12 | 13 | // Lifecycle management 14 | Start() error 15 | Stop() error 16 | 17 | // Statistics 18 | GetCompactionStats() map[string]interface{} 19 | } 20 | 21 | // CompactionCoordinator handles scheduling and coordination of compaction 22 | type CompactionCoordinator interface { 23 | CompactionManager 24 | 25 | // Coordination methods 26 | ScheduleCompaction() error 27 | IsCompactionRunning() bool 28 | WaitForCompaction() error 29 | } 30 | -------------------------------------------------------------------------------- /pkg/sstable/block/types.go: -------------------------------------------------------------------------------- 1 | package block 2 | 3 | // Entry represents a key-value pair within the block 4 | type Entry struct { 5 | Key []byte 6 | Value []byte 7 | SequenceNum uint64 // Sequence number for versioning 8 | } 9 | 10 | const ( 11 | // BlockSize is the target size for each block 12 | BlockSize = 16 * 1024 // 16KB 13 | // RestartInterval defines how often we store a full key 14 | RestartInterval = 16 15 | // MaxBlockEntries is the maximum number of entries per block 16 | MaxBlockEntries = 1024 17 | // BlockFooterSize is the size of the footer (checksum + restart point count) 18 | BlockFooterSize = 8 + 4 // 8 bytes for checksum, 4 for restart count 19 | // TombstoneValueLengthMarker is used to mark tombstones in serialized blocks 20 | TombstoneValueLengthMarker = uint32(0xFFFFFFFF) 21 | ) 22 | -------------------------------------------------------------------------------- /pkg/client/simple_test.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/KevoDB/kevo/pkg/transport" 7 | ) 8 | 9 | // mockTransport is a simple mock for testing 10 | type mockTransport struct{} 11 | 12 | // Create a simple mock client factory for testing 13 | func mockClientFactory(endpoint string, options transport.TransportOptions) (transport.Client, error) { 14 | return &mockClient{}, nil 15 | } 16 | 17 | func TestClientCreation(t *testing.T) { 18 | // First, register our mock transport 19 | transport.RegisterClientTransport("mock_test", mockClientFactory) 20 | 21 | // Create client options using our mock transport 22 | options := DefaultClientOptions() 23 | options.TransportType = "mock_test" 24 | 25 | // Create a client 26 | client, err := NewClient(options) 27 | if err != nil { 28 | t.Fatalf("Failed to create client: %v", err) 29 | } 30 | 31 | // Verify the client was created 32 | if client == nil { 33 | t.Fatal("Client is nil") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Version information 2 | VERSION ?= $(shell grep -E '^\s*Version\s*=' pkg/version/version.go | cut -d'"' -f2) 3 | GIT_COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown") 4 | BUILD_TIME ?= $(shell date -u '+%Y-%m-%d_%H:%M:%S') 5 | GO_VERSION ?= $(shell go version | cut -d' ' -f3) 6 | 7 | # Build flags 8 | LDFLAGS := -ldflags "\ 9 | -X github.com/KevoDB/kevo/pkg/version.Version=$(VERSION) \ 10 | -X github.com/KevoDB/kevo/pkg/version.GitCommit=$(GIT_COMMIT) \ 11 | -X github.com/KevoDB/kevo/pkg/version.BuildTime=$(BUILD_TIME) \ 12 | -X github.com/KevoDB/kevo/pkg/version.GoVersion=$(GO_VERSION)" 13 | 14 | .PHONY: all build clean test version 15 | 16 | all: build 17 | 18 | build: 19 | go build $(LDFLAGS) -o kevo ./cmd/kevo 20 | 21 | clean: 22 | rm -f kevo 23 | 24 | test: 25 | go test ./... 26 | 27 | # Show version information 28 | version: 29 | @echo "Version: $(VERSION)" 30 | @echo "Git Commit: $(GIT_COMMIT)" 31 | @echo "Build Time: $(BUILD_TIME)" 32 | @echo "Go Version: $(GO_VERSION)" 33 | -------------------------------------------------------------------------------- /pkg/replication/primary_info.go: -------------------------------------------------------------------------------- 1 | package replication 2 | 3 | // GetReplicaInfo returns information about all connected replicas 4 | func (p *Primary) GetReplicaInfo() []ReplicationNodeInfo { 5 | p.mu.RLock() 6 | defer p.mu.RUnlock() 7 | 8 | var replicas []ReplicationNodeInfo 9 | 10 | // Convert replica sessions to ReplicationNodeInfo 11 | for _, session := range p.sessions { 12 | if !session.Connected { 13 | continue 14 | } 15 | 16 | replica := ReplicationNodeInfo{ 17 | Address: session.ListenerAddress, // Use actual listener address 18 | LastSequence: session.LastAckSequence, 19 | Available: session.Active, 20 | Region: "", 21 | Meta: map[string]string{}, 22 | } 23 | 24 | replicas = append(replicas, replica) 25 | } 26 | 27 | return replicas 28 | } 29 | 30 | // GetLastSequence returns the highest sequence number that has been synced to disk 31 | func (p *Primary) GetLastSequence() uint64 { 32 | p.mu.RLock() 33 | defer p.mu.RUnlock() 34 | return p.lastSyncedSeq 35 | } 36 | -------------------------------------------------------------------------------- /pkg/common/iterator/iterator.go: -------------------------------------------------------------------------------- 1 | package iterator 2 | 3 | // Iterator defines the interface for iterating over key-value pairs 4 | // This is used across the storage engine components to provide a consistent 5 | // way to traverse data regardless of where it's stored. 6 | type Iterator interface { 7 | // SeekToFirst positions the iterator at the first key 8 | SeekToFirst() 9 | 10 | // SeekToLast positions the iterator at the last key 11 | SeekToLast() 12 | 13 | // Seek positions the iterator at the first key >= target 14 | Seek(target []byte) bool 15 | 16 | // Next advances the iterator to the next key 17 | Next() bool 18 | 19 | // Key returns the current key 20 | Key() []byte 21 | 22 | // Value returns the current value 23 | Value() []byte 24 | 25 | // Valid returns true if the iterator is positioned at a valid entry 26 | Valid() bool 27 | 28 | // IsTombstone returns true if the current entry is a deletion marker 29 | // This is used during compaction to distinguish between a regular nil value and a tombstone 30 | IsTombstone() bool 31 | } 32 | -------------------------------------------------------------------------------- /pkg/sstable/sstable.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/KevoDB/kevo/pkg/sstable/block" 7 | ) 8 | 9 | const ( 10 | // IndexBlockEntrySize is the approximate size of an index entry 11 | IndexBlockEntrySize = 20 12 | // DefaultBlockSize is the target size for data blocks 13 | DefaultBlockSize = block.BlockSize 14 | // IndexKeyInterval controls how frequently we add keys to the index 15 | IndexKeyInterval = 64 * 1024 // Add index entry every ~64KB 16 | ) 17 | 18 | var ( 19 | // ErrNotFound indicates a key was not found in the SSTable 20 | ErrNotFound = errors.New("key not found in sstable") 21 | // ErrCorruption indicates data corruption was detected 22 | ErrCorruption = errors.New("sstable corruption detected") 23 | ) 24 | 25 | // IndexEntry represents a block index entry 26 | type IndexEntry struct { 27 | // BlockOffset is the offset of the block in the file 28 | BlockOffset uint64 29 | // BlockSize is the size of the block in bytes 30 | BlockSize uint32 31 | // FirstKey is the first key in the block 32 | FirstKey []byte 33 | } 34 | -------------------------------------------------------------------------------- /pkg/wal/observer.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | // WALEntryObserver defines the interface for observing WAL operations. 4 | // Components that need to be notified of WAL events (such as replication systems) 5 | // can implement this interface and register with the WAL. 6 | type WALEntryObserver interface { 7 | // OnWALEntryWritten is called when a single entry is written to the WAL. 8 | // This method is called after the entry has been written to the WAL buffer 9 | // but before it may have been synced to disk. 10 | OnWALEntryWritten(entry *Entry) 11 | 12 | // OnWALBatchWritten is called when a batch of entries is written to the WAL. 13 | // The startSeq parameter is the sequence number of the first entry in the batch. 14 | // This method is called after all entries in the batch have been written to 15 | // the WAL buffer but before they may have been synced to disk. 16 | OnWALBatchWritten(startSeq uint64, entries []*Entry) 17 | 18 | // OnWALSync is called when the WAL is synced to disk. 19 | // The upToSeq parameter is the highest sequence number that has been synced. 20 | // This method is called after the fsync operation has completed successfully. 21 | OnWALSync(upToSeq uint64) 22 | } 23 | -------------------------------------------------------------------------------- /pkg/client/options_test.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func TestDefaultClientOptions(t *testing.T) { 9 | options := DefaultClientOptions() 10 | 11 | // Verify the default options have sensible values 12 | if options.Endpoint != "localhost:50051" { 13 | t.Errorf("Expected default endpoint to be localhost:50051, got %s", options.Endpoint) 14 | } 15 | 16 | if options.ConnectTimeout != 5*time.Second { 17 | t.Errorf("Expected default connect timeout to be 5s, got %s", options.ConnectTimeout) 18 | } 19 | 20 | if options.RequestTimeout != 10*time.Second { 21 | t.Errorf("Expected default request timeout to be 10s, got %s", options.RequestTimeout) 22 | } 23 | 24 | if options.TransportType != "grpc" { 25 | t.Errorf("Expected default transport type to be grpc, got %s", options.TransportType) 26 | } 27 | 28 | if options.PoolSize != 5 { 29 | t.Errorf("Expected default pool size to be 5, got %d", options.PoolSize) 30 | } 31 | 32 | if options.TLSEnabled != false { 33 | t.Errorf("Expected default TLS enabled to be false") 34 | } 35 | 36 | if options.MaxRetries != 3 { 37 | t.Errorf("Expected default max retries to be 3, got %d", options.MaxRetries) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /pkg/engine/replication.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "github.com/KevoDB/kevo/pkg/common/log" 5 | "github.com/KevoDB/kevo/pkg/wal" 6 | ) 7 | 8 | // GetWAL exposes the WAL for replication purposes 9 | func (e *EngineFacade) GetWAL() *wal.WAL { 10 | // This is an enhancement to the EngineFacade to support replication 11 | // It's used by the replication manager to access the WAL 12 | if e.storage == nil { 13 | return nil 14 | } 15 | 16 | // Get WAL from storage manager 17 | // For now, we'll use type assertion since the interface doesn't 18 | // have a GetWAL method 19 | type walProvider interface { 20 | GetWAL() *wal.WAL 21 | } 22 | 23 | if provider, ok := e.storage.(walProvider); ok { 24 | return provider.GetWAL() 25 | } 26 | 27 | return nil 28 | } 29 | 30 | // SetReadOnly sets the engine to read-only mode for replicas 31 | func (e *EngineFacade) SetReadOnly(readOnly bool) { 32 | // This is an enhancement to the EngineFacade to support replication 33 | // Setting this will force the engine to reject write operations 34 | // Used by replicas to ensure they don't accept direct writes 35 | e.readOnly.Store(readOnly) 36 | log.Info("Engine read-only mode set to: %v", readOnly) 37 | } 38 | 39 | // IsReadOnly moved to facade.go 40 | -------------------------------------------------------------------------------- /pkg/engine/interfaces/storage.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/KevoDB/kevo/pkg/common/iterator" 5 | "github.com/KevoDB/kevo/pkg/wal" 6 | ) 7 | 8 | // Storage defines the core storage operations interface 9 | // This abstracts the actual storage implementation from the engine 10 | type Storage interface { 11 | // Core operations 12 | Put(key, value []byte) error 13 | Get(key []byte) ([]byte, error) 14 | Delete(key []byte) error 15 | IsDeleted(key []byte) (bool, error) 16 | 17 | // Iterator access 18 | GetIterator() (iterator.Iterator, error) 19 | GetRangeIterator(startKey, endKey []byte) (iterator.Iterator, error) 20 | 21 | // Batch operations 22 | ApplyBatch(entries []*wal.Entry) error 23 | 24 | // Flushing operations 25 | FlushMemTables() error 26 | 27 | // Lifecycle management 28 | Close() error 29 | } 30 | 31 | // StorageManager extends Storage with management operations 32 | type StorageManager interface { 33 | Storage 34 | 35 | // Memtable management 36 | GetMemTableSize() uint64 37 | IsFlushNeeded() bool 38 | 39 | // SSTable management 40 | GetSSTables() []string 41 | ReloadSSTables() error 42 | 43 | // WAL management 44 | RotateWAL() error 45 | 46 | // Statistics 47 | GetStorageStats() map[string]interface{} 48 | } 49 | -------------------------------------------------------------------------------- /pkg/version/README.md: -------------------------------------------------------------------------------- 1 | # Version Package 2 | 3 | This package provides centralized version management for Kevo. 4 | 5 | ## Usage 6 | 7 | ### Getting the version in code: 8 | 9 | ```go 10 | import "github.com/KevoDB/kevo/pkg/version" 11 | 12 | // Get simple version string 13 | v := version.GetVersion() // "1.4.0" 14 | 15 | // Get full version with build info 16 | full := version.GetFullVersion() // "1.4.0 (commit: abc123, built: 2024-01-01_12:00:00)" 17 | 18 | // Get all version info as struct 19 | info := version.GetInfo() 20 | fmt.Printf("Version: %s\n", info.Version) 21 | fmt.Printf("Git Commit: %s\n", info.GitCommit) 22 | fmt.Printf("Build Time: %s\n", info.BuildTime) 23 | fmt.Printf("Go Version: %s\n", info.GoVersion) 24 | ``` 25 | 26 | ### Building with version injection: 27 | 28 | ```bash 29 | # Using make (recommended) 30 | make build 31 | 32 | # Using go build directly 33 | go build -ldflags "\ 34 | -X github.com/KevoDB/kevo/pkg/version.Version=1.4.1 \ 35 | -X github.com/KevoDB/kevo/pkg/version.GitCommit=$(git rev-parse --short HEAD) \ 36 | -X github.com/KevoDB/kevo/pkg/version.BuildTime=$(date -u '+%Y-%m-%d_%H:%M:%S') \ 37 | -X github.com/KevoDB/kevo/pkg/version.GoVersion=$(go version | cut -d' ' -f3)" \ 38 | ./cmd/kevo 39 | ``` 40 | 41 | ### Updating the version: 42 | 43 | To update the version, edit the `Version` variable in `version.go`: 44 | 45 | ```go 46 | var ( 47 | Version = "1.4.1" // Update this line 48 | ... 49 | ) 50 | ``` 51 | 52 | The build system will automatically pick up the new version. -------------------------------------------------------------------------------- /.gitea/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Go Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | ci-test: 13 | name: Run Tests 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | go-version: [ '1.24.2' ] 18 | steps: 19 | - name: Check out code 20 | uses: actions/checkout@v4 21 | 22 | - name: Set up Go ${{ matrix.go-version }} 23 | uses: actions/setup-go@v5 24 | with: 25 | go-version: ${{ matrix.go-version }} 26 | check-latest: true 27 | 28 | - name: Verify dependencies 29 | run: go mod verify 30 | 31 | - name: Run go vet 32 | run: go vet ./... 33 | 34 | - name: Run tests 35 | run: go test -v ./... 36 | 37 | - name: Send success notification 38 | if: success() 39 | run: | 40 | curl -X POST \ 41 | -H "Content-Type: text/plain" \ 42 | -d "✅ kevo success! View run at: https://git.canoozie.net/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}" \ 43 | https://chat.canoozie.net/rooms/5/2-q6gKxqrTAfhd/messages 44 | 45 | - name: Send failure notification 46 | if: failure() 47 | run: | 48 | curl -X POST \ 49 | -H "Content-Type: text/plain" \ 50 | -d "❌ kevo failure! View run at: https://git.canoozie.net/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}" \ 51 | https://chat.canoozie.net/rooms/5/2-q6gKxqrTAfhd/messages 52 | -------------------------------------------------------------------------------- /pkg/stats/interface.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import "time" 4 | 5 | // Provider defines the interface for components that provide statistics 6 | type Provider interface { 7 | // GetStats returns all statistics 8 | GetStats() map[string]interface{} 9 | 10 | // GetStatsFiltered returns statistics filtered by prefix 11 | GetStatsFiltered(prefix string) map[string]interface{} 12 | } 13 | 14 | // Collector interface defines methods for collecting statistics 15 | type Collector interface { 16 | Provider 17 | 18 | // TrackOperation records a single operation 19 | TrackOperation(op OperationType) 20 | 21 | // TrackOperationWithLatency records an operation with its latency 22 | TrackOperationWithLatency(op OperationType, latencyNs uint64) 23 | 24 | // TrackError increments the counter for the specified error type 25 | TrackError(errorType string) 26 | 27 | // TrackBytes adds the specified number of bytes to the read or write counter 28 | TrackBytes(isWrite bool, bytes uint64) 29 | 30 | // TrackMemTableSize records the current memtable size 31 | TrackMemTableSize(size uint64) 32 | 33 | // TrackFlush increments the flush counter 34 | TrackFlush() 35 | 36 | // TrackCompaction increments the compaction counter 37 | TrackCompaction() 38 | 39 | // StartRecovery initializes recovery statistics 40 | StartRecovery() time.Time 41 | 42 | // FinishRecovery completes recovery statistics 43 | FinishRecovery(startTime time.Time, filesRecovered, entriesRecovered, corruptedEntries uint64) 44 | } 45 | 46 | // Ensure AtomicCollector implements the Collector interface 47 | var _ Collector = (*AtomicCollector)(nil) 48 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | // ABOUTME: Provides centralized version management for the Kevo storage engine 2 | // ABOUTME: Supports both compile-time constant and build-time injection via ldflags 3 | 4 | package version 5 | 6 | // Version variables that can be overridden at build time using ldflags 7 | // Example: go build -ldflags "-X github.com/KevoDB/kevo/pkg/version.Version=1.2.3" 8 | var ( 9 | // Version is the semantic version of Kevo 10 | Version = "1.4.2" 11 | 12 | // GitCommit is the git commit hash (set via ldflags) 13 | GitCommit = "unknown" 14 | 15 | // BuildTime is the build timestamp (set via ldflags) 16 | BuildTime = "unknown" 17 | 18 | // GoVersion is the Go version used to build (set via ldflags) 19 | GoVersion = "unknown" 20 | ) 21 | 22 | // GetVersion returns the current version string 23 | func GetVersion() string { 24 | return Version 25 | } 26 | 27 | // GetFullVersion returns a detailed version string including build information 28 | func GetFullVersion() string { 29 | if GitCommit == "unknown" && BuildTime == "unknown" { 30 | // Simple version when build info not available 31 | return Version 32 | } 33 | return Version + " (commit: " + GitCommit + ", built: " + BuildTime + ")" 34 | } 35 | 36 | // Info contains all version information 37 | type Info struct { 38 | Version string 39 | GitCommit string 40 | BuildTime string 41 | GoVersion string 42 | } 43 | 44 | // GetInfo returns a struct with all version information 45 | func GetInfo() Info { 46 | return Info{ 47 | Version: Version, 48 | GitCommit: GitCommit, 49 | BuildTime: BuildTime, 50 | GoVersion: GoVersion, 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pkg/engine/interfaces/transaction.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | 7 | "github.com/KevoDB/kevo/pkg/common/iterator" 8 | ) 9 | 10 | // Transaction defines the interface for a database transaction 11 | type Transaction interface { 12 | // Core operations 13 | Get(key []byte) ([]byte, error) 14 | Put(key, value []byte) error 15 | Delete(key []byte) error 16 | 17 | // Iterator access 18 | NewIterator() iterator.Iterator 19 | NewRangeIterator(startKey, endKey []byte) iterator.Iterator 20 | 21 | // Transaction management 22 | Commit() error 23 | Rollback() error 24 | IsReadOnly() bool 25 | } 26 | 27 | // TransactionManager handles transaction lifecycle 28 | type TransactionManager interface { 29 | // Create a new transaction 30 | BeginTransaction(readOnly bool) (Transaction, error) 31 | 32 | // Get the lock used for transaction isolation 33 | GetRWLock() *sync.RWMutex 34 | 35 | // Transaction statistics 36 | IncrementTxCompleted() 37 | IncrementTxAborted() 38 | GetTransactionStats() map[string]interface{} 39 | } 40 | 41 | // TxRegistry defines the interface for a transaction registry 42 | type TxRegistry interface { 43 | // Begin starts a new transaction 44 | Begin(ctx context.Context, eng Engine, readOnly bool) (string, error) 45 | 46 | // Get retrieves a transaction by ID 47 | Get(txID string) (Transaction, bool) 48 | 49 | // Remove removes a transaction from the registry 50 | Remove(txID string) 51 | 52 | // CleanupConnection cleans up all transactions for a given connection 53 | CleanupConnection(connectionID string) 54 | 55 | // GracefulShutdown performs cleanup on shutdown 56 | GracefulShutdown(ctx context.Context) error 57 | } 58 | -------------------------------------------------------------------------------- /pkg/engine/interfaces/engine.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/KevoDB/kevo/pkg/common/iterator" 7 | "github.com/KevoDB/kevo/pkg/stats" 8 | "github.com/KevoDB/kevo/pkg/wal" 9 | ) 10 | 11 | // Engine defines the core interface for the storage engine 12 | // This is the primary interface clients will interact with 13 | type Engine interface { 14 | // Core operations 15 | Put(key, value []byte) error 16 | Get(key []byte) ([]byte, error) 17 | Delete(key []byte) error 18 | IsDeleted(key []byte) (bool, error) 19 | 20 | // Iterator access 21 | GetIterator() (iterator.Iterator, error) 22 | GetRangeIterator(startKey, endKey []byte) (iterator.Iterator, error) 23 | 24 | // Batch operations 25 | ApplyBatch(entries []*wal.Entry) error 26 | 27 | // Transaction management 28 | BeginTransaction(readOnly bool) (Transaction, error) 29 | 30 | // Maintenance operations 31 | FlushImMemTables() error 32 | TriggerCompaction() error 33 | CompactRange(startKey, endKey []byte) error 34 | 35 | // Statistics 36 | GetStats() map[string]interface{} 37 | GetCompactionStats() (map[string]interface{}, error) 38 | 39 | // Lifecycle management 40 | Close() error 41 | 42 | // Read-only mode? 43 | IsReadOnly() bool 44 | } 45 | 46 | // Components is a struct containing all the components needed by the engine 47 | // This allows for dependency injection and easier testing 48 | type Components struct { 49 | Storage StorageManager 50 | TransactionMgr TransactionManager 51 | CompactionMgr CompactionManager 52 | StatsCollector stats.Collector 53 | } 54 | 55 | // Engine related errors 56 | var ( 57 | // ErrEngineClosed is returned when operations are performed on a closed engine 58 | ErrEngineClosed = errors.New("engine is closed") 59 | 60 | // ErrKeyNotFound is returned when a key is not found 61 | ErrKeyNotFound = errors.New("key not found") 62 | ) 63 | -------------------------------------------------------------------------------- /pkg/sstable/iterator_adapter.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | // No imports needed 4 | 5 | // IteratorAdapter adapts an sstable.Iterator to the common Iterator interface 6 | type IteratorAdapter struct { 7 | iter *Iterator 8 | } 9 | 10 | // NewIteratorAdapter creates a new adapter for an sstable iterator 11 | func NewIteratorAdapter(iter *Iterator) *IteratorAdapter { 12 | return &IteratorAdapter{iter: iter} 13 | } 14 | 15 | // SeekToFirst positions the iterator at the first key 16 | func (a *IteratorAdapter) SeekToFirst() { 17 | a.iter.SeekToFirst() 18 | } 19 | 20 | // SeekToLast positions the iterator at the last key 21 | func (a *IteratorAdapter) SeekToLast() { 22 | a.iter.SeekToLast() 23 | } 24 | 25 | // Seek positions the iterator at the first key >= target 26 | func (a *IteratorAdapter) Seek(target []byte) bool { 27 | return a.iter.Seek(target) 28 | } 29 | 30 | // Next advances the iterator to the next key 31 | func (a *IteratorAdapter) Next() bool { 32 | return a.iter.Next() 33 | } 34 | 35 | // Key returns the current key 36 | func (a *IteratorAdapter) Key() []byte { 37 | if !a.Valid() { 38 | return nil 39 | } 40 | return a.iter.Key() 41 | } 42 | 43 | // Value returns the current value 44 | func (a *IteratorAdapter) Value() []byte { 45 | if !a.Valid() { 46 | return nil 47 | } 48 | return a.iter.Value() 49 | } 50 | 51 | // Valid returns true if the iterator is positioned at a valid entry 52 | func (a *IteratorAdapter) Valid() bool { 53 | return a.iter != nil && a.iter.Valid() 54 | } 55 | 56 | // IsTombstone returns true if the current entry is a deletion marker 57 | func (a *IteratorAdapter) IsTombstone() bool { 58 | return a.Valid() && a.iter.IsTombstone() 59 | } 60 | 61 | // SequenceNumber returns the sequence number of the current entry 62 | func (a *IteratorAdapter) SequenceNumber() uint64 { 63 | if !a.Valid() { 64 | return 0 65 | } 66 | return a.iter.SequenceNumber() 67 | } 68 | -------------------------------------------------------------------------------- /pkg/compaction/compat.go: -------------------------------------------------------------------------------- 1 | package compaction 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/KevoDB/kevo/pkg/config" 7 | ) 8 | 9 | // NewCompactionManager creates a new compaction manager with the old API 10 | // This is kept for backward compatibility with existing code 11 | func NewCompactionManager(cfg *config.Config, sstableDir string) *DefaultCompactionCoordinator { 12 | // Create tombstone tracker with default 24-hour retention 13 | tombstones := NewTombstoneTracker(24 * time.Hour) 14 | 15 | // Create file tracker 16 | fileTracker := NewFileTracker() 17 | 18 | // Create compaction executor 19 | executor := NewCompactionExecutor(cfg, sstableDir, tombstones) 20 | 21 | // Create tiered compaction strategy 22 | strategy := NewTieredCompactionStrategy(cfg, sstableDir, executor) 23 | 24 | // Return the new coordinator 25 | return NewCompactionCoordinator(cfg, sstableDir, CompactionCoordinatorOptions{ 26 | Strategy: strategy, 27 | Executor: executor, 28 | FileTracker: fileTracker, 29 | TombstoneManager: tombstones, 30 | CompactionInterval: cfg.CompactionInterval, 31 | }) 32 | } 33 | 34 | // Temporary alias types for backward compatibility 35 | type CompactionManager = DefaultCompactionCoordinator 36 | type Compactor = BaseCompactionStrategy 37 | type TieredCompactor = TieredCompactionStrategy 38 | 39 | // NewCompactor creates a new compactor with the old API (backward compatibility) 40 | func NewCompactor(cfg *config.Config, sstableDir string, tracker *TombstoneTracker) *BaseCompactionStrategy { 41 | return NewBaseCompactionStrategy(cfg, sstableDir) 42 | } 43 | 44 | // NewTieredCompactor creates a new tiered compactor with the old API (backward compatibility) 45 | func NewTieredCompactor(cfg *config.Config, sstableDir string, tracker *TombstoneTracker) *TieredCompactionStrategy { 46 | executor := NewCompactionExecutor(cfg, sstableDir, tracker) 47 | return NewTieredCompactionStrategy(cfg, sstableDir, executor) 48 | } 49 | -------------------------------------------------------------------------------- /pkg/sstable/block/block_reader.go: -------------------------------------------------------------------------------- 1 | package block 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | 7 | "github.com/cespare/xxhash/v2" 8 | ) 9 | 10 | // Reader provides methods to read data from a serialized block 11 | type Reader struct { 12 | data []byte 13 | restartPoints []uint32 14 | numRestarts uint32 15 | checksum uint64 16 | } 17 | 18 | // NewReader creates a new block reader 19 | func NewReader(data []byte) (*Reader, error) { 20 | if len(data) < BlockFooterSize { 21 | return nil, fmt.Errorf("block data too small: %d bytes", len(data)) 22 | } 23 | 24 | // Read footer 25 | footerOffset := len(data) - BlockFooterSize 26 | numRestarts := binary.LittleEndian.Uint32(data[footerOffset : footerOffset+4]) 27 | checksum := binary.LittleEndian.Uint64(data[footerOffset+4:]) 28 | 29 | // Verify checksum - the checksum covers everything except the checksum itself 30 | computedChecksum := xxhash.Sum64(data[:len(data)-8]) 31 | if computedChecksum != checksum { 32 | return nil, fmt.Errorf("block checksum mismatch: expected %d, got %d", 33 | checksum, computedChecksum) 34 | } 35 | 36 | // Read restart points 37 | restartOffset := footerOffset - int(numRestarts)*4 38 | if restartOffset < 0 { 39 | return nil, fmt.Errorf("invalid restart points offset") 40 | } 41 | 42 | restartPoints := make([]uint32, numRestarts) 43 | for i := uint32(0); i < numRestarts; i++ { 44 | restartPoints[i] = binary.LittleEndian.Uint32( 45 | data[restartOffset+int(i)*4:]) 46 | } 47 | 48 | reader := &Reader{ 49 | data: data, 50 | restartPoints: restartPoints, 51 | numRestarts: numRestarts, 52 | checksum: checksum, 53 | } 54 | 55 | return reader, nil 56 | } 57 | 58 | // Iterator returns an iterator for the block 59 | func (r *Reader) Iterator() *Iterator { 60 | // Calculate the data end position (everything before the restart points array) 61 | dataEnd := len(r.data) - BlockFooterSize - 4*len(r.restartPoints) 62 | 63 | return &Iterator{ 64 | reader: r, 65 | currentPos: 0, 66 | currentKey: nil, 67 | currentVal: nil, 68 | restartIdx: 0, 69 | initialized: false, 70 | dataEnd: uint32(dataEnd), 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | Welcome, and thank you for your interest in contributing. This project is governed with a clear vision and strong direction. The following guidelines exist to keep contributions productive, focused, and aligned with the project's goals. 4 | 5 | 1. **Project Governance** 6 | 7 | This is not a democracy. The project is maintained by a *benevolent dictator*--that’s me. Final decisions rest with the maintainer. Suggestions are welcome, but proposals that conflict with the core philosophy or goals of the project may be closed without further discussion. 8 | 9 | 2. **Pull Requests** 10 | 11 | - **Feature Suggestions**: Open to ideas, but keep them aligned with the project's direction. Off-mission PRs will be closed. 12 | - **Bug Fixes**: Fixes must include tests that directly verifies the issue described in the report. If the bug can't be demonstrated, it doesn't exist (or at least, it's not our problem). 13 | - **Test Coverage**: PRs that improve test coverage are encouraged. If there's a need to refactor code to enable testing, that's acceptable--but any touched area must remain fully covered. Incomplete refactors or those introducing new test gaps will be rejected. 14 | 15 | 3. **Code Style & Quality** 16 | 17 | - Follow existing code patterns. If you think something should be improved, propose it, but don't go on a "style crusade." 18 | - Consistency and clarity matter more than cleverness, always. 19 | 20 | 4. **Communication** 21 | 22 | - Be respectful by default. We're here to build, not babysit. 23 | - If someone is being willfully obtuse or disruptive, use your judgement. Free speech is respected, but consequences follow unproductive behavior. 24 | 25 | 5. **Philosophy** 26 | 27 | This project prioritizes: 28 | 29 | - Simplicity over abstraction 30 | - Practicality over purity. 31 | - Results over ceremony. 32 | 33 | Stay focused, keep your contributions sharp, and understand the mission. If that sounds like your style, you're more than welcome here. 34 | 35 | P.S. These contribution guidelines were 35 lines long when written the first time. If they're still not 35 lines long, then it's safe to assume that someone has done something that required we clarify things for those with a lower IQ than ours. 36 | -------------------------------------------------------------------------------- /pkg/compaction/compaction.go: -------------------------------------------------------------------------------- 1 | package compaction 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/KevoDB/kevo/pkg/sstable" 8 | ) 9 | 10 | // SSTableInfo represents metadata about an SSTable file 11 | type SSTableInfo struct { 12 | // Path of the SSTable file 13 | Path string 14 | 15 | // Level number (0 to N) 16 | Level int 17 | 18 | // Sequence number for the file within its level 19 | Sequence uint64 20 | 21 | // Timestamp when the file was created 22 | Timestamp int64 23 | 24 | // Approximate size of the file in bytes 25 | Size int64 26 | 27 | // Estimated key count (may be approximate) 28 | KeyCount int 29 | 30 | // First key in the SSTable 31 | FirstKey []byte 32 | 33 | // Last key in the SSTable 34 | LastKey []byte 35 | 36 | // Reader for the SSTable 37 | Reader *sstable.Reader 38 | } 39 | 40 | // Overlaps checks if this SSTable's key range overlaps with another SSTable 41 | func (s *SSTableInfo) Overlaps(other *SSTableInfo) bool { 42 | // If either SSTable has no keys, they don't overlap 43 | if len(s.FirstKey) == 0 || len(s.LastKey) == 0 || 44 | len(other.FirstKey) == 0 || len(other.LastKey) == 0 { 45 | return false 46 | } 47 | 48 | // Check for overlap: not (s ends before other starts OR s starts after other ends) 49 | // s.LastKey < other.FirstKey || s.FirstKey > other.LastKey 50 | return !(bytes.Compare(s.LastKey, other.FirstKey) < 0 || 51 | bytes.Compare(s.FirstKey, other.LastKey) > 0) 52 | } 53 | 54 | // KeyRange returns a string representation of the key range in this SSTable 55 | func (s *SSTableInfo) KeyRange() string { 56 | return fmt.Sprintf("[%s, %s]", 57 | string(s.FirstKey), string(s.LastKey)) 58 | } 59 | 60 | // String returns a string representation of the SSTable info 61 | func (s *SSTableInfo) String() string { 62 | return fmt.Sprintf("L%d-%06d-%020d.sst Size:%d Keys:%d Range:%s", 63 | s.Level, s.Sequence, s.Timestamp, s.Size, s.KeyCount, s.KeyRange()) 64 | } 65 | 66 | // CompactionTask represents a set of SSTables to be compacted 67 | type CompactionTask struct { 68 | // Input SSTables to compact, grouped by level 69 | InputFiles map[int][]*SSTableInfo 70 | 71 | // Target level for compaction output 72 | TargetLevel int 73 | 74 | // Output file path template 75 | OutputPathTemplate string 76 | } 77 | -------------------------------------------------------------------------------- /pkg/sstable/integration_test.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "fmt" 5 | "path/filepath" 6 | "testing" 7 | ) 8 | 9 | // TestIntegration performs a basic integration test between Writer and Reader 10 | func TestIntegration(t *testing.T) { 11 | // Create a temporary directory for the test 12 | tempDir := t.TempDir() 13 | sstablePath := filepath.Join(tempDir, "test-integration.sst") 14 | 15 | // Create a new SSTable writer 16 | writer, err := NewWriter(sstablePath) 17 | if err != nil { 18 | t.Fatalf("Failed to create SSTable writer: %v", err) 19 | } 20 | 21 | // Add some key-value pairs 22 | numEntries := 100 23 | keyValues := make(map[string]string, numEntries) 24 | 25 | for i := 0; i < numEntries; i++ { 26 | key := fmt.Sprintf("key%05d", i) 27 | value := fmt.Sprintf("value%05d", i) 28 | keyValues[key] = value 29 | 30 | err := writer.Add([]byte(key), []byte(value)) 31 | if err != nil { 32 | t.Fatalf("Failed to add entry: %v", err) 33 | } 34 | } 35 | 36 | // Finish writing 37 | err = writer.Finish() 38 | if err != nil { 39 | t.Fatalf("Failed to finish SSTable: %v", err) 40 | } 41 | 42 | // Open the SSTable for reading 43 | reader, err := OpenReader(sstablePath) 44 | if err != nil { 45 | t.Fatalf("Failed to open SSTable: %v", err) 46 | } 47 | defer reader.Close() 48 | 49 | // Verify the number of entries 50 | if reader.GetKeyCount() != numEntries { 51 | t.Errorf("Expected %d entries, got %d", numEntries, reader.GetKeyCount()) 52 | } 53 | 54 | // Test GetKeyCount method 55 | if reader.GetKeyCount() != numEntries { 56 | t.Errorf("GetKeyCount returned %d, expected %d", reader.GetKeyCount(), numEntries) 57 | } 58 | 59 | // First test direct key retrieval 60 | missingKeys := 0 61 | for key, expectedValue := range keyValues { 62 | // Test direct Get 63 | value, err := reader.Get([]byte(key)) 64 | if err != nil { 65 | t.Errorf("Failed to get key %s via Get(): %v", key, err) 66 | missingKeys++ 67 | continue 68 | } 69 | 70 | if string(value) != expectedValue { 71 | t.Errorf("Value mismatch for key %s via Get(): expected %s, got %s", 72 | key, expectedValue, value) 73 | } 74 | } 75 | 76 | if missingKeys > 0 { 77 | t.Errorf("%d keys could not be retrieved via direct Get", missingKeys) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /pkg/engine/storage/retry.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "math/rand" 5 | "time" 6 | 7 | "github.com/KevoDB/kevo/pkg/wal" 8 | ) 9 | 10 | // RetryConfig defines parameters for retry operations 11 | type RetryConfig struct { 12 | MaxRetries int // Maximum number of retries 13 | InitialBackoff time.Duration // Initial backoff duration 14 | MaxBackoff time.Duration // Maximum backoff duration 15 | } 16 | 17 | // DefaultRetryConfig returns default retry configuration 18 | func DefaultRetryConfig() *RetryConfig { 19 | return &RetryConfig{ 20 | MaxRetries: 3, 21 | InitialBackoff: 5 * time.Millisecond, 22 | MaxBackoff: 50 * time.Millisecond, 23 | } 24 | } 25 | 26 | // Commented out due to duplicate declaration with the one in manager.go 27 | // func (m *Manager) RetryOnWALRotating(operation func() error) error { 28 | // config := DefaultRetryConfig() 29 | // return m.RetryWithConfig(operation, config, isWALRotating) 30 | // } 31 | 32 | // RetryWithConfig retries an operation with the given configuration 33 | func (m *Manager) RetryWithConfig(operation func() error, config *RetryConfig, isRetryable func(error) bool) error { 34 | backoff := config.InitialBackoff 35 | 36 | for i := 0; i <= config.MaxRetries; i++ { 37 | // Attempt the operation 38 | err := operation() 39 | if err == nil { 40 | return nil 41 | } 42 | 43 | // Check if we should retry 44 | if !isRetryable(err) || i == config.MaxRetries { 45 | return err 46 | } 47 | 48 | // Add some jitter to the backoff 49 | jitter := time.Duration(rand.Int63n(int64(backoff / 10))) 50 | backoff = backoff + jitter 51 | 52 | // Wait before retrying 53 | time.Sleep(backoff) 54 | 55 | // Increase backoff for next attempt, but cap it 56 | backoff = 2 * backoff 57 | if backoff > config.MaxBackoff { 58 | backoff = config.MaxBackoff 59 | } 60 | } 61 | 62 | // Should never get here, but just in case 63 | return nil 64 | } 65 | 66 | // isWALRotating checks if the error is due to WAL rotation or closure 67 | func isWALRotating(err error) bool { 68 | // Both ErrWALRotating and ErrWALClosed can occur during WAL rotation 69 | // Since WAL rotation is a normal operation, we should retry in both cases 70 | return err == wal.ErrWALRotating || err == wal.ErrWALClosed 71 | } 72 | -------------------------------------------------------------------------------- /pkg/transaction/interface.go: -------------------------------------------------------------------------------- 1 | package transaction 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | 7 | "github.com/KevoDB/kevo/pkg/common/iterator" 8 | ) 9 | 10 | // TransactionMode defines the transaction access mode (ReadOnly or ReadWrite) 11 | type TransactionMode int 12 | 13 | const ( 14 | // ReadOnly transactions only read from the database 15 | ReadOnly TransactionMode = iota 16 | 17 | // ReadWrite transactions can both read and write to the database 18 | ReadWrite 19 | ) 20 | 21 | // Transaction represents a database transaction that provides ACID guarantees 22 | // This matches the interfaces.Transaction interface from pkg/engine/interfaces/transaction.go 23 | type Transaction interface { 24 | // Core operations 25 | Get(key []byte) ([]byte, error) 26 | Put(key, value []byte) error 27 | Delete(key []byte) error 28 | 29 | // Iterator access 30 | NewIterator() iterator.Iterator 31 | NewRangeIterator(startKey, endKey []byte) iterator.Iterator 32 | 33 | // Transaction management 34 | Commit() error 35 | Rollback() error 36 | IsReadOnly() bool 37 | } 38 | 39 | // TransactionManager handles transaction lifecycle 40 | // This matches the interfaces.TransactionManager interface from pkg/engine/interfaces/transaction.go 41 | type TransactionManager interface { 42 | // Create a new transaction 43 | BeginTransaction(readOnly bool) (Transaction, error) 44 | 45 | // Get the lock used for transaction isolation 46 | GetRWLock() *sync.RWMutex 47 | 48 | // Transaction statistics 49 | IncrementTxCompleted() 50 | IncrementTxAborted() 51 | GetTransactionStats() map[string]interface{} 52 | } 53 | 54 | // Registry manages transaction lifecycle and connections 55 | // This matches the interfaces.TxRegistry interface from pkg/engine/interfaces/transaction.go 56 | type Registry interface { 57 | // Begin starts a new transaction 58 | Begin(ctx context.Context, eng interface{}, readOnly bool) (string, error) 59 | 60 | // Get retrieves a transaction by ID 61 | Get(txID string) (Transaction, bool) 62 | 63 | // Remove removes a transaction from the registry 64 | Remove(txID string) 65 | 66 | // CleanupConnection cleans up all transactions for a given connection 67 | CleanupConnection(connectionID string) 68 | 69 | // GracefulShutdown performs cleanup on shutdown 70 | GracefulShutdown(ctx context.Context) error 71 | } 72 | -------------------------------------------------------------------------------- /pkg/common/iterator/adapter_pattern.go: -------------------------------------------------------------------------------- 1 | package iterator 2 | 3 | // This file documents the recommended adapter pattern for iterator implementations. 4 | // 5 | // Guidelines for Iterator Adapters: 6 | // 7 | // 1. Naming Convention: 8 | // - Use the suffix "IteratorAdapter" for adapter types 9 | // - Use "New[SourceType]IteratorAdapter" for constructor functions 10 | // 11 | // 2. Implementation Pattern: 12 | // - Store the source iterator as a field 13 | // - Implement the Iterator interface by delegating to the source 14 | // - Add any necessary conversion or transformation logic 15 | // - For nil/error handling, be defensive and check validity 16 | // 17 | // 3. Performance Considerations: 18 | // - Avoid unnecessary copying of keys/values when possible 19 | // - Consider buffer reuse for frequently allocated memory 20 | // - Use read-write locks instead of full mutexes where appropriate 21 | // 22 | // 4. Adapter Location: 23 | // - Implement adapters within the package that owns the source type 24 | // - For example, memtable adapters should be in the memtable package 25 | // 26 | // Example: 27 | // 28 | // // ExampleAdapter adapts a SourceIterator to the common Iterator interface 29 | // type ExampleAdapter struct { 30 | // source SourceIterator 31 | // } 32 | // 33 | // func NewExampleAdapter(source SourceIterator) *ExampleAdapter { 34 | // return &ExampleAdapter{source: source} 35 | // } 36 | // 37 | // func (a *ExampleAdapter) SeekToFirst() { 38 | // a.source.SeekToFirst() 39 | // } 40 | // 41 | // func (a *ExampleAdapter) SeekToLast() { 42 | // a.source.SeekToLast() 43 | // } 44 | // 45 | // func (a *ExampleAdapter) Seek(target []byte) bool { 46 | // return a.source.Seek(target) 47 | // } 48 | // 49 | // func (a *ExampleAdapter) Next() bool { 50 | // return a.source.Next() 51 | // } 52 | // 53 | // func (a *ExampleAdapter) Key() []byte { 54 | // if !a.Valid() { 55 | // return nil 56 | // } 57 | // return a.source.Key() 58 | // } 59 | // 60 | // func (a *ExampleAdapter) Value() []byte { 61 | // if !a.Valid() { 62 | // return nil 63 | // } 64 | // return a.source.Value() 65 | // } 66 | // 67 | // func (a *ExampleAdapter) Valid() bool { 68 | // return a.source != nil && a.source.Valid() 69 | // } 70 | // 71 | // func (a *ExampleAdapter) IsTombstone() bool { 72 | // return a.Valid() && a.source.IsTombstone() 73 | // } 74 | -------------------------------------------------------------------------------- /pkg/transport/common_test.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | ) 7 | 8 | func TestBasicRequest(t *testing.T) { 9 | // Test creating a request 10 | payload := []byte("test payload") 11 | req := NewRequest(TypeGet, payload) 12 | 13 | // Test Type method 14 | if req.Type() != TypeGet { 15 | t.Errorf("Expected type %s, got %s", TypeGet, req.Type()) 16 | } 17 | 18 | // Test Payload method 19 | if string(req.Payload()) != string(payload) { 20 | t.Errorf("Expected payload %s, got %s", string(payload), string(req.Payload())) 21 | } 22 | } 23 | 24 | func TestBasicResponse(t *testing.T) { 25 | // Test creating a response with no error 26 | payload := []byte("test response") 27 | resp := NewResponse(TypeGet, payload, nil) 28 | 29 | // Test Type method 30 | if resp.Type() != TypeGet { 31 | t.Errorf("Expected type %s, got %s", TypeGet, resp.Type()) 32 | } 33 | 34 | // Test Payload method 35 | if string(resp.Payload()) != string(payload) { 36 | t.Errorf("Expected payload %s, got %s", string(payload), string(resp.Payload())) 37 | } 38 | 39 | // Test Error method 40 | if resp.Error() != nil { 41 | t.Errorf("Expected nil error, got %v", resp.Error()) 42 | } 43 | 44 | // Test creating a response with an error 45 | testErr := errors.New("test error") 46 | resp = NewResponse(TypeGet, payload, testErr) 47 | 48 | if resp.Error() != testErr { 49 | t.Errorf("Expected error %v, got %v", testErr, resp.Error()) 50 | } 51 | } 52 | 53 | func TestNewErrorResponse(t *testing.T) { 54 | // Test creating an error response 55 | testErr := errors.New("test error") 56 | resp := NewErrorResponse(testErr) 57 | 58 | // Test Type method 59 | if resp.Type() != TypeError { 60 | t.Errorf("Expected type %s, got %s", TypeError, resp.Type()) 61 | } 62 | 63 | // Test Payload method - should contain error message 64 | if string(resp.Payload()) != testErr.Error() { 65 | t.Errorf("Expected payload %s, got %s", testErr.Error(), string(resp.Payload())) 66 | } 67 | 68 | // Test Error method 69 | if resp.Error() != testErr { 70 | t.Errorf("Expected error %v, got %v", testErr, resp.Error()) 71 | } 72 | 73 | // Test with nil error 74 | resp = NewErrorResponse(nil) 75 | 76 | if resp.Type() != TypeError { 77 | t.Errorf("Expected type %s, got %s", TypeError, resp.Type()) 78 | } 79 | 80 | if len(resp.Payload()) != 0 { 81 | t.Errorf("Expected empty payload, got %s", string(resp.Payload())) 82 | } 83 | 84 | if resp.Error() != nil { 85 | t.Errorf("Expected nil error, got %v", resp.Error()) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /pkg/sstable/bloom_test.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "testing" 9 | ) 10 | 11 | func TestBasicBloomFilter(t *testing.T) { 12 | // Create a temporary directory 13 | tempDir, err := os.MkdirTemp("", "bloom_test") 14 | if err != nil { 15 | t.Fatalf("Failed to create temp dir: %v", err) 16 | } 17 | defer os.RemoveAll(tempDir) 18 | 19 | // Create an SSTable with bloom filters enabled 20 | sst := filepath.Join(tempDir, "test_bloom.sst") 21 | 22 | // Create the writer with bloom filters enabled 23 | options := DefaultWriterOptions() 24 | options.EnableBloomFilter = true 25 | writer, err := NewWriterWithOptions(sst, options) 26 | if err != nil { 27 | t.Fatalf("Failed to create writer: %v", err) 28 | } 29 | 30 | // Add just a few keys 31 | keys := []string{ 32 | "apple", 33 | "banana", 34 | "cherry", 35 | "date", 36 | "elderberry", 37 | } 38 | 39 | for _, key := range keys { 40 | value := fmt.Sprintf("value-%s", key) 41 | if err := writer.Add([]byte(key), []byte(value)); err != nil { 42 | t.Fatalf("Failed to add key %s: %v", key, err) 43 | } 44 | } 45 | 46 | // Finish writing 47 | if err := writer.Finish(); err != nil { 48 | t.Fatalf("Failed to finish writer: %v", err) 49 | } 50 | 51 | // Open the reader 52 | reader, err := OpenReader(sst) 53 | if err != nil { 54 | t.Fatalf("Failed to open reader: %v", err) 55 | } 56 | defer reader.Close() 57 | 58 | // Check that reader has bloom filters 59 | if !reader.hasBloomFilter { 60 | t.Errorf("Reader does not have bloom filters even though they were enabled") 61 | } 62 | 63 | // Check that all keys can be found 64 | for _, key := range keys { 65 | expectedValue := []byte(fmt.Sprintf("value-%s", key)) 66 | value, err := reader.Get([]byte(key)) 67 | if err != nil { 68 | t.Errorf("Failed to find key %s: %v", key, err) 69 | continue 70 | } 71 | 72 | if !bytes.Equal(value, expectedValue) { 73 | t.Errorf("Value mismatch for key %s: got %q, expected %q", key, value, expectedValue) 74 | } else { 75 | t.Logf("Successfully found key %s", key) 76 | } 77 | } 78 | 79 | // Check that non-existent keys are not found 80 | nonExistentKeys := []string{ 81 | "fig", 82 | "grape", 83 | "honeydew", 84 | } 85 | 86 | for _, key := range nonExistentKeys { 87 | _, err := reader.Get([]byte(key)) 88 | if err != ErrNotFound { 89 | t.Errorf("Expected ErrNotFound for key %s, got: %v", key, err) 90 | } else { 91 | t.Logf("Correctly reported key %s as not found", key) 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /pkg/memtable/iterator_adapter.go: -------------------------------------------------------------------------------- 1 | package memtable 2 | 3 | // No imports needed 4 | 5 | // IteratorAdapter adapts a memtable.Iterator to the common Iterator interface 6 | type IteratorAdapter struct { 7 | iter *Iterator 8 | } 9 | 10 | // NewIteratorAdapter creates a new adapter for a memtable iterator 11 | func NewIteratorAdapter(iter *Iterator) *IteratorAdapter { 12 | return &IteratorAdapter{iter: iter} 13 | } 14 | 15 | // SeekToFirst positions the iterator at the first key 16 | func (a *IteratorAdapter) SeekToFirst() { 17 | a.iter.SeekToFirst() 18 | } 19 | 20 | // SeekToLast positions the iterator at the last key 21 | func (a *IteratorAdapter) SeekToLast() { 22 | a.iter.SeekToFirst() 23 | 24 | // If no items, return early 25 | if !a.iter.Valid() { 26 | return 27 | } 28 | 29 | // Store the last key we've seen 30 | var lastKey []byte 31 | 32 | // Scan to find the last element 33 | for a.iter.Valid() { 34 | lastKey = a.iter.Key() 35 | a.iter.Next() 36 | } 37 | 38 | // Re-position at the last key we found 39 | if lastKey != nil { 40 | a.iter.Seek(lastKey) 41 | } 42 | } 43 | 44 | // Seek positions the iterator at the first key >= target 45 | func (a *IteratorAdapter) Seek(target []byte) bool { 46 | a.iter.Seek(target) 47 | return a.iter.Valid() 48 | } 49 | 50 | // Next advances the iterator to the next key 51 | func (a *IteratorAdapter) Next() bool { 52 | if !a.Valid() { 53 | return false 54 | } 55 | a.iter.Next() 56 | return a.iter.Valid() 57 | } 58 | 59 | // Key returns the current key 60 | func (a *IteratorAdapter) Key() []byte { 61 | if !a.Valid() { 62 | return nil 63 | } 64 | return a.iter.Key() 65 | } 66 | 67 | // Value returns the current value 68 | func (a *IteratorAdapter) Value() []byte { 69 | if !a.Valid() { 70 | return nil 71 | } 72 | 73 | // Check if this is a tombstone (deletion marker) 74 | if a.iter.IsTombstone() { 75 | // This ensures that during compaction, we know this is a deletion marker 76 | return nil 77 | } 78 | 79 | return a.iter.Value() 80 | } 81 | 82 | // Valid returns true if the iterator is positioned at a valid entry 83 | func (a *IteratorAdapter) Valid() bool { 84 | return a.iter != nil && a.iter.Valid() 85 | } 86 | 87 | // IsTombstone returns true if the current entry is a deletion marker 88 | func (a *IteratorAdapter) IsTombstone() bool { 89 | return a.iter != nil && a.iter.IsTombstone() 90 | } 91 | 92 | // SequenceNumber returns the sequence number of the current entry 93 | func (a *IteratorAdapter) SequenceNumber() uint64 { 94 | if !a.Valid() || a.iter.Entry() == nil { 95 | return 0 96 | } 97 | return a.iter.Entry().seqNum 98 | } 99 | -------------------------------------------------------------------------------- /pkg/transport/metrics_extended.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "sync/atomic" 5 | "time" 6 | ) 7 | 8 | // Metrics struct extensions for server metrics 9 | type ServerMetrics struct { 10 | Metrics 11 | ServerStarted uint64 12 | ServerErrored uint64 13 | ServerStopped uint64 14 | } 15 | 16 | // ConnectionStatus represents the status of a connection 17 | type ConnectionStatus struct { 18 | Connected bool 19 | LastActivity time.Time 20 | ErrorCount int 21 | RequestCount int 22 | LatencyAvg time.Duration 23 | } 24 | 25 | // ExtendedMetricsCollector extends the basic metrics collector with server metrics 26 | type ExtendedMetricsCollector struct { 27 | BasicMetricsCollector 28 | serverStarted uint64 29 | serverErrored uint64 30 | serverStopped uint64 31 | } 32 | 33 | // NewMetrics creates a new extended metrics collector with a given transport name 34 | func NewMetrics(transport string) *ExtendedMetricsCollector { 35 | return &ExtendedMetricsCollector{ 36 | BasicMetricsCollector: BasicMetricsCollector{ 37 | avgLatencyByType: make(map[string]time.Duration), 38 | requestCountByType: make(map[string]uint64), 39 | }, 40 | } 41 | } 42 | 43 | // ServerStarted increments the server started counter 44 | func (c *ExtendedMetricsCollector) ServerStarted() { 45 | atomic.AddUint64(&c.serverStarted, 1) 46 | } 47 | 48 | // ServerErrored increments the server errored counter 49 | func (c *ExtendedMetricsCollector) ServerErrored() { 50 | atomic.AddUint64(&c.serverErrored, 1) 51 | } 52 | 53 | // ServerStopped increments the server stopped counter 54 | func (c *ExtendedMetricsCollector) ServerStopped() { 55 | atomic.AddUint64(&c.serverStopped, 1) 56 | } 57 | 58 | // ConnectionOpened records a connection opened event 59 | func (c *ExtendedMetricsCollector) ConnectionOpened() { 60 | atomic.AddUint64(&c.connections, 1) 61 | } 62 | 63 | // ConnectionFailed records a connection failed event 64 | func (c *ExtendedMetricsCollector) ConnectionFailed() { 65 | atomic.AddUint64(&c.connectionFailures, 1) 66 | } 67 | 68 | // ConnectionClosed records a connection closed event 69 | func (c *ExtendedMetricsCollector) ConnectionClosed() { 70 | atomic.AddUint64(&c.connections, ^uint64(0)) // Decrement active connections count 71 | } 72 | 73 | // GetExtendedMetrics returns the current extended metrics 74 | func (c *ExtendedMetricsCollector) GetExtendedMetrics() ServerMetrics { 75 | baseMetrics := c.GetMetrics() 76 | 77 | return ServerMetrics{ 78 | Metrics: baseMetrics, 79 | ServerStarted: atomic.LoadUint64(&c.serverStarted), 80 | ServerErrored: atomic.LoadUint64(&c.serverErrored), 81 | ServerStopped: atomic.LoadUint64(&c.serverStopped), 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /pkg/transaction/mock_stats_test.go: -------------------------------------------------------------------------------- 1 | package transaction 2 | 3 | import ( 4 | "sync/atomic" 5 | "time" 6 | 7 | "github.com/KevoDB/kevo/pkg/stats" 8 | ) 9 | 10 | // StatsCollectorMock is a simple stats collector for testing 11 | type StatsCollectorMock struct { 12 | txCompleted atomic.Int64 13 | txAborted atomic.Int64 14 | } 15 | 16 | // GetStats returns all statistics 17 | func (s *StatsCollectorMock) GetStats() map[string]interface{} { 18 | return map[string]interface{}{ 19 | "tx_completed": s.txCompleted.Load(), 20 | "tx_aborted": s.txAborted.Load(), 21 | } 22 | } 23 | 24 | // GetStatsFiltered returns statistics filtered by prefix 25 | func (s *StatsCollectorMock) GetStatsFiltered(prefix string) map[string]interface{} { 26 | return s.GetStats() // No filtering in mock 27 | } 28 | 29 | // TrackOperation records a single operation 30 | func (s *StatsCollectorMock) TrackOperation(op stats.OperationType) { 31 | // No-op for the mock 32 | } 33 | 34 | // TrackOperationWithLatency records an operation with its latency 35 | func (s *StatsCollectorMock) TrackOperationWithLatency(op stats.OperationType, latencyNs uint64) { 36 | // No-op for the mock 37 | } 38 | 39 | // TrackError increments the counter for the specified error type 40 | func (s *StatsCollectorMock) TrackError(errorType string) { 41 | // No-op for the mock 42 | } 43 | 44 | // TrackBytes adds the specified number of bytes to the read or write counter 45 | func (s *StatsCollectorMock) TrackBytes(isWrite bool, bytes uint64) { 46 | // No-op for the mock 47 | } 48 | 49 | // TrackMemTableSize records the current memtable size 50 | func (s *StatsCollectorMock) TrackMemTableSize(size uint64) { 51 | // No-op for the mock 52 | } 53 | 54 | // TrackFlush increments the flush counter 55 | func (s *StatsCollectorMock) TrackFlush() { 56 | // No-op for the mock 57 | } 58 | 59 | // TrackCompaction increments the compaction counter 60 | func (s *StatsCollectorMock) TrackCompaction() { 61 | // No-op for the mock 62 | } 63 | 64 | // StartRecovery initializes recovery statistics 65 | func (s *StatsCollectorMock) StartRecovery() time.Time { 66 | return time.Now() 67 | } 68 | 69 | // FinishRecovery completes recovery statistics 70 | func (s *StatsCollectorMock) FinishRecovery(startTime time.Time, filesRecovered, entriesRecovered, corruptedEntries uint64) { 71 | // No-op for the mock 72 | } 73 | 74 | // IncrementTxCompleted increments the completed transaction counter 75 | func (s *StatsCollectorMock) IncrementTxCompleted() { 76 | s.txCompleted.Add(1) 77 | } 78 | 79 | // IncrementTxAborted increments the aborted transaction counter 80 | func (s *StatsCollectorMock) IncrementTxAborted() { 81 | s.txAborted.Add(1) 82 | } 83 | -------------------------------------------------------------------------------- /pkg/compaction/file_tracker.go: -------------------------------------------------------------------------------- 1 | package compaction 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "sync" 7 | ) 8 | 9 | // DefaultFileTracker is the default implementation of FileTracker 10 | type DefaultFileTracker struct { 11 | // Map of file path -> true for files that have been obsoleted by compaction 12 | obsoleteFiles map[string]bool 13 | 14 | // Map of file path -> true for files that are currently being compacted 15 | pendingFiles map[string]bool 16 | 17 | // Mutex for file tracking maps 18 | filesMu sync.RWMutex 19 | } 20 | 21 | // NewFileTracker creates a new file tracker 22 | func NewFileTracker() *DefaultFileTracker { 23 | return &DefaultFileTracker{ 24 | obsoleteFiles: make(map[string]bool), 25 | pendingFiles: make(map[string]bool), 26 | } 27 | } 28 | 29 | // MarkFileObsolete marks a file as obsolete (can be deleted) 30 | func (f *DefaultFileTracker) MarkFileObsolete(path string) { 31 | f.filesMu.Lock() 32 | defer f.filesMu.Unlock() 33 | 34 | f.obsoleteFiles[path] = true 35 | } 36 | 37 | // MarkFilePending marks a file as being used in a compaction 38 | func (f *DefaultFileTracker) MarkFilePending(path string) { 39 | f.filesMu.Lock() 40 | defer f.filesMu.Unlock() 41 | 42 | f.pendingFiles[path] = true 43 | } 44 | 45 | // UnmarkFilePending removes the pending mark from a file 46 | func (f *DefaultFileTracker) UnmarkFilePending(path string) { 47 | f.filesMu.Lock() 48 | defer f.filesMu.Unlock() 49 | 50 | delete(f.pendingFiles, path) 51 | } 52 | 53 | // IsFileObsolete checks if a file is marked as obsolete 54 | func (f *DefaultFileTracker) IsFileObsolete(path string) bool { 55 | f.filesMu.RLock() 56 | defer f.filesMu.RUnlock() 57 | 58 | return f.obsoleteFiles[path] 59 | } 60 | 61 | // IsFilePending checks if a file is marked as pending compaction 62 | func (f *DefaultFileTracker) IsFilePending(path string) bool { 63 | f.filesMu.RLock() 64 | defer f.filesMu.RUnlock() 65 | 66 | return f.pendingFiles[path] 67 | } 68 | 69 | // CleanupObsoleteFiles removes files that are no longer needed 70 | func (f *DefaultFileTracker) CleanupObsoleteFiles() error { 71 | f.filesMu.Lock() 72 | defer f.filesMu.Unlock() 73 | 74 | // Safely remove obsolete files that aren't pending 75 | for path := range f.obsoleteFiles { 76 | // Skip files that are still being used in a compaction 77 | if f.pendingFiles[path] { 78 | continue 79 | } 80 | 81 | // Try to delete the file 82 | if err := os.Remove(path); err != nil { 83 | if !os.IsNotExist(err) { 84 | return fmt.Errorf("failed to delete obsolete file %s: %w", path, err) 85 | } 86 | // If the file doesn't exist, remove it from our tracking 87 | delete(f.obsoleteFiles, path) 88 | } else { 89 | // Successfully deleted, remove from tracking 90 | delete(f.obsoleteFiles, path) 91 | } 92 | } 93 | 94 | return nil 95 | } 96 | -------------------------------------------------------------------------------- /pkg/transport/common.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | // Standard request/response type constants 8 | const ( 9 | TypeGet = "get" 10 | TypePut = "put" 11 | TypeDelete = "delete" 12 | TypeBatchWrite = "batch_write" 13 | TypeScan = "scan" 14 | TypeBeginTx = "begin_tx" 15 | TypeCommitTx = "commit_tx" 16 | TypeRollbackTx = "rollback_tx" 17 | TypeTxGet = "tx_get" 18 | TypeTxPut = "tx_put" 19 | TypeTxDelete = "tx_delete" 20 | TypeTxScan = "tx_scan" 21 | TypeGetStats = "get_stats" 22 | TypeCompact = "compact" 23 | TypeError = "error" 24 | ) 25 | 26 | // Common errors 27 | var ( 28 | ErrInvalidRequest = errors.New("invalid request") 29 | ErrInvalidPayload = errors.New("invalid payload") 30 | ErrNotConnected = errors.New("not connected to server") 31 | ErrTimeout = errors.New("operation timed out") 32 | ) 33 | 34 | // BasicRequest implements the Request interface 35 | type BasicRequest struct { 36 | RequestType string 37 | RequestData []byte 38 | } 39 | 40 | // Type returns the type of the request 41 | func (r *BasicRequest) Type() string { 42 | return r.RequestType 43 | } 44 | 45 | // Payload returns the payload of the request 46 | func (r *BasicRequest) Payload() []byte { 47 | return r.RequestData 48 | } 49 | 50 | // NewRequest creates a new request with the given type and payload 51 | func NewRequest(requestType string, data []byte) Request { 52 | return &BasicRequest{ 53 | RequestType: requestType, 54 | RequestData: data, 55 | } 56 | } 57 | 58 | // BasicResponse implements the Response interface 59 | type BasicResponse struct { 60 | ResponseType string 61 | ResponseData []byte 62 | ResponseErr error 63 | } 64 | 65 | // Type returns the type of the response 66 | func (r *BasicResponse) Type() string { 67 | return r.ResponseType 68 | } 69 | 70 | // Payload returns the payload of the response 71 | func (r *BasicResponse) Payload() []byte { 72 | return r.ResponseData 73 | } 74 | 75 | // Error returns any error associated with the response 76 | func (r *BasicResponse) Error() error { 77 | return r.ResponseErr 78 | } 79 | 80 | // NewResponse creates a new response with the given type, payload, and error 81 | func NewResponse(responseType string, data []byte, err error) Response { 82 | return &BasicResponse{ 83 | ResponseType: responseType, 84 | ResponseData: data, 85 | ResponseErr: err, 86 | } 87 | } 88 | 89 | // NewErrorResponse creates a new error response 90 | func NewErrorResponse(err error) Response { 91 | var msg []byte 92 | if err != nil { 93 | msg = []byte(err.Error()) 94 | } 95 | return &BasicResponse{ 96 | ResponseType: TypeError, 97 | ResponseData: msg, 98 | ResponseErr: err, 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /pkg/engine/compat.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "errors" 5 | "sync" 6 | 7 | "github.com/KevoDB/kevo/pkg/common/iterator" 8 | ) 9 | 10 | // Compatibility layer for the legacy engine API 11 | 12 | // LegacyTransaction interface is kept for backward compatibility 13 | type LegacyTransaction interface { 14 | Get(key []byte) ([]byte, error) 15 | Put(key, value []byte) error 16 | Delete(key []byte) error 17 | NewIterator() iterator.Iterator 18 | NewRangeIterator(startKey, endKey []byte) iterator.Iterator 19 | Commit() error 20 | Rollback() error 21 | IsReadOnly() bool 22 | } 23 | 24 | // LegacyTransactionCreator is kept for backward compatibility 25 | type LegacyTransactionCreator interface { 26 | CreateTransaction(engine interface{}, readOnly bool) (LegacyTransaction, error) 27 | } 28 | 29 | var ( 30 | // legacyTransactionCreatorFunc holds the function that creates transactions 31 | legacyTransactionCreatorFunc LegacyTransactionCreator 32 | transactionCreatorMu sync.RWMutex 33 | ) 34 | 35 | // RegisterTransactionCreator registers a function that can create transactions 36 | // This is kept for backward compatibility 37 | func RegisterTransactionCreator(creator LegacyTransactionCreator) { 38 | transactionCreatorMu.Lock() 39 | defer transactionCreatorMu.Unlock() 40 | legacyTransactionCreatorFunc = creator 41 | } 42 | 43 | // GetRegisteredTransactionCreator returns the registered transaction creator 44 | // This is for internal use by the engine facade 45 | func GetRegisteredTransactionCreator() LegacyTransactionCreator { 46 | transactionCreatorMu.RLock() 47 | defer transactionCreatorMu.RUnlock() 48 | return legacyTransactionCreatorFunc 49 | } 50 | 51 | // CreateTransactionWithCreator creates a transaction using the registered creator 52 | // This is for internal use by the engine facade 53 | func CreateTransactionWithCreator(engine interface{}, readOnly bool) (LegacyTransaction, error) { 54 | transactionCreatorMu.RLock() 55 | creator := legacyTransactionCreatorFunc 56 | transactionCreatorMu.RUnlock() 57 | 58 | if creator == nil { 59 | return nil, errors.New("no transaction creator registered") 60 | } 61 | 62 | return creator.CreateTransaction(engine, readOnly) 63 | } 64 | 65 | // GetRWLock is a compatibility method for the engine facade 66 | // It returns a sync.RWMutex for use by the legacy transaction code 67 | func (e *EngineFacade) GetRWLock() *sync.RWMutex { 68 | // Forward to the transaction manager's lock 69 | return e.txManager.GetRWLock() 70 | } 71 | 72 | // IncrementTxCompleted is a compatibility method for the engine facade 73 | func (e *EngineFacade) IncrementTxCompleted() { 74 | e.txManager.IncrementTxCompleted() 75 | } 76 | 77 | // IncrementTxAborted is a compatibility method for the engine facade 78 | func (e *EngineFacade) IncrementTxAborted() { 79 | e.txManager.IncrementTxAborted() 80 | } 81 | -------------------------------------------------------------------------------- /pkg/replication/info_provider.go: -------------------------------------------------------------------------------- 1 | package replication 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | const ( 8 | ReplicationModeStandalone = "standalone" 9 | ReplicationModePrimary = "primary" 10 | ReplicationModeReplica = "replica" 11 | ) 12 | 13 | // ReplicationNodeInfo contains information about a node in the replication topology 14 | type ReplicationNodeInfo struct { 15 | Address string // Host:port of the node 16 | LastSequence uint64 // Last applied sequence number 17 | Available bool // Whether the node is available 18 | Region string // Optional region information 19 | Meta map[string]string // Additional metadata 20 | } 21 | 22 | // GetNodeInfo exposes replication topology information to the client service 23 | func (m *Manager) GetNodeInfo() (string, string, []ReplicationNodeInfo, uint64, bool) { 24 | // Return information about the current node and replication topology 25 | var role string 26 | var primaryAddr string 27 | var replicas []ReplicationNodeInfo 28 | var lastSequence uint64 29 | var readOnly bool 30 | 31 | // Safety check - the manager itself cannot be nil here (as this is a method on it), 32 | // but we need to make sure we have valid internal state 33 | m.mu.RLock() 34 | defer m.mu.RUnlock() 35 | 36 | // Check if we have a valid configuration 37 | if m.config == nil { 38 | fmt.Printf("DEBUG[GetNodeInfo]: Replication manager has nil config\n") 39 | // Return safe default values if config is nil 40 | return "standalone", "", nil, 0, false 41 | } 42 | 43 | fmt.Printf("DEBUG[GetNodeInfo]: Replication mode: %s, Enabled: %v\n", 44 | m.config.Mode, m.config.Enabled) 45 | 46 | // Set role 47 | role = m.config.Mode 48 | 49 | // Set primary address 50 | if role == ReplicationModeReplica { 51 | primaryAddr = m.config.PrimaryAddr 52 | } else if role == ReplicationModePrimary { 53 | primaryAddr = m.config.ListenAddr 54 | } 55 | 56 | // Set last sequence 57 | if role == ReplicationModePrimary && m.primary != nil { 58 | lastSequence = m.primary.GetLastSequence() 59 | } else if role == ReplicationModeReplica && m.replica != nil { 60 | lastSequence = m.replica.GetLastAppliedSequence() 61 | } 62 | 63 | // Gather replica information 64 | if role == ReplicationModePrimary && m.primary != nil { 65 | // Get replica sessions from primary 66 | replicas = m.primary.GetReplicaInfo() 67 | } else if role == ReplicationModeReplica { 68 | // Add self as a replica 69 | replicas = append(replicas, ReplicationNodeInfo{ 70 | Address: m.config.ListenAddr, 71 | LastSequence: lastSequence, 72 | Available: true, 73 | Region: "", 74 | Meta: map[string]string{}, 75 | }) 76 | } 77 | 78 | // Check for a valid engine before calling IsReadOnly 79 | if m.engine != nil { 80 | readOnly = m.engine.IsReadOnly() 81 | } 82 | 83 | return role, primaryAddr, replicas, lastSequence, readOnly 84 | } 85 | -------------------------------------------------------------------------------- /pkg/memtable/recovery.go: -------------------------------------------------------------------------------- 1 | package memtable 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/KevoDB/kevo/pkg/config" 7 | "github.com/KevoDB/kevo/pkg/wal" 8 | ) 9 | 10 | // RecoveryOptions contains options for MemTable recovery 11 | type RecoveryOptions struct { 12 | // MaxSequenceNumber is the maximum sequence number to recover 13 | // Entries with sequence numbers greater than this will be ignored 14 | MaxSequenceNumber uint64 15 | 16 | // MaxMemTables is the maximum number of MemTables to create during recovery 17 | // If more MemTables would be needed, an error is returned 18 | MaxMemTables int 19 | 20 | // MemTableSize is the maximum size of each MemTable 21 | MemTableSize int64 22 | } 23 | 24 | // DefaultRecoveryOptions returns the default recovery options 25 | func DefaultRecoveryOptions(cfg *config.Config) *RecoveryOptions { 26 | return &RecoveryOptions{ 27 | MaxSequenceNumber: ^uint64(0), // Max uint64 28 | MaxMemTables: cfg.MaxMemTables, 29 | MemTableSize: cfg.MemTableSize, 30 | } 31 | } 32 | 33 | // RecoverFromWAL rebuilds MemTables from the write-ahead log 34 | // Returns a list of recovered MemTables, the maximum sequence number seen, and stats 35 | func RecoverFromWAL(cfg *config.Config, opts *RecoveryOptions) ([]*MemTable, uint64, error) { 36 | if opts == nil { 37 | opts = DefaultRecoveryOptions(cfg) 38 | } 39 | 40 | // Create the first MemTable 41 | memTables := []*MemTable{NewMemTable()} 42 | var maxSeqNum uint64 43 | 44 | // Function to process each WAL entry 45 | entryHandler := func(entry *wal.Entry) error { 46 | // Skip entries with sequence numbers beyond our max 47 | if entry.SequenceNumber > opts.MaxSequenceNumber { 48 | return nil 49 | } 50 | 51 | // Update the max sequence number 52 | if entry.SequenceNumber > maxSeqNum { 53 | maxSeqNum = entry.SequenceNumber 54 | } 55 | 56 | // Get the current memtable 57 | current := memTables[len(memTables)-1] 58 | 59 | // Check if we should create a new memtable based on size 60 | if current.ApproximateSize() >= opts.MemTableSize { 61 | // Make sure we don't exceed the max number of memtables 62 | if len(memTables) >= opts.MaxMemTables { 63 | return fmt.Errorf("maximum number of memtables (%d) exceeded during recovery", opts.MaxMemTables) 64 | } 65 | 66 | // Mark the current memtable as immutable 67 | current.SetImmutable() 68 | 69 | // Create a new memtable 70 | current = NewMemTable() 71 | memTables = append(memTables, current) 72 | } 73 | 74 | // Process the entry 75 | return current.ProcessWALEntry(entry) 76 | } 77 | 78 | // Replay the WAL directory 79 | _, err := wal.ReplayWALDir(cfg.WALDir, entryHandler) 80 | if err != nil { 81 | return nil, 0, fmt.Errorf("failed to replay WAL: %w", err) 82 | } 83 | 84 | // Stats will be captured in the engine directly 85 | 86 | // maxSeqNum now properly tracks the actual highest sequence number from WAL replay 87 | 88 | return memTables, maxSeqNum, nil 89 | } 90 | -------------------------------------------------------------------------------- /pkg/engine/iterator/factory.go: -------------------------------------------------------------------------------- 1 | package iterator 2 | 3 | import ( 4 | "github.com/KevoDB/kevo/pkg/common/iterator" 5 | "github.com/KevoDB/kevo/pkg/common/iterator/bounded" 6 | "github.com/KevoDB/kevo/pkg/common/iterator/composite" 7 | "github.com/KevoDB/kevo/pkg/memtable" 8 | "github.com/KevoDB/kevo/pkg/sstable" 9 | ) 10 | 11 | // Factory provides methods to create iterators for the storage engine 12 | type Factory struct{} 13 | 14 | // NewFactory creates a new iterator factory 15 | func NewFactory() *Factory { 16 | return &Factory{} 17 | } 18 | 19 | // CreateIterator creates a hierarchical iterator that combines 20 | // memtables and sstables in the correct priority order 21 | func (f *Factory) CreateIterator( 22 | memTables []*memtable.MemTable, 23 | ssTables []*sstable.Reader, 24 | ) iterator.Iterator { 25 | return f.createBaseIterator(memTables, ssTables) 26 | } 27 | 28 | // CreateRangeIterator creates an iterator limited to a specific key range 29 | func (f *Factory) CreateRangeIterator( 30 | memTables []*memtable.MemTable, 31 | ssTables []*sstable.Reader, 32 | startKey, endKey []byte, 33 | ) iterator.Iterator { 34 | baseIter := f.createBaseIterator(memTables, ssTables) 35 | return bounded.NewBoundedIterator(baseIter, startKey, endKey) 36 | } 37 | 38 | // createBaseIterator creates the base hierarchical iterator 39 | func (f *Factory) createBaseIterator( 40 | memTables []*memtable.MemTable, 41 | ssTables []*sstable.Reader, 42 | ) iterator.Iterator { 43 | // If there are no sources, return an empty iterator 44 | if len(memTables) == 0 && len(ssTables) == 0 { 45 | return newEmptyIterator() 46 | } 47 | 48 | // Create individual iterators in newest-to-oldest order 49 | iterators := make([]iterator.Iterator, 0, len(memTables)+len(ssTables)) 50 | 51 | // Add memtable iterators (newest to oldest) 52 | for _, mt := range memTables { 53 | iterators = append(iterators, memtable.NewIteratorAdapter(mt.NewIterator())) 54 | } 55 | 56 | // Add sstable iterators (newest to oldest) 57 | for i := len(ssTables) - 1; i >= 0; i-- { 58 | iterators = append(iterators, sstable.NewIteratorAdapter(ssTables[i].NewIterator())) 59 | } 60 | 61 | // Create hierarchical iterator 62 | return composite.NewHierarchicalIterator(iterators) 63 | } 64 | 65 | // newEmptyIterator creates an iterator that contains no entries 66 | func newEmptyIterator() iterator.Iterator { 67 | return &emptyIterator{} 68 | } 69 | 70 | // Simple empty iterator implementation 71 | type emptyIterator struct{} 72 | 73 | func (e *emptyIterator) SeekToFirst() {} 74 | func (e *emptyIterator) SeekToLast() {} 75 | func (e *emptyIterator) Seek(target []byte) bool { return false } 76 | func (e *emptyIterator) Next() bool { return false } 77 | func (e *emptyIterator) Key() []byte { return nil } 78 | func (e *emptyIterator) Value() []byte { return nil } 79 | func (e *emptyIterator) Valid() bool { return false } 80 | func (e *emptyIterator) IsTombstone() bool { return false } 81 | -------------------------------------------------------------------------------- /pkg/compaction/interfaces.go: -------------------------------------------------------------------------------- 1 | package compaction 2 | 3 | // CompactionStrategy defines the interface for selecting files for compaction 4 | type CompactionStrategy interface { 5 | // SelectCompaction selects files for compaction and returns a CompactionTask 6 | SelectCompaction() (*CompactionTask, error) 7 | 8 | // CompactRange selects files within a key range for compaction 9 | CompactRange(minKey, maxKey []byte) error 10 | 11 | // LoadSSTables reloads SSTable information from disk 12 | LoadSSTables() error 13 | 14 | // Close closes any resources held by the strategy 15 | Close() error 16 | } 17 | 18 | // CompactionExecutor defines the interface for executing compaction tasks 19 | type CompactionExecutor interface { 20 | // CompactFiles performs the actual compaction of the input files 21 | CompactFiles(task *CompactionTask) ([]string, error) 22 | 23 | // DeleteCompactedFiles removes the input files that were successfully compacted 24 | DeleteCompactedFiles(filePaths []string) error 25 | } 26 | 27 | // FileTracker defines the interface for tracking file states during compaction 28 | type FileTracker interface { 29 | // MarkFileObsolete marks a file as obsolete (can be deleted) 30 | MarkFileObsolete(path string) 31 | 32 | // MarkFilePending marks a file as being used in a compaction 33 | MarkFilePending(path string) 34 | 35 | // UnmarkFilePending removes the pending mark from a file 36 | UnmarkFilePending(path string) 37 | 38 | // IsFileObsolete checks if a file is marked as obsolete 39 | IsFileObsolete(path string) bool 40 | 41 | // IsFilePending checks if a file is marked as pending compaction 42 | IsFilePending(path string) bool 43 | 44 | // CleanupObsoleteFiles removes files that are no longer needed 45 | CleanupObsoleteFiles() error 46 | } 47 | 48 | // TombstoneManager defines the interface for tracking and managing tombstones 49 | type TombstoneManager interface { 50 | // AddTombstone records a key deletion 51 | AddTombstone(key []byte) 52 | 53 | // ForcePreserveTombstone marks a tombstone to be preserved indefinitely 54 | ForcePreserveTombstone(key []byte) 55 | 56 | // ShouldKeepTombstone checks if a tombstone should be preserved during compaction 57 | ShouldKeepTombstone(key []byte) bool 58 | 59 | // CollectGarbage removes expired tombstone records 60 | CollectGarbage() 61 | } 62 | 63 | // CompactionCoordinator defines the interface for coordinating compaction processes 64 | type CompactionCoordinator interface { 65 | // Start begins background compaction 66 | Start() error 67 | 68 | // Stop halts background compaction 69 | Stop() error 70 | 71 | // TriggerCompaction forces a compaction cycle 72 | TriggerCompaction() error 73 | 74 | // CompactRange triggers compaction on a specific key range 75 | CompactRange(minKey, maxKey []byte) error 76 | 77 | // TrackTombstone adds a key to the tombstone tracker 78 | TrackTombstone(key []byte) 79 | 80 | // GetCompactionStats returns statistics about the compaction state 81 | GetCompactionStats() map[string]interface{} 82 | } 83 | -------------------------------------------------------------------------------- /cmd/storage-bench/README.md: -------------------------------------------------------------------------------- 1 | # Storage Benchmark Utility 2 | 3 | This utility benchmarks the performance of the Kevo storage engine under various workloads. 4 | 5 | ## Usage 6 | 7 | ```bash 8 | go run ./cmd/storage-bench/... [flags] 9 | ``` 10 | 11 | ### Available Flags 12 | 13 | - `-type`: Type of benchmark to run (write, read, scan, mixed, tune, or all) [default: all] 14 | - `-duration`: Duration to run each benchmark [default: 10s] 15 | - `-keys`: Number of keys to use [default: 100000] 16 | - `-value-size`: Size of values in bytes [default: 100] 17 | - `-data-dir`: Directory to store benchmark data [default: ./benchmark-data] 18 | - `-sequential`: Use sequential keys instead of random [default: false] 19 | - `-cpu-profile`: Write CPU profile to file [optional] 20 | - `-mem-profile`: Write memory profile to file [optional] 21 | - `-results`: File to write results to (in addition to stdout) [optional] 22 | - `-tune`: Run configuration tuning benchmarks [default: false] 23 | 24 | ## Example Commands 25 | 26 | Run all benchmarks with default settings: 27 | ```bash 28 | go run ./cmd/storage-bench/... 29 | ``` 30 | 31 | Run only write benchmark with 1 million keys and 1KB values for 30 seconds: 32 | ```bash 33 | go run ./cmd/storage-bench/... -type=write -keys=1000000 -value-size=1024 -duration=30s 34 | ``` 35 | 36 | Run read and scan benchmarks with sequential keys: 37 | ```bash 38 | go run ./cmd/storage-bench/... -type=read,scan -sequential 39 | ``` 40 | 41 | Run with profiling enabled: 42 | ```bash 43 | go run ./cmd/storage-bench/... -cpu-profile=cpu.prof -mem-profile=mem.prof 44 | ``` 45 | 46 | Run configuration tuning benchmarks: 47 | ```bash 48 | go run ./cmd/storage-bench/... -tune 49 | ``` 50 | 51 | ## Benchmark Types 52 | 53 | 1. **Write Benchmark**: Measures throughput and latency of key-value writes 54 | 2. **Read Benchmark**: Measures throughput and latency of key lookups 55 | 3. **Scan Benchmark**: Measures performance of range scans 56 | 4. **Mixed Benchmark**: Simulates real-world workload with 75% reads, 25% writes 57 | 5. **Compaction Benchmark**: Tests compaction throughput and overhead (available through code API) 58 | 6. **Tuning Benchmark**: Tests different configuration parameters to find optimal settings 59 | 60 | ## Result Interpretation 61 | 62 | Benchmark results include: 63 | - Operations per second (throughput) 64 | - Average latency per operation 65 | - Hit rate for read operations 66 | - Throughput in MB/s for compaction 67 | - Memory usage statistics 68 | 69 | ## Configuration Tuning 70 | 71 | The tuning benchmark tests various configuration parameters including: 72 | - `MemTableSize`: Sizes tested: 16MB, 32MB 73 | - `SSTableBlockSize`: Sizes tested: 8KB, 16KB 74 | - `WALSyncMode`: Modes tested: None, Batch 75 | - `CompactionRatio`: Ratios tested: 10.0, 20.0 76 | 77 | Tuning results are saved to: 78 | - `tuning_results.json`: Detailed benchmark metrics for each configuration 79 | - `recommendations.md`: Markdown file with performance analysis and optimal configuration recommendations 80 | 81 | The recommendations include: 82 | - Optimal settings for write-heavy workloads 83 | - Optimal settings for read-heavy workloads 84 | - Balanced settings for mixed workloads 85 | - Additional configuration advice 86 | 87 | ## Profiling 88 | 89 | Use the `-cpu-profile` and `-mem-profile` flags to generate profiling data that can be analyzed with: 90 | 91 | ```bash 92 | go tool pprof cpu.prof 93 | go tool pprof mem.prof 94 | ``` -------------------------------------------------------------------------------- /pkg/client/utils.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "math" 7 | "math/rand" 8 | "time" 9 | ) 10 | 11 | // RetryableFunc is a function that can be retried 12 | type RetryableFunc func() error 13 | 14 | // Errors that can occur during client operations 15 | var ( 16 | // ErrNotConnected indicates the client is not connected to the server 17 | ErrNotConnected = errors.New("not connected to server") 18 | 19 | // ErrInvalidOptions indicates invalid client options 20 | ErrInvalidOptions = errors.New("invalid client options") 21 | 22 | // ErrTimeout indicates a request timed out 23 | ErrTimeout = errors.New("request timed out") 24 | 25 | // ErrKeyNotFound indicates a key was not found 26 | ErrKeyNotFound = errors.New("key not found") 27 | 28 | // ErrTransactionConflict indicates a transaction conflict occurred 29 | ErrTransactionConflict = errors.New("transaction conflict detected") 30 | ) 31 | 32 | // IsRetryableError returns true if the error is considered retryable 33 | func IsRetryableError(err error) bool { 34 | if err == nil { 35 | return false 36 | } 37 | 38 | // These errors are considered transient and can be retried 39 | if errors.Is(err, ErrTimeout) || errors.Is(err, context.DeadlineExceeded) { 40 | return true 41 | } 42 | 43 | // Other errors are considered permanent 44 | return false 45 | } 46 | 47 | // RetryWithBackoff executes a function with exponential backoff and jitter 48 | func RetryWithBackoff( 49 | ctx context.Context, 50 | fn RetryableFunc, 51 | maxRetries int, 52 | initialBackoff time.Duration, 53 | maxBackoff time.Duration, 54 | backoffFactor float64, 55 | jitter float64, 56 | ) error { 57 | var err error 58 | backoff := initialBackoff 59 | 60 | for attempt := 0; attempt <= maxRetries; attempt++ { 61 | // Execute the function 62 | err = fn() 63 | if err == nil { 64 | return nil 65 | } 66 | 67 | // Check if the error is retryable 68 | if !IsRetryableError(err) { 69 | return err 70 | } 71 | 72 | // Check if we've reached the retry limit 73 | if attempt >= maxRetries { 74 | return err 75 | } 76 | 77 | // Calculate next backoff with jitter 78 | jitterRange := float64(backoff) * jitter 79 | jitterAmount := int64(rand.Float64() * jitterRange) 80 | sleepTime := backoff + time.Duration(jitterAmount) 81 | 82 | // Check context before sleeping 83 | select { 84 | case <-ctx.Done(): 85 | return ctx.Err() 86 | case <-time.After(sleepTime): 87 | // Continue with next attempt 88 | } 89 | 90 | // Increase backoff for next attempt 91 | backoff = time.Duration(float64(backoff) * backoffFactor) 92 | if backoff > maxBackoff { 93 | backoff = maxBackoff 94 | } 95 | } 96 | 97 | return err 98 | } 99 | 100 | // CalculateExponentialBackoff calculates the backoff time for a given attempt 101 | func CalculateExponentialBackoff( 102 | attempt int, 103 | initialBackoff time.Duration, 104 | maxBackoff time.Duration, 105 | backoffFactor float64, 106 | jitter float64, 107 | ) time.Duration { 108 | backoff := initialBackoff * time.Duration(math.Pow(backoffFactor, float64(attempt))) 109 | if backoff > maxBackoff { 110 | backoff = maxBackoff 111 | } 112 | 113 | if jitter > 0 { 114 | jitterRange := float64(backoff) * jitter 115 | jitterAmount := int64(rand.Float64() * jitterRange) 116 | backoff = backoff + time.Duration(jitterAmount) 117 | } 118 | 119 | return backoff 120 | } 121 | -------------------------------------------------------------------------------- /pkg/sstable/bench_bloom_test.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | ) 9 | 10 | func BenchmarkBloomFilterGet(b *testing.B) { 11 | // Test with and without bloom filters 12 | for _, enableBloomFilter := range []bool{false, true} { 13 | name := "WithoutBloomFilter" 14 | if enableBloomFilter { 15 | name = "WithBloomFilter" 16 | } 17 | 18 | b.Run(name, func(b *testing.B) { 19 | // Create temporary directory for the test 20 | tmpDir, err := os.MkdirTemp("", "sstable_bloom_benchmark") 21 | if err != nil { 22 | b.Fatalf("Failed to create temp dir: %v", err) 23 | } 24 | defer os.RemoveAll(tmpDir) 25 | 26 | // Create SSTable file path 27 | tablePath := filepath.Join(tmpDir, fmt.Sprintf("bench_%s.sst", name)) 28 | 29 | // Create writer with or without bloom filters 30 | options := DefaultWriterOptions() 31 | options.EnableBloomFilter = enableBloomFilter 32 | writer, err := NewWriterWithOptions(tablePath, options) 33 | if err != nil { 34 | b.Fatalf("Failed to create writer: %v", err) 35 | } 36 | 37 | // Insert some known keys 38 | // Use fewer keys for faster benchmarking 39 | const numKeys = 1000 40 | 41 | // Create sorted keys (SSTable requires sorted keys) 42 | keys := make([]string, numKeys) 43 | for i := 0; i < numKeys; i++ { 44 | keys[i] = fmt.Sprintf("key%08d", i) 45 | } 46 | 47 | // Add them to the SSTable 48 | for _, key := range keys { 49 | value := []byte(fmt.Sprintf("val-%s", key)) 50 | if err := writer.Add([]byte(key), value); err != nil { 51 | b.Fatalf("Failed to add key %s: %v", key, err) 52 | } 53 | } 54 | 55 | // Finish writing 56 | if err := writer.Finish(); err != nil { 57 | b.Fatalf("Failed to finish writer: %v", err) 58 | } 59 | 60 | // Open reader 61 | reader, err := OpenReader(tablePath) 62 | if err != nil { 63 | b.Fatalf("Failed to open reader: %v", err) 64 | } 65 | defer reader.Close() 66 | 67 | // Test a few specific lookups to ensure the table was written correctly 68 | for i := 0; i < 5; i++ { 69 | testKey := []byte(fmt.Sprintf("key%08d", i)) 70 | expectedValue := []byte(fmt.Sprintf("val-key%08d", i)) 71 | 72 | val, err := reader.Get(testKey) 73 | if err != nil { 74 | b.Fatalf("Verification failed: couldn't find key %s: %v", testKey, err) 75 | } 76 | 77 | if string(val) != string(expectedValue) { 78 | b.Fatalf("Value mismatch for key %s: got %q, expected %q", testKey, val, expectedValue) 79 | } 80 | 81 | b.Logf("Successfully verified key: %s", testKey) 82 | } 83 | 84 | // Reset timer for the benchmark 85 | b.ResetTimer() 86 | 87 | // Run benchmark - alternate between existing and non-existing keys 88 | for i := 0; i < b.N; i++ { 89 | var key []byte 90 | if i%2 == 0 { 91 | // Existing key 92 | keyIdx := i % numKeys 93 | key = []byte(fmt.Sprintf("key%08d", keyIdx)) 94 | 95 | // Should find this key 96 | _, err := reader.Get(key) 97 | if err != nil { 98 | b.Fatalf("Failed to find existing key %s: %v", key, err) 99 | } 100 | } else { 101 | // Non-existing key - this is where bloom filters really help 102 | key = []byte(fmt.Sprintf("nonexistent%08d", i)) 103 | 104 | // Should not find this key 105 | _, err := reader.Get(key) 106 | if err != ErrNotFound { 107 | b.Fatalf("Expected ErrNotFound for key %s, got: %v", key, err) 108 | } 109 | } 110 | } 111 | }) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /pkg/transport/registry.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | // registry implements the Registry interface 9 | type registry struct { 10 | mu sync.RWMutex 11 | clientFactories map[string]ClientFactory 12 | serverFactories map[string]ServerFactory 13 | } 14 | 15 | // NewRegistry creates a new transport registry 16 | func NewRegistry() Registry { 17 | return ®istry{ 18 | clientFactories: make(map[string]ClientFactory), 19 | serverFactories: make(map[string]ServerFactory), 20 | } 21 | } 22 | 23 | // DefaultRegistry is the default global registry instance 24 | var DefaultRegistry = NewRegistry() 25 | 26 | // RegisterClient adds a new client implementation to the registry 27 | func (r *registry) RegisterClient(name string, factory ClientFactory) { 28 | r.mu.Lock() 29 | defer r.mu.Unlock() 30 | r.clientFactories[name] = factory 31 | } 32 | 33 | // RegisterServer adds a new server implementation to the registry 34 | func (r *registry) RegisterServer(name string, factory ServerFactory) { 35 | r.mu.Lock() 36 | defer r.mu.Unlock() 37 | r.serverFactories[name] = factory 38 | } 39 | 40 | // CreateClient instantiates a client by name 41 | func (r *registry) CreateClient(name, endpoint string, options TransportOptions) (Client, error) { 42 | r.mu.RLock() 43 | factory, exists := r.clientFactories[name] 44 | r.mu.RUnlock() 45 | 46 | if !exists { 47 | return nil, fmt.Errorf("transport client %q not registered", name) 48 | } 49 | 50 | return factory(endpoint, options) 51 | } 52 | 53 | // CreateServer instantiates a server by name 54 | func (r *registry) CreateServer(name, address string, options TransportOptions) (Server, error) { 55 | r.mu.RLock() 56 | factory, exists := r.serverFactories[name] 57 | r.mu.RUnlock() 58 | 59 | if !exists { 60 | return nil, fmt.Errorf("transport server %q not registered", name) 61 | } 62 | 63 | return factory(address, options) 64 | } 65 | 66 | // ListTransports returns all available transport names 67 | func (r *registry) ListTransports() []string { 68 | r.mu.RLock() 69 | defer r.mu.RUnlock() 70 | 71 | // Get unique transport names 72 | names := make(map[string]struct{}) 73 | for name := range r.clientFactories { 74 | names[name] = struct{}{} 75 | } 76 | for name := range r.serverFactories { 77 | names[name] = struct{}{} 78 | } 79 | 80 | // Convert to slice 81 | result := make([]string, 0, len(names)) 82 | for name := range names { 83 | result = append(result, name) 84 | } 85 | 86 | return result 87 | } 88 | 89 | // Helper functions for global registry 90 | 91 | // RegisterClientTransport registers a client transport with the default registry 92 | func RegisterClientTransport(name string, factory ClientFactory) { 93 | DefaultRegistry.RegisterClient(name, factory) 94 | } 95 | 96 | // RegisterServerTransport registers a server transport with the default registry 97 | func RegisterServerTransport(name string, factory ServerFactory) { 98 | DefaultRegistry.RegisterServer(name, factory) 99 | } 100 | 101 | // GetClient creates a client using the default registry 102 | func GetClient(name, endpoint string, options TransportOptions) (Client, error) { 103 | return DefaultRegistry.CreateClient(name, endpoint, options) 104 | } 105 | 106 | // GetServer creates a server using the default registry 107 | func GetServer(name, address string, options TransportOptions) (Server, error) { 108 | return DefaultRegistry.CreateServer(name, address, options) 109 | } 110 | 111 | // AvailableTransports lists all available transports in the default registry 112 | func AvailableTransports() []string { 113 | return DefaultRegistry.ListTransports() 114 | } 115 | -------------------------------------------------------------------------------- /pkg/transport/metrics_test.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestBasicMetricsCollector(t *testing.T) { 10 | collector := NewMetricsCollector() 11 | 12 | // Test initial state 13 | metrics := collector.GetMetrics() 14 | if metrics.TotalRequests != 0 || 15 | metrics.SuccessfulRequests != 0 || 16 | metrics.FailedRequests != 0 || 17 | metrics.BytesSent != 0 || 18 | metrics.BytesReceived != 0 || 19 | metrics.Connections != 0 || 20 | metrics.ConnectionFailures != 0 || 21 | len(metrics.AvgLatencyByType) != 0 { 22 | t.Errorf("Initial metrics not initialized correctly: %+v", metrics) 23 | } 24 | 25 | // Test recording successful request 26 | startTime := time.Now().Add(-100 * time.Millisecond) // Simulate 100ms request 27 | collector.RecordRequest("get", startTime, nil) 28 | 29 | metrics = collector.GetMetrics() 30 | if metrics.TotalRequests != 1 { 31 | t.Errorf("Expected TotalRequests to be 1, got %d", metrics.TotalRequests) 32 | } 33 | if metrics.SuccessfulRequests != 1 { 34 | t.Errorf("Expected SuccessfulRequests to be 1, got %d", metrics.SuccessfulRequests) 35 | } 36 | if metrics.FailedRequests != 0 { 37 | t.Errorf("Expected FailedRequests to be 0, got %d", metrics.FailedRequests) 38 | } 39 | 40 | // Check average latency 41 | if avgLatency, exists := metrics.AvgLatencyByType["get"]; !exists { 42 | t.Error("Expected 'get' latency to exist") 43 | } else if avgLatency < 100*time.Millisecond { 44 | t.Errorf("Expected latency to be at least 100ms, got %v", avgLatency) 45 | } 46 | 47 | // Test recording failed request 48 | startTime = time.Now().Add(-200 * time.Millisecond) // Simulate 200ms request 49 | collector.RecordRequest("get", startTime, errors.New("test error")) 50 | 51 | metrics = collector.GetMetrics() 52 | if metrics.TotalRequests != 2 { 53 | t.Errorf("Expected TotalRequests to be 2, got %d", metrics.TotalRequests) 54 | } 55 | if metrics.SuccessfulRequests != 1 { 56 | t.Errorf("Expected SuccessfulRequests to be 1, got %d", metrics.SuccessfulRequests) 57 | } 58 | if metrics.FailedRequests != 1 { 59 | t.Errorf("Expected FailedRequests to be 1, got %d", metrics.FailedRequests) 60 | } 61 | 62 | // Test average latency calculation for multiple requests 63 | startTime = time.Now().Add(-300 * time.Millisecond) 64 | collector.RecordRequest("put", startTime, nil) 65 | 66 | startTime = time.Now().Add(-500 * time.Millisecond) 67 | collector.RecordRequest("put", startTime, nil) 68 | 69 | metrics = collector.GetMetrics() 70 | avgPutLatency := metrics.AvgLatencyByType["put"] 71 | 72 | // Expected avg is around (300ms + 500ms) / 2 = 400ms 73 | if avgPutLatency < 390*time.Millisecond || avgPutLatency > 410*time.Millisecond { 74 | t.Errorf("Expected average 'put' latency to be around 400ms, got %v", avgPutLatency) 75 | } 76 | 77 | // Test byte tracking 78 | collector.RecordSend(1000) 79 | collector.RecordReceive(2000) 80 | 81 | metrics = collector.GetMetrics() 82 | if metrics.BytesSent != 1000 { 83 | t.Errorf("Expected BytesSent to be 1000, got %d", metrics.BytesSent) 84 | } 85 | if metrics.BytesReceived != 2000 { 86 | t.Errorf("Expected BytesReceived to be 2000, got %d", metrics.BytesReceived) 87 | } 88 | 89 | // Test connection tracking 90 | collector.RecordConnection(true) 91 | collector.RecordConnection(false) 92 | collector.RecordConnection(true) 93 | 94 | metrics = collector.GetMetrics() 95 | if metrics.Connections != 2 { 96 | t.Errorf("Expected Connections to be 2, got %d", metrics.Connections) 97 | } 98 | if metrics.ConnectionFailures != 1 { 99 | t.Errorf("Expected ConnectionFailures to be 1, got %d", metrics.ConnectionFailures) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /pkg/client/replication_test.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | ) 7 | 8 | // Renamed from TestClientConnectWithTopology to avoid duplicate function name 9 | func TestClientConnectWithReplicationTopology(t *testing.T) { 10 | // Create mock client 11 | mock := newMockClient() 12 | mock.setResponse("GetNodeInfo", []byte(`{ 13 | "node_role": 0, 14 | "primary_address": "", 15 | "replicas": [], 16 | "last_sequence": 0, 17 | "read_only": false 18 | }`)) 19 | 20 | // Create and override client 21 | options := DefaultClientOptions() 22 | options.TransportType = "mock" 23 | client, err := NewClient(options) 24 | if err != nil { 25 | t.Fatalf("Failed to create client: %v", err) 26 | } 27 | 28 | // Replace the transport with our manually configured mock 29 | client.client = mock 30 | 31 | // Connect and discover topology 32 | err = client.Connect(context.Background()) 33 | if err != nil { 34 | t.Fatalf("Connect failed: %v", err) 35 | } 36 | 37 | // Verify node info was collected correctly 38 | if client.nodeInfo == nil { 39 | t.Fatal("Expected nodeInfo to be set") 40 | } 41 | if client.nodeInfo.Role != "standalone" { 42 | t.Errorf("Expected role to be standalone, got %s", client.nodeInfo.Role) 43 | } 44 | } 45 | 46 | // Test simple replica check 47 | func TestIsReplicaMethod(t *testing.T) { 48 | // Setup client with replica node info 49 | client := &Client{ 50 | options: DefaultClientOptions(), 51 | nodeInfo: &NodeInfo{ 52 | Role: "replica", 53 | PrimaryAddr: "primary:50051", 54 | }, 55 | } 56 | 57 | // Verify IsReplica returns true 58 | if !client.IsReplica() { 59 | t.Error("Expected IsReplica() to return true for a replica node") 60 | } 61 | 62 | // Verify IsPrimary returns false 63 | if client.IsPrimary() { 64 | t.Error("Expected IsPrimary() to return false for a replica node") 65 | } 66 | 67 | // Verify IsStandalone returns false 68 | if client.IsStandalone() { 69 | t.Error("Expected IsStandalone() to return false for a replica node") 70 | } 71 | } 72 | 73 | // Test simple primary check 74 | func TestIsPrimaryMethod(t *testing.T) { 75 | // Setup client with primary node info 76 | client := &Client{ 77 | options: DefaultClientOptions(), 78 | nodeInfo: &NodeInfo{ 79 | Role: "primary", 80 | }, 81 | } 82 | 83 | // Verify IsPrimary returns true 84 | if !client.IsPrimary() { 85 | t.Error("Expected IsPrimary() to return true for a primary node") 86 | } 87 | 88 | // Verify IsReplica returns false 89 | if client.IsReplica() { 90 | t.Error("Expected IsReplica() to return false for a primary node") 91 | } 92 | 93 | // Verify IsStandalone returns false 94 | if client.IsStandalone() { 95 | t.Error("Expected IsStandalone() to return false for a primary node") 96 | } 97 | } 98 | 99 | // Test simple standalone check 100 | func TestIsStandaloneMethod(t *testing.T) { 101 | // Setup client with standalone node info 102 | client := &Client{ 103 | options: DefaultClientOptions(), 104 | nodeInfo: &NodeInfo{ 105 | Role: "standalone", 106 | }, 107 | } 108 | 109 | // Verify IsStandalone returns true 110 | if !client.IsStandalone() { 111 | t.Error("Expected IsStandalone() to return true for a standalone node") 112 | } 113 | 114 | // Verify IsPrimary returns false 115 | if client.IsPrimary() { 116 | t.Error("Expected IsPrimary() to return false for a standalone node") 117 | } 118 | 119 | // Verify IsReplica returns false 120 | if client.IsReplica() { 121 | t.Error("Expected IsReplica() to return false for a standalone node") 122 | } 123 | 124 | // Test with nil nodeInfo should also return true for standalone 125 | client = &Client{ 126 | options: DefaultClientOptions(), 127 | nodeInfo: nil, 128 | } 129 | if !client.IsStandalone() { 130 | t.Error("Expected IsStandalone() to return true when nodeInfo is nil") 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /pkg/sstable/error_handling_test.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "errors" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | // TestFinishHandlesCloseErrors tests that Finish() properly handles Close() errors by 10 | // simulating a scenario where the file is closed prematurely 11 | func TestFinishHandlesCloseErrors(t *testing.T) { 12 | tempDir := t.TempDir() 13 | sstablePath := tempDir + "/test.sst" 14 | 15 | writer, err := NewWriter(sstablePath) 16 | if err != nil { 17 | t.Fatalf("Failed to create writer: %v", err) 18 | } 19 | 20 | // Add some data so Finish() has work to do 21 | if err := writer.Add([]byte("key1"), []byte("value1")); err != nil { 22 | t.Fatalf("Failed to add entry: %v", err) 23 | } 24 | 25 | // Manually close the underlying file to simulate a file descriptor error 26 | // This will cause subsequent operations to fail, and Close() to potentially fail too 27 | if writer.fileManager.file != nil { 28 | writer.fileManager.file.Close() 29 | // Set it to nil so our Close() won't try to close it again 30 | writer.fileManager.file = nil 31 | } 32 | 33 | // Finish should still complete without panicking, even if Close() fails 34 | err = writer.Finish() 35 | // We expect some kind of error due to the closed file 36 | if err == nil { 37 | t.Error("Expected Finish() to return an error when file operations fail") 38 | } 39 | } 40 | 41 | // TestFinishWithInvalidFile tests that the error handling works when file operations fail 42 | func TestFinishWithInvalidFile(t *testing.T) { 43 | tempDir := t.TempDir() 44 | sstablePath := tempDir + "/test.sst" 45 | 46 | writer, err := NewWriter(sstablePath) 47 | if err != nil { 48 | t.Fatalf("Failed to create writer: %v", err) 49 | } 50 | 51 | // Add some data 52 | if err := writer.Add([]byte("key1"), []byte("value1")); err != nil { 53 | t.Fatalf("Failed to add entry: %v", err) 54 | } 55 | 56 | // Create a scenario where both operations fail and Close() fails 57 | // We'll simulate this by manually closing the file to cause subsequent operations to fail 58 | if writer.fileManager.file != nil { 59 | writer.fileManager.file.Close() // This will cause writes to fail 60 | writer.fileManager.file = nil // Set to nil to avoid double-close 61 | } 62 | 63 | err = writer.Finish() 64 | if err == nil { 65 | t.Error("Expected Finish() to return an error when file operations fail") 66 | } 67 | } 68 | 69 | // TestCleanupWithInvalidFile tests that Cleanup() handles file errors properly 70 | func TestCleanupWithInvalidFile(t *testing.T) { 71 | tempDir := t.TempDir() 72 | 73 | // Create a FileManager with non-existent paths to trigger errors 74 | nonExistentFile := tempDir + "/nonexistent.tmp" 75 | 76 | fm := &FileManager{ 77 | path: tempDir + "/test.sst", 78 | tmpPath: nonExistentFile, // This file doesn't exist, so Remove() will fail 79 | file: nil, // No file to close 80 | } 81 | 82 | err := fm.Cleanup() 83 | if err == nil { 84 | t.Error("Expected Cleanup() to return an error when removing non-existent file") 85 | } 86 | 87 | // Should be able to unwrap to the underlying os.PathError 88 | if !errors.Is(err, os.ErrNotExist) { 89 | t.Errorf("Expected error to wrap os.ErrNotExist, got: %v", err) 90 | } 91 | } 92 | 93 | // TestSuccessfulCloseDoesNotAffectResult tests that successful Close() doesn't change success results 94 | func TestSuccessfulCloseDoesNotAffectResult(t *testing.T) { 95 | tempDir := t.TempDir() 96 | sstablePath := tempDir + "/test.sst" 97 | 98 | writer, err := NewWriter(sstablePath) 99 | if err != nil { 100 | t.Fatalf("Failed to create writer: %v", err) 101 | } 102 | 103 | // Add some data 104 | if err := writer.Add([]byte("key1"), []byte("value1")); err != nil { 105 | t.Fatalf("Failed to add entry: %v", err) 106 | } 107 | 108 | // This should succeed normally 109 | err = writer.Finish() 110 | if err != nil { 111 | t.Errorf("Expected Finish() to succeed, got error: %v", err) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 2 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 3 | github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= 4 | github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= 5 | github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= 6 | github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= 7 | github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= 8 | github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= 9 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 10 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 11 | github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 12 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 13 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= 14 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= 15 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 16 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 17 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 18 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 19 | github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 20 | github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 21 | go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= 22 | go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= 23 | go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= 24 | go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI= 25 | go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ= 26 | go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE= 27 | go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A= 28 | go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU= 29 | go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk= 30 | go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w= 31 | go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= 32 | go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= 33 | golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= 34 | golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= 35 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 36 | golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= 37 | golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 38 | golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= 39 | golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= 40 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a h1:51aaUVRocpvUOSQKM6Q7VuoaktNIaMCLuhZB6DKksq4= 41 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a/go.mod h1:uRxBH1mhmO8PGhU89cMcHaXKZqO+OfakD8QQO0oYwlQ= 42 | google.golang.org/grpc v1.72.0 h1:S7UkcVa60b5AAQTaO6ZKamFp1zMZSU0fGDK2WZLbBnM= 43 | google.golang.org/grpc v1.72.0/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= 44 | google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= 45 | google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= 46 | -------------------------------------------------------------------------------- /pkg/transaction/manager.go: -------------------------------------------------------------------------------- 1 | package transaction 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | "time" 7 | 8 | "github.com/KevoDB/kevo/pkg/stats" 9 | ) 10 | 11 | // Manager implements the TransactionManager interface 12 | type Manager struct { 13 | // Storage backend for transaction operations 14 | storage StorageBackend 15 | 16 | // Statistics collector 17 | stats stats.Collector 18 | 19 | // Transaction isolation lock 20 | txLock sync.RWMutex 21 | 22 | // Transaction counters 23 | txStarted atomic.Uint64 24 | txCompleted atomic.Uint64 25 | txAborted atomic.Uint64 26 | 27 | // TTL settings 28 | readOnlyTxTTL time.Duration 29 | readWriteTxTTL time.Duration 30 | idleTxTimeout time.Duration 31 | } 32 | 33 | // NewManager creates a new transaction manager with default TTL settings 34 | func NewManager(storage StorageBackend, stats stats.Collector) *Manager { 35 | return &Manager{ 36 | storage: storage, 37 | stats: stats, 38 | readOnlyTxTTL: 3 * time.Minute, // 3 minutes 39 | readWriteTxTTL: 1 * time.Minute, // 1 minute 40 | idleTxTimeout: 30 * time.Second, // 30 seconds 41 | } 42 | } 43 | 44 | // NewManagerWithTTL creates a new transaction manager with custom TTL settings 45 | func NewManagerWithTTL(storage StorageBackend, stats stats.Collector, readOnlyTTL, readWriteTTL, idleTimeout time.Duration) *Manager { 46 | return &Manager{ 47 | storage: storage, 48 | stats: stats, 49 | readOnlyTxTTL: readOnlyTTL, 50 | readWriteTxTTL: readWriteTTL, 51 | idleTxTimeout: idleTimeout, 52 | } 53 | } 54 | 55 | // BeginTransaction starts a new transaction 56 | func (m *Manager) BeginTransaction(readOnly bool) (Transaction, error) { 57 | // Track transaction start 58 | if m.stats != nil { 59 | m.stats.TrackOperation(stats.OpTxBegin) 60 | } 61 | m.txStarted.Add(1) 62 | 63 | // Convert to transaction mode 64 | mode := ReadWrite 65 | if readOnly { 66 | mode = ReadOnly 67 | } 68 | 69 | // Create a new transaction 70 | now := time.Now() 71 | 72 | // Set TTL based on transaction mode 73 | var ttl time.Duration 74 | if mode == ReadOnly { 75 | ttl = m.readOnlyTxTTL 76 | } else { 77 | ttl = m.readWriteTxTTL 78 | } 79 | 80 | tx := &TransactionImpl{ 81 | storage: m.storage, 82 | mode: mode, 83 | buffer: NewBuffer(), 84 | rwLock: &m.txLock, 85 | stats: m, 86 | creationTime: now, 87 | lastActiveTime: now, 88 | ttl: ttl, 89 | } 90 | 91 | // Set transaction as active 92 | tx.active.Store(true) 93 | 94 | // Acquire appropriate lock 95 | if mode == ReadOnly { 96 | m.txLock.RLock() 97 | tx.hasReadLock.Store(true) 98 | } else { 99 | m.txLock.Lock() 100 | tx.hasWriteLock.Store(true) 101 | } 102 | 103 | return tx, nil 104 | } 105 | 106 | // GetRWLock returns the transaction isolation lock 107 | func (m *Manager) GetRWLock() *sync.RWMutex { 108 | return &m.txLock 109 | } 110 | 111 | // IncrementTxCompleted increments the completed transaction counter 112 | func (m *Manager) IncrementTxCompleted() { 113 | m.txCompleted.Add(1) 114 | 115 | // Track the commit operation 116 | if m.stats != nil { 117 | m.stats.TrackOperation(stats.OpTxCommit) 118 | } 119 | } 120 | 121 | // IncrementTxAborted increments the aborted transaction counter 122 | func (m *Manager) IncrementTxAborted() { 123 | m.txAborted.Add(1) 124 | 125 | // Track the rollback operation 126 | if m.stats != nil { 127 | m.stats.TrackOperation(stats.OpTxRollback) 128 | } 129 | } 130 | 131 | // GetTransactionStats returns transaction statistics 132 | func (m *Manager) GetTransactionStats() map[string]interface{} { 133 | stats := make(map[string]interface{}) 134 | 135 | stats["tx_started"] = m.txStarted.Load() 136 | stats["tx_completed"] = m.txCompleted.Load() 137 | stats["tx_aborted"] = m.txAborted.Load() 138 | 139 | // Calculate active transactions 140 | active := m.txStarted.Load() - m.txCompleted.Load() - m.txAborted.Load() 141 | stats["tx_active"] = active 142 | 143 | return stats 144 | } 145 | -------------------------------------------------------------------------------- /pkg/replication/engine_applier.go: -------------------------------------------------------------------------------- 1 | package replication 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/KevoDB/kevo/pkg/common/log" 7 | "github.com/KevoDB/kevo/pkg/engine/interfaces" 8 | "github.com/KevoDB/kevo/pkg/wal" 9 | ) 10 | 11 | // EngineApplier implements the WALEntryApplier interface for applying 12 | // WAL entries to a database engine. 13 | type EngineApplier struct { 14 | engine interfaces.Engine 15 | } 16 | 17 | // NewEngineApplier creates a new engine applier 18 | func NewEngineApplier(engine interfaces.Engine) *EngineApplier { 19 | return &EngineApplier{ 20 | engine: engine, 21 | } 22 | } 23 | 24 | // Apply applies a WAL entry to the engine through its API 25 | // This bypasses the read-only check for replication purposes 26 | func (e *EngineApplier) Apply(entry *wal.Entry) error { 27 | log.Info("Replica applying WAL entry through engine API: seq=%d, type=%d, key=%s", 28 | entry.SequenceNumber, entry.Type, string(entry.Key)) 29 | 30 | // Check if engine is in read-only mode 31 | isReadOnly := false 32 | if checker, ok := e.engine.(interface{ IsReadOnly() bool }); ok { 33 | isReadOnly = checker.IsReadOnly() 34 | } 35 | 36 | // Handle application based on read-only status and operation type 37 | if isReadOnly { 38 | return e.applyInReadOnlyMode(entry) 39 | } 40 | 41 | return e.applyInNormalMode(entry) 42 | } 43 | 44 | // applyInReadOnlyMode applies a WAL entry in read-only mode 45 | func (e *EngineApplier) applyInReadOnlyMode(entry *wal.Entry) error { 46 | log.Info("Applying entry in read-only mode: seq=%d", entry.SequenceNumber) 47 | 48 | switch entry.Type { 49 | case wal.OpTypePut: 50 | // Try internal interface first 51 | if putter, ok := e.engine.(interface{ PutInternal(key, value []byte) error }); ok { 52 | return putter.PutInternal(entry.Key, entry.Value) 53 | } 54 | 55 | // Try temporarily disabling read-only mode 56 | if setter, ok := e.engine.(interface{ SetReadOnly(bool) }); ok { 57 | setter.SetReadOnly(false) 58 | err := e.engine.Put(entry.Key, entry.Value) 59 | setter.SetReadOnly(true) 60 | return err 61 | } 62 | 63 | // Fall back to normal operation which may fail 64 | return e.engine.Put(entry.Key, entry.Value) 65 | 66 | case wal.OpTypeDelete: 67 | // Try internal interface first 68 | if deleter, ok := e.engine.(interface{ DeleteInternal(key []byte) error }); ok { 69 | return deleter.DeleteInternal(entry.Key) 70 | } 71 | 72 | // Try temporarily disabling read-only mode 73 | if setter, ok := e.engine.(interface{ SetReadOnly(bool) }); ok { 74 | setter.SetReadOnly(false) 75 | err := e.engine.Delete(entry.Key) 76 | setter.SetReadOnly(true) 77 | return err 78 | } 79 | 80 | // Fall back to normal operation which may fail 81 | return e.engine.Delete(entry.Key) 82 | 83 | case wal.OpTypeMerge: 84 | // Handle merge as a put operation for compatibility 85 | if setter, ok := e.engine.(interface{ SetReadOnly(bool) }); ok { 86 | setter.SetReadOnly(false) 87 | err := e.engine.Put(entry.Key, entry.Value) 88 | setter.SetReadOnly(true) 89 | return err 90 | } 91 | return e.engine.Put(entry.Key, entry.Value) 92 | 93 | default: 94 | return fmt.Errorf("unsupported WAL entry type: %d", entry.Type) 95 | } 96 | } 97 | 98 | // applyInNormalMode applies a WAL entry in normal mode 99 | func (e *EngineApplier) applyInNormalMode(entry *wal.Entry) error { 100 | log.Info("Applying entry in normal mode: seq=%d", entry.SequenceNumber) 101 | 102 | switch entry.Type { 103 | case wal.OpTypePut: 104 | return e.engine.Put(entry.Key, entry.Value) 105 | 106 | case wal.OpTypeDelete: 107 | return e.engine.Delete(entry.Key) 108 | 109 | case wal.OpTypeMerge: 110 | // Handle merge as a put operation for compatibility 111 | return e.engine.Put(entry.Key, entry.Value) 112 | 113 | default: 114 | return fmt.Errorf("unsupported WAL entry type: %d", entry.Type) 115 | } 116 | } 117 | 118 | // Sync ensures all applied entries are persisted 119 | func (e *EngineApplier) Sync() error { 120 | // Force a flush of in-memory tables to ensure durability 121 | return e.engine.FlushImMemTables() 122 | } 123 | -------------------------------------------------------------------------------- /pkg/sstable/block/block_iterator_security_test.go: -------------------------------------------------------------------------------- 1 | // ABOUTME: This file contains security tests for block iterator delta encoding 2 | // ABOUTME: Tests protect against malicious delta encoding that could cause buffer overruns 3 | package block 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | // TestDeltaEncodingValidation_ExcessiveSharedLength tests protection against excessive shared length 10 | func TestDeltaEncodingValidation_ExcessiveSharedLength(t *testing.T) { 11 | currentKey := []byte("abc") // 3 bytes 12 | data := []byte("defgh") // 5 bytes available 13 | 14 | // Try to claim 5 bytes shared from a 3-byte key 15 | err := validateDeltaEncoding(5, 2, currentKey, data) 16 | if err == nil { 17 | t.Error("Expected validation to fail with excessive shared length") 18 | } 19 | } 20 | 21 | // TestDeltaEncodingValidation_NilCurrentKey tests protection against nil current key 22 | func TestDeltaEncodingValidation_NilCurrentKey(t *testing.T) { 23 | data := []byte("defgh") 24 | 25 | err := validateDeltaEncoding(1, 2, nil, data) 26 | if err == nil { 27 | t.Error("Expected validation to fail with nil current key") 28 | } 29 | } 30 | 31 | // TestDeltaEncodingValidation_InsufficientUnsharedData tests protection against insufficient unshared data 32 | func TestDeltaEncodingValidation_InsufficientUnsharedData(t *testing.T) { 33 | currentKey := []byte("abc") 34 | data := []byte("de") // Only 2 bytes available 35 | 36 | // Try to claim 5 bytes unshared from 2-byte data 37 | err := validateDeltaEncoding(2, 5, currentKey, data) 38 | if err == nil { 39 | t.Error("Expected validation to fail with insufficient unshared data") 40 | } 41 | } 42 | 43 | // TestDeltaEncodingValidation_ExcessiveKeyLength tests protection against memory exhaustion 44 | func TestDeltaEncodingValidation_ExcessiveKeyLength(t *testing.T) { 45 | currentKey := make([]byte, 40000) // 40KB key 46 | data := make([]byte, 40000) // 40KB data 47 | 48 | // Try to create a 80KB key (40KB shared + 40KB unshared) - exceeds 64KB limit 49 | err := validateDeltaEncoding(40000, 40000, currentKey, data) 50 | if err == nil { 51 | t.Error("Expected validation to fail with excessive key length") 52 | } 53 | } 54 | 55 | // TestDeltaEncodingValidation_ValidDeltaEncoding tests that valid delta encoding passes 56 | func TestDeltaEncodingValidation_ValidDeltaEncoding(t *testing.T) { 57 | currentKey := []byte("abcdef") 58 | data := []byte("ghijk") 59 | 60 | // Valid: 3 bytes shared + 2 bytes unshared = 5 bytes total 61 | err := validateDeltaEncoding(3, 2, currentKey, data) 62 | if err != nil { 63 | t.Errorf("Expected valid delta encoding to pass, got error: %v", err) 64 | } 65 | } 66 | 67 | // TestDeltaEncodingValidation_IntegerOverflow tests protection against integer overflow 68 | func TestDeltaEncodingValidation_IntegerOverflow(t *testing.T) { 69 | currentKey := make([]byte, 1000) 70 | data := make([]byte, 1000) 71 | 72 | // Try to cause integer overflow with large values 73 | err := validateDeltaEncoding(65535, 65535, currentKey, data) // MaxUint16 + MaxUint16 would overflow 74 | if err == nil { 75 | t.Error("Expected validation to fail with potential integer overflow") 76 | } 77 | } 78 | 79 | // TestDeltaEncodingValidation_ZeroSharedLength tests edge case with zero shared length 80 | func TestDeltaEncodingValidation_ZeroSharedLength(t *testing.T) { 81 | currentKey := []byte("abc") 82 | data := []byte("defgh") 83 | 84 | // Valid: 0 bytes shared + 3 bytes unshared = 3 bytes total 85 | err := validateDeltaEncoding(0, 3, currentKey, data) 86 | if err != nil { 87 | t.Errorf("Expected zero shared length to be valid, got error: %v", err) 88 | } 89 | } 90 | 91 | // TestDeltaEncodingValidation_MaxValidKeyLength tests boundary condition at max key length 92 | func TestDeltaEncodingValidation_MaxValidKeyLength(t *testing.T) { 93 | currentKey := make([]byte, 32000) // 32KB key 94 | data := make([]byte, 32000) // 32KB data 95 | 96 | // Valid: 32KB shared + 32KB unshared = 64KB total (exactly at limit) 97 | err := validateDeltaEncoding(32000, 32000, currentKey, data) 98 | if err != nil { 99 | t.Errorf("Expected max valid key length to pass, got error: %v", err) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /pkg/compaction/base_strategy.go: -------------------------------------------------------------------------------- 1 | package compaction 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "sort" 8 | "strings" 9 | 10 | "github.com/KevoDB/kevo/pkg/config" 11 | "github.com/KevoDB/kevo/pkg/sstable" 12 | ) 13 | 14 | // BaseCompactionStrategy provides common functionality for compaction strategies 15 | type BaseCompactionStrategy struct { 16 | // Configuration 17 | cfg *config.Config 18 | 19 | // SSTable directory 20 | sstableDir string 21 | 22 | // File information by level 23 | levels map[int][]*SSTableInfo 24 | } 25 | 26 | // NewBaseCompactionStrategy creates a new base compaction strategy 27 | func NewBaseCompactionStrategy(cfg *config.Config, sstableDir string) *BaseCompactionStrategy { 28 | return &BaseCompactionStrategy{ 29 | cfg: cfg, 30 | sstableDir: sstableDir, 31 | levels: make(map[int][]*SSTableInfo), 32 | } 33 | } 34 | 35 | // LoadSSTables scans the SSTable directory and loads metadata for all files 36 | func (s *BaseCompactionStrategy) LoadSSTables() error { 37 | // Clear existing data 38 | s.levels = make(map[int][]*SSTableInfo) 39 | 40 | // Read all files from the SSTable directory 41 | entries, err := os.ReadDir(s.sstableDir) 42 | if err != nil { 43 | if os.IsNotExist(err) { 44 | return nil // Directory doesn't exist yet 45 | } 46 | return fmt.Errorf("failed to read SSTable directory: %w", err) 47 | } 48 | 49 | // Parse filenames and collect information 50 | for _, entry := range entries { 51 | if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".sst") { 52 | continue // Skip directories and non-SSTable files 53 | } 54 | 55 | // Parse filename to extract level, sequence, and timestamp 56 | // Filename format: level_sequence_timestamp.sst 57 | var level int 58 | var sequence uint64 59 | var timestamp int64 60 | 61 | if n, err := fmt.Sscanf(entry.Name(), "%d_%06d_%020d.sst", 62 | &level, &sequence, ×tamp); n != 3 || err != nil { 63 | // Skip files that don't match our naming pattern 64 | continue 65 | } 66 | 67 | // Get file info for size 68 | fi, err := entry.Info() 69 | if err != nil { 70 | return fmt.Errorf("failed to get file info for %s: %w", entry.Name(), err) 71 | } 72 | 73 | // Open the file to extract key range information 74 | path := filepath.Join(s.sstableDir, entry.Name()) 75 | reader, err := sstable.OpenReader(path) 76 | if err != nil { 77 | return fmt.Errorf("failed to open SSTable %s: %w", path, err) 78 | } 79 | 80 | // Create iterator to get first and last keys 81 | iter := reader.NewIterator() 82 | var firstKey, lastKey []byte 83 | 84 | // Get first key 85 | iter.SeekToFirst() 86 | if iter.Valid() { 87 | firstKey = append([]byte{}, iter.Key()...) 88 | } 89 | 90 | // Get last key 91 | iter.SeekToLast() 92 | if iter.Valid() { 93 | lastKey = append([]byte{}, iter.Key()...) 94 | } 95 | 96 | // Create SSTable info 97 | info := &SSTableInfo{ 98 | Path: path, 99 | Level: level, 100 | Sequence: sequence, 101 | Timestamp: timestamp, 102 | Size: fi.Size(), 103 | KeyCount: reader.GetKeyCount(), 104 | FirstKey: firstKey, 105 | LastKey: lastKey, 106 | Reader: reader, 107 | } 108 | 109 | // Add to appropriate level 110 | s.levels[level] = append(s.levels[level], info) 111 | } 112 | 113 | // Sort files within each level by sequence number 114 | for level, files := range s.levels { 115 | sort.Slice(files, func(i, j int) bool { 116 | return files[i].Sequence < files[j].Sequence 117 | }) 118 | s.levels[level] = files 119 | } 120 | 121 | return nil 122 | } 123 | 124 | // Close closes all open SSTable readers 125 | func (s *BaseCompactionStrategy) Close() error { 126 | var lastErr error 127 | 128 | for _, files := range s.levels { 129 | for _, file := range files { 130 | if file.Reader != nil { 131 | if err := file.Reader.Close(); err != nil && lastErr == nil { 132 | lastErr = err 133 | } 134 | file.Reader = nil 135 | } 136 | } 137 | } 138 | 139 | return lastErr 140 | } 141 | 142 | // GetLevelSize returns the total size of all files in a level 143 | func (s *BaseCompactionStrategy) GetLevelSize(level int) int64 { 144 | var size int64 145 | for _, file := range s.levels[level] { 146 | size += file.Size 147 | } 148 | return size 149 | } 150 | -------------------------------------------------------------------------------- /pkg/transport/registry_test.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | // mockClient implements the Client interface for testing 11 | type mockClient struct { 12 | connected bool 13 | endpoint string 14 | options TransportOptions 15 | } 16 | 17 | func (m *mockClient) Connect(ctx context.Context) error { 18 | m.connected = true 19 | return nil 20 | } 21 | 22 | func (m *mockClient) Close() error { 23 | m.connected = false 24 | return nil 25 | } 26 | 27 | func (m *mockClient) IsConnected() bool { 28 | return m.connected 29 | } 30 | 31 | func (m *mockClient) Status() TransportStatus { 32 | return TransportStatus{ 33 | Connected: m.connected, 34 | } 35 | } 36 | 37 | func (m *mockClient) Send(ctx context.Context, request Request) (Response, error) { 38 | if !m.connected { 39 | return nil, ErrNotConnected 40 | } 41 | return &BasicResponse{ 42 | ResponseType: request.Type() + "_response", 43 | ResponseData: []byte("mock response"), 44 | }, nil 45 | } 46 | 47 | func (m *mockClient) Stream(ctx context.Context) (Stream, error) { 48 | if !m.connected { 49 | return nil, ErrNotConnected 50 | } 51 | return nil, errors.New("streaming not implemented in mock") 52 | } 53 | 54 | // mockClientFactory creates a new mock client 55 | func mockClientFactory(endpoint string, options TransportOptions) (Client, error) { 56 | return &mockClient{ 57 | endpoint: endpoint, 58 | options: options, 59 | }, nil 60 | } 61 | 62 | // mockServer implements the Server interface for testing 63 | type mockServer struct { 64 | started bool 65 | address string 66 | options TransportOptions 67 | handler RequestHandler 68 | } 69 | 70 | func (m *mockServer) Start() error { 71 | m.started = true 72 | return nil 73 | } 74 | 75 | func (m *mockServer) Serve() error { 76 | m.started = true 77 | return nil 78 | } 79 | 80 | func (m *mockServer) Stop(ctx context.Context) error { 81 | m.started = false 82 | return nil 83 | } 84 | 85 | func (m *mockServer) SetRequestHandler(handler RequestHandler) { 86 | m.handler = handler 87 | } 88 | 89 | // mockServerFactory creates a new mock server 90 | func mockServerFactory(address string, options TransportOptions) (Server, error) { 91 | return &mockServer{ 92 | address: address, 93 | options: options, 94 | }, nil 95 | } 96 | 97 | // TestRegistry tests the transport registry 98 | func TestRegistry(t *testing.T) { 99 | registry := NewRegistry() 100 | 101 | // Register transports 102 | registry.RegisterClient("mock", mockClientFactory) 103 | registry.RegisterServer("mock", mockServerFactory) 104 | 105 | // Test listing transports 106 | transports := registry.ListTransports() 107 | if len(transports) != 1 || transports[0] != "mock" { 108 | t.Errorf("Expected [mock], got %v", transports) 109 | } 110 | 111 | // Test creating client 112 | client, err := registry.CreateClient("mock", "localhost:8080", TransportOptions{ 113 | Timeout: 5 * time.Second, 114 | }) 115 | if err != nil { 116 | t.Fatalf("Failed to create client: %v", err) 117 | } 118 | 119 | // Test client methods 120 | if client.IsConnected() { 121 | t.Error("Expected client to be disconnected initially") 122 | } 123 | 124 | err = client.Connect(context.Background()) 125 | if err != nil { 126 | t.Fatalf("Failed to connect: %v", err) 127 | } 128 | 129 | if !client.IsConnected() { 130 | t.Error("Expected client to be connected after Connect()") 131 | } 132 | 133 | // Test server creation 134 | server, err := registry.CreateServer("mock", "localhost:8080", TransportOptions{ 135 | Timeout: 5 * time.Second, 136 | }) 137 | if err != nil { 138 | t.Fatalf("Failed to create server: %v", err) 139 | } 140 | 141 | // Test server methods 142 | err = server.Start() 143 | if err != nil { 144 | t.Fatalf("Failed to start server: %v", err) 145 | } 146 | 147 | mockServer := server.(*mockServer) 148 | if !mockServer.started { 149 | t.Error("Expected server to be started") 150 | } 151 | 152 | // Test non-existent transport 153 | _, err = registry.CreateClient("nonexistent", "", TransportOptions{}) 154 | if err == nil { 155 | t.Error("Expected error creating non-existent client") 156 | } 157 | 158 | _, err = registry.CreateServer("nonexistent", "", TransportOptions{}) 159 | if err == nil { 160 | t.Error("Expected error creating non-existent server") 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /pkg/common/iterator/filtered/filtered.go: -------------------------------------------------------------------------------- 1 | // Package filtered provides iterators that filter keys based on different criteria 2 | package filtered 3 | 4 | import ( 5 | "bytes" 6 | 7 | "github.com/KevoDB/kevo/pkg/common/iterator" 8 | ) 9 | 10 | // KeyFilterFunc is a function type for filtering keys 11 | type KeyFilterFunc func(key []byte) bool 12 | 13 | // FilteredIterator wraps an iterator and applies a key filter 14 | type FilteredIterator struct { 15 | iter iterator.Iterator 16 | keyFilter KeyFilterFunc 17 | } 18 | 19 | // NewFilteredIterator creates a new iterator with a key filter 20 | func NewFilteredIterator(iter iterator.Iterator, filter KeyFilterFunc) *FilteredIterator { 21 | return &FilteredIterator{ 22 | iter: iter, 23 | keyFilter: filter, 24 | } 25 | } 26 | 27 | // Next advances to the next key that passes the filter 28 | func (fi *FilteredIterator) Next() bool { 29 | for fi.iter.Next() { 30 | if fi.keyFilter(fi.iter.Key()) { 31 | return true 32 | } 33 | } 34 | return false 35 | } 36 | 37 | // Key returns the current key 38 | func (fi *FilteredIterator) Key() []byte { 39 | return fi.iter.Key() 40 | } 41 | 42 | // Value returns the current value 43 | func (fi *FilteredIterator) Value() []byte { 44 | return fi.iter.Value() 45 | } 46 | 47 | // Valid returns true if the iterator is at a valid position 48 | func (fi *FilteredIterator) Valid() bool { 49 | return fi.iter.Valid() && fi.keyFilter(fi.iter.Key()) 50 | } 51 | 52 | // IsTombstone returns true if the current entry is a deletion marker 53 | func (fi *FilteredIterator) IsTombstone() bool { 54 | return fi.iter.IsTombstone() 55 | } 56 | 57 | // SeekToFirst positions at the first key that passes the filter 58 | func (fi *FilteredIterator) SeekToFirst() { 59 | fi.iter.SeekToFirst() 60 | 61 | // Advance to the first key that passes the filter 62 | if fi.iter.Valid() && !fi.keyFilter(fi.iter.Key()) { 63 | fi.Next() 64 | } 65 | } 66 | 67 | // SeekToLast positions at the last key that passes the filter 68 | func (fi *FilteredIterator) SeekToLast() { 69 | // This is a simplistic implementation that may not be efficient 70 | // For a production-quality implementation, we might want a more 71 | // sophisticated approach 72 | fi.iter.SeekToLast() 73 | 74 | // If we're at a valid position but it doesn't pass the filter, 75 | // we need to find the last key that does 76 | if fi.iter.Valid() && !fi.keyFilter(fi.iter.Key()) { 77 | // Inefficient but correct - scan from beginning to find last valid key 78 | var lastValidKey []byte 79 | fi.iter.SeekToFirst() 80 | 81 | for fi.iter.Valid() { 82 | if fi.keyFilter(fi.iter.Key()) { 83 | lastValidKey = make([]byte, len(fi.iter.Key())) 84 | copy(lastValidKey, fi.iter.Key()) 85 | } 86 | fi.iter.Next() 87 | } 88 | 89 | // If we found a valid key, seek to it 90 | if lastValidKey != nil { 91 | fi.iter.Seek(lastValidKey) 92 | } else { 93 | // No valid keys found 94 | fi.iter.SeekToFirst() 95 | // This will be invalid after the filter is applied 96 | } 97 | } 98 | } 99 | 100 | // Seek positions at the first key >= target that passes the filter 101 | func (fi *FilteredIterator) Seek(target []byte) bool { 102 | if !fi.iter.Seek(target) { 103 | return false 104 | } 105 | 106 | // If the current position doesn't pass the filter, find the next one that does 107 | if !fi.keyFilter(fi.iter.Key()) { 108 | return fi.Next() 109 | } 110 | 111 | return true 112 | } 113 | 114 | // PrefixFilterFunc creates a filter function for keys with a specific prefix 115 | func PrefixFilterFunc(prefix []byte) KeyFilterFunc { 116 | return func(key []byte) bool { 117 | return bytes.HasPrefix(key, prefix) 118 | } 119 | } 120 | 121 | // SuffixFilterFunc creates a filter function for keys with a specific suffix 122 | func SuffixFilterFunc(suffix []byte) KeyFilterFunc { 123 | return func(key []byte) bool { 124 | return bytes.HasSuffix(key, suffix) 125 | } 126 | } 127 | 128 | // PrefixIterator returns an iterator that filters keys by prefix 129 | func NewPrefixIterator(iter iterator.Iterator, prefix []byte) *FilteredIterator { 130 | return NewFilteredIterator(iter, PrefixFilterFunc(prefix)) 131 | } 132 | 133 | // SuffixIterator returns an iterator that filters keys by suffix 134 | func NewSuffixIterator(iter iterator.Iterator, suffix []byte) *FilteredIterator { 135 | return NewFilteredIterator(iter, SuffixFilterFunc(suffix)) 136 | } 137 | -------------------------------------------------------------------------------- /pkg/common/log/logger_test.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | func TestStandardLogger(t *testing.T) { 10 | // Create a buffer to capture output 11 | var buf bytes.Buffer 12 | 13 | // Create a logger with the buffer as output 14 | logger := NewStandardLogger( 15 | WithOutput(&buf), 16 | WithLevel(LevelDebug), 17 | ) 18 | 19 | // Test debug level 20 | logger.Debug("This is a debug message") 21 | if !strings.Contains(buf.String(), "[DEBUG]") || !strings.Contains(buf.String(), "This is a debug message") { 22 | t.Errorf("Debug logging failed, got: %s", buf.String()) 23 | } 24 | buf.Reset() 25 | 26 | // Test info level 27 | logger.Info("This is an info message") 28 | if !strings.Contains(buf.String(), "[INFO]") || !strings.Contains(buf.String(), "This is an info message") { 29 | t.Errorf("Info logging failed, got: %s", buf.String()) 30 | } 31 | buf.Reset() 32 | 33 | // Test warn level 34 | logger.Warn("This is a warning message") 35 | if !strings.Contains(buf.String(), "[WARN]") || !strings.Contains(buf.String(), "This is a warning message") { 36 | t.Errorf("Warn logging failed, got: %s", buf.String()) 37 | } 38 | buf.Reset() 39 | 40 | // Test error level 41 | logger.Error("This is an error message") 42 | if !strings.Contains(buf.String(), "[ERROR]") || !strings.Contains(buf.String(), "This is an error message") { 43 | t.Errorf("Error logging failed, got: %s", buf.String()) 44 | } 45 | buf.Reset() 46 | 47 | // Test with fields 48 | loggerWithFields := logger.WithFields(map[string]interface{}{ 49 | "component": "test", 50 | "count": 123, 51 | }) 52 | loggerWithFields.Info("Message with fields") 53 | output := buf.String() 54 | if !strings.Contains(output, "[INFO]") || 55 | !strings.Contains(output, "Message with fields") || 56 | !strings.Contains(output, "component=test") || 57 | !strings.Contains(output, "count=123") { 58 | t.Errorf("Logging with fields failed, got: %s", output) 59 | } 60 | buf.Reset() 61 | 62 | // Test with a single field 63 | loggerWithField := logger.WithField("module", "logger") 64 | loggerWithField.Info("Message with a field") 65 | output = buf.String() 66 | if !strings.Contains(output, "[INFO]") || 67 | !strings.Contains(output, "Message with a field") || 68 | !strings.Contains(output, "module=logger") { 69 | t.Errorf("Logging with a field failed, got: %s", output) 70 | } 71 | buf.Reset() 72 | 73 | // Test level filtering 74 | logger.SetLevel(LevelError) 75 | logger.Debug("This debug message should not appear") 76 | logger.Info("This info message should not appear") 77 | logger.Warn("This warning message should not appear") 78 | logger.Error("This error message should appear") 79 | output = buf.String() 80 | if strings.Contains(output, "should not appear") || 81 | !strings.Contains(output, "This error message should appear") { 82 | t.Errorf("Level filtering failed, got: %s", output) 83 | } 84 | buf.Reset() 85 | 86 | // Test formatted messages 87 | logger.SetLevel(LevelInfo) 88 | logger.Info("Formatted %s with %d params", "message", 2) 89 | if !strings.Contains(buf.String(), "Formatted message with 2 params") { 90 | t.Errorf("Formatted message failed, got: %s", buf.String()) 91 | } 92 | buf.Reset() 93 | 94 | // Test GetLevel 95 | if logger.GetLevel() != LevelInfo { 96 | t.Errorf("GetLevel failed, expected LevelInfo, got: %v", logger.GetLevel()) 97 | } 98 | } 99 | 100 | func TestDefaultLogger(t *testing.T) { 101 | // Save original default logger 102 | originalLogger := defaultLogger 103 | defer func() { 104 | defaultLogger = originalLogger 105 | }() 106 | 107 | // Create a buffer to capture output 108 | var buf bytes.Buffer 109 | 110 | // Set a new default logger 111 | SetDefaultLogger(NewStandardLogger( 112 | WithOutput(&buf), 113 | WithLevel(LevelInfo), 114 | )) 115 | 116 | // Test global functions 117 | Info("Global info message") 118 | if !strings.Contains(buf.String(), "[INFO]") || !strings.Contains(buf.String(), "Global info message") { 119 | t.Errorf("Global info logging failed, got: %s", buf.String()) 120 | } 121 | buf.Reset() 122 | 123 | // Test global with fields 124 | WithField("global", true).Info("Global with field") 125 | output := buf.String() 126 | if !strings.Contains(output, "[INFO]") || 127 | !strings.Contains(output, "Global with field") || 128 | !strings.Contains(output, "global=true") { 129 | t.Errorf("Global logging with field failed, got: %s", output) 130 | } 131 | buf.Reset() 132 | } 133 | -------------------------------------------------------------------------------- /pkg/replication/interfaces.go: -------------------------------------------------------------------------------- 1 | // Package replication implements primary-replica replication for Kevo database. 2 | package replication 3 | 4 | import ( 5 | "context" 6 | 7 | "github.com/KevoDB/kevo/pkg/wal" 8 | proto "github.com/KevoDB/kevo/proto/kevo/replication" 9 | ) 10 | 11 | // WALProvider abstracts access to the Write-Ahead Log 12 | type WALProvider interface { 13 | // GetEntriesFrom retrieves WAL entries starting from the given sequence number 14 | GetEntriesFrom(sequenceNumber uint64) ([]*wal.Entry, error) 15 | 16 | // GetNextSequence returns the next sequence number that will be assigned 17 | GetNextSequence() uint64 18 | 19 | // RegisterObserver registers a WAL observer for notifications 20 | RegisterObserver(id string, observer WALObserver) 21 | 22 | // UnregisterObserver removes a previously registered observer 23 | UnregisterObserver(id string) 24 | } 25 | 26 | // WALObserver defines how components observe WAL operations 27 | type WALObserver interface { 28 | // OnWALEntryWritten is called when a single WAL entry is written 29 | OnWALEntryWritten(entry *wal.Entry) 30 | 31 | // OnWALBatchWritten is called when a batch of WAL entries is written 32 | OnWALBatchWritten(startSeq uint64, entries []*wal.Entry) 33 | 34 | // OnWALSync is called when the WAL is synced to disk 35 | OnWALSync(upToSeq uint64) 36 | } 37 | 38 | // WALEntryApplier defines how components apply WAL entries 39 | type WALEntryApplier interface { 40 | // Apply applies a single WAL entry 41 | Apply(entry *wal.Entry) error 42 | 43 | // Sync ensures all applied entries are persisted 44 | Sync() error 45 | } 46 | 47 | // PrimaryNode defines the behavior of a primary node 48 | type PrimaryNode interface { 49 | // StreamWAL handles streaming WAL entries to replicas 50 | StreamWAL(req *proto.WALStreamRequest, stream proto.WALReplicationService_StreamWALServer) error 51 | 52 | // Acknowledge handles acknowledgments from replicas 53 | Acknowledge(ctx context.Context, req *proto.Ack) (*proto.AckResponse, error) 54 | 55 | // NegativeAcknowledge handles negative acknowledgments (retransmission requests) 56 | NegativeAcknowledge(ctx context.Context, req *proto.Nack) (*proto.NackResponse, error) 57 | 58 | // Close shuts down the primary node 59 | Close() error 60 | } 61 | 62 | // ReplicaNode defines the behavior of a replica node 63 | type ReplicaNode interface { 64 | // Start begins the replication process 65 | Start() error 66 | 67 | // Stop halts the replication process 68 | Stop() error 69 | 70 | // GetLastAppliedSequence returns the last successfully applied sequence 71 | GetLastAppliedSequence() uint64 72 | 73 | // GetCurrentState returns the current state of the replica 74 | GetCurrentState() ReplicaState 75 | 76 | // GetStateString returns a string representation of the current state 77 | GetStateString() string 78 | } 79 | 80 | // ReplicaState is defined in state.go 81 | 82 | // Batcher manages batching of WAL entries for transmission 83 | type Batcher interface { 84 | // Add adds a WAL entry to the current batch 85 | Add(entry *proto.WALEntry) bool 86 | 87 | // CreateResponse creates a WALStreamResponse from the current batch 88 | CreateResponse() *proto.WALStreamResponse 89 | 90 | // Count returns the number of entries in the current batch 91 | Count() int 92 | 93 | // Size returns the size of the current batch in bytes 94 | Size() int 95 | 96 | // Clear resets the batcher 97 | Clear() 98 | } 99 | 100 | // Compressor manages compression of WAL entries 101 | type Compressor interface { 102 | // Compress compresses data 103 | Compress(data []byte, codec proto.CompressionCodec) ([]byte, error) 104 | 105 | // Decompress decompresses data 106 | Decompress(data []byte, codec proto.CompressionCodec) ([]byte, error) 107 | 108 | // Close releases resources 109 | Close() error 110 | } 111 | 112 | // SessionManager manages replica sessions 113 | type SessionManager interface { 114 | // RegisterSession registers a new replica session 115 | RegisterSession(sessionID string, conn proto.WALReplicationService_StreamWALServer) 116 | 117 | // UnregisterSession removes a replica session 118 | UnregisterSession(sessionID string) 119 | 120 | // GetSession returns a replica session by ID 121 | GetSession(sessionID string) (proto.WALReplicationService_StreamWALServer, bool) 122 | 123 | // BroadcastBatch sends a batch to all active sessions 124 | BroadcastBatch(batch *proto.WALStreamResponse) int 125 | 126 | // CountSessions returns the number of active sessions 127 | CountSessions() int 128 | } 129 | -------------------------------------------------------------------------------- /pkg/transport/metrics.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | "time" 7 | ) 8 | 9 | // MetricsCollector collects metrics for transport operations 10 | type MetricsCollector interface { 11 | // RecordRequest records metrics for a request 12 | RecordRequest(requestType string, startTime time.Time, err error) 13 | 14 | // RecordSend records metrics for bytes sent 15 | RecordSend(bytes int) 16 | 17 | // RecordReceive records metrics for bytes received 18 | RecordReceive(bytes int) 19 | 20 | // RecordConnection records a connection event 21 | RecordConnection(successful bool) 22 | 23 | // GetMetrics returns the current metrics 24 | GetMetrics() Metrics 25 | } 26 | 27 | // Metrics represents transport metrics 28 | type Metrics struct { 29 | TotalRequests uint64 30 | SuccessfulRequests uint64 31 | FailedRequests uint64 32 | BytesSent uint64 33 | BytesReceived uint64 34 | Connections uint64 35 | ConnectionFailures uint64 36 | AvgLatencyByType map[string]time.Duration 37 | } 38 | 39 | // BasicMetricsCollector is a simple implementation of MetricsCollector 40 | type BasicMetricsCollector struct { 41 | mu sync.RWMutex 42 | totalRequests uint64 43 | successfulRequests uint64 44 | failedRequests uint64 45 | bytesSent uint64 46 | bytesReceived uint64 47 | connections uint64 48 | connectionFailures uint64 49 | 50 | // Track average latency and count for each request type 51 | avgLatencyByType map[string]time.Duration 52 | requestCountByType map[string]uint64 53 | } 54 | 55 | // NewMetricsCollector creates a new metrics collector 56 | func NewMetricsCollector() MetricsCollector { 57 | return &BasicMetricsCollector{ 58 | avgLatencyByType: make(map[string]time.Duration), 59 | requestCountByType: make(map[string]uint64), 60 | } 61 | } 62 | 63 | // RecordRequest records metrics for a request 64 | func (c *BasicMetricsCollector) RecordRequest(requestType string, startTime time.Time, err error) { 65 | atomic.AddUint64(&c.totalRequests, 1) 66 | 67 | if err == nil { 68 | atomic.AddUint64(&c.successfulRequests, 1) 69 | } else { 70 | atomic.AddUint64(&c.failedRequests, 1) 71 | } 72 | 73 | // Update average latency for request type 74 | latency := time.Since(startTime) 75 | 76 | c.mu.Lock() 77 | defer c.mu.Unlock() 78 | 79 | currentAvg, exists := c.avgLatencyByType[requestType] 80 | currentCount, _ := c.requestCountByType[requestType] 81 | 82 | if exists { 83 | // Update running average - the common case for better branch prediction 84 | // new_avg = (old_avg * count + new_value) / (count + 1) 85 | totalDuration := currentAvg*time.Duration(currentCount) + latency 86 | newCount := currentCount + 1 87 | c.avgLatencyByType[requestType] = totalDuration / time.Duration(newCount) 88 | c.requestCountByType[requestType] = newCount 89 | } else { 90 | // First request of this type 91 | c.avgLatencyByType[requestType] = latency 92 | c.requestCountByType[requestType] = 1 93 | } 94 | } 95 | 96 | // RecordSend records metrics for bytes sent 97 | func (c *BasicMetricsCollector) RecordSend(bytes int) { 98 | atomic.AddUint64(&c.bytesSent, uint64(bytes)) 99 | } 100 | 101 | // RecordReceive records metrics for bytes received 102 | func (c *BasicMetricsCollector) RecordReceive(bytes int) { 103 | atomic.AddUint64(&c.bytesReceived, uint64(bytes)) 104 | } 105 | 106 | // RecordConnection records a connection event 107 | func (c *BasicMetricsCollector) RecordConnection(successful bool) { 108 | if successful { 109 | atomic.AddUint64(&c.connections, 1) 110 | } else { 111 | atomic.AddUint64(&c.connectionFailures, 1) 112 | } 113 | } 114 | 115 | // GetMetrics returns the current metrics 116 | func (c *BasicMetricsCollector) GetMetrics() Metrics { 117 | c.mu.RLock() 118 | defer c.mu.RUnlock() 119 | 120 | // Create a copy of the average latency map 121 | avgLatencyByType := make(map[string]time.Duration, len(c.avgLatencyByType)) 122 | for k, v := range c.avgLatencyByType { 123 | avgLatencyByType[k] = v 124 | } 125 | 126 | return Metrics{ 127 | TotalRequests: atomic.LoadUint64(&c.totalRequests), 128 | SuccessfulRequests: atomic.LoadUint64(&c.successfulRequests), 129 | FailedRequests: atomic.LoadUint64(&c.failedRequests), 130 | BytesSent: atomic.LoadUint64(&c.bytesSent), 131 | BytesReceived: atomic.LoadUint64(&c.bytesReceived), 132 | Connections: atomic.LoadUint64(&c.connections), 133 | ConnectionFailures: atomic.LoadUint64(&c.connectionFailures), 134 | AvgLatencyByType: avgLatencyByType, 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /proto/kevo/replication/replication.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package kevo.replication; 4 | 5 | option go_package = "github.com/KevoDB/kevo/pkg/replication/proto;replication_proto"; 6 | 7 | // WALReplicationService defines the gRPC service for Kevo's primary-replica replication protocol. 8 | // It enables replicas to stream WAL entries from a primary node in real-time, maintaining 9 | // a consistent, crash-resilient, and ordered copy of the data. 10 | service WALReplicationService { 11 | // StreamWAL allows replicas to request WAL entries starting from a specific sequence number. 12 | // The primary responds with a stream of WAL entries in strict logical order. 13 | rpc StreamWAL(WALStreamRequest) returns (stream WALStreamResponse); 14 | 15 | // Acknowledge allows replicas to inform the primary about entries that have been 16 | // successfully applied and persisted, enabling the primary to manage WAL retention. 17 | rpc Acknowledge(Ack) returns (AckResponse); 18 | 19 | // NegativeAcknowledge allows replicas to request retransmission 20 | // of entries when a gap is detected in the sequence numbers. 21 | rpc NegativeAcknowledge(Nack) returns (NackResponse); 22 | } 23 | 24 | // WALStreamRequest is sent by replicas to initiate or resume WAL streaming. 25 | message WALStreamRequest { 26 | // The sequence number to start streaming from (exclusive) 27 | uint64 start_sequence = 1; 28 | 29 | // Protocol version for negotiation and backward compatibility 30 | uint32 protocol_version = 2; 31 | 32 | // Whether the replica supports compressed payloads 33 | bool compression_supported = 3; 34 | 35 | // Preferred compression codec 36 | CompressionCodec preferred_codec = 4; 37 | 38 | // The network address (host:port) the replica is listening on 39 | string listener_address = 5; 40 | } 41 | 42 | // WALStreamResponse contains a batch of WAL entries sent from the primary to a replica. 43 | message WALStreamResponse { 44 | // The batch of WAL entries being streamed 45 | repeated WALEntry entries = 1; 46 | 47 | // Whether the payload is compressed 48 | bool compressed = 2; 49 | 50 | // The compression codec used if compressed is true 51 | CompressionCodec codec = 3; 52 | } 53 | 54 | // WALEntry represents a single entry from the WAL. 55 | message WALEntry { 56 | // The unique, monotonically increasing sequence number (Lamport clock) 57 | uint64 sequence_number = 1; 58 | 59 | // The serialized entry data 60 | bytes payload = 2; 61 | 62 | // The fragment type for handling large entries that span multiple messages 63 | FragmentType fragment_type = 3; 64 | 65 | // CRC32 checksum of the payload for data integrity verification 66 | uint32 checksum = 4; 67 | } 68 | 69 | // FragmentType indicates how a WAL entry is fragmented across multiple messages. 70 | enum FragmentType { 71 | // A complete, unfragmented entry 72 | FULL = 0; 73 | 74 | // The first fragment of a multi-fragment entry 75 | FIRST = 1; 76 | 77 | // A middle fragment of a multi-fragment entry 78 | MIDDLE = 2; 79 | 80 | // The last fragment of a multi-fragment entry 81 | LAST = 3; 82 | } 83 | 84 | // CompressionCodec defines the supported compression algorithms. 85 | enum CompressionCodec { 86 | // No compression 87 | NONE = 0; 88 | 89 | // ZSTD compression algorithm 90 | ZSTD = 1; 91 | 92 | // Snappy compression algorithm 93 | SNAPPY = 2; 94 | } 95 | 96 | // Ack is sent by replicas to acknowledge successful application and persistence 97 | // of WAL entries up to a specific sequence number. 98 | message Ack { 99 | // The highest sequence number that has been successfully 100 | // applied and persisted by the replica 101 | uint64 acknowledged_up_to = 1; 102 | } 103 | 104 | // AckResponse is sent by the primary in response to an Ack message. 105 | message AckResponse { 106 | // Whether the acknowledgment was processed successfully 107 | bool success = 1; 108 | 109 | // An optional message providing additional details 110 | string message = 2; 111 | } 112 | 113 | // Nack (Negative Acknowledgement) is sent by replicas when they detect 114 | // a gap in sequence numbers, requesting retransmission from a specific sequence. 115 | message Nack { 116 | // The sequence number from which to resend WAL entries 117 | uint64 missing_from_sequence = 1; 118 | } 119 | 120 | // NackResponse is sent by the primary in response to a Nack message. 121 | message NackResponse { 122 | // Whether the negative acknowledgment was processed successfully 123 | bool success = 1; 124 | 125 | // An optional message providing additional details 126 | string message = 2; 127 | } -------------------------------------------------------------------------------- /pkg/engine/storage/sequence_test.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/KevoDB/kevo/pkg/config" 10 | "github.com/KevoDB/kevo/pkg/stats" 11 | ) 12 | 13 | // TestSequenceNumberVersioning tests that sequence numbers are properly tracked 14 | // and used for version selection during recovery. 15 | func TestSequenceNumberVersioning(t *testing.T) { 16 | // Create temporary directories for the test 17 | tempDir, err := os.MkdirTemp("", "sequence-test-*") 18 | if err != nil { 19 | t.Fatalf("Failed to create temp directory: %v", err) 20 | } 21 | defer os.RemoveAll(tempDir) 22 | 23 | // Create subdirectories for SSTables and WAL 24 | sstDir := filepath.Join(tempDir, "sst") 25 | walDir := filepath.Join(tempDir, "wal") 26 | 27 | // Create configuration 28 | cfg := &config.Config{ 29 | Version: config.CurrentManifestVersion, 30 | SSTDir: sstDir, 31 | WALDir: walDir, 32 | MemTableSize: 1024 * 1024, // 1MB 33 | MemTablePoolCap: 2, 34 | MaxMemTables: 2, 35 | } 36 | 37 | // Create a stats collector 38 | statsCollector := stats.NewAtomicCollector() 39 | 40 | // Create a new storage manager 41 | manager, err := NewManager(cfg, statsCollector) 42 | if err != nil { 43 | t.Fatalf("Failed to create storage manager: %v", err) 44 | } 45 | defer manager.Close() 46 | 47 | // Step 1: Add a key with initial value 48 | testKey := []byte("test-key") 49 | initialValue := []byte("initial-value") 50 | err = manager.Put(testKey, initialValue) 51 | if err != nil { 52 | t.Fatalf("Failed to put initial value: %v", err) 53 | } 54 | 55 | // Verify the key is readable 56 | value, err := manager.Get(testKey) 57 | if err != nil { 58 | t.Fatalf("Failed to get value: %v", err) 59 | } 60 | if !bytes.Equal(initialValue, value) { 61 | t.Errorf("Expected initial value %s, got %s", initialValue, value) 62 | } 63 | 64 | // Step 2: Flush to create an SSTable 65 | err = manager.FlushMemTables() 66 | if err != nil { 67 | t.Fatalf("Failed to flush memtables: %v", err) 68 | } 69 | 70 | // Verify data is still accessible after flush 71 | value, err = manager.Get(testKey) 72 | if err != nil { 73 | t.Fatalf("Failed to get value after flush: %v", err) 74 | } 75 | if !bytes.Equal(initialValue, value) { 76 | t.Errorf("Expected initial value %s after flush, got %s", initialValue, value) 77 | } 78 | 79 | // Step 3: Update the key with a new value 80 | updatedValue := []byte("updated-value") 81 | err = manager.Put(testKey, updatedValue) 82 | if err != nil { 83 | t.Fatalf("Failed to put updated value: %v", err) 84 | } 85 | 86 | // Verify the updated value is readable 87 | value, err = manager.Get(testKey) 88 | if err != nil { 89 | t.Fatalf("Failed to get updated value: %v", err) 90 | } 91 | if !bytes.Equal(updatedValue, value) { 92 | t.Errorf("Expected updated value %s, got %s", updatedValue, value) 93 | } 94 | 95 | // Step 4: Flush again to create another SSTable with the updated value 96 | err = manager.FlushMemTables() 97 | if err != nil { 98 | t.Fatalf("Failed to flush memtables again: %v", err) 99 | } 100 | 101 | // Verify updated data is still accessible after second flush 102 | value, err = manager.Get(testKey) 103 | if err != nil { 104 | t.Fatalf("Failed to get value after second flush: %v", err) 105 | } 106 | if !bytes.Equal(updatedValue, value) { 107 | t.Errorf("Expected updated value %s after second flush, got %s", updatedValue, value) 108 | } 109 | 110 | // Get the last sequence number 111 | lastSeqNum := manager.lastSeqNum 112 | 113 | // Step 5: Close the manager and simulate a recovery scenario 114 | err = manager.Close() 115 | if err != nil { 116 | t.Fatalf("Failed to close manager: %v", err) 117 | } 118 | 119 | // Create a new manager to simulate recovery 120 | recoveredManager, err := NewManager(cfg, statsCollector) 121 | if err != nil { 122 | t.Fatalf("Failed to create recovered manager: %v", err) 123 | } 124 | defer recoveredManager.Close() 125 | 126 | // Verify the key still has the latest value after recovery 127 | recoveredValue, err := recoveredManager.Get(testKey) 128 | if err != nil { 129 | t.Fatalf("Failed to get value after recovery: %v", err) 130 | } 131 | if !bytes.Equal(updatedValue, recoveredValue) { 132 | t.Errorf("Expected updated value %s after recovery, got %s", updatedValue, recoveredValue) 133 | } 134 | 135 | // Verify the sequence number was properly recovered 136 | if recoveredManager.lastSeqNum < lastSeqNum { 137 | t.Errorf("Recovered sequence number %d is less than last known sequence number %d", 138 | recoveredManager.lastSeqNum, lastSeqNum) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /pkg/wal/overflow_test.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | 7 | "github.com/KevoDB/kevo/pkg/config" 8 | ) 9 | 10 | // TestSequenceNumberOverflow tests that sequence number overflow is properly detected 11 | func TestSequenceNumberOverflow(t *testing.T) { 12 | tempDir := t.TempDir() 13 | 14 | cfg := &config.Config{ 15 | WALDir: tempDir, 16 | WALSyncMode: config.SyncNone, 17 | WALSyncBytes: 0, 18 | WALMaxSize: 1024 * 1024, 19 | } 20 | 21 | wal, err := NewWAL(cfg, tempDir) 22 | if err != nil { 23 | t.Fatalf("Failed to create WAL: %v", err) 24 | } 25 | defer wal.Close() 26 | 27 | // Set sequence number to near overflow 28 | wal.nextSequence = MaxSequenceNumber 29 | 30 | // This should trigger overflow error 31 | _, err = wal.Append(OpTypePut, []byte("test"), []byte("value")) 32 | if err != ErrSequenceOverflow { 33 | t.Errorf("Expected ErrSequenceOverflow, got: %v", err) 34 | } 35 | } 36 | 37 | // TestSequenceNumberOverflowBatch tests batch overflow detection 38 | func TestSequenceNumberOverflowBatch(t *testing.T) { 39 | tempDir := t.TempDir() 40 | 41 | cfg := &config.Config{ 42 | WALDir: tempDir, 43 | WALSyncMode: config.SyncNone, 44 | WALSyncBytes: 0, 45 | WALMaxSize: 1024 * 1024, 46 | } 47 | 48 | wal, err := NewWAL(cfg, tempDir) 49 | if err != nil { 50 | t.Fatalf("Failed to create WAL: %v", err) 51 | } 52 | defer wal.Close() 53 | 54 | // Set sequence number to exactly at overflow point 55 | wal.nextSequence = MaxSequenceNumber 56 | 57 | // Create a batch that would overflow (any batch will overflow at this point) 58 | entries := []*Entry{ 59 | {Key: []byte("key1"), Value: []byte("value1")}, 60 | } 61 | 62 | _, err = wal.AppendBatch(entries) 63 | if err != ErrSequenceOverflow { 64 | t.Errorf("Expected ErrSequenceOverflow for batch, got: %v", err) 65 | } 66 | } 67 | 68 | // TestSequenceNumberOverflowWithSequence tests AppendWithSequence overflow detection 69 | func TestSequenceNumberOverflowWithSequence(t *testing.T) { 70 | tempDir := t.TempDir() 71 | 72 | cfg := &config.Config{ 73 | WALDir: tempDir, 74 | WALSyncMode: config.SyncNone, 75 | WALSyncBytes: 0, 76 | WALMaxSize: 1024 * 1024, 77 | } 78 | 79 | wal, err := NewWAL(cfg, tempDir) 80 | if err != nil { 81 | t.Fatalf("Failed to create WAL: %v", err) 82 | } 83 | defer wal.Close() 84 | 85 | // Try to append with sequence number at overflow threshold 86 | _, err = wal.AppendWithSequence(OpTypePut, []byte("test"), []byte("value"), MaxSequenceNumber) 87 | if err != ErrSequenceOverflow { 88 | t.Errorf("Expected ErrSequenceOverflow for AppendWithSequence, got: %v", err) 89 | } 90 | } 91 | 92 | // TestSequenceNumberWarningThreshold tests that warnings are logged appropriately 93 | func TestSequenceNumberWarningThreshold(t *testing.T) { 94 | tempDir := t.TempDir() 95 | 96 | cfg := &config.Config{ 97 | WALDir: tempDir, 98 | WALSyncMode: config.SyncNone, 99 | WALSyncBytes: 0, 100 | WALMaxSize: 1024 * 1024, 101 | } 102 | 103 | wal, err := NewWAL(cfg, tempDir) 104 | if err != nil { 105 | t.Fatalf("Failed to create WAL: %v", err) 106 | } 107 | defer wal.Close() 108 | 109 | // Set sequence number to warning threshold 110 | wal.nextSequence = SequenceWarningThreshold 111 | 112 | // This should log a warning but succeed 113 | _, err = wal.Append(OpTypePut, []byte("test"), []byte("value")) 114 | if err != nil { 115 | t.Errorf("Expected no error at warning threshold, got: %v", err) 116 | } 117 | 118 | // Verify warning flag is set 119 | if !wal.overflowWarning { 120 | t.Error("Expected overflow warning flag to be set") 121 | } 122 | 123 | // Second call should not log again (no additional verification since we can't easily capture logs) 124 | _, err = wal.Append(OpTypePut, []byte("test2"), []byte("value2")) 125 | if err != nil { 126 | t.Errorf("Expected no error on second append, got: %v", err) 127 | } 128 | } 129 | 130 | // TestSequenceNumberConstants verifies our constants are reasonable 131 | func TestSequenceNumberConstants(t *testing.T) { 132 | // Verify MaxSequenceNumber is less than math.MaxUint64 133 | if MaxSequenceNumber >= math.MaxUint64 { 134 | t.Errorf("MaxSequenceNumber should be less than math.MaxUint64") 135 | } 136 | 137 | // Verify there's a reasonable safety margin 138 | if math.MaxUint64-MaxSequenceNumber != 1_000_000 { 139 | t.Errorf("Expected 1 million sequence safety margin, got: %d", math.MaxUint64-MaxSequenceNumber) 140 | } 141 | 142 | // Verify warning threshold is before max 143 | if SequenceWarningThreshold >= MaxSequenceNumber { 144 | t.Errorf("SequenceWarningThreshold should be less than MaxSequenceNumber") 145 | } 146 | 147 | // Verify warning margin 148 | if MaxSequenceNumber-SequenceWarningThreshold != 9_000_000 { 149 | t.Errorf("Expected 9 million sequence warning margin, got: %d", MaxSequenceNumber-SequenceWarningThreshold) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /pkg/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | ) 8 | 9 | func TestNewDefaultConfig(t *testing.T) { 10 | dbPath := "/tmp/testdb" 11 | cfg := NewDefaultConfig(dbPath) 12 | 13 | if cfg.Version != CurrentManifestVersion { 14 | t.Errorf("expected version %d, got %d", CurrentManifestVersion, cfg.Version) 15 | } 16 | 17 | if cfg.WALDir != filepath.Join(dbPath, "wal") { 18 | t.Errorf("expected WAL dir %s, got %s", filepath.Join(dbPath, "wal"), cfg.WALDir) 19 | } 20 | 21 | if cfg.SSTDir != filepath.Join(dbPath, "sst") { 22 | t.Errorf("expected SST dir %s, got %s", filepath.Join(dbPath, "sst"), cfg.SSTDir) 23 | } 24 | 25 | // Test default values 26 | if cfg.WALSyncMode != SyncImmediate { 27 | t.Errorf("expected WAL sync mode %d, got %d", SyncImmediate, cfg.WALSyncMode) 28 | } 29 | 30 | if cfg.MemTableSize != 32*1024*1024 { 31 | t.Errorf("expected memtable size %d, got %d", 32*1024*1024, cfg.MemTableSize) 32 | } 33 | } 34 | 35 | func TestConfigValidate(t *testing.T) { 36 | cfg := NewDefaultConfig("/tmp/testdb") 37 | 38 | // Valid config 39 | if err := cfg.Validate(); err != nil { 40 | t.Errorf("expected valid config, got error: %v", err) 41 | } 42 | 43 | // Test invalid configs 44 | testCases := []struct { 45 | name string 46 | mutate func(*Config) 47 | expected string 48 | }{ 49 | { 50 | name: "invalid version", 51 | mutate: func(c *Config) { 52 | c.Version = 0 53 | }, 54 | expected: "invalid configuration: invalid version 0", 55 | }, 56 | { 57 | name: "empty WAL dir", 58 | mutate: func(c *Config) { 59 | c.WALDir = "" 60 | }, 61 | expected: "invalid configuration: WAL directory not specified", 62 | }, 63 | { 64 | name: "empty SST dir", 65 | mutate: func(c *Config) { 66 | c.SSTDir = "" 67 | }, 68 | expected: "invalid configuration: SSTable directory not specified", 69 | }, 70 | { 71 | name: "zero memtable size", 72 | mutate: func(c *Config) { 73 | c.MemTableSize = 0 74 | }, 75 | expected: "invalid configuration: MemTable size must be positive", 76 | }, 77 | { 78 | name: "negative max memtables", 79 | mutate: func(c *Config) { 80 | c.MaxMemTables = -1 81 | }, 82 | expected: "invalid configuration: Max MemTables must be positive", 83 | }, 84 | { 85 | name: "zero block size", 86 | mutate: func(c *Config) { 87 | c.SSTableBlockSize = 0 88 | }, 89 | expected: "invalid configuration: SSTable block size must be positive", 90 | }, 91 | } 92 | 93 | for _, tc := range testCases { 94 | t.Run(tc.name, func(t *testing.T) { 95 | cfg := NewDefaultConfig("/tmp/testdb") 96 | tc.mutate(cfg) 97 | 98 | err := cfg.Validate() 99 | if err == nil { 100 | t.Fatal("expected error, got nil") 101 | } 102 | 103 | if err.Error() != tc.expected { 104 | t.Errorf("expected error %q, got %q", tc.expected, err.Error()) 105 | } 106 | }) 107 | } 108 | } 109 | 110 | func TestConfigManifestSaveLoad(t *testing.T) { 111 | // Create a temporary directory for the test 112 | tempDir, err := os.MkdirTemp("", "config_test") 113 | if err != nil { 114 | t.Fatalf("failed to create temp dir: %v", err) 115 | } 116 | defer os.RemoveAll(tempDir) 117 | 118 | // Create a config and save it 119 | cfg := NewDefaultConfig(tempDir) 120 | cfg.MemTableSize = 16 * 1024 * 1024 // 16MB 121 | cfg.CompactionThreads = 4 122 | 123 | if err := cfg.SaveManifest(tempDir); err != nil { 124 | t.Fatalf("failed to save manifest: %v", err) 125 | } 126 | 127 | // Load the config 128 | loadedCfg, err := LoadConfigFromManifest(tempDir) 129 | if err != nil { 130 | t.Fatalf("failed to load manifest: %v", err) 131 | } 132 | 133 | // Verify loaded config 134 | if loadedCfg.MemTableSize != cfg.MemTableSize { 135 | t.Errorf("expected memtable size %d, got %d", cfg.MemTableSize, loadedCfg.MemTableSize) 136 | } 137 | 138 | if loadedCfg.CompactionThreads != cfg.CompactionThreads { 139 | t.Errorf("expected compaction threads %d, got %d", cfg.CompactionThreads, loadedCfg.CompactionThreads) 140 | } 141 | 142 | // Test loading non-existent manifest 143 | nonExistentDir := filepath.Join(tempDir, "nonexistent") 144 | _, err = LoadConfigFromManifest(nonExistentDir) 145 | if err != ErrManifestNotFound { 146 | t.Errorf("expected ErrManifestNotFound, got %v", err) 147 | } 148 | } 149 | 150 | func TestConfigUpdate(t *testing.T) { 151 | cfg := NewDefaultConfig("/tmp/testdb") 152 | 153 | // Update config 154 | cfg.Update(func(c *Config) { 155 | c.MemTableSize = 64 * 1024 * 1024 // 64MB 156 | c.MaxMemTables = 8 157 | }) 158 | 159 | // Verify update 160 | if cfg.MemTableSize != 64*1024*1024 { 161 | t.Errorf("expected memtable size %d, got %d", 64*1024*1024, cfg.MemTableSize) 162 | } 163 | 164 | if cfg.MaxMemTables != 8 { 165 | t.Errorf("expected max memtables %d, got %d", 8, cfg.MaxMemTables) 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /pkg/transport/interface.go: -------------------------------------------------------------------------------- 1 | package transport 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "google.golang.org/grpc/keepalive" 8 | ) 9 | 10 | // CompressionType defines the compression algorithm used 11 | type CompressionType string 12 | 13 | // Standard compression options 14 | const ( 15 | CompressionNone CompressionType = "none" 16 | CompressionGzip CompressionType = "gzip" 17 | CompressionSnappy CompressionType = "snappy" 18 | ) 19 | 20 | // RetryPolicy defines how retries are handled 21 | type RetryPolicy struct { 22 | MaxRetries int 23 | InitialBackoff time.Duration 24 | MaxBackoff time.Duration 25 | BackoffFactor float64 26 | Jitter float64 27 | } 28 | 29 | // TransportOptions contains common configuration across all transport types 30 | type TransportOptions struct { 31 | Timeout time.Duration 32 | RetryPolicy RetryPolicy 33 | Compression CompressionType 34 | MaxMessageSize int 35 | TLSEnabled bool 36 | CertFile string 37 | KeyFile string 38 | CAFile string 39 | KeepaliveParams *keepalive.ClientParameters // Optional keepalive parameters for gRPC clients 40 | } 41 | 42 | // TransportStatus contains information about the current transport state 43 | type TransportStatus struct { 44 | Connected bool 45 | LastConnected time.Time 46 | LastError error 47 | BytesSent uint64 48 | BytesReceived uint64 49 | RTT time.Duration 50 | } 51 | 52 | // Request represents a generic request to the transport layer 53 | type Request interface { 54 | // Type returns the type of request 55 | Type() string 56 | 57 | // Payload returns the payload of the request 58 | Payload() []byte 59 | } 60 | 61 | // Response represents a generic response from the transport layer 62 | type Response interface { 63 | // Type returns the type of response 64 | Type() string 65 | 66 | // Payload returns the payload of the response 67 | Payload() []byte 68 | 69 | // Error returns any error associated with the response 70 | Error() error 71 | } 72 | 73 | // Stream represents a bidirectional stream of messages 74 | type Stream interface { 75 | // Send sends a request over the stream 76 | Send(request Request) error 77 | 78 | // Recv receives a response from the stream 79 | Recv() (Response, error) 80 | 81 | // Close closes the stream 82 | Close() error 83 | } 84 | 85 | // Client defines the client interface for any transport implementation 86 | type Client interface { 87 | // Connect establishes a connection to the server 88 | Connect(ctx context.Context) error 89 | 90 | // Close closes the connection 91 | Close() error 92 | 93 | // IsConnected returns whether the client is connected 94 | IsConnected() bool 95 | 96 | // Status returns the current status of the connection 97 | Status() TransportStatus 98 | 99 | // Send sends a request and waits for a response 100 | Send(ctx context.Context, request Request) (Response, error) 101 | 102 | // Stream opens a bidirectional stream 103 | Stream(ctx context.Context) (Stream, error) 104 | } 105 | 106 | // RequestHandler processes incoming requests 107 | type RequestHandler interface { 108 | // HandleRequest processes a request and returns a response 109 | HandleRequest(ctx context.Context, request Request) (Response, error) 110 | 111 | // HandleStream processes a bidirectional stream 112 | HandleStream(stream Stream) error 113 | } 114 | 115 | // Server defines the server interface for any transport implementation 116 | type Server interface { 117 | // Start starts the server and returns immediately 118 | Start() error 119 | 120 | // Serve starts the server and blocks until it's stopped 121 | Serve() error 122 | 123 | // Stop stops the server gracefully 124 | Stop(ctx context.Context) error 125 | 126 | // SetRequestHandler sets the handler for incoming requests 127 | SetRequestHandler(handler RequestHandler) 128 | } 129 | 130 | // ClientFactory creates a new client 131 | type ClientFactory func(endpoint string, options TransportOptions) (Client, error) 132 | 133 | // ServerFactory creates a new server 134 | type ServerFactory func(address string, options TransportOptions) (Server, error) 135 | 136 | // Registry keeps track of available transport implementations 137 | type Registry interface { 138 | // RegisterClient adds a new client implementation to the registry 139 | RegisterClient(name string, factory ClientFactory) 140 | 141 | // RegisterServer adds a new server implementation to the registry 142 | RegisterServer(name string, factory ServerFactory) 143 | 144 | // CreateClient instantiates a client by name 145 | CreateClient(name, endpoint string, options TransportOptions) (Client, error) 146 | 147 | // CreateServer instantiates a server by name 148 | CreateServer(name, address string, options TransportOptions) (Server, error) 149 | 150 | // ListTransports returns all available transport names 151 | ListTransports() []string 152 | } 153 | -------------------------------------------------------------------------------- /pkg/config/manifest_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestNewManifest(t *testing.T) { 9 | dbPath := "/tmp/testdb" 10 | cfg := NewDefaultConfig(dbPath) 11 | 12 | manifest, err := NewManifest(dbPath, cfg) 13 | if err != nil { 14 | t.Fatalf("failed to create manifest: %v", err) 15 | } 16 | 17 | if manifest.DBPath != dbPath { 18 | t.Errorf("expected DBPath %s, got %s", dbPath, manifest.DBPath) 19 | } 20 | 21 | if len(manifest.Entries) != 1 { 22 | t.Errorf("expected 1 entry, got %d", len(manifest.Entries)) 23 | } 24 | 25 | if manifest.Current == nil { 26 | t.Error("current entry is nil") 27 | } else if manifest.Current.Config != cfg { 28 | t.Error("current config does not match the provided config") 29 | } 30 | } 31 | 32 | func TestManifestUpdateConfig(t *testing.T) { 33 | dbPath := "/tmp/testdb" 34 | cfg := NewDefaultConfig(dbPath) 35 | 36 | manifest, err := NewManifest(dbPath, cfg) 37 | if err != nil { 38 | t.Fatalf("failed to create manifest: %v", err) 39 | } 40 | 41 | // Update config 42 | err = manifest.UpdateConfig(func(c *Config) { 43 | c.MemTableSize = 64 * 1024 * 1024 // 64MB 44 | c.MaxMemTables = 8 45 | }) 46 | if err != nil { 47 | t.Fatalf("failed to update config: %v", err) 48 | } 49 | 50 | // Verify entries count 51 | if len(manifest.Entries) != 2 { 52 | t.Errorf("expected 2 entries, got %d", len(manifest.Entries)) 53 | } 54 | 55 | // Verify updated config 56 | current := manifest.GetConfig() 57 | if current.MemTableSize != 64*1024*1024 { 58 | t.Errorf("expected memtable size %d, got %d", 64*1024*1024, current.MemTableSize) 59 | } 60 | if current.MaxMemTables != 8 { 61 | t.Errorf("expected max memtables %d, got %d", 8, current.MaxMemTables) 62 | } 63 | } 64 | 65 | func TestManifestFileTracking(t *testing.T) { 66 | dbPath := "/tmp/testdb" 67 | cfg := NewDefaultConfig(dbPath) 68 | 69 | manifest, err := NewManifest(dbPath, cfg) 70 | if err != nil { 71 | t.Fatalf("failed to create manifest: %v", err) 72 | } 73 | 74 | // Add files 75 | err = manifest.AddFile("sst/000001.sst", 1) 76 | if err != nil { 77 | t.Fatalf("failed to add file: %v", err) 78 | } 79 | 80 | err = manifest.AddFile("sst/000002.sst", 2) 81 | if err != nil { 82 | t.Fatalf("failed to add file: %v", err) 83 | } 84 | 85 | // Verify files 86 | files := manifest.GetFiles() 87 | if len(files) != 2 { 88 | t.Errorf("expected 2 files, got %d", len(files)) 89 | } 90 | 91 | if files["sst/000001.sst"] != 1 { 92 | t.Errorf("expected sequence number 1, got %d", files["sst/000001.sst"]) 93 | } 94 | 95 | if files["sst/000002.sst"] != 2 { 96 | t.Errorf("expected sequence number 2, got %d", files["sst/000002.sst"]) 97 | } 98 | 99 | // Remove file 100 | err = manifest.RemoveFile("sst/000001.sst") 101 | if err != nil { 102 | t.Fatalf("failed to remove file: %v", err) 103 | } 104 | 105 | // Verify files after removal 106 | files = manifest.GetFiles() 107 | if len(files) != 1 { 108 | t.Errorf("expected 1 file, got %d", len(files)) 109 | } 110 | 111 | if _, exists := files["sst/000001.sst"]; exists { 112 | t.Error("file should have been removed") 113 | } 114 | } 115 | 116 | func TestManifestSaveLoad(t *testing.T) { 117 | // Create a temporary directory for the test 118 | tempDir, err := os.MkdirTemp("", "manifest_test") 119 | if err != nil { 120 | t.Fatalf("failed to create temp dir: %v", err) 121 | } 122 | defer os.RemoveAll(tempDir) 123 | 124 | // Create a manifest 125 | cfg := NewDefaultConfig(tempDir) 126 | manifest, err := NewManifest(tempDir, cfg) 127 | if err != nil { 128 | t.Fatalf("failed to create manifest: %v", err) 129 | } 130 | 131 | // Update config 132 | err = manifest.UpdateConfig(func(c *Config) { 133 | c.MemTableSize = 64 * 1024 * 1024 // 64MB 134 | }) 135 | if err != nil { 136 | t.Fatalf("failed to update config: %v", err) 137 | } 138 | 139 | // Add some files 140 | err = manifest.AddFile("sst/000001.sst", 1) 141 | if err != nil { 142 | t.Fatalf("failed to add file: %v", err) 143 | } 144 | 145 | // Save the manifest 146 | if err := manifest.Save(); err != nil { 147 | t.Fatalf("failed to save manifest: %v", err) 148 | } 149 | 150 | // Load the manifest 151 | loadedManifest, err := LoadManifest(tempDir) 152 | if err != nil { 153 | t.Fatalf("failed to load manifest: %v", err) 154 | } 155 | 156 | // Verify entries count 157 | if len(loadedManifest.Entries) != len(manifest.Entries) { 158 | t.Errorf("expected %d entries, got %d", len(manifest.Entries), len(loadedManifest.Entries)) 159 | } 160 | 161 | // Verify config 162 | loadedConfig := loadedManifest.GetConfig() 163 | if loadedConfig.MemTableSize != 64*1024*1024 { 164 | t.Errorf("expected memtable size %d, got %d", 64*1024*1024, loadedConfig.MemTableSize) 165 | } 166 | 167 | // Verify files 168 | loadedFiles := loadedManifest.GetFiles() 169 | if len(loadedFiles) != 1 { 170 | t.Errorf("expected 1 file, got %d", len(loadedFiles)) 171 | } 172 | 173 | if loadedFiles["sst/000001.sst"] != 1 { 174 | t.Errorf("expected sequence number 1, got %d", loadedFiles["sst/000001.sst"]) 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /pkg/replication/primary_test.go: -------------------------------------------------------------------------------- 1 | package replication 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | "time" 8 | 9 | "github.com/KevoDB/kevo/pkg/config" 10 | "github.com/KevoDB/kevo/pkg/wal" 11 | proto "github.com/KevoDB/kevo/proto/kevo/replication" 12 | ) 13 | 14 | // TestPrimaryCreation tests that a primary can be created with a WAL 15 | func TestPrimaryCreation(t *testing.T) { 16 | // Create a temporary directory for the WAL 17 | tempDir, err := os.MkdirTemp("", "primary_creation_test") 18 | if err != nil { 19 | t.Fatalf("Failed to create temp dir: %v", err) 20 | } 21 | defer os.RemoveAll(tempDir) 22 | 23 | // Create a WAL 24 | cfg := config.NewDefaultConfig(tempDir) 25 | w, err := wal.NewWAL(cfg, filepath.Join(tempDir, "wal")) 26 | if err != nil { 27 | t.Fatalf("Failed to create WAL: %v", err) 28 | } 29 | defer w.Close() 30 | 31 | // Create a primary 32 | primary, err := NewPrimary(w, DefaultPrimaryConfig()) 33 | if err != nil { 34 | t.Fatalf("Failed to create primary: %v", err) 35 | } 36 | defer primary.Close() 37 | 38 | // Check that the primary was configured correctly 39 | if primary.wal != w { 40 | t.Errorf("Primary has incorrect WAL reference") 41 | } 42 | 43 | if primary.batcher == nil { 44 | t.Errorf("Primary has nil batcher") 45 | } 46 | 47 | if primary.compressor == nil { 48 | t.Errorf("Primary has nil compressor") 49 | } 50 | 51 | if primary.sessions == nil { 52 | t.Errorf("Primary has nil sessions map") 53 | } 54 | } 55 | 56 | // TestPrimaryWALObserver tests that the primary correctly observes WAL events 57 | func TestPrimaryWALObserver(t *testing.T) { 58 | t.Skip("Skipping flaky test - will need to improve test reliability separately") 59 | // Create a temporary directory for the WAL 60 | tempDir, err := os.MkdirTemp("", "primary_observer_test") 61 | if err != nil { 62 | t.Fatalf("Failed to create temp dir: %v", err) 63 | } 64 | defer os.RemoveAll(tempDir) 65 | 66 | // Create a WAL 67 | cfg := config.NewDefaultConfig(tempDir) 68 | w, err := wal.NewWAL(cfg, filepath.Join(tempDir, "wal")) 69 | if err != nil { 70 | t.Fatalf("Failed to create WAL: %v", err) 71 | } 72 | defer w.Close() 73 | 74 | // Create a primary 75 | primary, err := NewPrimary(w, DefaultPrimaryConfig()) 76 | if err != nil { 77 | t.Fatalf("Failed to create primary: %v", err) 78 | } 79 | defer primary.Close() 80 | 81 | // Write a single entry to the WAL 82 | key := []byte("test-key") 83 | value := []byte("test-value") 84 | seq, err := w.Append(wal.OpTypePut, key, value) 85 | if err != nil { 86 | t.Fatalf("Failed to append to WAL: %v", err) 87 | } 88 | if seq != 1 { 89 | t.Errorf("Expected sequence 1, got %d", seq) 90 | } 91 | 92 | // Allow some time for notifications to be processed 93 | time.Sleep(150 * time.Millisecond) 94 | 95 | // Verify the batcher has entries 96 | if primary.batcher.GetBatchCount() <= 0 { 97 | t.Errorf("Primary batcher did not receive WAL entry") 98 | } 99 | 100 | // Sync the WAL and verify the primary observes it 101 | lastSyncedBefore := primary.lastSyncedSeq 102 | err = w.Sync() 103 | if err != nil { 104 | t.Fatalf("Failed to sync WAL: %v", err) 105 | } 106 | 107 | // Allow more time for sync notification 108 | time.Sleep(150 * time.Millisecond) 109 | 110 | // Check that lastSyncedSeq was updated 111 | if primary.lastSyncedSeq <= lastSyncedBefore { 112 | t.Errorf("Primary did not update lastSyncedSeq after WAL sync") 113 | } 114 | } 115 | 116 | // TestPrimarySessionManagement tests session registration and management 117 | func TestPrimarySessionManagement(t *testing.T) { 118 | // Create a temporary directory for the WAL 119 | tempDir, err := os.MkdirTemp("", "primary_session_test") 120 | if err != nil { 121 | t.Fatalf("Failed to create temp dir: %v", err) 122 | } 123 | defer os.RemoveAll(tempDir) 124 | 125 | // Create a WAL 126 | cfg := config.NewDefaultConfig(tempDir) 127 | w, err := wal.NewWAL(cfg, filepath.Join(tempDir, "wal")) 128 | if err != nil { 129 | t.Fatalf("Failed to create WAL: %v", err) 130 | } 131 | defer w.Close() 132 | 133 | // Create a primary 134 | primary, err := NewPrimary(w, DefaultPrimaryConfig()) 135 | if err != nil { 136 | t.Fatalf("Failed to create primary: %v", err) 137 | } 138 | defer primary.Close() 139 | 140 | // Register a session 141 | session := &ReplicaSession{ 142 | ID: "test-session", 143 | StartSequence: 0, 144 | LastAckSequence: 0, 145 | Connected: true, 146 | Active: true, 147 | LastActivity: time.Now(), 148 | SupportedCodecs: []proto.CompressionCodec{proto.CompressionCodec_NONE}, 149 | } 150 | 151 | primary.registerReplicaSession(session) 152 | 153 | // Verify session was registered 154 | if len(primary.sessions) != 1 { 155 | t.Errorf("Expected 1 session, got %d", len(primary.sessions)) 156 | } 157 | 158 | // Unregister session 159 | primary.unregisterReplicaSession("test-session") 160 | 161 | // Verify session was unregistered 162 | if len(primary.sessions) != 0 { 163 | t.Errorf("Expected 0 sessions after unregistering, got %d", len(primary.sessions)) 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /pkg/sstable/footer/footer_test.go: -------------------------------------------------------------------------------- 1 | package footer 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "testing" 7 | ) 8 | 9 | func TestFooterEncodeDecode(t *testing.T) { 10 | // Create a footer 11 | f := NewFooter( 12 | 1000, // indexOffset 13 | 500, // indexSize 14 | 1234, // numEntries 15 | 100, // minKeyOffset 16 | 200, // maxKeyOffset 17 | 5000, // bloomFilterOffset 18 | 300, // bloomFilterSize 19 | ) 20 | 21 | // Encode the footer 22 | encoded := f.Encode() 23 | 24 | // The encoded data should be exactly FooterSize bytes 25 | if len(encoded) != FooterSize { 26 | t.Errorf("Encoded footer size is %d, expected %d", len(encoded), FooterSize) 27 | } 28 | 29 | // Decode the encoded data 30 | decoded, err := Decode(encoded) 31 | if err != nil { 32 | t.Fatalf("Failed to decode footer: %v", err) 33 | } 34 | 35 | // Verify fields match 36 | if decoded.Magic != f.Magic { 37 | t.Errorf("Magic mismatch: got %d, expected %d", decoded.Magic, f.Magic) 38 | } 39 | 40 | if decoded.Version != f.Version { 41 | t.Errorf("Version mismatch: got %d, expected %d", decoded.Version, f.Version) 42 | } 43 | 44 | if decoded.Timestamp != f.Timestamp { 45 | t.Errorf("Timestamp mismatch: got %d, expected %d", decoded.Timestamp, f.Timestamp) 46 | } 47 | 48 | if decoded.IndexOffset != f.IndexOffset { 49 | t.Errorf("IndexOffset mismatch: got %d, expected %d", decoded.IndexOffset, f.IndexOffset) 50 | } 51 | 52 | if decoded.IndexSize != f.IndexSize { 53 | t.Errorf("IndexSize mismatch: got %d, expected %d", decoded.IndexSize, f.IndexSize) 54 | } 55 | 56 | if decoded.NumEntries != f.NumEntries { 57 | t.Errorf("NumEntries mismatch: got %d, expected %d", decoded.NumEntries, f.NumEntries) 58 | } 59 | 60 | if decoded.MinKeyOffset != f.MinKeyOffset { 61 | t.Errorf("MinKeyOffset mismatch: got %d, expected %d", decoded.MinKeyOffset, f.MinKeyOffset) 62 | } 63 | 64 | if decoded.MaxKeyOffset != f.MaxKeyOffset { 65 | t.Errorf("MaxKeyOffset mismatch: got %d, expected %d", decoded.MaxKeyOffset, f.MaxKeyOffset) 66 | } 67 | 68 | if decoded.Checksum != f.Checksum { 69 | t.Errorf("Checksum mismatch: got %d, expected %d", decoded.Checksum, f.Checksum) 70 | } 71 | } 72 | 73 | func TestFooterWriteTo(t *testing.T) { 74 | // Create a footer 75 | f := NewFooter( 76 | 1000, // indexOffset 77 | 500, // indexSize 78 | 1234, // numEntries 79 | 100, // minKeyOffset 80 | 200, // maxKeyOffset 81 | 5000, // bloomFilterOffset 82 | 300, // bloomFilterSize 83 | ) 84 | 85 | // Write to a buffer 86 | var buf bytes.Buffer 87 | n, err := f.WriteTo(&buf) 88 | 89 | if err != nil { 90 | t.Fatalf("Failed to write footer: %v", err) 91 | } 92 | 93 | if n != int64(FooterSize) { 94 | t.Errorf("WriteTo wrote %d bytes, expected %d", n, FooterSize) 95 | } 96 | 97 | // Read back and verify 98 | data := buf.Bytes() 99 | decoded, err := Decode(data) 100 | 101 | if err != nil { 102 | t.Fatalf("Failed to decode footer: %v", err) 103 | } 104 | 105 | if decoded.Magic != f.Magic { 106 | t.Errorf("Magic mismatch after write/read") 107 | } 108 | 109 | if decoded.NumEntries != f.NumEntries { 110 | t.Errorf("NumEntries mismatch after write/read") 111 | } 112 | } 113 | 114 | func TestFooterCorruption(t *testing.T) { 115 | // Create a footer 116 | f := NewFooter( 117 | 1000, // indexOffset 118 | 500, // indexSize 119 | 1234, // numEntries 120 | 100, // minKeyOffset 121 | 200, // maxKeyOffset 122 | 5000, // bloomFilterOffset 123 | 300, // bloomFilterSize 124 | ) 125 | 126 | // Encode the footer 127 | encoded := f.Encode() 128 | 129 | // Corrupt the magic number 130 | corruptedMagic := make([]byte, len(encoded)) 131 | copy(corruptedMagic, encoded) 132 | binary.LittleEndian.PutUint64(corruptedMagic[0:], 0x1234567812345678) 133 | 134 | _, err := Decode(corruptedMagic) 135 | if err == nil { 136 | t.Errorf("Expected error when decoding footer with corrupt magic, but got none") 137 | } 138 | 139 | // Corrupt the checksum 140 | corruptedChecksum := make([]byte, len(encoded)) 141 | copy(corruptedChecksum, encoded) 142 | binary.LittleEndian.PutUint64(corruptedChecksum[44:], 0xBADBADBADBADBAD) 143 | 144 | _, err = Decode(corruptedChecksum) 145 | if err == nil { 146 | t.Errorf("Expected error when decoding footer with corrupt checksum, but got none") 147 | } 148 | 149 | // Truncated data 150 | truncated := encoded[:FooterSize-1] 151 | _, err = Decode(truncated) 152 | if err == nil { 153 | t.Errorf("Expected error when decoding truncated footer, but got none") 154 | } 155 | } 156 | 157 | func TestFooterVersionCheck(t *testing.T) { 158 | // Create a footer with the current version 159 | f := NewFooter(1000, 500, 1234, 100, 200, 5000, 300) 160 | 161 | // Create a modified version 162 | f.Version = 9999 163 | encoded := f.Encode() 164 | 165 | // Decode should still work since we don't verify version compatibility 166 | // in the Decode function directly 167 | decoded, err := Decode(encoded) 168 | if err != nil { 169 | t.Errorf("Unexpected error decoding footer with unknown version: %v", err) 170 | } 171 | 172 | if decoded.Version != 9999 { 173 | t.Errorf("Expected version 9999, got %d", decoded.Version) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /cmd/kevo/server_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "testing" 7 | "time" 8 | 9 | "github.com/KevoDB/kevo/pkg/engine" 10 | ) 11 | 12 | func TestTransactionManager(t *testing.T) { 13 | // Create a timeout context for the whole test 14 | _, cancel := context.WithTimeout(context.Background(), 5*time.Second) 15 | defer cancel() 16 | 17 | // Set up temporary directory for test 18 | tmpDir, err := os.MkdirTemp("", "kevo_test") 19 | if err != nil { 20 | t.Fatalf("Failed to create temporary directory: %v", err) 21 | } 22 | defer os.RemoveAll(tmpDir) 23 | 24 | // Create a test engine 25 | eng, err := engine.NewEngineFacade(tmpDir) 26 | if err != nil { 27 | t.Fatalf("Failed to create engine: %v", err) 28 | } 29 | defer eng.Close() 30 | 31 | // Get the transaction manager 32 | txManager := eng.GetTransactionManager() 33 | 34 | // Test read-write transaction 35 | rwTx, err := txManager.BeginTransaction(false) 36 | if err != nil { 37 | t.Fatalf("Failed to begin read-write transaction: %v", err) 38 | } 39 | if rwTx.IsReadOnly() { 40 | t.Fatal("Expected non-read-only transaction") 41 | } 42 | 43 | // Test committing the transaction 44 | if err := rwTx.Commit(); err != nil { 45 | t.Fatalf("Failed to commit transaction: %v", err) 46 | } 47 | 48 | // Test read-only transaction 49 | roTx, err := txManager.BeginTransaction(true) 50 | if err != nil { 51 | t.Fatalf("Failed to begin read-only transaction: %v", err) 52 | } 53 | if !roTx.IsReadOnly() { 54 | t.Fatal("Expected read-only transaction") 55 | } 56 | 57 | // Test rollback 58 | if err := roTx.Rollback(); err != nil { 59 | t.Fatalf("Failed to rollback transaction: %v", err) 60 | } 61 | } 62 | 63 | func TestServerStartup(t *testing.T) { 64 | // Skip if not running in an environment where we can bind to ports 65 | if os.Getenv("ENABLE_NETWORK_TESTS") != "1" { 66 | t.Skip("Skipping network test (set ENABLE_NETWORK_TESTS=1 to run)") 67 | } 68 | 69 | // Set up temporary directory for test 70 | tmpDir, err := os.MkdirTemp("", "kevo_server_test") 71 | if err != nil { 72 | t.Fatalf("Failed to create temporary directory: %v", err) 73 | } 74 | defer os.RemoveAll(tmpDir) 75 | 76 | // Create a test engine 77 | eng, err := engine.NewEngineFacade(tmpDir) 78 | if err != nil { 79 | t.Fatalf("Failed to create engine: %v", err) 80 | } 81 | defer eng.Close() 82 | 83 | // Create server with a random port 84 | config := Config{ 85 | ServerMode: true, 86 | ListenAddr: "localhost:0", // Let the OS assign a port 87 | DBPath: tmpDir, 88 | } 89 | server := NewServer(eng, config) 90 | 91 | // Start server (does not block) 92 | if err := server.Start(); err != nil { 93 | t.Fatalf("Failed to start server: %v", err) 94 | } 95 | 96 | // Check that the listener is active 97 | if server.listener == nil { 98 | t.Fatal("Server listener is nil after Start()") 99 | } 100 | 101 | // Get the assigned port - if this works, the listener is properly set up 102 | addr := server.listener.Addr().String() 103 | if addr == "" { 104 | t.Fatal("Server listener has no address") 105 | } 106 | t.Logf("Server listening on %s", addr) 107 | 108 | // Test shutdown 109 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 110 | defer cancel() 111 | if err := server.Shutdown(ctx); err != nil { 112 | t.Fatalf("Failed to shutdown server: %v", err) 113 | } 114 | } 115 | 116 | func TestGRPCServer(t *testing.T) { 117 | // Skip if not running in an environment where we can bind to ports 118 | if os.Getenv("ENABLE_NETWORK_TESTS") != "1" { 119 | t.Skip("Skipping network test (set ENABLE_NETWORK_TESTS=1 to run)") 120 | } 121 | 122 | // Create a temporary database for testing 123 | tempDBPath, err := os.MkdirTemp("", "kevo_grpc_test") 124 | if err != nil { 125 | t.Fatalf("Failed to create temporary directory: %v", err) 126 | } 127 | defer os.RemoveAll(tempDBPath) 128 | 129 | // Create engine 130 | eng, err := engine.NewEngineFacade(tempDBPath) 131 | if err != nil { 132 | t.Fatalf("Failed to create engine: %v", err) 133 | } 134 | defer eng.Close() 135 | 136 | // Create server configuration 137 | config := Config{ 138 | ServerMode: true, 139 | ListenAddr: "localhost:50052", // Use a different port for tests 140 | DBPath: tempDBPath, 141 | } 142 | 143 | // Create and start the server 144 | server := NewServer(eng, config) 145 | if err := server.Start(); err != nil { 146 | t.Fatalf("Failed to start server: %v", err) 147 | } 148 | 149 | // Run server in a goroutine 150 | go func() { 151 | if err := server.Serve(); err != nil { 152 | t.Logf("Server stopped: %v", err) 153 | } 154 | }() 155 | 156 | // Give the server a moment to start 157 | time.Sleep(200 * time.Millisecond) 158 | 159 | // Clean up at the end 160 | defer func() { 161 | shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) 162 | defer shutdownCancel() 163 | 164 | if err := server.Shutdown(shutdownCtx); err != nil { 165 | t.Logf("Failed to shut down server: %v", err) 166 | } 167 | }() 168 | 169 | // TODO: Add gRPC client tests here when client implementation is complete 170 | t.Log("gRPC server integration test scaffolding added") 171 | } 172 | -------------------------------------------------------------------------------- /pkg/sstable/reader_test.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | ) 9 | 10 | func TestReaderBasics(t *testing.T) { 11 | // Create a temporary directory for the test 12 | tempDir := t.TempDir() 13 | sstablePath := filepath.Join(tempDir, "test.sst") 14 | 15 | // Create a new SSTable writer 16 | writer, err := NewWriter(sstablePath) 17 | if err != nil { 18 | t.Fatalf("Failed to create SSTable writer: %v", err) 19 | } 20 | 21 | // Add some key-value pairs 22 | numEntries := 100 23 | keyValues := make(map[string]string, numEntries) 24 | 25 | for i := 0; i < numEntries; i++ { 26 | key := fmt.Sprintf("key%05d", i) 27 | value := fmt.Sprintf("value%05d", i) 28 | keyValues[key] = value 29 | 30 | err := writer.Add([]byte(key), []byte(value)) 31 | if err != nil { 32 | t.Fatalf("Failed to add entry: %v", err) 33 | } 34 | } 35 | 36 | // Finish writing 37 | err = writer.Finish() 38 | if err != nil { 39 | t.Fatalf("Failed to finish SSTable: %v", err) 40 | } 41 | 42 | // Open the SSTable for reading 43 | reader, err := OpenReader(sstablePath) 44 | if err != nil { 45 | t.Fatalf("Failed to open SSTable: %v", err) 46 | } 47 | defer reader.Close() 48 | 49 | // Verify the number of entries 50 | if reader.numEntries != uint32(numEntries) { 51 | t.Errorf("Expected %d entries, got %d", numEntries, reader.numEntries) 52 | } 53 | 54 | // Print file information 55 | t.Logf("SSTable file size: %d bytes", reader.ioManager.GetFileSize()) 56 | t.Logf("Index offset: %d", reader.indexOffset) 57 | t.Logf("Index size: %d", reader.indexSize) 58 | t.Logf("Entries in table: %d", reader.numEntries) 59 | 60 | // Check what's in the index 61 | indexIter := reader.indexBlock.Iterator() 62 | t.Log("Index entries:") 63 | count := 0 64 | for indexIter.SeekToFirst(); indexIter.Valid(); indexIter.Next() { 65 | if count < 10 { // Log the first 10 entries only 66 | indexValue := indexIter.Value() 67 | locator, err := ParseBlockLocator(indexIter.Key(), indexValue) 68 | if err != nil { 69 | t.Errorf("Failed to parse block locator: %v", err) 70 | continue 71 | } 72 | 73 | t.Logf(" Index key: %s, block offset: %d, block size: %d", 74 | string(locator.Key), locator.Offset, locator.Size) 75 | 76 | // Read the block and see what keys it contains 77 | blockReader, err := reader.blockFetcher.FetchBlock(locator.Offset, locator.Size) 78 | if err == nil { 79 | blockIter := blockReader.Iterator() 80 | t.Log(" Block contents:") 81 | keysInBlock := 0 82 | for blockIter.SeekToFirst(); blockIter.Valid() && keysInBlock < 10; blockIter.Next() { 83 | t.Logf(" Key: %s, Value: %s", 84 | string(blockIter.Key()), string(blockIter.Value())) 85 | keysInBlock++ 86 | } 87 | if keysInBlock >= 10 { 88 | t.Logf(" ... and more keys") 89 | } 90 | } 91 | } 92 | count++ 93 | } 94 | t.Logf("Total index entries: %d", count) 95 | 96 | // Read some keys 97 | for i := 0; i < numEntries; i += 10 { 98 | key := fmt.Sprintf("key%05d", i) 99 | expectedValue := keyValues[key] 100 | 101 | value, err := reader.Get([]byte(key)) 102 | if err != nil { 103 | t.Errorf("Failed to get key %s: %v", key, err) 104 | continue 105 | } 106 | 107 | if string(value) != expectedValue { 108 | t.Errorf("Value mismatch for key %s: expected %s, got %s", 109 | key, expectedValue, value) 110 | } 111 | } 112 | 113 | // Try to read a non-existent key 114 | _, err = reader.Get([]byte("nonexistent")) 115 | if err != ErrNotFound { 116 | t.Errorf("Expected ErrNotFound for non-existent key, got: %v", err) 117 | } 118 | } 119 | 120 | func TestReaderCorruption(t *testing.T) { 121 | // Create a temporary directory for the test 122 | tempDir := t.TempDir() 123 | sstablePath := filepath.Join(tempDir, "test.sst") 124 | 125 | // Create a new SSTable writer 126 | writer, err := NewWriter(sstablePath) 127 | if err != nil { 128 | t.Fatalf("Failed to create SSTable writer: %v", err) 129 | } 130 | 131 | // Add some key-value pairs 132 | for i := 0; i < 100; i++ { 133 | key := []byte(fmt.Sprintf("key%05d", i)) 134 | value := []byte(fmt.Sprintf("value%05d", i)) 135 | 136 | err := writer.Add(key, value) 137 | if err != nil { 138 | t.Fatalf("Failed to add entry: %v", err) 139 | } 140 | } 141 | 142 | // Finish writing 143 | err = writer.Finish() 144 | if err != nil { 145 | t.Fatalf("Failed to finish SSTable: %v", err) 146 | } 147 | 148 | // Corrupt the file 149 | file, err := os.OpenFile(sstablePath, os.O_RDWR, 0) 150 | if err != nil { 151 | t.Fatalf("Failed to open file for corruption: %v", err) 152 | } 153 | 154 | // Write some garbage at the end to corrupt the footer 155 | _, err = file.Seek(-8, os.SEEK_END) 156 | if err != nil { 157 | t.Fatalf("Failed to seek: %v", err) 158 | } 159 | 160 | _, err = file.Write([]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}) 161 | if err != nil { 162 | t.Fatalf("Failed to write garbage: %v", err) 163 | } 164 | 165 | file.Close() 166 | 167 | // Try to open the corrupted file 168 | _, err = OpenReader(sstablePath) 169 | if err == nil { 170 | t.Errorf("Expected error when opening corrupted file, but got none") 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /pkg/memtable/memtable.go: -------------------------------------------------------------------------------- 1 | package memtable 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | "time" 7 | 8 | "github.com/KevoDB/kevo/pkg/wal" 9 | ) 10 | 11 | // MemTable is an in-memory table that stores key-value pairs 12 | // It is implemented using a skip list for efficient inserts and lookups 13 | type MemTable struct { 14 | skipList *SkipList 15 | nextSeqNum atomic.Uint64 16 | creationTime time.Time 17 | immutable atomic.Bool 18 | size int64 19 | mu sync.RWMutex 20 | } 21 | 22 | // NewMemTable creates a new memory table 23 | func NewMemTable() *MemTable { 24 | return &MemTable{ 25 | skipList: NewSkipList(), 26 | creationTime: time.Now(), 27 | } 28 | } 29 | 30 | // Put adds a key-value pair to the MemTable 31 | func (m *MemTable) Put(key, value []byte, seqNum uint64) { 32 | m.mu.Lock() 33 | defer m.mu.Unlock() 34 | 35 | if m.IsImmutable() { 36 | // Don't modify immutable memtables 37 | return 38 | } 39 | 40 | e := newEntry(key, value, TypeValue, seqNum) 41 | m.skipList.Insert(e) 42 | 43 | // Update maximum sequence number 44 | nextSeqNum := m.nextSeqNum.Load() 45 | if seqNum > nextSeqNum { 46 | m.nextSeqNum.Store(seqNum + 1) 47 | } 48 | } 49 | 50 | // Delete marks a key as deleted in the MemTable 51 | func (m *MemTable) Delete(key []byte, seqNum uint64) { 52 | m.mu.Lock() 53 | defer m.mu.Unlock() 54 | 55 | if m.IsImmutable() { 56 | // Don't modify immutable memtables 57 | return 58 | } 59 | 60 | e := newEntry(key, nil, TypeDeletion, seqNum) 61 | m.skipList.Insert(e) 62 | 63 | // Update maximum sequence number 64 | nextSeqNum := m.nextSeqNum.Load() 65 | if seqNum > nextSeqNum { 66 | m.nextSeqNum.Store(seqNum + 1) 67 | } 68 | } 69 | 70 | // Get retrieves the value associated with the given key 71 | // Returns (nil, true) if the key exists but has been deleted 72 | // Returns (nil, false) if the key does not exist 73 | // Returns (value, true) if the key exists and has a value 74 | func (m *MemTable) Get(key []byte) ([]byte, bool) { 75 | // Use atomic check for immutability first 76 | if m.IsImmutable() { 77 | // For immutable memtables, we can bypass the write lock completely 78 | e := m.skipList.Find(key) 79 | if e == nil { 80 | return nil, false 81 | } 82 | 83 | // Check if this is a deletion marker 84 | if e.valueType == TypeDeletion { 85 | return nil, true // Key exists but was deleted 86 | } 87 | 88 | return e.value, true 89 | } else { 90 | // For mutable memtables, we still need read lock protection 91 | // as the structure could be modified during reads 92 | m.mu.RLock() 93 | defer m.mu.RUnlock() 94 | 95 | e := m.skipList.Find(key) 96 | if e == nil { 97 | return nil, false 98 | } 99 | 100 | // Check if this is a deletion marker 101 | if e.valueType == TypeDeletion { 102 | return nil, true // Key exists but was deleted 103 | } 104 | 105 | return e.value, true 106 | } 107 | } 108 | 109 | // Contains checks if the key exists in the MemTable 110 | func (m *MemTable) Contains(key []byte) bool { 111 | // For immutable memtables, we can bypass the RWLock completely 112 | if m.IsImmutable() { 113 | return m.skipList.Find(key) != nil 114 | } else { 115 | // For mutable memtables, we still need read lock protection 116 | m.mu.RLock() 117 | defer m.mu.RUnlock() 118 | 119 | return m.skipList.Find(key) != nil 120 | } 121 | } 122 | 123 | // ApproximateSize returns the approximate size of the MemTable in bytes 124 | func (m *MemTable) ApproximateSize() int64 { 125 | return m.skipList.ApproximateSize() 126 | } 127 | 128 | // SetImmutable marks the MemTable as immutable 129 | // After this is called, no more modifications are allowed 130 | func (m *MemTable) SetImmutable() { 131 | m.immutable.Store(true) 132 | } 133 | 134 | // IsImmutable returns whether the MemTable is immutable 135 | func (m *MemTable) IsImmutable() bool { 136 | return m.immutable.Load() 137 | } 138 | 139 | // Age returns the age of the MemTable in seconds 140 | func (m *MemTable) Age() float64 { 141 | return time.Since(m.creationTime).Seconds() 142 | } 143 | 144 | // NewIterator returns an iterator for the MemTable 145 | func (m *MemTable) NewIterator() *Iterator { 146 | // For immutable memtables, we can bypass the lock 147 | if m.IsImmutable() { 148 | return m.skipList.NewIterator() 149 | } else { 150 | // For mutable memtables, capture current snapshot sequence number 151 | m.mu.RLock() 152 | snapshotSeq := m.nextSeqNum.Load() 153 | m.mu.RUnlock() 154 | 155 | return m.skipList.NewIteratorWithSnapshot(snapshotSeq) 156 | } 157 | } 158 | 159 | // GetNextSequenceNumber returns the next sequence number to use 160 | func (m *MemTable) GetNextSequenceNumber() uint64 { 161 | // For immutable memtables, nextSeqNum won't change 162 | if m.IsImmutable() { 163 | return m.nextSeqNum.Load() 164 | } else { 165 | // For mutable memtables, we need read lock 166 | m.mu.RLock() 167 | defer m.mu.RUnlock() 168 | return m.nextSeqNum.Load() 169 | } 170 | } 171 | 172 | // ProcessWALEntry processes a WAL entry and applies it to the MemTable 173 | func (m *MemTable) ProcessWALEntry(entry *wal.Entry) error { 174 | switch entry.Type { 175 | case wal.OpTypePut: 176 | m.Put(entry.Key, entry.Value, entry.SequenceNumber) 177 | case wal.OpTypeDelete: 178 | m.Delete(entry.Key, entry.SequenceNumber) 179 | } 180 | return nil 181 | } 182 | -------------------------------------------------------------------------------- /pkg/sstable/sstable_test.go: -------------------------------------------------------------------------------- 1 | package sstable 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | ) 9 | 10 | func TestBasics(t *testing.T) { 11 | // Create a temporary directory for the test 12 | tempDir := t.TempDir() 13 | sstablePath := filepath.Join(tempDir, "test.sst") 14 | 15 | // Create a new SSTable writer 16 | writer, err := NewWriter(sstablePath) 17 | if err != nil { 18 | t.Fatalf("Failed to create SSTable writer: %v", err) 19 | } 20 | 21 | // Add some key-value pairs 22 | numEntries := 100 23 | keyValues := make(map[string]string, numEntries) 24 | 25 | for i := 0; i < numEntries; i++ { 26 | key := fmt.Sprintf("key%05d", i) 27 | value := fmt.Sprintf("value%05d", i) 28 | keyValues[key] = value 29 | 30 | err := writer.Add([]byte(key), []byte(value)) 31 | if err != nil { 32 | t.Fatalf("Failed to add entry: %v", err) 33 | } 34 | } 35 | 36 | // Finish writing 37 | err = writer.Finish() 38 | if err != nil { 39 | t.Fatalf("Failed to finish SSTable: %v", err) 40 | } 41 | 42 | // Check that the file exists and has some data 43 | info, err := os.Stat(sstablePath) 44 | if err != nil { 45 | t.Fatalf("Failed to stat file: %v", err) 46 | } 47 | 48 | if info.Size() == 0 { 49 | t.Errorf("File is empty") 50 | } 51 | 52 | // Open the SSTable for reading 53 | reader, err := OpenReader(sstablePath) 54 | if err != nil { 55 | t.Fatalf("Failed to open SSTable: %v", err) 56 | } 57 | defer reader.Close() 58 | 59 | // Verify the number of entries 60 | if reader.numEntries != uint32(numEntries) { 61 | t.Errorf("Expected %d entries, got %d", numEntries, reader.numEntries) 62 | } 63 | 64 | // Print file information 65 | t.Logf("SSTable file size: %d bytes", reader.ioManager.GetFileSize()) 66 | t.Logf("Index offset: %d", reader.indexOffset) 67 | t.Logf("Index size: %d", reader.indexSize) 68 | t.Logf("Entries in table: %d", reader.numEntries) 69 | 70 | // Check what's in the index 71 | indexIter := reader.indexBlock.Iterator() 72 | t.Log("Index entries:") 73 | count := 0 74 | for indexIter.SeekToFirst(); indexIter.Valid(); indexIter.Next() { 75 | if count < 10 { // Log the first 10 entries only 76 | locator, err := ParseBlockLocator(indexIter.Key(), indexIter.Value()) 77 | if err != nil { 78 | t.Errorf("Failed to parse block locator: %v", err) 79 | continue 80 | } 81 | 82 | t.Logf(" Index key: %s, block offset: %d, block size: %d", 83 | string(locator.Key), locator.Offset, locator.Size) 84 | 85 | // Read the block and see what keys it contains 86 | blockReader, err := reader.blockFetcher.FetchBlock(locator.Offset, locator.Size) 87 | if err == nil { 88 | blockIter := blockReader.Iterator() 89 | t.Log(" Block contents:") 90 | keysInBlock := 0 91 | for blockIter.SeekToFirst(); blockIter.Valid() && keysInBlock < 10; blockIter.Next() { 92 | t.Logf(" Key: %s, Value: %s", 93 | string(blockIter.Key()), string(blockIter.Value())) 94 | keysInBlock++ 95 | } 96 | if keysInBlock >= 10 { 97 | t.Logf(" ... and more keys") 98 | } 99 | } 100 | } 101 | count++ 102 | } 103 | t.Logf("Total index entries: %d", count) 104 | 105 | // Read some keys 106 | for i := 0; i < numEntries; i += 10 { 107 | key := fmt.Sprintf("key%05d", i) 108 | expectedValue := keyValues[key] 109 | 110 | value, err := reader.Get([]byte(key)) 111 | if err != nil { 112 | t.Errorf("Failed to get key %s: %v", key, err) 113 | continue 114 | } 115 | 116 | if string(value) != expectedValue { 117 | t.Errorf("Value mismatch for key %s: expected %s, got %s", 118 | key, expectedValue, value) 119 | } 120 | } 121 | 122 | // Try to read a non-existent key 123 | _, err = reader.Get([]byte("nonexistent")) 124 | if err != ErrNotFound { 125 | t.Errorf("Expected ErrNotFound for non-existent key, got: %v", err) 126 | } 127 | } 128 | 129 | func TestCorruption(t *testing.T) { 130 | // Create a temporary directory for the test 131 | tempDir := t.TempDir() 132 | sstablePath := filepath.Join(tempDir, "test.sst") 133 | 134 | // Create a new SSTable writer 135 | writer, err := NewWriter(sstablePath) 136 | if err != nil { 137 | t.Fatalf("Failed to create SSTable writer: %v", err) 138 | } 139 | 140 | // Add some key-value pairs 141 | for i := 0; i < 100; i++ { 142 | key := []byte(fmt.Sprintf("key%05d", i)) 143 | value := []byte(fmt.Sprintf("value%05d", i)) 144 | 145 | err := writer.Add(key, value) 146 | if err != nil { 147 | t.Fatalf("Failed to add entry: %v", err) 148 | } 149 | } 150 | 151 | // Finish writing 152 | err = writer.Finish() 153 | if err != nil { 154 | t.Fatalf("Failed to finish SSTable: %v", err) 155 | } 156 | 157 | // Corrupt the file 158 | file, err := os.OpenFile(sstablePath, os.O_RDWR, 0) 159 | if err != nil { 160 | t.Fatalf("Failed to open file for corruption: %v", err) 161 | } 162 | 163 | // Write some garbage at the end to corrupt the footer 164 | _, err = file.Seek(-8, os.SEEK_END) 165 | if err != nil { 166 | t.Fatalf("Failed to seek: %v", err) 167 | } 168 | 169 | _, err = file.Write([]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}) 170 | if err != nil { 171 | t.Fatalf("Failed to write garbage: %v", err) 172 | } 173 | 174 | file.Close() 175 | 176 | // Try to open the corrupted file 177 | _, err = OpenReader(sstablePath) 178 | if err == nil { 179 | t.Errorf("Expected error when opening corrupted file, but got none") 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /cmd/storage-bench/report.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "strconv" 9 | "time" 10 | ) 11 | 12 | // BenchmarkResult stores the results of a benchmark 13 | type BenchmarkResult struct { 14 | BenchmarkType string 15 | NumKeys int 16 | ValueSize int 17 | Mode string 18 | Operations int 19 | Duration float64 20 | Throughput float64 21 | Latency float64 22 | HitRate float64 // For read benchmarks 23 | EntriesPerSec float64 // For scan benchmarks 24 | ReadRatio float64 // For mixed benchmarks 25 | WriteRatio float64 // For mixed benchmarks 26 | Timestamp time.Time 27 | } 28 | 29 | // SaveResultCSV saves benchmark results to a CSV file 30 | func SaveResultCSV(results []BenchmarkResult, filename string) error { 31 | // Create directory if it doesn't exist 32 | dir := filepath.Dir(filename) 33 | if err := os.MkdirAll(dir, 0755); err != nil { 34 | return err 35 | } 36 | 37 | // Open file 38 | file, err := os.Create(filename) 39 | if err != nil { 40 | return err 41 | } 42 | defer file.Close() 43 | 44 | // Create CSV writer 45 | writer := csv.NewWriter(file) 46 | defer writer.Flush() 47 | 48 | // Write header 49 | header := []string{ 50 | "Timestamp", "BenchmarkType", "NumKeys", "ValueSize", "Mode", 51 | "Operations", "Duration", "Throughput", "Latency", "HitRate", 52 | "EntriesPerSec", "ReadRatio", "WriteRatio", 53 | } 54 | if err := writer.Write(header); err != nil { 55 | return err 56 | } 57 | 58 | // Write results 59 | for _, r := range results { 60 | record := []string{ 61 | r.Timestamp.Format(time.RFC3339), 62 | r.BenchmarkType, 63 | strconv.Itoa(r.NumKeys), 64 | strconv.Itoa(r.ValueSize), 65 | r.Mode, 66 | strconv.Itoa(r.Operations), 67 | fmt.Sprintf("%.2f", r.Duration), 68 | fmt.Sprintf("%.2f", r.Throughput), 69 | fmt.Sprintf("%.3f", r.Latency), 70 | fmt.Sprintf("%.2f", r.HitRate), 71 | fmt.Sprintf("%.2f", r.EntriesPerSec), 72 | fmt.Sprintf("%.1f", r.ReadRatio), 73 | fmt.Sprintf("%.1f", r.WriteRatio), 74 | } 75 | if err := writer.Write(record); err != nil { 76 | return err 77 | } 78 | } 79 | 80 | return nil 81 | } 82 | 83 | // LoadResultCSV loads benchmark results from a CSV file 84 | func LoadResultCSV(filename string) ([]BenchmarkResult, error) { 85 | // Open file 86 | file, err := os.Open(filename) 87 | if err != nil { 88 | return nil, err 89 | } 90 | defer file.Close() 91 | 92 | // Create CSV reader 93 | reader := csv.NewReader(file) 94 | records, err := reader.ReadAll() 95 | if err != nil { 96 | return nil, err 97 | } 98 | 99 | // Skip header 100 | if len(records) <= 1 { 101 | return []BenchmarkResult{}, nil 102 | } 103 | records = records[1:] 104 | 105 | // Parse results 106 | results := make([]BenchmarkResult, 0, len(records)) 107 | for _, record := range records { 108 | if len(record) < 13 { 109 | continue 110 | } 111 | 112 | timestamp, _ := time.Parse(time.RFC3339, record[0]) 113 | numKeys, _ := strconv.Atoi(record[2]) 114 | valueSize, _ := strconv.Atoi(record[3]) 115 | operations, _ := strconv.Atoi(record[5]) 116 | duration, _ := strconv.ParseFloat(record[6], 64) 117 | throughput, _ := strconv.ParseFloat(record[7], 64) 118 | latency, _ := strconv.ParseFloat(record[8], 64) 119 | hitRate, _ := strconv.ParseFloat(record[9], 64) 120 | entriesPerSec, _ := strconv.ParseFloat(record[10], 64) 121 | readRatio, _ := strconv.ParseFloat(record[11], 64) 122 | writeRatio, _ := strconv.ParseFloat(record[12], 64) 123 | 124 | result := BenchmarkResult{ 125 | Timestamp: timestamp, 126 | BenchmarkType: record[1], 127 | NumKeys: numKeys, 128 | ValueSize: valueSize, 129 | Mode: record[4], 130 | Operations: operations, 131 | Duration: duration, 132 | Throughput: throughput, 133 | Latency: latency, 134 | HitRate: hitRate, 135 | EntriesPerSec: entriesPerSec, 136 | ReadRatio: readRatio, 137 | WriteRatio: writeRatio, 138 | } 139 | results = append(results, result) 140 | } 141 | 142 | return results, nil 143 | } 144 | 145 | // PrintResultTable prints a formatted table of benchmark results 146 | func PrintResultTable(results []BenchmarkResult) { 147 | if len(results) == 0 { 148 | fmt.Println("No results to display") 149 | return 150 | } 151 | 152 | // Print header 153 | fmt.Println("+-----------------+--------+---------+------------+----------+----------+") 154 | fmt.Println("| Benchmark Type | Keys | ValSize | Throughput | Latency | Hit Rate |") 155 | fmt.Println("+-----------------+--------+---------+------------+----------+----------+") 156 | 157 | // Print results 158 | for _, r := range results { 159 | hitRateStr := "-" 160 | if r.BenchmarkType == "Read" { 161 | hitRateStr = fmt.Sprintf("%.2f%%", r.HitRate) 162 | } else if r.BenchmarkType == "Mixed" { 163 | hitRateStr = fmt.Sprintf("R:%.0f/W:%.0f", r.ReadRatio, r.WriteRatio) 164 | } 165 | 166 | latencyUnit := "µs" 167 | latency := r.Latency 168 | if latency > 1000 { 169 | latencyUnit = "ms" 170 | latency /= 1000 171 | } 172 | 173 | fmt.Printf("| %-15s | %6d | %7d | %10.2f | %6.2f%s | %8s |\n", 174 | r.BenchmarkType, 175 | r.NumKeys, 176 | r.ValueSize, 177 | r.Throughput, 178 | latency, latencyUnit, 179 | hitRateStr) 180 | } 181 | fmt.Println("+-----------------+--------+---------+------------+----------+----------+") 182 | } 183 | -------------------------------------------------------------------------------- /pkg/common/iterator/bounded/bounded.go: -------------------------------------------------------------------------------- 1 | package bounded 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/KevoDB/kevo/pkg/common/iterator" 7 | ) 8 | 9 | // BoundedIterator wraps an iterator and limits it to a specific key range 10 | type BoundedIterator struct { 11 | iterator.Iterator 12 | start []byte 13 | end []byte 14 | } 15 | 16 | // NewBoundedIterator creates a new bounded iterator 17 | func NewBoundedIterator(iter iterator.Iterator, startKey, endKey []byte) *BoundedIterator { 18 | bi := &BoundedIterator{ 19 | Iterator: iter, 20 | } 21 | 22 | // Make copies of the bounds to avoid external modification 23 | if startKey != nil { 24 | bi.start = make([]byte, len(startKey)) 25 | copy(bi.start, startKey) 26 | } 27 | 28 | if endKey != nil { 29 | bi.end = make([]byte, len(endKey)) 30 | copy(bi.end, endKey) 31 | } 32 | 33 | return bi 34 | } 35 | 36 | // SetBounds sets the start and end bounds for the iterator 37 | func (b *BoundedIterator) SetBounds(start, end []byte) { 38 | // Make copies of the bounds to avoid external modification 39 | if start != nil { 40 | b.start = make([]byte, len(start)) 41 | copy(b.start, start) 42 | } else { 43 | b.start = nil 44 | } 45 | 46 | if end != nil { 47 | b.end = make([]byte, len(end)) 48 | copy(b.end, end) 49 | } else { 50 | b.end = nil 51 | } 52 | 53 | // If we already have a valid position, check if it's still in bounds 54 | if b.Iterator.Valid() { 55 | b.checkBounds() 56 | } 57 | } 58 | 59 | // SeekToFirst positions at the first key in the bounded range 60 | func (b *BoundedIterator) SeekToFirst() { 61 | if b.start != nil { 62 | // If we have a start bound, seek to it 63 | b.Iterator.Seek(b.start) 64 | } else { 65 | // Otherwise seek to the first key 66 | b.Iterator.SeekToFirst() 67 | } 68 | b.checkBounds() 69 | } 70 | 71 | // SeekToLast positions at the last key in the bounded range 72 | func (b *BoundedIterator) SeekToLast() { 73 | if b.end != nil { 74 | // If we have an end bound, seek to it 75 | // The current implementation might not be efficient for finding the last 76 | // key before the end bound, but it works for now 77 | b.Iterator.Seek(b.end) 78 | 79 | // If we landed exactly at the end bound, back up one 80 | if b.Iterator.Valid() && bytes.Equal(b.Iterator.Key(), b.end) { 81 | // We need to back up because end is exclusive 82 | // This is inefficient but correct 83 | b.Iterator.SeekToFirst() 84 | 85 | // Scan to find the last key before the end bound 86 | var lastKey []byte 87 | for b.Iterator.Valid() && bytes.Compare(b.Iterator.Key(), b.end) < 0 { 88 | lastKey = b.Iterator.Key() 89 | b.Iterator.Next() 90 | } 91 | 92 | if lastKey != nil { 93 | b.Iterator.Seek(lastKey) 94 | } else { 95 | // No keys before the end bound 96 | b.Iterator.SeekToFirst() 97 | // This will be marked invalid by checkBounds 98 | } 99 | } 100 | } else { 101 | // No end bound, seek to the last key 102 | b.Iterator.SeekToLast() 103 | } 104 | 105 | // Verify we're within bounds 106 | b.checkBounds() 107 | } 108 | 109 | // Seek positions at the first key >= target within bounds 110 | func (b *BoundedIterator) Seek(target []byte) bool { 111 | // If target is before start bound, use start bound instead 112 | if b.start != nil && bytes.Compare(target, b.start) < 0 { 113 | target = b.start 114 | } 115 | 116 | // If target is at or after end bound, the seek will fail 117 | if b.end != nil && bytes.Compare(target, b.end) >= 0 { 118 | return false 119 | } 120 | 121 | if b.Iterator.Seek(target) { 122 | return b.checkBounds() 123 | } 124 | return false 125 | } 126 | 127 | // Next advances to the next key within bounds 128 | func (b *BoundedIterator) Next() bool { 129 | // First check if we're already at or beyond the end boundary 130 | if !b.checkBounds() { 131 | return false 132 | } 133 | 134 | // Then try to advance 135 | if !b.Iterator.Next() { 136 | return false 137 | } 138 | 139 | // Check if the new position is within bounds 140 | return b.checkBounds() 141 | } 142 | 143 | // Valid returns true if the iterator is positioned at a valid entry within bounds 144 | func (b *BoundedIterator) Valid() bool { 145 | return b.Iterator.Valid() && b.checkBounds() 146 | } 147 | 148 | // Key returns the current key if within bounds 149 | func (b *BoundedIterator) Key() []byte { 150 | if !b.Valid() { 151 | return nil 152 | } 153 | return b.Iterator.Key() 154 | } 155 | 156 | // Value returns the current value if within bounds 157 | func (b *BoundedIterator) Value() []byte { 158 | if !b.Valid() { 159 | return nil 160 | } 161 | return b.Iterator.Value() 162 | } 163 | 164 | // IsTombstone returns true if the current entry is a deletion marker 165 | func (b *BoundedIterator) IsTombstone() bool { 166 | if !b.Valid() { 167 | return false 168 | } 169 | return b.Iterator.IsTombstone() 170 | } 171 | 172 | // checkBounds verifies that the current position is within the bounds 173 | // Returns true if the position is valid and within bounds 174 | func (b *BoundedIterator) checkBounds() bool { 175 | if !b.Iterator.Valid() { 176 | return false 177 | } 178 | 179 | // Check if the current key is before the start bound 180 | if b.start != nil && bytes.Compare(b.Iterator.Key(), b.start) < 0 { 181 | return false 182 | } 183 | 184 | // Check if the current key is beyond the end bound 185 | if b.end != nil && bytes.Compare(b.Iterator.Key(), b.end) >= 0 { 186 | return false 187 | } 188 | 189 | return true 190 | } 191 | -------------------------------------------------------------------------------- /pkg/sstable/footer/footer.go: -------------------------------------------------------------------------------- 1 | package footer 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "io" 7 | "time" 8 | 9 | "github.com/cespare/xxhash/v2" 10 | ) 11 | 12 | const ( 13 | // FooterSize is the fixed size of the footer in bytes 14 | FooterSize = 68 15 | // FooterMagic is a magic number to verify we're reading a valid footer 16 | FooterMagic = uint64(0xFACEFEEDFACEFEED) 17 | // CurrentVersion is the current file format version 18 | CurrentVersion = uint32(2) // Updated for bloom filter support 19 | ) 20 | 21 | // Footer contains metadata for an SSTable file 22 | type Footer struct { 23 | // Magic number for integrity checking 24 | Magic uint64 25 | // Version of the file format 26 | Version uint32 27 | // Timestamp of when the file was created 28 | Timestamp int64 29 | // Offset where the index block starts 30 | IndexOffset uint64 31 | // Size of the index block in bytes 32 | IndexSize uint32 33 | // Total number of key/value pairs 34 | NumEntries uint32 35 | // Smallest key in the file 36 | MinKeyOffset uint32 37 | // Largest key in the file 38 | MaxKeyOffset uint32 39 | // Bloom filter offset (0 if no bloom filter) 40 | BloomFilterOffset uint64 41 | // Bloom filter size (0 if no bloom filter) 42 | BloomFilterSize uint32 43 | // Checksum of all footer fields excluding the checksum itself 44 | Checksum uint64 45 | } 46 | 47 | // NewFooter creates a new footer with the given parameters 48 | func NewFooter(indexOffset uint64, indexSize uint32, numEntries uint32, 49 | minKeyOffset, maxKeyOffset uint32, bloomFilterOffset uint64, bloomFilterSize uint32) *Footer { 50 | 51 | return &Footer{ 52 | Magic: FooterMagic, 53 | Version: CurrentVersion, 54 | Timestamp: time.Now().UnixNano(), 55 | IndexOffset: indexOffset, 56 | IndexSize: indexSize, 57 | NumEntries: numEntries, 58 | MinKeyOffset: minKeyOffset, 59 | MaxKeyOffset: maxKeyOffset, 60 | BloomFilterOffset: bloomFilterOffset, 61 | BloomFilterSize: bloomFilterSize, 62 | Checksum: 0, // Will be calculated during serialization 63 | } 64 | } 65 | 66 | // Encode serializes the footer to a byte slice 67 | func (f *Footer) Encode() []byte { 68 | result := make([]byte, FooterSize) 69 | 70 | // Encode all fields directly into the buffer 71 | binary.LittleEndian.PutUint64(result[0:8], f.Magic) 72 | binary.LittleEndian.PutUint32(result[8:12], f.Version) 73 | binary.LittleEndian.PutUint64(result[12:20], uint64(f.Timestamp)) 74 | binary.LittleEndian.PutUint64(result[20:28], f.IndexOffset) 75 | binary.LittleEndian.PutUint32(result[28:32], f.IndexSize) 76 | binary.LittleEndian.PutUint32(result[32:36], f.NumEntries) 77 | binary.LittleEndian.PutUint32(result[36:40], f.MinKeyOffset) 78 | binary.LittleEndian.PutUint32(result[40:44], f.MaxKeyOffset) 79 | binary.LittleEndian.PutUint64(result[44:52], f.BloomFilterOffset) 80 | binary.LittleEndian.PutUint32(result[52:56], f.BloomFilterSize) 81 | // 4 bytes of padding (56:60) 82 | 83 | // Calculate checksum of all fields excluding the checksum itself 84 | f.Checksum = xxhash.Sum64(result[:60]) 85 | binary.LittleEndian.PutUint64(result[60:], f.Checksum) 86 | 87 | return result 88 | } 89 | 90 | // WriteTo writes the footer to an io.Writer 91 | func (f *Footer) WriteTo(w io.Writer) (int64, error) { 92 | data := f.Encode() 93 | n, err := w.Write(data) 94 | return int64(n), err 95 | } 96 | 97 | // Decode parses a footer from a byte slice 98 | func Decode(data []byte) (*Footer, error) { 99 | if len(data) < FooterSize { 100 | return nil, fmt.Errorf("footer data too small: %d bytes, expected %d", 101 | len(data), FooterSize) 102 | } 103 | 104 | footer := &Footer{ 105 | Magic: binary.LittleEndian.Uint64(data[0:8]), 106 | Version: binary.LittleEndian.Uint32(data[8:12]), 107 | Timestamp: int64(binary.LittleEndian.Uint64(data[12:20])), 108 | IndexOffset: binary.LittleEndian.Uint64(data[20:28]), 109 | IndexSize: binary.LittleEndian.Uint32(data[28:32]), 110 | NumEntries: binary.LittleEndian.Uint32(data[32:36]), 111 | MinKeyOffset: binary.LittleEndian.Uint32(data[36:40]), 112 | MaxKeyOffset: binary.LittleEndian.Uint32(data[40:44]), 113 | } 114 | 115 | // Check version to determine how to decode the rest 116 | // Version 1: Original format without bloom filters 117 | // Version 2+: Format with bloom filters 118 | if footer.Version >= 2 { 119 | footer.BloomFilterOffset = binary.LittleEndian.Uint64(data[44:52]) 120 | footer.BloomFilterSize = binary.LittleEndian.Uint32(data[52:56]) 121 | // 4 bytes of padding (56:60) 122 | footer.Checksum = binary.LittleEndian.Uint64(data[60:]) 123 | } else { 124 | // Legacy format without bloom filters 125 | footer.BloomFilterOffset = 0 126 | footer.BloomFilterSize = 0 127 | footer.Checksum = binary.LittleEndian.Uint64(data[44:52]) 128 | } 129 | 130 | // Verify magic number 131 | if footer.Magic != FooterMagic { 132 | return nil, fmt.Errorf("invalid footer magic: %x, expected %x", 133 | footer.Magic, FooterMagic) 134 | } 135 | 136 | // Verify checksum based on version 137 | var expectedChecksum uint64 138 | if footer.Version >= 2 { 139 | expectedChecksum = xxhash.Sum64(data[:60]) 140 | } else { 141 | expectedChecksum = xxhash.Sum64(data[:44]) 142 | } 143 | 144 | if footer.Checksum != expectedChecksum { 145 | return nil, fmt.Errorf("footer checksum mismatch: file has %d, calculated %d", 146 | footer.Checksum, expectedChecksum) 147 | } 148 | 149 | return footer, nil 150 | } 151 | --------------------------------------------------------------------------------