├── .github └── workflows │ └── wildcat_ci.yml ├── .gitignore ├── artwork ├── arch-diagram.png ├── avatar.png ├── wildcat-logo.png └── wildcat-logo.svg ├── blockmanager ├── blockmanager.go ├── blockmanager_test.go ├── fdatasync_windows.go ├── fsyncdata.go ├── pio.go └── pio_windows.go ├── bloomfilter ├── bloomfilter.go └── bloomfilter_test.go ├── c ├── example.c ├── go.mod ├── go.sum └── wildcat_c.go ├── compactor.go ├── compactor_test.go ├── db.go ├── db_test.go ├── flusher.go ├── flusher_test.go ├── go.mod ├── go.sum ├── id_generator.go ├── id_generator_test.go ├── level.go ├── level_test.go ├── license ├── lru ├── lru.go └── lru_test.go ├── memtable.go ├── memtable_test.go ├── merge_iterator.go ├── merge_iterator_test.go ├── queue ├── queue.go └── queue_test.go ├── readme.md ├── serialize.go ├── serialize_test.go ├── skiplist ├── skiplist.go └── skiplist_test.go ├── sstable.go ├── sstable_test.go ├── tree ├── tree.go └── tree_test.go ├── txn.go ├── txn_test.go ├── utils.go └── utils_test.go /.github/workflows/wildcat_ci.yml: -------------------------------------------------------------------------------- 1 | name: Wildcat CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v2 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v3 21 | with: 22 | go-version: '1.24' 23 | 24 | - name: Install dependencies 25 | run: go mod tidy 26 | 27 | - name: Run block manager tests 28 | run: go test ./blockmanager -v 29 | 30 | - name: Run bloom filter tests 31 | run: go test ./bloomfilter -v 32 | 33 | - name: Run lru tests 34 | run: go test ./lru -v 35 | 36 | - name: Run queue tests 37 | run: go test ./queue -v 38 | 39 | - name: Run skiplist tests 40 | run: go test ./skiplist -v 41 | 42 | - name: Run btree tests 43 | run: go test ./tree -v 44 | 45 | - name: Run core tests 46 | run: go test -v -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | *.exe~ 3 | *.dll 4 | *.so 5 | *.dylib 6 | *.test 7 | *.out 8 | go.work 9 | go.work.sum 10 | .env 11 | .idea 12 | libwildcat.so 13 | libwildcat.h 14 | libwildcat.dll 15 | c/libwildcat.so 16 | c/libwildcat.h 17 | c/libwildcat.dll 18 | c/wildcat_example 19 | c/wildcat_example.exe -------------------------------------------------------------------------------- /artwork/arch-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guycipher/wildcat/5bc86503705c6d7b0039587ad28dfeeea261428a/artwork/arch-diagram.png -------------------------------------------------------------------------------- /artwork/avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guycipher/wildcat/5bc86503705c6d7b0039587ad28dfeeea261428a/artwork/avatar.png -------------------------------------------------------------------------------- /artwork/wildcat-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guycipher/wildcat/5bc86503705c6d7b0039587ad28dfeeea261428a/artwork/wildcat-logo.png -------------------------------------------------------------------------------- /blockmanager/fdatasync_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | // +build windows 3 | 4 | package blockmanager 5 | 6 | var ( 7 | modntdll = syscall.NewLazyDLL("ntdll.dll") 8 | procNtFlushBuffersFileEx = modntdll.NewProc("NtFlushBuffersFileEx") 9 | ) 10 | 11 | // Fdatasync__Win is a Windows-specific implementation of fdatasync. 12 | // https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/nf-ntifs-ntflushbuffersfileex 13 | func Fdatasync(fd uintptr) error { 14 | 15 | // Try to use NtFlushBuffersFileEx with FLUSH_FLAGS_FILE_DATA_SYNC_ONLY flag 16 | status, _, err := procNtFlushBuffersFileEx.Call( 17 | fd, 18 | FLUSH_FLAGS_FILE_DATA_SYNC_ONLY, 19 | 0, 20 | 0, 21 | 0, 22 | ) 23 | 24 | // Check for error (0 means success in Windows API) 25 | if status != 0 { 26 | // Fall back to regular FlushFileBuffers if NtFlushBuffersFileEx fails or isn't available 27 | return syscall.FlushFileBuffers(syscall.Handle(fd)) 28 | } 29 | 30 | return nil 31 | } 32 | -------------------------------------------------------------------------------- /blockmanager/fsyncdata.go: -------------------------------------------------------------------------------- 1 | //go:build darwin || linux || freebsd || netbsd || openbsd 2 | 3 | package blockmanager 4 | 5 | import ( 6 | "runtime" 7 | "syscall" 8 | ) 9 | 10 | func Fdatasync(fd uintptr) error { 11 | // On Darwin/macOS, Fdatasync is not available, so we fall back to Fsync.. 12 | if runtime.GOOS == "darwin" { 13 | return syscall.Fsync(int(fd)) 14 | } 15 | 16 | err := syscall.Fdatasync(int(fd)) 17 | if err != nil { 18 | return err 19 | } 20 | 21 | return nil 22 | } 23 | -------------------------------------------------------------------------------- /blockmanager/pio.go: -------------------------------------------------------------------------------- 1 | //go:build darwin || linux || freebsd || netbsd || openbsd 2 | 3 | package blockmanager 4 | 5 | import "syscall" 6 | 7 | // pwrite performs an atomic write at a specific offset without needing to Seek first 8 | func pwrite(fd uintptr, data []byte, offset int64) (int, error) { 9 | return syscall.Pwrite(int(fd), data, offset) 10 | } 11 | 12 | // pread performs an atomic read from a specific offset without needing to Seek first 13 | func pread(fd uintptr, data []byte, offset int64) (int, error) { 14 | return syscall.Pread(int(fd), data, offset) 15 | } 16 | -------------------------------------------------------------------------------- /blockmanager/pio_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | // +build windows 3 | 4 | package blockmanager 5 | 6 | import "syscall" 7 | 8 | // pwrite performs an atomic write at a specific offset without needing to Seek first 9 | func pwrite(fd uintptr, data []byte, offset int64) (int, error) { 10 | var overlapped syscall.Overlapped 11 | overlapped.OffsetHigh = uint32(offset >> 32) 12 | overlapped.Offset = uint32(offset) 13 | 14 | var bytesWritten uint32 15 | err := syscall.WriteFile(syscall.Handle(fd), data, &bytesWritten, &overlapped) 16 | return int(bytesWritten), err 17 | } 18 | 19 | // pread performs an atomic read from a specific offset without needing to Seek first 20 | func pread(fd uintptr, data []byte, offset int64) (int, error) { 21 | var overlapped syscall.Overlapped 22 | overlapped.OffsetHigh = uint32(offset >> 32) 23 | overlapped.Offset = uint32(offset) 24 | 25 | var bytesRead uint32 26 | err := syscall.ReadFile(syscall.Handle(fd), data, &bytesRead, &overlapped) 27 | return int(bytesRead), err 28 | } 29 | -------------------------------------------------------------------------------- /bloomfilter/bloomfilter.go: -------------------------------------------------------------------------------- 1 | // Package bloomfilter 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en-US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package bloomfilter 17 | 18 | import ( 19 | "errors" 20 | "hash" 21 | "hash/fnv" 22 | "math" 23 | ) 24 | 25 | // BloomFilter struct represents a Bloom filter 26 | type BloomFilter struct { 27 | Bitset []int8 // Bitset, each int8 can store 8 bits 28 | Size uint // Size of the bit array 29 | hashFunc1 hash.Hash64 // First hash function 30 | hashFunc2 hash.Hash64 // Second hash function for double hashing 31 | hashCount uint // Number of hash functions 32 | } 33 | 34 | // New creates a new Bloom filter with an expected number of items and false positive rate 35 | func New(expectedItems uint, falsePositiveRate float64) (*BloomFilter, error) { 36 | if expectedItems == 0 { 37 | return nil, errors.New("expectedItems must be greater than 0") 38 | } 39 | 40 | if falsePositiveRate <= 0 || falsePositiveRate >= 1 { 41 | return nil, errors.New("falsePositiveRate must be between 0 and 1") 42 | } 43 | 44 | // Calculate optimal size and add a safety margin for low FPR cases 45 | size := optimalSize(expectedItems, falsePositiveRate) 46 | if falsePositiveRate < 0.01 { 47 | // Add 20% extra space for very low FPR targets 48 | size = uint(float64(size) * 1.2) 49 | } 50 | 51 | // Make size a prime number (or at least odd) to improve hash distribution 52 | size = nextOddNumber(size) 53 | 54 | hashCount := optimalHashCount(size, expectedItems) 55 | 56 | bf := &BloomFilter{ 57 | Bitset: make([]int8, (size+7)/8), // Allocate enough int8s to store the bits 58 | Size: size, 59 | hashFunc1: fnv.New64a(), // FNV-1a for first hash 60 | hashFunc2: fnv.New64(), // FNV for second hash (different algorithm) 61 | hashCount: hashCount, 62 | } 63 | 64 | return bf, nil 65 | } 66 | 67 | // Add adds an item to the Bloom filter 68 | func (bf *BloomFilter) Add(data []byte) error { 69 | // Get the two hash values for double hashing 70 | h1, h2, err := bf.getTwoHashes(data) 71 | if err != nil { 72 | return err 73 | } 74 | 75 | // h_i(x) = (h1(x) + i*h2(x)) mod m 76 | // This produces k different hash functions from two base hashes 77 | m := uint64(bf.Size) 78 | for i := uint(0); i < bf.hashCount; i++ { 79 | // Ensure h2 is relatively prime to m (odd h2 with even m, or any h2 with prime m) 80 | // Specifically, we'll make sure h2 is not zero and add 1 if it is 81 | h2Val := h2 82 | if h2Val%m == 0 { 83 | h2Val++ 84 | } 85 | 86 | // Calculate position using double hashing formula 87 | position := (h1 + uint64(i)*h2Val) % m 88 | bf.Bitset[position/8] |= 1 << (position % 8) 89 | } 90 | 91 | return nil 92 | } 93 | 94 | // Contains checks if an item might exist in the Bloom filter 95 | func (bf *BloomFilter) Contains(data []byte) bool { 96 | h1, h2, err := bf.getTwoHashes(data) 97 | if err != nil { 98 | return false 99 | } 100 | 101 | // Use same double hashing scheme as Add 102 | m := uint64(bf.Size) 103 | for i := uint(0); i < bf.hashCount; i++ { 104 | // Ensure h2 is relatively prime to m 105 | h2Val := h2 106 | if h2Val%m == 0 { 107 | h2Val++ 108 | } 109 | 110 | position := (h1 + uint64(i)*h2Val) % m 111 | if bf.Bitset[position/8]&(1<<(position%8)) == 0 { 112 | return false // Definitely not in set 113 | } 114 | } 115 | return true // Might be in set 116 | } 117 | 118 | // getTwoHashes computes two independent hash values for an item 119 | func (bf *BloomFilter) getTwoHashes(data []byte) (uint64, uint64, error) { 120 | bf.hashFunc1.Reset() 121 | _, err := bf.hashFunc1.Write(data) 122 | if err != nil { 123 | return 0, 0, err 124 | } 125 | h1 := bf.hashFunc1.Sum64() 126 | 127 | bf.hashFunc2.Reset() 128 | _, err = bf.hashFunc2.Write(data) 129 | if err != nil { 130 | return 0, 0, err 131 | } 132 | h2 := bf.hashFunc2.Sum64() 133 | 134 | // It's possible for small data inputs, FNV hashes might be too similar.. 135 | // Thus we add an extra mixing step if data is small 136 | if len(data) < 8 { 137 | // Mix h1 and h2 with different patterns 138 | h2 = h2 ^ (h1 >> 13) ^ (h1 << 37) 139 | } 140 | 141 | return h1, h2, nil 142 | } 143 | 144 | // optimalSize calculates the optimal size of the bit array 145 | func optimalSize(n uint, p float64) uint { 146 | return uint(math.Ceil(-float64(n) * math.Log(p) / math.Pow(math.Log(2), 2))) 147 | } 148 | 149 | // optimalHashCount calculates the optimal number of hash functions 150 | func optimalHashCount(size uint, n uint) uint { 151 | return uint(math.Ceil(float64(size) / float64(n) * math.Log(2))) 152 | } 153 | 154 | // nextOddNumber returns the next odd number >= n 155 | func nextOddNumber(n uint) uint { 156 | if n%2 == 0 { 157 | return n + 1 158 | } 159 | return n 160 | } 161 | 162 | // CalculateTheoreticalFPP returns the theoretical false positive probability 163 | // based on the current state of the filter 164 | func (bf *BloomFilter) CalculateTheoreticalFPP(itemsAdded uint) float64 { 165 | if itemsAdded == 0 { 166 | return 0.0 167 | } 168 | 169 | // (1 - e^(-kn/m))^k 170 | k := float64(bf.hashCount) 171 | m := float64(bf.Size) 172 | n := float64(itemsAdded) 173 | 174 | return math.Pow(1.0-math.Exp(-k*n/m), k) 175 | } 176 | -------------------------------------------------------------------------------- /bloomfilter/bloomfilter_test.go: -------------------------------------------------------------------------------- 1 | // Package bloomfilter 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en-US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package bloomfilter 17 | 18 | import ( 19 | "math/rand" 20 | "testing" 21 | ) 22 | 23 | func TestNewBloomFilter(t *testing.T) { 24 | bf, err := New(1000, 0.01) 25 | if err != nil { 26 | t.Errorf("Error creating BloomFilter: %v", err) 27 | } 28 | 29 | if bf.Size == 0 { 30 | t.Errorf("Expected non-zero size, got %d", bf.Size) 31 | } 32 | if len(bf.Bitset) == 0 { 33 | t.Errorf("Expected non-empty bitset, got empty") 34 | 35 | } 36 | } 37 | 38 | func TestAddAndContains(t *testing.T) { 39 | bf, err := New(1000, 0.01) 40 | if err != nil { 41 | t.Errorf("Error creating BloomFilter: %v", err) 42 | } 43 | 44 | data := []byte("testdata") 45 | 46 | err = bf.Add(data) 47 | if err != nil { 48 | t.Errorf("Error adding data to BloomFilter: %v", err) 49 | } 50 | 51 | if !bf.Contains(data) { 52 | t.Errorf("Expected BloomFilter to contain data") 53 | } 54 | 55 | nonExistentData := []byte("nonexistent") 56 | if bf.Contains(nonExistentData) { 57 | t.Errorf("Expected BloomFilter to not contain non-existent data") 58 | } 59 | } 60 | 61 | func TestCollisionRate(t *testing.T) { 62 | // Test parameters 63 | expectedItems := uint(10000) 64 | falsePositiveRate := 0.01 // 1% expected false positive rate 65 | 66 | // Create a new Bloom filter 67 | bf, err := New(expectedItems, falsePositiveRate) 68 | if err != nil { 69 | t.Fatalf("Error creating BloomFilter: %v", err) 70 | } 71 | 72 | // Generate and add unique items to the filter 73 | addedItems := make([][]byte, expectedItems) 74 | for i := uint(0); i < expectedItems; i++ { 75 | // Generate random 16-byte data 76 | data := make([]byte, 16) 77 | _, err := rand.Read(data) 78 | if err != nil { 79 | t.Fatalf("Error generating random data: %v", err) 80 | } 81 | 82 | // Store item for later verification 83 | addedItems[i] = data 84 | 85 | // Add to filter 86 | err = bf.Add(data) 87 | if err != nil { 88 | t.Fatalf("Error adding data to BloomFilter: %v", err) 89 | } 90 | } 91 | 92 | // Verify all added items are found (should be 100%) 93 | for i, item := range addedItems { 94 | if !bf.Contains(item) { 95 | t.Errorf("Added item %d not found in BloomFilter", i) 96 | } 97 | } 98 | 99 | // Test for false positives with new random items 100 | testItems := uint(100000) // Test with 10x more items for statistical significance 101 | falsePositives := 0 102 | 103 | for i := uint(0); i < testItems; i++ { 104 | // Generate random data that wasn't added 105 | data := make([]byte, 16) 106 | _, err := rand.Read(data) 107 | if err != nil { 108 | t.Fatalf("Error generating random test data: %v", err) 109 | } 110 | 111 | // Check if the filter falsely reports this item as present 112 | if bf.Contains(data) { 113 | falsePositives++ 114 | } 115 | } 116 | 117 | // Calculate actual false positive rate 118 | actualFPR := float64(falsePositives) / float64(testItems) 119 | 120 | // Calculate theoretical false positive rate 121 | theoreticalFPR := bf.CalculateTheoreticalFPP(expectedItems) 122 | 123 | // Log the results 124 | t.Logf("Expected FP rate: %.6f", falsePositiveRate) 125 | t.Logf("Theoretical FP rate: %.6f", theoreticalFPR) 126 | t.Logf("Actual FP rate: %.6f (%d false positives out of %d tests)", 127 | actualFPR, falsePositives, testItems) 128 | 129 | // The actual rate should be reasonably close to the theoretical rate 130 | // Allow for some statistical variance (3x theoretical is usually acceptable) 131 | maxAcceptableFPR := 3.0 * theoreticalFPR 132 | 133 | if actualFPR > maxAcceptableFPR { 134 | t.Errorf("False positive rate too high: %.6f > %.6f (3x theoretical rate)", 135 | actualFPR, maxAcceptableFPR) 136 | } 137 | } 138 | 139 | func BenchmarkAdd(b *testing.B) { 140 | bf, err := New(1000, 0.01) 141 | if err != nil { 142 | b.Errorf("Error creating BloomFilter: %v", err) 143 | } 144 | 145 | data := []byte("testdata") 146 | 147 | for i := 0; i < b.N; i++ { 148 | err = bf.Add(data) 149 | if err != nil { 150 | b.Errorf("Error adding data to BloomFilter: %v", err) 151 | 152 | } 153 | } 154 | 155 | } 156 | 157 | func BenchmarkContains(b *testing.B) { 158 | bf, err := New(1000, 0.01) 159 | if err != nil { 160 | b.Errorf("Error creating BloomFilter: %v", err) 161 | } 162 | 163 | data := []byte("testdata") 164 | err = bf.Add(data) 165 | if err != nil { 166 | b.Errorf("Error adding data to BloomFilter: %v", err) 167 | } 168 | 169 | for i := 0; i < b.N; i++ { 170 | bf.Contains(data) 171 | } 172 | 173 | } 174 | 175 | func BenchmarkFalsePositiveRate(b *testing.B) { 176 | testCases := []struct { 177 | name string 178 | expectedItems uint 179 | targetFPR float64 180 | }{ 181 | {"Small-Low-FPR", 100, 0.001}, // Small set with very low FPR 182 | {"Small-Medium-FPR", 100, 0.01}, // Small set with medium FPR 183 | {"Medium-Low-FPR", 10000, 0.001}, // Medium set with low FPR 184 | {"Medium-Medium-FPR", 10000, 0.01}, // Medium set with medium FPR 185 | {"Large-Low-FPR", 100000, 0.001}, // Large set with low FPR (memory intensive) 186 | } 187 | 188 | for _, tc := range testCases { 189 | b.Run(tc.name, func(b *testing.B) { 190 | // Only perform test once per configuration regardless of b.N 191 | b.StopTimer() 192 | 193 | // Create filter with specified parameters 194 | bf, err := New(tc.expectedItems, tc.targetFPR) 195 | if err != nil { 196 | b.Fatalf("Error creating BloomFilter: %v", err) 197 | } 198 | 199 | // Add items (using 80% of expected capacity) 200 | itemCount := tc.expectedItems * 80 / 100 201 | for i := uint(0); i < itemCount; i++ { 202 | data := make([]byte, 16) 203 | _, err := rand.Read(data) 204 | if err != nil { 205 | b.Fatalf("Error generating random data: %v", err) 206 | } 207 | 208 | err = bf.Add(data) 209 | if err != nil { 210 | b.Fatalf("Error adding data: %v", err) 211 | } 212 | } 213 | 214 | // Test for false positives 215 | testCount := uint(10000) // Fixed test count regardless of b.N 216 | falsePositives := 0 217 | 218 | b.StartTimer() 219 | for i := uint(0); i < testCount; i++ { 220 | data := make([]byte, 16) 221 | rand.Read(data) 222 | 223 | if bf.Contains(data) { 224 | falsePositives++ 225 | } 226 | } 227 | b.StopTimer() 228 | 229 | actualFPR := float64(falsePositives) / float64(testCount) 230 | theoreticalFPR := bf.CalculateTheoreticalFPP(itemCount) 231 | 232 | b.ReportMetric(actualFPR, "actual-fpr") 233 | b.ReportMetric(theoreticalFPR, "theoretical-fpr") 234 | b.ReportMetric(float64(bf.Size)/8/1024, "size-kb") 235 | b.ReportMetric(float64(bf.hashCount), "hash-funcs") 236 | 237 | // Check if actual FPR is within acceptable range 238 | fprRatio := actualFPR / tc.targetFPR 239 | b.ReportMetric(fprRatio, "fpr-ratio") 240 | }) 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /c/example.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | /* create_directory 9 | * Helper function to create directory if it doesn't exist */ 10 | int 11 | create_directory(path) 12 | const char *path; 13 | { 14 | struct stat st = {0}; 15 | if (stat(path, &st) == -1) { 16 | if (mkdir(path, 0755) == -1) { 17 | perror("mkdir"); 18 | return -1; 19 | } 20 | } 21 | return 0; 22 | } 23 | 24 | /* cleanup_directory 25 | * Helper function to cleanup directory */ 26 | void 27 | cleanup_directory(path) 28 | const char *path; 29 | { 30 | char command[512]; 31 | snprintf(command, sizeof(command), "rm -rf %s", path); 32 | system(command); 33 | } 34 | 35 | int 36 | main() 37 | { 38 | printf(" \\ \\ / _ _| | _ \\ __| \\ __ __| \n"); 39 | printf(" \\ \\ \\ / | | | | ( _ \\ | \n"); 40 | printf(" \\_/\\_/ ___| ____| ___/ \\___| _/ _\\ _| \n"); 41 | printf(" \n"); 42 | 43 | const char *db_path = "/tmp/wildcat_c_example/"; 44 | 45 | /* Cleanup any existing directory */ 46 | cleanup_directory(db_path); 47 | 48 | /* Create directory */ 49 | if (create_directory(db_path) != 0) { 50 | fprintf(stderr, "Failed to create database directory\n"); 51 | return 1; 52 | } 53 | 54 | /* Initialize options */ 55 | wildcat_opts_t opts = {0}; 56 | opts.directory = (char*)db_path; 57 | opts.write_buffer_size = 32 * 1024 * 1024; /* 32MB */ 58 | opts.sync_option = SYNC_ALWAYS; 59 | opts.sync_interval_ns = 0; 60 | opts.level_count = 6; 61 | opts.level_multiplier = 10; 62 | opts.block_manager_lru_size = 256; 63 | opts.block_manager_lru_evict_ratio = 0.2; 64 | opts.block_manager_lru_access_weight = 0.8; 65 | opts.permission = 0755; 66 | opts.bloom_filter = 0; /* false */ 67 | opts.max_compaction_concurrency = 4; 68 | opts.compaction_cooldown_ns = 5000000000LL; /* 5 seconds */ 69 | opts.compaction_batch_size = 8; 70 | opts.compaction_size_ratio = 1.1; 71 | opts.compaction_size_threshold = 8; 72 | opts.compaction_score_size_weight = 0.8; 73 | opts.compaction_score_count_weight = 0.2; 74 | opts.flusher_interval_ns = 1000000LL; /* 1ms */ 75 | opts.compactor_interval_ns = 250000000LL; /* 250ms */ 76 | opts.bloom_fpr = 0.01; 77 | opts.wal_append_retry = 10; 78 | opts.wal_append_backoff_ns = 128000LL; /* 128µs */ 79 | opts.sstable_btree_order = 10; 80 | 81 | /* Open database */ 82 | printf("Opening Wildcat database...\n"); 83 | unsigned long db_handle = wildcat_open(&opts); 84 | if (db_handle == 0) { 85 | fprintf(stderr, "Failed to open database\n"); 86 | cleanup_directory(db_path); 87 | return 1; 88 | } 89 | printf("Database opened successfully (handle: 0x%lx)\n\n", db_handle); 90 | 91 | /* === Basic Put/Get Operations === */ 92 | printf("=== Basic Put/Get Operations ===\n"); 93 | long txn_id = wildcat_begin_txn(db_handle); 94 | if (txn_id == -1) { 95 | fprintf(stderr, "Failed to begin transaction\n"); 96 | (void)wildcat_close(db_handle); 97 | (void)cleanup_directory(db_path); 98 | return 1; 99 | } 100 | printf("Transaction started (ID: %ld)\n", txn_id); 101 | 102 | /* Put operations */ 103 | if (wildcat_txn_put((unsigned long)db_handle, txn_id, "hello", "world") != 0) { 104 | fprintf(stderr, "Failed to put hello->world\n"); 105 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); 106 | (void)wildcat_close(db_handle); 107 | (void)cleanup_directory(db_path); 108 | return 1; 109 | } 110 | printf("Put: hello -> world\n"); 111 | 112 | if (wildcat_txn_put((unsigned long)db_handle,txn_id, "foo", "bar") != 0) { 113 | fprintf(stderr, "Failed to put foo->bar\n"); 114 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); 115 | (void)wildcat_close(db_handle); 116 | (void)cleanup_directory(db_path); 117 | return 1; 118 | } 119 | printf("Put: foo -> bar\n"); 120 | 121 | /* Commit transaction */ 122 | if (wildcat_txn_commit((unsigned long)db_handle,txn_id) != 0) { 123 | fprintf(stderr, "Failed to commit transaction\n"); 124 | (void)wildcat_close(db_handle); 125 | (void)cleanup_directory(db_path); 126 | return 1; 127 | } 128 | printf("Transaction committed\n\n"); 129 | 130 | /* === Reading Data === */ 131 | printf("=== Reading Data ===\n"); 132 | txn_id = wildcat_begin_txn(db_handle); 133 | if (txn_id == -1) { 134 | fprintf(stderr, "Failed to begin read transaction\n"); 135 | (void)wildcat_close(db_handle); 136 | (void)cleanup_directory(db_path); 137 | return 1; 138 | } 139 | 140 | char *value = wildcat_txn_get((unsigned long)db_handle,txn_id, "hello"); 141 | if (value) { 142 | printf("Get: hello -> %s\n", value); 143 | free(value); /* Don't forget to free the returned string */ 144 | } else { 145 | printf("Get: hello -> NOT FOUND\n"); 146 | } 147 | 148 | value = wildcat_txn_get((unsigned long)db_handle,txn_id, "foo"); 149 | if (value) { 150 | printf("Get: foo -> %s\n", value); 151 | free(value); 152 | } else { 153 | printf("Get: foo -> NOT FOUND\n"); 154 | } 155 | 156 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); /* Read-only transaction, just rollback */ 157 | printf("\n"); 158 | 159 | /* === Batch Operations === */ 160 | printf("=== Batch Operations ===\n"); 161 | txn_id = wildcat_begin_txn(db_handle); 162 | if (txn_id == -1) { 163 | fprintf(stderr, "Failed to begin batch transaction\n"); 164 | (void)wildcat_close(db_handle); 165 | (void)cleanup_directory(db_path); 166 | return 1; 167 | } 168 | 169 | /* Insert multiple key-value pairs */ 170 | { 171 | int i; 172 | for (i = 0; i < 10; i++) { 173 | char key[16], val[32]; 174 | snprintf(key, sizeof(key), "key%d", i); 175 | snprintf(val, sizeof(val), "value%d", i); 176 | 177 | if (wildcat_txn_put((unsigned long)db_handle,txn_id, key, val) != 0) { 178 | fprintf(stderr, "Failed to put %s->%s\n", key, val); 179 | wildcat_txn_rollback((unsigned long)db_handle,txn_id); 180 | (void)wildcat_close(db_handle); 181 | (void)cleanup_directory(db_path); 182 | return 1; 183 | } 184 | printf("Put: %s -> %s\n", key, val); 185 | } 186 | } 187 | 188 | if (wildcat_txn_commit((unsigned long)db_handle,txn_id) != 0) { 189 | fprintf(stderr, "Failed to commit batch transaction\n"); 190 | (void)wildcat_close(db_handle); 191 | (void)cleanup_directory(db_path); 192 | return 1; 193 | } 194 | printf("Batch transaction committed\n\n"); 195 | 196 | /* === Iterator Example === */ 197 | printf("=== Iterator Example ===\n"); 198 | txn_id = wildcat_begin_txn(db_handle); 199 | if (txn_id == -1) { 200 | fprintf(stderr, "Failed to begin iterator transaction\n"); 201 | (void)wildcat_close(db_handle); 202 | (void)cleanup_directory(db_path); 203 | return 1; 204 | } 205 | 206 | /* Create iterator (ascending=1) */ 207 | unsigned long iter_id = wildcat_txn_new_iterator((unsigned long)db_handle,txn_id, 1); 208 | if (iter_id == 0) { 209 | fprintf(stderr, "Failed to create iterator\n"); 210 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); 211 | (void)wildcat_close(db_handle); 212 | (void)cleanup_directory(db_path); 213 | return 1; 214 | } 215 | 216 | printf("Iterating through all keys:\n"); 217 | /* The iterator is already positioned at the first element after creation */ 218 | /* Check if it's valid and iterate */ 219 | do { 220 | char *key = wildcat_iterator_key(iter_id); 221 | char *val = wildcat_iterator_value(iter_id); 222 | 223 | if (key && val) { 224 | printf(" %s -> %s\n", key, val); 225 | free(key); 226 | free(val); 227 | } 228 | } while (wildcat_txn_iterate_next(iter_id) == 0); /* 0 means valid/success */ 229 | 230 | 231 | (void)wildcat_iterator_free(iter_id); 232 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); 233 | printf("\n"); 234 | 235 | /* === Delete Operation === */ 236 | printf("=== Delete Operation ===\n"); 237 | txn_id = wildcat_begin_txn(db_handle); 238 | if (txn_id == -1) { 239 | fprintf(stderr, "Failed to begin delete transaction\n"); 240 | (void)wildcat_close(db_handle); 241 | (void)cleanup_directory(db_path); 242 | return 1; 243 | } 244 | 245 | if (wildcat_txn_delete((unsigned long)db_handle,txn_id, "key5") != 0) { 246 | fprintf(stderr, "Failed to delete key5\n"); 247 | wildcat_txn_rollback((unsigned long)db_handle,txn_id); 248 | (void)wildcat_close(db_handle); 249 | (void)cleanup_directory(db_path); 250 | return 1; 251 | } 252 | printf("Deleted key: key5\n"); 253 | 254 | if (wildcat_txn_commit((unsigned long)db_handle,txn_id) != 0) { 255 | fprintf(stderr, "Failed to commit delete transaction\n"); 256 | (void)wildcat_close(db_handle); 257 | (void)cleanup_directory(db_path); 258 | return 1; 259 | } 260 | 261 | /* === Verify Deletion === */ 262 | printf("=== Verify Deletion ===\n"); 263 | txn_id = wildcat_begin_txn(db_handle); 264 | if (txn_id != -1) { 265 | value = wildcat_txn_get((unsigned long)db_handle,txn_id, "key5"); 266 | if (value == NULL) { 267 | printf("SUCCESS: key5 was successfully deleted\n"); 268 | } else { 269 | printf("ERROR: key5 still exists with value: %s\n", value); 270 | free(value); 271 | } 272 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); 273 | } 274 | printf("\n"); 275 | 276 | /* === Force Flush === */ 277 | printf("=== Force Flush ===\n"); 278 | if (wildcat_force_flush(db_handle) == 0) { 279 | printf("Force flush completed successfully\n"); 280 | } else { 281 | printf("Force flush failed\n"); 282 | } 283 | printf("\n"); 284 | 285 | /* === Database Statistics === */ 286 | printf("=== Database Statistics ===\n"); 287 | { 288 | char *stats = wildcat_stats(db_handle); 289 | if (stats) { 290 | printf("%s\n", stats); 291 | free(stats); 292 | } else { 293 | printf("Failed to get database statistics\n"); 294 | } 295 | } 296 | 297 | /* === Range Iterator Example === */ 298 | printf("=== Range Iterator Example ===\n"); 299 | txn_id = wildcat_begin_txn(db_handle); 300 | if (txn_id != -1) { 301 | /* Create range iterator from "key0" to "key5" (exclusive) */ 302 | unsigned long range_iter_id = wildcat_txn_new_range_iterator((unsigned long)db_handle,txn_id, "key0", "key5", 1); 303 | if (range_iter_id != 0) { 304 | printf("Iterating through range [key0, key5):\n"); 305 | 306 | do { 307 | char *key = wildcat_iterator_key(range_iter_id); 308 | char *val = wildcat_iterator_value(range_iter_id); 309 | 310 | if (key && val) { 311 | printf(" %s -> %s\n", key, val); 312 | free(key); 313 | free(val); 314 | } 315 | } while (wildcat_txn_iterate_next(range_iter_id) == 0); 316 | 317 | (void)wildcat_iterator_free(range_iter_id); 318 | } else { 319 | printf("Failed to create range iterator\n"); 320 | } 321 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); 322 | } 323 | printf("\n"); 324 | 325 | /* === Prefix Iterator Example === */ 326 | printf("=== Prefix Iterator Example ===\n"); 327 | txn_id = wildcat_begin_txn(db_handle); 328 | if (txn_id != -1) { 329 | /* Create prefix iterator for keys starting with "key" */ 330 | unsigned long prefix_iter_id = wildcat_txn_new_prefix_iterator((unsigned long)db_handle,txn_id, "key", 1); 331 | if (prefix_iter_id != 0) { 332 | printf("Iterating through keys with prefix 'key':\n"); 333 | 334 | do { 335 | char *key = wildcat_iterator_key(prefix_iter_id); 336 | char *val = wildcat_iterator_value(prefix_iter_id); 337 | 338 | if (key && val) { 339 | printf(" %s -> %s\n", key, val); 340 | free(key); 341 | free(val); 342 | } 343 | } while (wildcat_txn_iterate_next(prefix_iter_id) == 0); 344 | 345 | (void)wildcat_iterator_free(prefix_iter_id); 346 | } else { 347 | printf("Failed to create prefix iterator\n"); 348 | } 349 | (void)wildcat_txn_rollback((unsigned long)db_handle,txn_id); 350 | } 351 | 352 | /* Close database instance */ 353 | printf("Closing database...\n"); 354 | (void)wildcat_close(db_handle); 355 | printf("Database closed successfully\n"); 356 | 357 | /* Cleanup */ 358 | (void)cleanup_directory(db_path); 359 | 360 | return 0; 361 | } -------------------------------------------------------------------------------- /c/go.mod: -------------------------------------------------------------------------------- 1 | module wildcat_c 2 | 3 | go 1.24.0 4 | 5 | require github.com/wildcatdb/wildcat v1.0.11 6 | 7 | require go.mongodb.org/mongo-driver v1.17.3 // indirect 8 | -------------------------------------------------------------------------------- /c/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 4 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 5 | github.com/wildcatdb/wildcat v1.0.11 h1:jYNJisdccHGgZ7qbLtzBrYKz21zkhzxIdIhCnavwBno= 6 | github.com/wildcatdb/wildcat v1.0.11/go.mod h1:vaEiYJwI/nKHI4gyHdQCky05nmHGzhEmwFgWCv8TvoI= 7 | go.mongodb.org/mongo-driver v1.17.3 h1:TQyXhnsWfWtgAhMtOgtYHMTkZIfBTpMTsMnd9ZBeHxQ= 8 | go.mongodb.org/mongo-driver v1.17.3/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= 9 | -------------------------------------------------------------------------------- /c/wildcat_c.go: -------------------------------------------------------------------------------- 1 | // Package wildcat 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en/US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package main 17 | 18 | /* 19 | #include 20 | #include 21 | #include 22 | 23 | typedef enum { 24 | SYNC_NONE = 0, 25 | SYNC_ALWAYS, 26 | SYNC_INTERVAL 27 | } sync_option_t; 28 | 29 | typedef struct { 30 | char* directory; 31 | long write_buffer_size; 32 | int sync_option; 33 | long sync_interval_ns; 34 | int level_count; 35 | int level_multiplier; 36 | int block_manager_lru_size; 37 | double block_manager_lru_evict_ratio; 38 | double block_manager_lru_access_weight; 39 | int permission; 40 | int bloom_filter; 41 | int max_compaction_concurrency; 42 | long compaction_cooldown_ns; 43 | int compaction_batch_size; 44 | double compaction_size_ratio; 45 | int compaction_size_threshold; 46 | double compaction_score_size_weight; 47 | double compaction_score_count_weight; 48 | long flusher_interval_ns; 49 | long compactor_interval_ns; 50 | double bloom_fpr; 51 | int wal_append_retry; 52 | long wal_append_backoff_ns; 53 | int sstable_btree_order; 54 | int stdout_logging; 55 | } wildcat_opts_t; 56 | 57 | static void print_error(const char* msg) { 58 | fprintf(stderr, "WILDCAT ERROR: %s\n", msg); 59 | fflush(stderr); 60 | } 61 | */ 62 | import "C" 63 | 64 | import ( 65 | "fmt" 66 | "github.com/wildcatdb/wildcat" 67 | "os" 68 | "sync" 69 | "time" 70 | "unsafe" 71 | ) 72 | 73 | var ( 74 | dbMap = sync.Map{} // map[uint64]*wildcat.DB 75 | dbCounter uint64 76 | ) 77 | 78 | // Removed txnHandle struct and txnMap - no longer needed! 79 | 80 | type iteratorHandle struct { 81 | iter *wildcat.MergeIterator 82 | key []byte 83 | value []byte 84 | valid bool 85 | } 86 | 87 | var ( 88 | iterMap = sync.Map{} // map[uint64]*iteratorHandle 89 | iterCounter uint64 // unique ID 90 | iterMu sync.Mutex 91 | ) 92 | 93 | // Register a database and return its handle ID 94 | func registerDB(db *wildcat.DB) uint64 { 95 | dbCounter++ 96 | dbMap.Store(dbCounter, db) 97 | return dbCounter 98 | } 99 | 100 | // Get database by handle ID 101 | func getDB(id uint64) *wildcat.DB { 102 | if val, ok := dbMap.Load(id); ok { 103 | return val.(*wildcat.DB) 104 | } 105 | return nil 106 | } 107 | 108 | // Remove database handle 109 | func removeDB(id uint64) { 110 | dbMap.Delete(id) 111 | } 112 | 113 | // convert C options to Go Options 114 | func fromCOptions(copts *C.wildcat_opts_t) *wildcat.Options { 115 | return &wildcat.Options{ 116 | Directory: C.GoString(copts.directory), 117 | WriteBufferSize: int64(copts.write_buffer_size), 118 | SyncOption: wildcat.SyncOption(copts.sync_option), 119 | SyncInterval: time.Duration(copts.sync_interval_ns), 120 | LevelCount: int(copts.level_count), 121 | LevelMultiplier: int(copts.level_multiplier), 122 | BlockManagerLRUSize: int(copts.block_manager_lru_size), 123 | BlockManagerLRUEvictRatio: float64(copts.block_manager_lru_evict_ratio), 124 | BlockManagerLRUAccesWeight: float64(copts.block_manager_lru_access_weight), 125 | Permission: os.FileMode(copts.permission), 126 | BloomFilter: copts.bloom_filter != 0, 127 | MaxCompactionConcurrency: int(copts.max_compaction_concurrency), 128 | CompactionCooldownPeriod: time.Duration(copts.compaction_cooldown_ns), 129 | CompactionBatchSize: int(copts.compaction_batch_size), 130 | CompactionSizeRatio: float64(copts.compaction_size_ratio), 131 | CompactionSizeThreshold: int(copts.compaction_size_threshold), 132 | CompactionScoreSizeWeight: float64(copts.compaction_score_size_weight), 133 | CompactionScoreCountWeight: float64(copts.compaction_score_count_weight), 134 | FlusherTickerInterval: time.Duration(copts.flusher_interval_ns), 135 | CompactorTickerInterval: time.Duration(copts.compactor_interval_ns), 136 | BloomFilterFPR: float64(copts.bloom_fpr), 137 | WalAppendRetry: int(copts.wal_append_retry), 138 | WalAppendBackoff: time.Duration(copts.wal_append_backoff_ns), 139 | SSTableBTreeOrder: int(copts.sstable_btree_order), 140 | STDOutLogging: copts.stdout_logging != 0, 141 | } 142 | } 143 | 144 | //export wildcat_open 145 | func wildcat_open(opts *C.wildcat_opts_t) C.ulong { 146 | goOpts := fromCOptions(opts) 147 | db, err := wildcat.Open(goOpts) 148 | if err != nil { 149 | cMsg := C.CString(fmt.Sprintf("wildcat_open failed: %v", err)) 150 | C.print_error(cMsg) 151 | C.free(unsafe.Pointer(cMsg)) 152 | return 0 153 | } 154 | return C.ulong(registerDB(db)) 155 | } 156 | 157 | //export wildcat_close 158 | func wildcat_close(handle C.ulong) { 159 | if handle == 0 { 160 | return 161 | } 162 | db := getDB(uint64(handle)) 163 | if db != nil { 164 | _ = db.Close() 165 | removeDB(uint64(handle)) 166 | } 167 | } 168 | 169 | //export wildcat_begin_txn 170 | func wildcat_begin_txn(handle C.ulong) C.long { 171 | db := getDB(uint64(handle)) 172 | if db == nil { 173 | return -1 174 | } 175 | txn := db.Begin() 176 | return C.long(txn.Id) 177 | } 178 | 179 | //export wildcat_txn_put 180 | func wildcat_txn_put(handle C.ulong, txnId C.long, key *C.char, val *C.char) C.int { 181 | db := getDB(uint64(handle)) 182 | if db == nil { 183 | return -1 184 | } 185 | 186 | txn, err := db.GetTxn(int64(txnId)) 187 | if err != nil { 188 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_put: transaction not found: %v", err)) 189 | C.print_error(cMsg) 190 | C.free(unsafe.Pointer(cMsg)) 191 | return -1 192 | } 193 | 194 | err = txn.Put(C.GoBytes(unsafe.Pointer(key), C.int(C.strlen(key))), 195 | C.GoBytes(unsafe.Pointer(val), C.int(C.strlen(val)))) 196 | if err != nil { 197 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_put failed: %v", err)) 198 | C.print_error(cMsg) 199 | C.free(unsafe.Pointer(cMsg)) 200 | return -1 201 | } 202 | return 0 203 | } 204 | 205 | //export wildcat_txn_get 206 | func wildcat_txn_get(handle C.ulong, txnId C.long, key *C.char) *C.char { 207 | db := getDB(uint64(handle)) 208 | if db == nil { 209 | return nil 210 | } 211 | 212 | txn, err := db.GetTxn(int64(txnId)) 213 | if err != nil { 214 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_get: transaction not found: %v", err)) 215 | C.print_error(cMsg) 216 | C.free(unsafe.Pointer(cMsg)) 217 | return nil 218 | } 219 | 220 | val, err := txn.Get(C.GoBytes(unsafe.Pointer(key), C.int(C.strlen(key)))) 221 | if err != nil { 222 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_get failed: %v", err)) 223 | C.print_error(cMsg) 224 | C.free(unsafe.Pointer(cMsg)) 225 | return nil 226 | } 227 | return C.CString(string(val)) 228 | } 229 | 230 | //export wildcat_txn_delete 231 | func wildcat_txn_delete(handle C.ulong, txnId C.long, key *C.char) C.int { 232 | db := getDB(uint64(handle)) 233 | if db == nil { 234 | return -1 235 | } 236 | 237 | txn, err := db.GetTxn(int64(txnId)) 238 | if err != nil { 239 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_delete: transaction not found: %v", err)) 240 | C.print_error(cMsg) 241 | C.free(unsafe.Pointer(cMsg)) 242 | return -1 243 | } 244 | 245 | err = txn.Delete(C.GoBytes(unsafe.Pointer(key), C.int(C.strlen(key)))) 246 | if err != nil { 247 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_delete failed: %v", err)) 248 | C.print_error(cMsg) 249 | C.free(unsafe.Pointer(cMsg)) 250 | return -1 251 | } 252 | return 0 253 | } 254 | 255 | //export wildcat_txn_commit 256 | func wildcat_txn_commit(handle C.ulong, txnId C.long) C.int { 257 | db := getDB(uint64(handle)) 258 | if db == nil { 259 | return -1 260 | } 261 | 262 | txn, err := db.GetTxn(int64(txnId)) 263 | if err != nil { 264 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_commit: transaction not found: %v", err)) 265 | C.print_error(cMsg) 266 | C.free(unsafe.Pointer(cMsg)) 267 | return -1 268 | } 269 | 270 | return boolToInt(txn.Commit() == nil) 271 | } 272 | 273 | //export wildcat_txn_rollback 274 | func wildcat_txn_rollback(handle C.ulong, txnId C.long) C.int { 275 | db := getDB(uint64(handle)) 276 | if db == nil { 277 | return -1 278 | } 279 | 280 | txn, err := db.GetTxn(int64(txnId)) 281 | if err != nil { 282 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_rollback: transaction not found: %v", err)) 283 | C.print_error(cMsg) 284 | C.free(unsafe.Pointer(cMsg)) 285 | return -1 286 | } 287 | 288 | return boolToInt(txn.Rollback() == nil) 289 | } 290 | 291 | //export wildcat_txn_free 292 | func wildcat_txn_free(handle C.ulong, txnId C.long) { 293 | // The database handles transaction cleanup internally when 294 | // commit/rollback is called, so this is essentially a no-op 295 | // but we keep it for API compatibility.. 296 | } 297 | 298 | //export wildcat_txn_new_iterator 299 | func wildcat_txn_new_iterator(handle C.ulong, txnId C.long, asc C.int) C.ulong { 300 | db := getDB(uint64(handle)) 301 | if db == nil { 302 | return 0 303 | } 304 | 305 | txn, err := db.GetTxn(int64(txnId)) 306 | if err != nil { 307 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_new_iterator: transaction not found: %v", err)) 308 | C.print_error(cMsg) 309 | C.free(unsafe.Pointer(cMsg)) 310 | return 0 311 | } 312 | 313 | iter, err := txn.NewIterator(asc != 0) 314 | if err != nil { 315 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_new_iterator failed: %v", err)) 316 | C.print_error(cMsg) 317 | C.free(unsafe.Pointer(cMsg)) 318 | return 0 319 | } 320 | return C.ulong(registerIterator(iter)) 321 | } 322 | 323 | //export wildcat_txn_new_range_iterator 324 | func wildcat_txn_new_range_iterator(handle C.ulong, txnId C.long, start, end *C.char, asc C.int) C.ulong { 325 | db := getDB(uint64(handle)) 326 | if db == nil { 327 | return 0 328 | } 329 | 330 | txn, err := db.GetTxn(int64(txnId)) 331 | if err != nil { 332 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_new_range_iterator: transaction not found: %v", err)) 333 | C.print_error(cMsg) 334 | C.free(unsafe.Pointer(cMsg)) 335 | return 0 336 | } 337 | 338 | iter, err := txn.NewRangeIterator( 339 | C.GoBytes(unsafe.Pointer(start), C.int(C.strlen(start))), 340 | C.GoBytes(unsafe.Pointer(end), C.int(C.strlen(end))), 341 | asc != 0, 342 | ) 343 | if err != nil { 344 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_new_range_iterator failed: %v", err)) 345 | C.print_error(cMsg) 346 | C.free(unsafe.Pointer(cMsg)) 347 | return 0 348 | } 349 | return C.ulong(registerIterator(iter)) 350 | } 351 | 352 | //export wildcat_txn_new_prefix_iterator 353 | func wildcat_txn_new_prefix_iterator(handle C.ulong, txnId C.long, prefix *C.char, asc C.int) C.ulong { 354 | db := getDB(uint64(handle)) 355 | if db == nil { 356 | return 0 357 | } 358 | 359 | txn, err := db.GetTxn(int64(txnId)) 360 | if err != nil { 361 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_new_prefix_iterator: transaction not found: %v", err)) 362 | C.print_error(cMsg) 363 | C.free(unsafe.Pointer(cMsg)) 364 | return 0 365 | } 366 | 367 | iter, err := txn.NewPrefixIterator( 368 | C.GoBytes(unsafe.Pointer(prefix), C.int(C.strlen(prefix))), 369 | asc != 0, 370 | ) 371 | if err != nil { 372 | cMsg := C.CString(fmt.Sprintf("wildcat_txn_new_prefix_iterator failed: %v", err)) 373 | C.print_error(cMsg) 374 | C.free(unsafe.Pointer(cMsg)) 375 | return 0 376 | } 377 | return C.ulong(registerIterator(iter)) 378 | } 379 | 380 | //export wildcat_stats 381 | func wildcat_stats(handle C.ulong) *C.char { 382 | db := getDB(uint64(handle)) 383 | if db == nil { 384 | return nil 385 | } 386 | stats := db.Stats() 387 | return C.CString(stats) 388 | } 389 | 390 | //export wildcat_force_flush 391 | func wildcat_force_flush(handle C.ulong) C.int { 392 | db := getDB(uint64(handle)) 393 | if db == nil { 394 | return -1 395 | } 396 | err := db.ForceFlush() 397 | return boolToInt(err == nil) 398 | } 399 | 400 | //export wildcat_txn_iterate_next 401 | func wildcat_txn_iterate_next(id C.ulong) C.int { 402 | h, ok := iterMap.Load(uint64(id)) 403 | if !ok { 404 | return -1 405 | } 406 | ih := h.(*iteratorHandle) 407 | ih.key, ih.value, _, ih.valid = ih.iter.Next() 408 | return boolToInt(ih.valid) 409 | } 410 | 411 | //export wildcat_txn_iterate_prev 412 | func wildcat_txn_iterate_prev(id C.ulong) C.int { 413 | h, ok := iterMap.Load(uint64(id)) 414 | if !ok { 415 | return -1 416 | } 417 | ih := h.(*iteratorHandle) 418 | ih.key, ih.value, _, ih.valid = ih.iter.Prev() 419 | return boolToInt(ih.valid) 420 | } 421 | 422 | //export wildcat_txn_iter_valid 423 | func wildcat_txn_iter_valid(id C.ulong) C.int { 424 | h, ok := iterMap.Load(uint64(id)) 425 | if !ok { 426 | return 0 427 | } 428 | return boolToInt(h.(*iteratorHandle).valid) 429 | } 430 | 431 | //export wildcat_iterator_key 432 | func wildcat_iterator_key(id C.ulong) *C.char { 433 | h, ok := iterMap.Load(uint64(id)) 434 | if !ok { 435 | return nil 436 | } 437 | return C.CString(string(h.(*iteratorHandle).key)) 438 | } 439 | 440 | //export wildcat_iterator_value 441 | func wildcat_iterator_value(id C.ulong) *C.char { 442 | h, ok := iterMap.Load(uint64(id)) 443 | if !ok { 444 | return nil 445 | } 446 | return C.CString(string(h.(*iteratorHandle).value)) 447 | } 448 | 449 | //export wildcat_iterator_free 450 | func wildcat_iterator_free(id C.ulong) { 451 | iterMap.Delete(uint64(id)) 452 | } 453 | 454 | //export wildcat_sync 455 | func wildcat_sync(handle C.ulong) C.int { 456 | db := getDB(uint64(handle)) 457 | if db == nil { 458 | return -1 459 | } 460 | err := db.Sync() 461 | if err != nil { 462 | cMsg := C.CString(fmt.Sprintf("wildcat_sync failed: %v", err)) 463 | C.print_error(cMsg) 464 | C.free(unsafe.Pointer(cMsg)) 465 | return -1 466 | } 467 | return 0 468 | } 469 | 470 | func boolToInt(ok bool) C.int { 471 | if ok { 472 | return 0 473 | } 474 | return -1 475 | } 476 | 477 | func registerIterator(mi *wildcat.MergeIterator) uint64 { 478 | handle := &iteratorHandle{iter: mi} 479 | handle.key, handle.value, _, handle.valid = mi.Next() 480 | 481 | iterMu.Lock() 482 | defer iterMu.Unlock() 483 | iterCounter++ 484 | iterMap.Store(iterCounter, handle) 485 | return iterCounter 486 | } 487 | 488 | func main() { 489 | // This is just a placeholder to ensure the package can compile. 490 | // The actual functionality is exposed via the exported C functions. 491 | } 492 | -------------------------------------------------------------------------------- /db_test.go: -------------------------------------------------------------------------------- 1 | // Package wildcat 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en-US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package wildcat 17 | 18 | import ( 19 | "fmt" 20 | "log" 21 | "math/rand" 22 | "os" 23 | "sync" 24 | "testing" 25 | "time" 26 | ) 27 | 28 | // Tests opening a brand new instance. Will setup an initial WAL and memory table and disk levels. 29 | func TestOpen(t *testing.T) { 30 | defer func() { 31 | _ = os.RemoveAll("testdb") 32 | 33 | }() 34 | 35 | // Create a log channel 36 | logChannel := make(chan string, 100) // Buffer size of 100 messages 37 | 38 | opts := &Options{ 39 | Directory: "testdb", 40 | LogChannel: logChannel, 41 | } 42 | 43 | wg := &sync.WaitGroup{} 44 | 45 | wg.Add(1) 46 | 47 | // Start a goroutine to listen to the log channel 48 | go func() { 49 | defer wg.Done() 50 | for msg := range logChannel { 51 | t.Logf("Log message: %s", msg) 52 | } 53 | }() 54 | 55 | // Open or create the database 56 | db, err := Open(opts) 57 | if err != nil { 58 | log.Fatalf("Failed to open database: %v", err) 59 | } 60 | 61 | // Verify all l1 to l6 directories exist 62 | for i := 1; i <= 6; i++ { 63 | dir := fmt.Sprintf("%s/l%d", opts.Directory, i) 64 | if _, err := os.Stat(dir); os.IsNotExist(err) { 65 | t.Errorf("Directory %s does not exist", dir) 66 | } 67 | } 68 | 69 | _ = db.Close() 70 | 71 | wg.Wait() 72 | } 73 | 74 | // **These are more internal benchmarks than actual database benchmarks. They are included for completeness and work on optimizations** 75 | 76 | func BenchmarkSinglePut(b *testing.B) { 77 | defer func() { 78 | _ = os.RemoveAll("benchdb_single_put") 79 | }() 80 | 81 | opts := &Options{ 82 | Directory: "benchdb_single_put", 83 | SyncOption: SyncNone, // Fastest for benchmarking 84 | } 85 | 86 | db, err := Open(opts) 87 | if err != nil { 88 | b.Fatalf("Failed to open database: %v", err) 89 | } 90 | defer func(db *DB) { 91 | _ = db.Close() 92 | }(db) 93 | 94 | key := []byte("benchmark_key") 95 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 96 | 97 | b.ResetTimer() 98 | for i := 0; i < b.N; i++ { 99 | err := db.Update(func(txn *Txn) error { 100 | return txn.Put(key, value) 101 | }) 102 | if err != nil { 103 | b.Fatalf("Put failed: %v", err) 104 | } 105 | } 106 | } 107 | 108 | func BenchmarkBatchPut(b *testing.B) { 109 | defer func() { 110 | _ = os.RemoveAll("benchdb_batch_put") 111 | }() 112 | 113 | opts := &Options{ 114 | Directory: "benchdb_batch_put", 115 | SyncOption: SyncNone, 116 | } 117 | 118 | db, err := Open(opts) 119 | if err != nil { 120 | b.Fatalf("Failed to open database: %v", err) 121 | } 122 | defer func(db *DB) { 123 | _ = db.Close() 124 | }(db) 125 | 126 | batchSize := 100 127 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 128 | 129 | b.ResetTimer() 130 | for i := 0; i < b.N; i++ { 131 | err := db.Update(func(txn *Txn) error { 132 | for j := 0; j < batchSize; j++ { 133 | key := []byte(fmt.Sprintf("key_%d_%d", i, j)) 134 | if err := txn.Put(key, value); err != nil { 135 | return err 136 | } 137 | } 138 | return nil 139 | }) 140 | if err != nil { 141 | b.Fatalf("Batch put failed: %v", err) 142 | } 143 | } 144 | } 145 | 146 | func BenchmarkRandomWrites(b *testing.B) { 147 | defer func() { 148 | _ = os.RemoveAll("benchdb_random_writes") 149 | }() 150 | 151 | opts := &Options{ 152 | Directory: "benchdb_random_writes", 153 | SyncOption: SyncNone, 154 | } 155 | 156 | db, err := Open(opts) 157 | if err != nil { 158 | b.Fatalf("Failed to open database: %v", err) 159 | } 160 | defer func(db *DB) { 161 | _ = db.Close() 162 | }(db) 163 | 164 | rand.Seed(time.Now().UnixNano()) 165 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 166 | 167 | b.ResetTimer() 168 | for i := 0; i < b.N; i++ { 169 | keyNum := rand.Intn(10000) // Random key from 0-9999 170 | key := []byte(fmt.Sprintf("random_key_%d", keyNum)) 171 | 172 | err := db.Update(func(txn *Txn) error { 173 | return txn.Put(key, value) 174 | }) 175 | if err != nil { 176 | b.Fatalf("Random write failed: %v", err) 177 | } 178 | } 179 | } 180 | 181 | func BenchmarkRandomReads(b *testing.B) { 182 | defer func() { 183 | _ = os.RemoveAll("benchdb_random_reads") 184 | }() 185 | 186 | opts := &Options{ 187 | Directory: "benchdb_random_reads", 188 | SyncOption: SyncNone, 189 | } 190 | 191 | db, err := Open(opts) 192 | if err != nil { 193 | b.Fatalf("Failed to open database: %v", err) 194 | } 195 | defer func(db *DB) { 196 | _ = db.Close() 197 | }(db) 198 | 199 | // Pre-populate with 10k keys 200 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 201 | for i := 0; i < 10000; i++ { 202 | key := []byte(fmt.Sprintf("random_key_%d", i)) 203 | err := db.Update(func(txn *Txn) error { 204 | return txn.Put(key, value) 205 | }) 206 | if err != nil { 207 | b.Fatalf("Failed to populate: %v", err) 208 | } 209 | // Flush every 5000 keys 210 | if i%5000 == 0 { 211 | _ = db.ForceFlush() 212 | 213 | } 214 | } 215 | 216 | rand.Seed(time.Now().UnixNano()) 217 | 218 | b.ResetTimer() 219 | for i := 0; i < b.N; i++ { 220 | keyNum := rand.Intn(10000) 221 | key := []byte(fmt.Sprintf("random_key_%d", keyNum)) 222 | 223 | err := db.View(func(txn *Txn) error { 224 | _, err := txn.Get(key) 225 | return err 226 | }) 227 | if err != nil { 228 | b.Fatalf("Random read failed: %v", err) 229 | } 230 | } 231 | } 232 | 233 | func BenchmarkRandomReads_Bloom(b *testing.B) { 234 | defer func() { 235 | _ = os.RemoveAll("benchdb_random_reads") 236 | }() 237 | 238 | opts := &Options{ 239 | Directory: "benchdb_random_reads", 240 | SyncOption: SyncNone, 241 | BloomFilter: true, 242 | } 243 | 244 | db, err := Open(opts) 245 | if err != nil { 246 | b.Fatalf("Failed to open database: %v", err) 247 | } 248 | defer func(db *DB) { 249 | _ = db.Close() 250 | }(db) 251 | 252 | // Pre-populate with 10k keys 253 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 254 | for i := 0; i < 10000; i++ { 255 | key := []byte(fmt.Sprintf("random_key_%d", i)) 256 | err := db.Update(func(txn *Txn) error { 257 | return txn.Put(key, value) 258 | }) 259 | if err != nil { 260 | b.Fatalf("Failed to populate: %v", err) 261 | } 262 | 263 | // Flush every 5000 keys 264 | if i%5000 == 0 { 265 | _ = db.ForceFlush() 266 | 267 | } 268 | } 269 | 270 | rand.Seed(time.Now().UnixNano()) 271 | 272 | b.ResetTimer() 273 | for i := 0; i < b.N; i++ { 274 | keyNum := rand.Intn(10000) 275 | key := []byte(fmt.Sprintf("random_key_%d", keyNum)) 276 | 277 | err := db.View(func(txn *Txn) error { 278 | _, err := txn.Get(key) 279 | return err 280 | }) 281 | if err != nil { 282 | b.Fatalf("Random read failed: %v", err) 283 | } 284 | } 285 | } 286 | 287 | func BenchmarkConcurrentWrites(b *testing.B) { 288 | defer func() { 289 | _ = os.RemoveAll("benchdb_concurrent_writes") 290 | }() 291 | 292 | opts := &Options{ 293 | Directory: "benchdb_concurrent_writes", 294 | SyncOption: SyncNone, 295 | } 296 | 297 | db, err := Open(opts) 298 | if err != nil { 299 | b.Fatalf("Failed to open database: %v", err) 300 | } 301 | defer func(db *DB) { 302 | _ = db.Close() 303 | }(db) 304 | 305 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 306 | goroutines := 10 307 | 308 | b.ResetTimer() 309 | 310 | var wg sync.WaitGroup 311 | start := make(chan struct{}) 312 | 313 | for g := 0; g < goroutines; g++ { 314 | wg.Add(1) 315 | go func(goroutineID int) { 316 | defer wg.Done() 317 | <-start // Wait for signal to start 318 | 319 | opsPerGoroutine := b.N / goroutines 320 | for i := 0; i < opsPerGoroutine; i++ { 321 | key := []byte(fmt.Sprintf("concurrent_key_%d_%d", goroutineID, i)) 322 | err := db.Update(func(txn *Txn) error { 323 | return txn.Put(key, value) 324 | }) 325 | if err != nil { 326 | b.Errorf("Concurrent write failed: %v", err) 327 | return 328 | } 329 | } 330 | }(g) 331 | } 332 | 333 | close(start) // Signal all goroutines to start 334 | wg.Wait() 335 | } 336 | 337 | func BenchmarkConcurrentReads(b *testing.B) { 338 | defer func() { 339 | _ = os.RemoveAll("benchdb_concurrent_reads") 340 | }() 341 | 342 | opts := &Options{ 343 | Directory: "benchdb_concurrent_reads", 344 | SyncOption: SyncNone, 345 | } 346 | 347 | db, err := Open(opts) 348 | if err != nil { 349 | b.Fatalf("Failed to open database: %v", err) 350 | } 351 | defer func(db *DB) { 352 | _ = db.Close() 353 | }(db) 354 | 355 | // Pre-populate with data 356 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 357 | for i := 0; i < 10000; i++ { 358 | key := []byte(fmt.Sprintf("read_key_%d", i)) 359 | err := db.Update(func(txn *Txn) error { 360 | return txn.Put(key, value) 361 | }) 362 | if err != nil { 363 | b.Fatalf("Failed to populate: %v", err) 364 | } 365 | 366 | // Flush every 5000 keys 367 | if i%5000 == 0 { 368 | _ = db.ForceFlush() 369 | 370 | } 371 | } 372 | 373 | goroutines := 10 374 | rand.Seed(time.Now().UnixNano()) 375 | 376 | b.ResetTimer() 377 | 378 | var wg sync.WaitGroup 379 | start := make(chan struct{}) 380 | 381 | for g := 0; g < goroutines; g++ { 382 | wg.Add(1) 383 | go func() { 384 | defer wg.Done() 385 | <-start // Wait for signal to start 386 | 387 | opsPerGoroutine := b.N / goroutines 388 | for i := 0; i < opsPerGoroutine; i++ { 389 | keyNum := rand.Intn(1000) 390 | key := []byte(fmt.Sprintf("read_key_%d", keyNum)) 391 | err := db.View(func(txn *Txn) error { 392 | _, err := txn.Get(key) 393 | return err 394 | }) 395 | if err != nil { 396 | b.Errorf("Concurrent read failed: %v", err) 397 | return 398 | } 399 | } 400 | }() 401 | } 402 | 403 | close(start) // Signal all goroutines to start 404 | wg.Wait() 405 | } 406 | 407 | func BenchmarkConcurrentReads_Bloom(b *testing.B) { 408 | defer func() { 409 | _ = os.RemoveAll("benchdb_concurrent_reads") 410 | }() 411 | 412 | opts := &Options{ 413 | Directory: "benchdb_concurrent_reads", 414 | SyncOption: SyncNone, 415 | BloomFilter: true, 416 | } 417 | 418 | db, err := Open(opts) 419 | if err != nil { 420 | b.Fatalf("Failed to open database: %v", err) 421 | } 422 | defer func(db *DB) { 423 | _ = db.Close() 424 | }(db) 425 | 426 | // Pre-populate with data 427 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 428 | for i := 0; i < 10000; i++ { 429 | key := []byte(fmt.Sprintf("read_key_%d", i)) 430 | err := db.Update(func(txn *Txn) error { 431 | return txn.Put(key, value) 432 | }) 433 | if err != nil { 434 | b.Fatalf("Failed to populate: %v", err) 435 | } 436 | 437 | // Flush every 5000 keys 438 | if i%5000 == 0 { 439 | _ = db.ForceFlush() 440 | 441 | } 442 | } 443 | 444 | goroutines := 10 445 | rand.Seed(time.Now().UnixNano()) 446 | 447 | b.ResetTimer() 448 | 449 | var wg sync.WaitGroup 450 | start := make(chan struct{}) 451 | 452 | for g := 0; g < goroutines; g++ { 453 | wg.Add(1) 454 | go func() { 455 | defer wg.Done() 456 | <-start // Wait for signal to start 457 | 458 | opsPerGoroutine := b.N / goroutines 459 | for i := 0; i < opsPerGoroutine; i++ { 460 | keyNum := rand.Intn(1000) 461 | key := []byte(fmt.Sprintf("read_key_%d", keyNum)) 462 | err := db.View(func(txn *Txn) error { 463 | _, err := txn.Get(key) 464 | return err 465 | }) 466 | if err != nil { 467 | b.Errorf("Concurrent read failed: %v", err) 468 | return 469 | } 470 | } 471 | }() 472 | } 473 | 474 | close(start) // Signal all goroutines to start 475 | wg.Wait() 476 | } 477 | 478 | func BenchmarkMixedWorkload(b *testing.B) { 479 | defer func() { 480 | _ = os.RemoveAll("benchdb_mixed_workload") 481 | }() 482 | 483 | opts := &Options{ 484 | Directory: "benchdb_mixed_workload", 485 | SyncOption: SyncNone, 486 | } 487 | 488 | db, err := Open(opts) 489 | if err != nil { 490 | b.Fatalf("Failed to open database: %v", err) 491 | } 492 | defer func(db *DB) { 493 | _ = db.Close() 494 | }(db) 495 | 496 | // Pre-populate with some data 497 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 498 | for i := 0; i < 10000; i++ { 499 | key := []byte(fmt.Sprintf("mixed_key_%d", i)) 500 | err := db.Update(func(txn *Txn) error { 501 | return txn.Put(key, value) 502 | }) 503 | if err != nil { 504 | b.Fatalf("Failed to populate: %v", err) 505 | } 506 | 507 | // Flush every 5000 keys 508 | if i%5000 == 0 { 509 | _ = db.ForceFlush() 510 | } 511 | } 512 | 513 | rand.Seed(time.Now().UnixNano()) 514 | 515 | b.ResetTimer() 516 | for i := 0; i < b.N; i++ { 517 | if rand.Float32() < 0.3 { // 30% writes 518 | key := []byte(fmt.Sprintf("mixed_key_%d", rand.Intn(2000))) 519 | err := db.Update(func(txn *Txn) error { 520 | return txn.Put(key, value) 521 | }) 522 | if err != nil { 523 | b.Fatalf("Mixed workload write failed: %v", err) 524 | } 525 | } else { // 70% reads 526 | key := []byte(fmt.Sprintf("mixed_key_%d", rand.Intn(1000))) 527 | err := db.View(func(txn *Txn) error { 528 | _, err := txn.Get(key) 529 | return err 530 | }) 531 | if err != nil { 532 | b.Fatalf("Mixed workload read failed: %v", err) 533 | } 534 | } 535 | } 536 | } 537 | 538 | func BenchmarkDelete(b *testing.B) { 539 | defer func() { 540 | _ = os.RemoveAll("benchdb_delete") 541 | }() 542 | 543 | opts := &Options{ 544 | Directory: "benchdb_delete", 545 | SyncOption: SyncNone, 546 | } 547 | 548 | db, err := Open(opts) 549 | if err != nil { 550 | b.Fatalf("Failed to open database: %v", err) 551 | } 552 | defer func(db *DB) { 553 | _ = db.Close() 554 | }(db) 555 | 556 | // Pre-populate with keys to delete 557 | value := []byte("benchmark_value_with_some_data_to_make_it_realistic") 558 | for i := 0; i < b.N; i++ { 559 | key := []byte(fmt.Sprintf("delete_key_%d", i)) 560 | err := db.Update(func(txn *Txn) error { 561 | return txn.Put(key, value) 562 | }) 563 | if err != nil { 564 | b.Fatalf("Failed to populate: %v", err) 565 | } 566 | } 567 | 568 | _ = db.ForceFlush() 569 | 570 | b.ResetTimer() 571 | for i := 0; i < b.N; i++ { 572 | key := []byte(fmt.Sprintf("delete_key_%d", i)) 573 | err := db.Update(func(txn *Txn) error { 574 | return txn.Delete(key) 575 | }) 576 | if err != nil { 577 | b.Fatalf("Delete failed: %v", err) 578 | } 579 | } 580 | } 581 | 582 | func BenchmarkLargeValues(b *testing.B) { 583 | defer func() { 584 | _ = os.RemoveAll("benchdb_large_values") 585 | }() 586 | 587 | opts := &Options{ 588 | Directory: "benchdb_large_values", 589 | SyncOption: SyncNone, 590 | } 591 | 592 | db, err := Open(opts) 593 | if err != nil { 594 | b.Fatalf("Failed to open database: %v", err) 595 | } 596 | defer func(db *DB) { 597 | _ = db.Close() 598 | }(db) 599 | 600 | // Create a 10KB value 601 | value := make([]byte, 10*1024) 602 | for i := range value { 603 | value[i] = byte(i % 256) 604 | } 605 | 606 | b.ResetTimer() 607 | for i := 0; i < b.N; i++ { 608 | key := []byte(fmt.Sprintf("large_key_%d", i)) 609 | err := db.Update(func(txn *Txn) error { 610 | return txn.Put(key, value) 611 | }) 612 | if err != nil { 613 | b.Fatalf("Large value put failed: %v", err) 614 | } 615 | } 616 | } 617 | -------------------------------------------------------------------------------- /flusher.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "fmt" 5 | "github.com/wildcatdb/wildcat/blockmanager" 6 | "github.com/wildcatdb/wildcat/queue" 7 | "github.com/wildcatdb/wildcat/skiplist" 8 | "github.com/wildcatdb/wildcat/tree" 9 | "os" 10 | "sync/atomic" 11 | "time" 12 | ) 13 | 14 | // Flusher is responsible for queuing and flushing memtables to disk 15 | type Flusher struct { 16 | db *DB // The db instance 17 | immutable *queue.Queue // Immutable queue for memtables 18 | flushing atomic.Pointer[Memtable] // Atomic pointer to the current flushing memtable 19 | swapping int32 // Atomic flag indicating if the flusher is swapping 20 | } 21 | 22 | // newFlusher creates a new Flusher instance 23 | func newFlusher(db *DB) *Flusher { 24 | return &Flusher{ 25 | db: db, 26 | immutable: queue.New(), 27 | } 28 | } 29 | 30 | // queueMemtable queues the current active memtable for flushing to disk. 31 | func (flusher *Flusher) queueMemtable() error { 32 | 33 | // Check if the flusher is already swapping 34 | if atomic.LoadInt32(&flusher.swapping) == 1 { 35 | return nil // Already swapping, no need to queue again 36 | } 37 | 38 | flusher.db.log("Flusher: queuing current memtable for flushing") 39 | 40 | // Set the swapping flag to indicate that we are in the process of swapping 41 | atomic.StoreInt32(&flusher.swapping, 1) 42 | defer atomic.StoreInt32(&flusher.swapping, 0) 43 | 44 | walId := flusher.db.walIdGenerator.nextID() 45 | // Create a new memtable 46 | newMemtable := &Memtable{ 47 | db: flusher.db, 48 | skiplist: skiplist.New(), 49 | wal: &WAL{ 50 | path: fmt.Sprintf("%s%d%s", flusher.db.opts.Directory, walId, WALFileExtension), 51 | }} 52 | 53 | // Open the new WAL 54 | walBm, err := blockmanager.Open(newMemtable.wal.path, os.O_RDWR|os.O_CREATE, flusher.db.opts.Permission, blockmanager.SyncOption(flusher.db.opts.SyncOption), flusher.db.opts.SyncInterval) 55 | if err != nil { 56 | return fmt.Errorf("failed to open WAL block manager: %w", err) 57 | } 58 | 59 | // Add the new WAL to the LRU cache 60 | flusher.db.lru.Put(newMemtable.wal.path, walBm, func(key, value interface{}) { 61 | // Close the block manager when evicted from LRU 62 | if bm, ok := value.(*blockmanager.BlockManager); ok { 63 | _ = bm.Close() 64 | } 65 | }) 66 | 67 | // Push the current memtable to the immutable queue 68 | flusher.immutable.Enqueue(flusher.db.memtable.Load().(*Memtable)) 69 | 70 | flusher.db.log(fmt.Sprintf("Flusher: new active memtable created with WAL %s", newMemtable.wal.path)) 71 | 72 | // Update the current memtable to the new one 73 | flusher.db.memtable.Store(newMemtable) 74 | 75 | return nil 76 | } 77 | 78 | // backgroundProcess starts the background process for flushing memtables 79 | func (flusher *Flusher) backgroundProcess() { 80 | defer flusher.db.wg.Done() 81 | ticker := time.NewTicker(flusher.db.opts.FlusherTickerInterval) 82 | defer ticker.Stop() 83 | 84 | for { 85 | select { 86 | case <-flusher.db.closeCh: 87 | flusher.db.log("Flusher: shutting down background process") 88 | return 89 | case <-ticker.C: 90 | immutableMemt := flusher.immutable.Dequeue() 91 | if immutableMemt == nil { 92 | continue // No immutable memtable to flush 93 | } 94 | 95 | flusher.db.log(fmt.Sprintf("Flusher: flushing immutable memtable %s", immutableMemt.(*Memtable).wal.path)) 96 | 97 | // Set the flushing memtable 98 | flusher.flushing.Store(immutableMemt.(*Memtable)) 99 | 100 | // Flush the immutable memtable to disk 101 | err := flusher.flushMemtable(immutableMemt.(*Memtable)) 102 | 103 | if err != nil { 104 | continue 105 | } 106 | } 107 | } 108 | } 109 | 110 | // flushMemtable flushes a memtable to disk as an SSTable at level 1 111 | func (flusher *Flusher) flushMemtable(memt *Memtable) error { 112 | maxTimestamp := time.Now().UnixNano() + 10000000000 // Far in the future 113 | entryCount := memt.skiplist.Count(maxTimestamp) 114 | deletionCount := memt.skiplist.DeleteCount(maxTimestamp) 115 | 116 | // We defer clearing db.flusher.flushing 117 | defer func() { 118 | flusher.flushing.Store(nil) 119 | }() 120 | 121 | flusher.db.log(fmt.Sprintf("Flushing memtable with %d entries and %d deletions", entryCount, deletionCount)) 122 | 123 | if entryCount == 0 && deletionCount == 0 { 124 | flusher.db.log("Skipping flush for empty memtable") 125 | return nil // Nothing to flush 126 | } 127 | 128 | // Create a new SSTable 129 | sstable := &SSTable{ 130 | Id: flusher.db.sstIdGenerator.nextID(), 131 | db: flusher.db, 132 | Level: 1, // We always flush to level 1, L0 is active memtable 133 | } 134 | 135 | // Use max timestamp to ensure we get all keys when finding min/max 136 | maxPossibleTs := time.Now().UnixNano() + 10000000000 // Far in the future 137 | 138 | // Min and max keys are for sstable metadata 139 | minKey, _, exists := memt.skiplist.GetMin(maxPossibleTs) 140 | if exists { 141 | sstable.Min = minKey 142 | } 143 | 144 | maxKey, _, exists := memt.skiplist.GetMax(maxPossibleTs) 145 | if exists { 146 | sstable.Max = maxKey 147 | } 148 | 149 | latestTs := memt.skiplist.GetLatestTimestamp() // For compactor awareness 150 | 151 | // Calculate the approx size of the memtable 152 | sstable.Size = atomic.LoadInt64(&memt.size) 153 | 154 | // Use max timestamp to get a count of all entries regardless of version 155 | sstable.EntryCount = memt.skiplist.Count(maxPossibleTs) 156 | sstable.Timestamp = latestTs 157 | 158 | // We create new sstable files (.klog and .vlog) here 159 | 160 | // We have a temp and final path 161 | // We use a temp path in case of system crash 162 | // When we reopen the system we can check if the temp file exists, if so we delete it 163 | // This would be a flush that was not finalized thus an existing WAL exists and possibly corrupt levels 164 | vlogTmpPath := fmt.Sprintf("%s%s1%s%s%d%s%s", flusher.db.opts.Directory, LevelPrefix, string(os.PathSeparator), SSTablePrefix, sstable.Id, VLogExtension, TempFileExtension) 165 | vlogFinalPath := fmt.Sprintf("%s%s1%s%s%d%s", flusher.db.opts.Directory, LevelPrefix, string(os.PathSeparator), SSTablePrefix, sstable.Id, VLogExtension) 166 | 167 | klogTmpPath := fmt.Sprintf("%s%s1%s%s%d%s%s", flusher.db.opts.Directory, LevelPrefix, string(os.PathSeparator), SSTablePrefix, sstable.Id, KLogExtension, TempFileExtension) 168 | klogFinalPath := fmt.Sprintf("%s%s1%s%s%d%s", flusher.db.opts.Directory, LevelPrefix, string(os.PathSeparator), SSTablePrefix, sstable.Id, KLogExtension) 169 | 170 | // Klog stores an immutable btree, vlog stores the values 171 | klogBm, err := blockmanager.Open(klogTmpPath, os.O_RDWR|os.O_CREATE, memt.db.opts.Permission, blockmanager.SyncOption(flusher.db.opts.SyncOption), flusher.db.opts.SyncInterval) 172 | if err != nil { 173 | return fmt.Errorf("failed to open KLog block manager: %w", err) 174 | } 175 | 176 | vlogBm, err := blockmanager.Open(vlogTmpPath, os.O_RDWR|os.O_CREATE, memt.db.opts.Permission, blockmanager.SyncOption(flusher.db.opts.SyncOption), flusher.db.opts.SyncInterval) 177 | if err != nil { 178 | return fmt.Errorf("failed to open VLog block manager: %w", err) 179 | } 180 | 181 | // We create a new bloom filter if enabled and add it to sstable meta 182 | if flusher.db.opts.BloomFilter { 183 | 184 | // Create a bloom filter for the SSTable 185 | sstable.BloomFilter, err = memt.createBloomFilter(int64(entryCount)) 186 | if err != nil { 187 | return fmt.Errorf("failed to create bloom filter: %w", err) 188 | 189 | } 190 | 191 | } 192 | 193 | // Create a BTree for the KLog 194 | t, err := tree.Open(klogBm, flusher.db.opts.SSTableBTreeOrder, sstable) 195 | if err != nil { 196 | return fmt.Errorf("failed to create BTree: %w", err) 197 | } 198 | 199 | // Use the maximum possible timestamp to make sure we get ALL versions 200 | // of keys during iteration, preserving their original transaction timestamps 201 | iter, err := memt.skiplist.NewIterator(nil, maxPossibleTs) 202 | if err != nil { 203 | return fmt.Errorf("failed to create iterator for flush: %w", err) 204 | } 205 | 206 | flusher.db.log(fmt.Sprintf("Starting to flush memtable to SSTable %d", sstable.Id)) 207 | 208 | for { 209 | key, value, ts, ok := iter.Next() 210 | if !ok { 211 | break // No more entries 212 | } 213 | 214 | // Check if this is a deletion marker 215 | if value == nil { 216 | // Write a deletion marker to the SSTable 217 | klogEntry := &KLogEntry{ 218 | Key: key, 219 | Timestamp: ts, 220 | ValueBlockID: -1, // Special marker for deletion 221 | } 222 | 223 | err = t.Put(key, klogEntry) // Insert deletion marker into B-tree 224 | if err != nil { 225 | return fmt.Errorf("failed to insert deletion marker into B-tree: %w", err) 226 | } 227 | } else { 228 | // Viewable value, write it to the VLog 229 | id, err := vlogBm.Append(value[:]) 230 | if err != nil { 231 | return fmt.Errorf("failed to write VLog: %w", err) 232 | } 233 | 234 | klogEntry := &KLogEntry{ 235 | Key: key, 236 | Timestamp: ts, 237 | ValueBlockID: id, 238 | } 239 | 240 | // Insert the KLog entry into the B-tree 241 | err = t.Put(key, klogEntry) 242 | if err != nil { 243 | return fmt.Errorf("failed to insert KLog entry into B-tree: %w", err) 244 | } 245 | } 246 | 247 | } 248 | 249 | flusher.db.log(fmt.Sprintf("Finished flushing memtable to SSTable %d", sstable.Id)) 250 | 251 | // Now we close the klog and vlog temp files and rename them 252 | // This means the files are finalized 253 | _ = klogBm.Close() 254 | err = os.Rename(klogTmpPath, klogFinalPath) 255 | if err != nil { 256 | return fmt.Errorf("failed to rename KLog file: %w", err) 257 | } 258 | 259 | _ = vlogBm.Close() 260 | err = os.Rename(vlogTmpPath, vlogFinalPath) 261 | if err != nil { 262 | return fmt.Errorf("failed to rename VLog file: %w", err) 263 | } 264 | 265 | // Delete original memtable wal 266 | _ = os.Remove(memt.wal.path) 267 | 268 | flusher.db.log(fmt.Sprintf("SSTable %d flushed successfully, and finalized KLog: %s, VLog: %s", sstable.Id, klogFinalPath, vlogFinalPath)) 269 | 270 | // Reopen the KLog and VLog block managers with final paths 271 | klogBm, err = blockmanager.Open(klogFinalPath, os.O_RDONLY, flusher.db.opts.Permission, blockmanager.SyncOption(flusher.db.opts.SyncOption)) 272 | if err != nil { 273 | return fmt.Errorf("failed to open KLog block manager: %w", err) 274 | } 275 | 276 | vlogBm, err = blockmanager.Open(vlogFinalPath, os.O_RDONLY, flusher.db.opts.Permission, blockmanager.SyncOption(flusher.db.opts.SyncOption)) 277 | if err != nil { 278 | return fmt.Errorf("failed to open VLog block manager: %w", err) 279 | } 280 | 281 | // Add both KLog and VLog to the LRU cache 282 | flusher.db.lru.Put(klogFinalPath, klogBm, func(key, value interface{}) { 283 | // Close the block manager when evicted from LRU 284 | if bm, ok := value.(*blockmanager.BlockManager); ok { 285 | _ = bm.Close() 286 | } 287 | }) 288 | flusher.db.lru.Put(vlogFinalPath, vlogBm, func(key, value interface{}) { 289 | // Close the block manager when evicted from LRU 290 | if bm, ok := value.(*blockmanager.BlockManager); ok { 291 | _ = bm.Close() 292 | } 293 | }) 294 | 295 | // Add the SSTable to level 1 296 | levels := flusher.db.levels.Load() 297 | if levels == nil { 298 | return fmt.Errorf("levels not initialized") 299 | } 300 | 301 | level1 := (*levels)[0] 302 | sstables := level1.sstables.Load() 303 | 304 | var sstablesList []*SSTable 305 | 306 | if sstables != nil { 307 | sstablesList = *sstables 308 | } else { 309 | sstablesList = make([]*SSTable, 0) 310 | } 311 | 312 | sstablesList = append(sstablesList, sstable) 313 | 314 | level1.sstables.Store(&sstablesList) 315 | 316 | // Update the current size of the level 317 | atomic.AddInt64(&level1.currentSize, sstable.Size) 318 | 319 | flusher.db.log(fmt.Sprintf("SSTable %d added to level 1, min: %s, max: %s, entries: %d", 320 | sstable.Id, string(sstable.Min), string(sstable.Max), entryCount)) 321 | 322 | return nil 323 | } 324 | 325 | // enqueueMemtable enqueues an immutable memtable for flushing 326 | func (flusher *Flusher) enqueueMemtable(memt *Memtable) { 327 | 328 | // Add the immutable memtable to the queue 329 | flusher.immutable.Enqueue(memt) 330 | 331 | } 332 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/wildcatdb/wildcat 2 | 3 | go 1.24 4 | 5 | require go.mongodb.org/mongo-driver v1.17.3 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 4 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 5 | go.mongodb.org/mongo-driver v1.17.3 h1:TQyXhnsWfWtgAhMtOgtYHMTkZIfBTpMTsMnd9ZBeHxQ= 6 | go.mongodb.org/mongo-driver v1.17.3/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= 7 | -------------------------------------------------------------------------------- /id_generator.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "math" 5 | "sync/atomic" 6 | "time" 7 | ) 8 | 9 | // The IDGenerator is a thread-safe utility for generating unique, monotonic IDs. 10 | 11 | // IDGenerator is a thread-safe ID generator 12 | type IDGenerator struct { 13 | lastID int64 14 | } 15 | 16 | // newIDGenerator creates a new ID generator 17 | func newIDGenerator() *IDGenerator { 18 | return &IDGenerator{ 19 | lastID: 0, 20 | } 21 | } 22 | 23 | // newIDGeneratorWithTimestamp creates a new ID generator starting from current nanosecond 24 | func newIDGeneratorWithTimestamp() *IDGenerator { 25 | return &IDGenerator{ 26 | lastID: time.Now().UnixNano(), 27 | } 28 | } 29 | 30 | // reloadIDGenerator creates a new ID generator with a specified last ID 31 | func reloadIDGenerator(lastId int64) *IDGenerator { 32 | return &IDGenerator{ 33 | lastID: lastId, 34 | } 35 | } 36 | 37 | // nextID generates the next unique ID, resetting to 1 if int64 max is reached 38 | func (g *IDGenerator) nextID() int64 { 39 | for { 40 | last := atomic.LoadInt64(&g.lastID) 41 | var next int64 42 | 43 | // Check if we're at max int64 44 | if last == math.MaxInt64 { 45 | next = 1 // Reset to 1 46 | } else { 47 | next = last + 1 48 | } 49 | 50 | if atomic.CompareAndSwapInt64(&g.lastID, last, next) { 51 | return next 52 | } 53 | } 54 | } 55 | 56 | // Save returns the last ID to be persisted 57 | func (g *IDGenerator) save() int64 { 58 | return atomic.LoadInt64(&g.lastID) 59 | } 60 | -------------------------------------------------------------------------------- /id_generator_test.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | ) 7 | 8 | func TestNewIDGenerator(t *testing.T) { 9 | g := newIDGenerator() 10 | if g == nil { 11 | t.Fatal("NewIDGenerator returned nil") 12 | } 13 | if g.lastID != 0 { 14 | t.Fatal("lastID was not initialized") 15 | } 16 | } 17 | 18 | func TestNextID_Unique(t *testing.T) { 19 | g := newIDGenerator() 20 | id1 := g.nextID() 21 | id2 := g.nextID() 22 | 23 | if id1 == id2 { 24 | t.Fatal("NextID did not generate unique IDs") 25 | } 26 | } 27 | 28 | func TestNextID_Monotonic(t *testing.T) { 29 | g := newIDGenerator() 30 | id1 := g.nextID() 31 | id2 := g.nextID() 32 | 33 | if id2 <= id1 { 34 | t.Fatalf("NextID did not ensure monotonicity: id1=%d, id2=%d", id1, id2) 35 | } 36 | } 37 | 38 | func TestNextID_ThreadSafety(t *testing.T) { 39 | g := newIDGenerator() 40 | const numGoroutines = 100 41 | const idsPerGoroutine = 100 42 | 43 | var wg sync.WaitGroup 44 | ids := make(chan int64, numGoroutines*idsPerGoroutine) 45 | 46 | for i := 0; i < numGoroutines; i++ { 47 | wg.Add(1) 48 | go func() { 49 | defer wg.Done() 50 | for j := 0; j < idsPerGoroutine; j++ { 51 | ids <- g.nextID() 52 | } 53 | }() 54 | } 55 | 56 | wg.Wait() 57 | close(ids) 58 | 59 | // Check for uniqueness 60 | idSet := make(map[int64]struct{}) 61 | for id := range ids { 62 | if _, exists := idSet[id]; exists { 63 | t.Fatalf("Duplicate ID detected: %d", id) 64 | } 65 | idSet[id] = struct{}{} 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /level.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "fmt" 5 | "github.com/wildcatdb/wildcat/blockmanager" 6 | "github.com/wildcatdb/wildcat/tree" 7 | "go.mongodb.org/mongo-driver/bson/primitive" 8 | "os" 9 | "sort" 10 | "strconv" 11 | "strings" 12 | "sync/atomic" 13 | ) 14 | 15 | // Level is a disk level within Wildcat, which contains a list of immutable SSTables 16 | type Level struct { 17 | id int // The level ID 18 | path string // The path to the level directory 19 | sstables atomic.Pointer[[]*SSTable] // Atomic pointer to the list of SSTables 20 | capacity int // The capacity of the level 21 | currentSize int64 // Atomic size of the level 22 | db *DB // Reference to the database 23 | } 24 | 25 | // reopen opens an existing level directories sstables 26 | // sstables are loaded and sorted by id 27 | func (l *Level) reopen() error { 28 | l.db.log(fmt.Sprintf("Reopening level %d at path %s", l.id, l.path)) 29 | 30 | // Read the level directory 31 | files, err := os.ReadDir(l.path) 32 | if err != nil { 33 | return fmt.Errorf("failed to read level directory: %w", err) 34 | } 35 | 36 | // Find KLog files to identify SSTables 37 | var sstables []*SSTable 38 | 39 | for _, file := range files { 40 | if file.IsDir() || !strings.HasSuffix(file.Name(), KLogExtension) { 41 | continue 42 | } 43 | 44 | // If we find a file with TempFileExtension we remove it 45 | if strings.HasSuffix(file.Name(), TempFileExtension) { 46 | tempFilePath := fmt.Sprintf("%s%s", l.path, file.Name()) 47 | if err := os.Remove(tempFilePath); err != nil { 48 | l.db.log(fmt.Sprintf("Warning: Failed to remove temporary file %s: %v", tempFilePath, err)) 49 | } 50 | continue 51 | } 52 | 53 | // Extract SSTable ID from the filename 54 | if !strings.HasPrefix(file.Name(), SSTablePrefix) { 55 | continue 56 | } 57 | 58 | idStr := strings.TrimPrefix(file.Name(), SSTablePrefix) 59 | idStr = strings.TrimSuffix(idStr, KLogExtension) 60 | id, err := strconv.ParseInt(idStr, 10, 64) 61 | if err != nil { 62 | return fmt.Errorf("failed to parse SSTable ID from filename %s: %w", file.Name(), err) 63 | } 64 | 65 | levelPath := l.path 66 | if !strings.HasSuffix(levelPath, string(os.PathSeparator)) { 67 | levelPath += string(os.PathSeparator) 68 | } 69 | 70 | // Get corresponding VLog file path 71 | vlogPath := fmt.Sprintf("%s%s%d%s", levelPath, SSTablePrefix, id, VLogExtension) 72 | 73 | // Check if VLog file exists 74 | if _, err := os.Stat(vlogPath); os.IsNotExist(err) { 75 | l.db.log(fmt.Sprintf("Warning: VLog file not found for SSTable %d: %v - skipping", id, err)) 76 | continue 77 | } 78 | 79 | // Create SSTable structure with basic info 80 | sstable := &SSTable{ 81 | Id: id, 82 | Level: l.id, 83 | db: l.db, 84 | } 85 | 86 | l.db.log(fmt.Sprintf("Found SSTable %d: KLog=%s, VLog=%s", id, file.Name(), vlogPath)) 87 | 88 | // Get file paths 89 | klogPath := fmt.Sprintf("%s%s%d%s", levelPath, SSTablePrefix, id, KLogExtension) 90 | 91 | // Get file sizes to calculate SSTable size 92 | klogInfo, err := os.Stat(klogPath) 93 | if err != nil { 94 | l.db.log(fmt.Sprintf("Warning: Failed to stat KLog file for SSTable %d: %v - skipping", id, err)) 95 | continue 96 | } 97 | 98 | vlogInfo, err := os.Stat(vlogPath) 99 | if err != nil { 100 | l.db.log(fmt.Sprintf("Warning: Failed to stat VLog file for SSTable %d: %v - skipping", id, err)) 101 | continue 102 | } 103 | 104 | // Calculate total size from file system 105 | sstable.Size = klogInfo.Size() + vlogInfo.Size() 106 | 107 | // Open the KLog file to try to get metadata from B-tree 108 | klogBm, err := blockmanager.Open(klogPath, os.O_RDONLY, l.db.opts.Permission, blockmanager.SyncOption(l.db.opts.SyncOption)) 109 | if err != nil { 110 | l.db.log(fmt.Sprintf("Warning: Failed to open KLog block manager for SSTable %d: %v - skipping", id, err)) 111 | continue 112 | } 113 | 114 | // Add the KLog to cache 115 | l.db.lru.Put(klogPath, klogBm, func(key, value interface{}) { 116 | if bm, ok := value.(*blockmanager.BlockManager); ok { 117 | _ = bm.Close() 118 | } 119 | }) 120 | 121 | // Try to open the B-tree to get metadata 122 | // An immutable btree in wildcat can store extra metadata for the tree itself. 123 | t, err := tree.Open(klogBm, l.db.opts.SSTableBTreeOrder, nil) 124 | if err != nil { 125 | l.db.log(fmt.Sprintf("Warning: Failed to open B-tree for SSTable %d: %v - using file system metadata only", id, err)) 126 | // Set basic metadata and continue 127 | sstable.Min = []byte{} 128 | sstable.Max = []byte{} 129 | sstable.EntryCount = 0 130 | sstables = append(sstables, sstable) 131 | continue 132 | } 133 | 134 | // Get the extra metadata from the B-tree 135 | extraMeta := t.GetExtraMeta() 136 | if extraMeta != nil { 137 | // Handle different types that might be returned 138 | switch meta := extraMeta.(type) { 139 | case *SSTable: 140 | // Perfect - we got the SSTable directly 141 | sstable.Min = meta.Min 142 | sstable.Max = meta.Max 143 | sstable.EntryCount = meta.EntryCount 144 | sstable.BloomFilter = meta.BloomFilter 145 | if meta.Size > 0 { 146 | sstable.Size = meta.Size // Use metadata size if available and valid 147 | } 148 | 149 | case primitive.D: 150 | // BSON document - need to extract fields manually 151 | l.extractSSTableFromBSON(sstable, meta) 152 | 153 | case map[string]interface{}: 154 | // Map interface - extract fields 155 | l.extractSSTableFromMap(sstable, meta) 156 | 157 | default: 158 | l.db.log(fmt.Sprintf("Warning: Unknown metadata type %T for SSTable %d - using file system metadata", extraMeta, id)) 159 | sstable.Min = []byte{} 160 | sstable.Max = []byte{} 161 | sstable.EntryCount = 0 162 | } 163 | } else { 164 | // No metadata available - use empty values 165 | sstable.Min = []byte{} 166 | sstable.Max = []byte{} 167 | sstable.EntryCount = 0 168 | } 169 | 170 | l.db.log(fmt.Sprintf("Loaded SSTable %d: Size=%d bytes, Entries=%d", 171 | sstable.Id, sstable.Size, sstable.EntryCount)) 172 | 173 | sstables = append(sstables, sstable) 174 | } 175 | 176 | // Sort SSTables by ID 177 | sort.Slice(sstables, func(i, j int) bool { 178 | return sstables[i].Id < sstables[j].Id 179 | }) 180 | 181 | // Update the level's total size 182 | var totalSize int64 183 | for _, sstable := range sstables { 184 | totalSize += sstable.Size 185 | } 186 | l.setSize(totalSize) 187 | 188 | // Store the sorted SSTables 189 | l.sstables.Store(&sstables) 190 | 191 | l.db.log(fmt.Sprintf("Level %d reopen completed: %d SSTables, total size %d bytes", 192 | l.id, len(sstables), totalSize)) 193 | 194 | return nil 195 | } 196 | 197 | // extractSSTableFromBSON a helper method to extract SSTable metadata from BSON primitive.D 198 | func (l *Level) extractSSTableFromBSON(sstable *SSTable, doc primitive.D) { 199 | for _, elem := range doc { 200 | switch elem.Key { 201 | case "id": 202 | if id, ok := elem.Value.(int64); ok { 203 | sstable.Id = id 204 | } 205 | case "min": 206 | if minData, ok := elem.Value.(primitive.Binary); ok { 207 | sstable.Min = minData.Data 208 | } else if minBytes, ok := elem.Value.([]byte); ok { 209 | sstable.Min = minBytes 210 | } 211 | case "max": 212 | if maxData, ok := elem.Value.(primitive.Binary); ok { 213 | sstable.Max = maxData.Data 214 | } else if maxBytes, ok := elem.Value.([]byte); ok { 215 | sstable.Max = maxBytes 216 | } 217 | case "size": 218 | if size, ok := elem.Value.(int64); ok && size > 0 { 219 | sstable.Size = size 220 | } 221 | case "entrycount": 222 | if count, ok := elem.Value.(int32); ok { 223 | sstable.EntryCount = int(count) 224 | } else if count, ok := elem.Value.(int64); ok { 225 | sstable.EntryCount = int(count) 226 | } 227 | case "level": 228 | if level, ok := elem.Value.(int32); ok { 229 | sstable.Level = int(level) 230 | } else if level, ok := elem.Value.(int64); ok { 231 | sstable.Level = int(level) 232 | } 233 | } 234 | } 235 | } 236 | 237 | // extractSSTableFromMap a helper method to extract SSTable metadata from map[string]interface{} 238 | func (l *Level) extractSSTableFromMap(sstable *SSTable, meta map[string]interface{}) { 239 | if id, ok := meta["id"].(int64); ok { 240 | sstable.Id = id 241 | } 242 | if minBytes, ok := meta["min"].([]byte); ok { 243 | sstable.Min = minBytes 244 | } 245 | if maxBytes, ok := meta["max"].([]byte); ok { 246 | sstable.Max = maxBytes 247 | } 248 | if size, ok := meta["size"].(int64); ok && size > 0 { 249 | sstable.Size = size 250 | } 251 | if count, ok := meta["entrycount"].(int); ok { 252 | sstable.EntryCount = count 253 | } else if count, ok := meta["entrycount"].(int32); ok { 254 | sstable.EntryCount = int(count) 255 | } else if count, ok := meta["entrycount"].(int64); ok { 256 | sstable.EntryCount = int(count) 257 | } 258 | if level, ok := meta["level"].(int); ok { 259 | sstable.Level = level 260 | } else if level, ok := meta["level"].(int32); ok { 261 | sstable.Level = int(level) 262 | } else if level, ok := meta["level"].(int64); ok { 263 | sstable.Level = int(level) 264 | } 265 | } 266 | 267 | // getSize returns the current size of the level 268 | func (l *Level) getSize() int64 { 269 | return atomic.LoadInt64(&l.currentSize) 270 | } 271 | 272 | // setSize sets the current size of the level 273 | func (l *Level) setSize(size int64) { 274 | atomic.StoreInt64(&l.currentSize, size) 275 | } 276 | 277 | // SSTables returns the list of SSTables in the level 278 | func (l *Level) SSTables() []*SSTable { 279 | return *l.sstables.Load() 280 | } 281 | -------------------------------------------------------------------------------- /level_test.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func TestLevel_BasicOperations(t *testing.T) { 12 | dir, err := os.MkdirTemp("", "db_level_test") 13 | if err != nil { 14 | t.Fatalf("Failed to create temp directory: %v", err) 15 | } 16 | 17 | // Create a log channel 18 | logChan := make(chan string, 100) 19 | defer func() { 20 | // Drain the log channel 21 | for len(logChan) > 0 { 22 | <-logChan 23 | } 24 | }() 25 | 26 | // Create a test DB 27 | opts := &Options{ 28 | Directory: dir, 29 | SyncOption: SyncFull, // Use full sync for reliability 30 | LogChannel: logChan, 31 | WriteBufferSize: 4 * 1024, // Small buffer to force flushing 32 | } 33 | 34 | db, err := Open(opts) 35 | if err != nil { 36 | t.Fatalf("Failed to open database: %v", err) 37 | } 38 | defer func(path string) { 39 | _ = os.RemoveAll(path) 40 | }(dir) 41 | 42 | // Insert enough data to create SSTables in level 1 43 | numEntries := 100 44 | for i := 0; i < numEntries; i++ { 45 | key := fmt.Sprintf("key%d", i) 46 | value := fmt.Sprintf("value%d", i) 47 | 48 | err = db.Update(func(txn *Txn) error { 49 | return txn.Put([]byte(key), []byte(value)) 50 | }) 51 | if err != nil { 52 | t.Fatalf("Failed to insert data: %v", err) 53 | } 54 | } 55 | 56 | // Force a memtable flush by exceeding write buffer size 57 | largeValue := make([]byte, opts.WriteBufferSize) 58 | for i := range largeValue { 59 | largeValue[i] = byte(i % 256) 60 | } 61 | 62 | err = db.Update(func(txn *Txn) error { 63 | return txn.Put([]byte("large_key"), largeValue) 64 | }) 65 | if err != nil { 66 | t.Fatalf("Failed to insert large value: %v", err) 67 | } 68 | 69 | // Give some time for background flushing to complete 70 | time.Sleep(2 * time.Second) 71 | 72 | // Get the level 1 73 | levels := db.levels.Load() 74 | if levels == nil { 75 | t.Fatalf("Levels not initialized") 76 | } 77 | 78 | level1 := (*levels)[0] // Level 1 is at index 0 79 | sstables := level1.SSTables() 80 | 81 | if sstables == nil || len(sstables) == 0 { 82 | t.Errorf("Expected at least one SSTable in level 1, but found none") 83 | } else { 84 | t.Logf("Found %d SSTables in level 1", len(sstables)) 85 | } 86 | 87 | // Check level properties 88 | if level1.id != 1 { 89 | t.Errorf("Expected level ID to be 1, got %d", level1.id) 90 | } 91 | 92 | if level1.getSize() <= 0 { 93 | t.Errorf("Expected level size to be greater than 0, got %d", level1.getSize()) 94 | } 95 | 96 | t.Logf("Level 1 size reported as: %d", level1.getSize()) 97 | 98 | // Close the DB to ensure all data is flushed 99 | err = db.Close() 100 | if err != nil { 101 | t.Fatalf("Failed to close database: %v", err) 102 | } 103 | 104 | // Check that level directory exists on disk 105 | levelDir := filepath.Join(dir, "l1") 106 | if _, err := os.Stat(levelDir); os.IsNotExist(err) { 107 | t.Fatalf("Level directory does not exist: %s", levelDir) 108 | } 109 | 110 | // Check for SSTable files in the level directory 111 | files, err := os.ReadDir(levelDir) 112 | if err != nil { 113 | t.Fatalf("Failed to read level directory: %v", err) 114 | } 115 | 116 | var klogCount, vlogCount int 117 | for _, file := range files { 118 | if filepath.Ext(file.Name()) == ".klog" { 119 | klogCount++ 120 | } 121 | if filepath.Ext(file.Name()) == ".vlog" { 122 | vlogCount++ 123 | } 124 | } 125 | 126 | if klogCount == 0 { 127 | t.Errorf("No .klog files found in level directory") 128 | } 129 | if vlogCount == 0 { 130 | t.Errorf("No .vlog files found in level directory") 131 | } 132 | 133 | t.Logf("Found %d .klog files and %d .vlog files in level directory", klogCount, vlogCount) 134 | } 135 | 136 | func TestLevel_Reopen(t *testing.T) { 137 | dir, err := os.MkdirTemp("", "db_level_reopen_test") 138 | if err != nil { 139 | t.Fatalf("Failed to create temp directory: %v", err) 140 | } 141 | 142 | // Create a log channel that won't be closed in this test 143 | logChan := make(chan string, 100) 144 | 145 | // First DB instance to create data 146 | { 147 | opts := &Options{ 148 | Directory: dir, 149 | SyncOption: SyncFull, 150 | LogChannel: logChan, 151 | WriteBufferSize: 4 * 1024, // Small buffer to force flushing 152 | } 153 | 154 | db, err := Open(opts) 155 | if err != nil { 156 | t.Fatalf("Failed to open database: %v", err) 157 | } 158 | 159 | defer func(path string) { 160 | _ = os.RemoveAll(path) 161 | }(dir) 162 | 163 | // Insert data that will be flushed to SSTable 164 | for i := 0; i < 50; i++ { 165 | key := fmt.Sprintf("reopen_key%d", i) 166 | value := fmt.Sprintf("reopen_value%d", i) 167 | 168 | err = db.Update(func(txn *Txn) error { 169 | return txn.Put([]byte(key), []byte(value)) 170 | }) 171 | if err != nil { 172 | t.Fatalf("Failed to insert data: %v", err) 173 | } 174 | } 175 | 176 | // Force flush with large value 177 | largeValue := make([]byte, opts.WriteBufferSize) 178 | err = db.Update(func(txn *Txn) error { 179 | return txn.Put([]byte("large_key"), largeValue) 180 | }) 181 | if err != nil { 182 | t.Fatalf("Failed to insert large value: %v", err) 183 | } 184 | 185 | // Give time for flush to complete 186 | time.Sleep(500 * time.Millisecond) 187 | 188 | // Get level info before closing 189 | levels := db.levels.Load() 190 | level1 := (*levels)[0] 191 | sstables := level1.SSTables() 192 | if sstables == nil { 193 | t.Fatalf("No SSTables found in level 1 before closing") 194 | } 195 | originalSSTableCount := len(sstables) 196 | originalSize := level1.getSize() 197 | 198 | t.Logf("Before closing: Level 1 has %d SSTables and size %d", originalSSTableCount, originalSize) 199 | 200 | // Close DB to ensure all data is persisted 201 | err = db.Close() 202 | if err != nil { 203 | t.Fatalf("Failed to close first database instance: %v", err) 204 | } 205 | 206 | // Drain log channel 207 | for len(logChan) > 0 { 208 | <-logChan 209 | } 210 | } 211 | 212 | // Second DB instance to test reopening 213 | { 214 | // Create new log channel for second instance 215 | logChan = make(chan string, 100) 216 | defer func() { 217 | for len(logChan) > 0 { 218 | <-logChan 219 | } 220 | }() 221 | 222 | opts := &Options{ 223 | Directory: dir, 224 | SyncOption: SyncFull, 225 | LogChannel: logChan, 226 | WriteBufferSize: 4 * 1024, 227 | } 228 | 229 | db2, err := Open(opts) 230 | if err != nil { 231 | t.Fatalf("Failed to reopen database: %v", err) 232 | } 233 | 234 | // Check that level 1 was properly restored 235 | levels := db2.levels.Load() 236 | level1 := (*levels)[0] 237 | sstables := level1.sstables.Load() 238 | 239 | if sstables == nil || len(*sstables) == 0 { 240 | t.Errorf("Expected SSTables in reopened level 1, but found none") 241 | } else { 242 | t.Logf("After reopening: Level 1 has %d SSTables and size %d", 243 | len(*sstables), level1.getSize()) 244 | } 245 | 246 | // Verify data can be read after reopening 247 | for i := 0; i < 50; i++ { 248 | key := fmt.Sprintf("reopen_key%d", i) 249 | expectedValue := fmt.Sprintf("reopen_value%d", i) 250 | 251 | var actualValue []byte 252 | err = db2.Update(func(txn *Txn) error { 253 | var err error 254 | actualValue, err = txn.Get([]byte(key)) 255 | return err 256 | }) 257 | 258 | if err != nil { 259 | t.Errorf("Failed to read key %s after reopening: %v", key, err) 260 | } else if string(actualValue) != expectedValue { 261 | t.Errorf("Value mismatch for key %s: expected '%s', got '%s'", 262 | key, expectedValue, string(actualValue)) 263 | } 264 | } 265 | 266 | // Close properly 267 | err = db2.Close() 268 | if err != nil { 269 | t.Fatalf("Failed to close second database instance: %v", err) 270 | } 271 | } 272 | } 273 | 274 | func TestLevel_SizeMethods(t *testing.T) { 275 | dir, err := os.MkdirTemp("", "db_level_size_test") 276 | if err != nil { 277 | t.Fatalf("Failed to create temp directory: %v", err) 278 | } 279 | 280 | // Create a log channel 281 | logChan := make(chan string, 100) 282 | defer func() { 283 | // Drain the log channel 284 | for len(logChan) > 0 { 285 | <-logChan 286 | } 287 | }() 288 | 289 | // Create a test DB 290 | opts := &Options{ 291 | Directory: dir, 292 | SyncOption: SyncFull, 293 | LogChannel: logChan, 294 | WriteBufferSize: 4 * 1024, // Small buffer to force flushing 295 | } 296 | 297 | db, err := Open(opts) 298 | if err != nil { 299 | t.Fatalf("Failed to open database: %v", err) 300 | } 301 | defer func(path string) { 302 | _ = os.RemoveAll(path) 303 | }(dir) 304 | 305 | // Get the level 1 306 | levels := db.levels.Load() 307 | level1 := (*levels)[0] // Level 1 is at index 0 308 | 309 | // Check initial size 310 | initialSize := level1.getSize() 311 | t.Logf("Initial level 1 size: %d", initialSize) 312 | 313 | // Set size and verify it's updated 314 | testSize := int64(12345) 315 | level1.setSize(testSize) 316 | 317 | if level1.getSize() != testSize { 318 | t.Errorf("Size not updated correctly: expected %d, got %d", 319 | testSize, level1.getSize()) 320 | } 321 | 322 | // Test with actual data 323 | numEntries := 100 324 | for i := 0; i < numEntries; i++ { 325 | key := fmt.Sprintf("key%d", i) 326 | value := fmt.Sprintf("value%d", i) 327 | 328 | err = db.Update(func(txn *Txn) error { 329 | return txn.Put([]byte(key), []byte(value)) 330 | }) 331 | if err != nil { 332 | t.Fatalf("Failed to insert data: %v", err) 333 | } 334 | } 335 | 336 | // Force flush to SSTable with large value 337 | largeValue := make([]byte, opts.WriteBufferSize) 338 | err = db.Update(func(txn *Txn) error { 339 | return txn.Put([]byte("large_key"), largeValue) 340 | }) 341 | if err != nil { 342 | t.Fatalf("Failed to insert large value: %v", err) 343 | } 344 | 345 | err = db.ForceFlush() 346 | if err != nil { 347 | return 348 | } 349 | 350 | // After data insertion and flush, size should have increased 351 | currentSize := level1.getSize() 352 | t.Logf("Level 1 size after data insertion: %d", currentSize) 353 | 354 | if currentSize <= testSize { 355 | t.Errorf("Expected level size to increase after data insertion") 356 | } 357 | 358 | // Close the database 359 | err = db.Close() 360 | if err != nil { 361 | t.Fatalf("Failed to close database: %v", err) 362 | } 363 | } 364 | 365 | func TestLevel_ErrorHandling(t *testing.T) { 366 | dir, err := os.MkdirTemp("", "db_level_error_test") 367 | if err != nil { 368 | t.Fatalf("Failed to create temp directory: %v", err) 369 | } 370 | 371 | // Create a log channel 372 | logChan := make(chan string, 100) 373 | defer func() { 374 | // Drain the log channel 375 | for len(logChan) > 0 { 376 | <-logChan 377 | } 378 | }() 379 | 380 | // Create a test DB 381 | opts := &Options{ 382 | Directory: dir, 383 | SyncOption: SyncFull, 384 | LogChannel: logChan, 385 | WriteBufferSize: 4 * 1024, // Small buffer to force flushing 386 | } 387 | 388 | db, err := Open(opts) 389 | if err != nil { 390 | t.Fatalf("Failed to open database: %v", err) 391 | } 392 | defer func(path string) { 393 | _ = os.RemoveAll(path) 394 | }(dir) 395 | 396 | // Add some data and flush to create SSTable files 397 | for i := 0; i < 50; i++ { 398 | key := fmt.Sprintf("error_key%d", i) 399 | value := fmt.Sprintf("error_value%d", i) 400 | 401 | err = db.Update(func(txn *Txn) error { 402 | return txn.Put([]byte(key), []byte(value)) 403 | }) 404 | if err != nil { 405 | t.Fatalf("Failed to insert data: %v", err) 406 | } 407 | } 408 | 409 | // Force flush 410 | largeValue := make([]byte, opts.WriteBufferSize) 411 | err = db.Update(func(txn *Txn) error { 412 | return txn.Put([]byte("large_key"), largeValue) 413 | }) 414 | if err != nil { 415 | t.Fatalf("Failed to insert large value: %v", err) 416 | } 417 | 418 | // Wait for flush to complete 419 | time.Sleep(500 * time.Millisecond) 420 | 421 | // Close the database 422 | err = db.Close() 423 | if err != nil { 424 | t.Fatalf("Failed to close database: %v", err) 425 | } 426 | 427 | // Corrupt a level by removing vlog file but keeping klog file 428 | levelDir := filepath.Join(dir, "l1") 429 | files, err := os.ReadDir(levelDir) 430 | if err != nil { 431 | t.Fatalf("Failed to read level directory: %v", err) 432 | } 433 | 434 | // Find a klog file and its corresponding vlog file 435 | var klogFile, vlogFile string 436 | for _, file := range files { 437 | if filepath.Ext(file.Name()) == ".klog" { 438 | klogFile = file.Name() 439 | vlogFile = klogFile[:len(klogFile)-5] + ".vlog" // Replace .klog with .vlog 440 | break 441 | } 442 | } 443 | 444 | if klogFile == "" { 445 | t.Fatalf("No .klog file found to test error handling") 446 | } 447 | 448 | // Remove the vlog file to create an inconsistent state 449 | err = os.Remove(filepath.Join(levelDir, vlogFile)) 450 | if err != nil { 451 | t.Fatalf("Failed to remove vlog file: %v", err) 452 | } 453 | t.Logf("Removed vlog file %s to test error handling", vlogFile) 454 | 455 | opts.LogChannel = make(chan string, 100) 456 | 457 | // Try to reopen the database 458 | db2, err := Open(opts) 459 | if err != nil { 460 | t.Fatalf("Failed to reopen database with corrupted level: %v", err) 461 | } 462 | defer func(path string) { 463 | _ = os.RemoveAll(path) 464 | }(dir) 465 | 466 | // The level.reopen method should skip the corrupted SSTable 467 | levels := db2.levels.Load() 468 | level1 := (*levels)[0] 469 | sstables := level1.sstables.Load() 470 | sstCount := 0 471 | if sstables != nil { 472 | sstCount = len(*sstables) 473 | } 474 | t.Logf("Reopened database with %d valid SSTables in level 1", sstCount) 475 | 476 | // Try to read data from non-corrupted SSTables 477 | successCount := 0 478 | for i := 0; i < 50; i++ { 479 | key := fmt.Sprintf("error_key%d", i) 480 | err = db2.Update(func(txn *Txn) error { 481 | _, err := txn.Get([]byte(key)) 482 | return err 483 | }) 484 | if err == nil { 485 | successCount++ 486 | } 487 | } 488 | t.Logf("Successfully read %d of 50 keys after corruption", successCount) 489 | 490 | // Close the database 491 | err = db2.Close() 492 | if err != nil { 493 | t.Fatalf("Failed to close reopened database: %v", err) 494 | } 495 | } 496 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. -------------------------------------------------------------------------------- /lru/lru.go: -------------------------------------------------------------------------------- 1 | // Package lru 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en-US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package lru 17 | 18 | import ( 19 | "github.com/wildcatdb/wildcat/queue" 20 | "math" 21 | "runtime" 22 | "sync/atomic" 23 | "time" 24 | "unsafe" 25 | ) 26 | 27 | type EvictionCallback func(key, value interface{}) 28 | 29 | // ValueWrapper is a wrapper for values stored in the LRU list 30 | type ValueWrapper struct { 31 | data interface{} 32 | } 33 | 34 | // Node represents a node in the linked list 35 | type Node struct { 36 | key interface{} 37 | value unsafe.Pointer // *ValueWrapper 38 | accessCnt uint64 39 | timestamp int64 40 | next unsafe.Pointer // *Node 41 | prev unsafe.Pointer // *Node 42 | onEvict EvictionCallback 43 | markedForEviction int32 // atomic flag(0=normal, 1=marked for eviction) 44 | } 45 | 46 | // LRU is a lockless linked list with lazy eviction and anti-thrashing mechanisms 47 | type LRU struct { 48 | head unsafe.Pointer // *Node 49 | tail unsafe.Pointer // *Node 50 | length int64 51 | capacity int64 52 | evictRatio float64 53 | accessWeight float64 54 | timeWeight float64 55 | evictionQueue *queue.Queue 56 | evicting int32 // Prevent recursive eviction 57 | lastProgressTime int64 // Track when we last made progress 58 | stuckCounter int32 // Count consecutive stuck operations 59 | } 60 | 61 | // New creates a new lru atomic linked list with lazy eviction 62 | func New(capacity int64, evictRatio float64, accessWeight float64) *LRU { 63 | if capacity <= 0 { 64 | capacity = math.MaxInt64 65 | } 66 | if evictRatio <= 0 || evictRatio >= 1 { 67 | evictRatio = 0.25 68 | } 69 | if accessWeight < 0 || accessWeight > 1 { 70 | accessWeight = 0.7 71 | } 72 | 73 | valueWrapper := &ValueWrapper{data: nil} 74 | sentinel := &Node{ 75 | key: nil, 76 | value: unsafe.Pointer(valueWrapper), 77 | accessCnt: 0, 78 | timestamp: time.Now().UnixNano(), 79 | } 80 | 81 | lru := &LRU{ 82 | head: unsafe.Pointer(sentinel), 83 | tail: unsafe.Pointer(sentinel), 84 | length: 0, 85 | capacity: capacity, 86 | evictRatio: evictRatio, 87 | accessWeight: accessWeight, 88 | timeWeight: 1 - accessWeight, 89 | evictionQueue: queue.New(), 90 | lastProgressTime: time.Now().UnixNano(), 91 | } 92 | 93 | return lru 94 | } 95 | 96 | // shouldEvictNode determines if a node should be evicted with balanced approach 97 | func (list *LRU) shouldEvictNode(node *Node, currentTime int64) bool { 98 | // Don't evict if already marked 99 | if atomic.LoadInt32(&node.markedForEviction) == 1 { 100 | return false 101 | } 102 | 103 | currentLength := atomic.LoadInt64(&list.length) 104 | loadFactor := float64(currentLength) / float64(list.capacity) 105 | 106 | accessCount := atomic.LoadUint64(&node.accessCnt) 107 | age := currentTime - node.timestamp 108 | 109 | // Only evict if we're actually over capacity or very close 110 | if loadFactor < 0.95 { 111 | return false // Don't evict unless we're at 95%+ capacity 112 | } 113 | 114 | // At capacity or over, an immediate eviction needed 115 | if loadFactor >= 1.0 { 116 | // Evict nodes with low access count regardless of age 117 | return accessCount <= 2 118 | } 119 | 120 | // Very close to capacity (95%+), must be more selective 121 | if loadFactor >= 0.95 { 122 | // Evict old nodes with very low access 123 | return age > 50*time.Millisecond.Nanoseconds() && accessCount <= 1 // Evict nodes that are old and have low access count 124 | } 125 | 126 | return false 127 | } 128 | 129 | // lazyEvictDuringTraversal checks if we should evict the current node during traversal 130 | func (list *LRU) lazyEvictDuringTraversal(node *Node) { 131 | // Only do lazy eviction if we're actually over capacity 132 | currentLength := atomic.LoadInt64(&list.length) 133 | if currentLength <= list.capacity { 134 | return // Don't evict if we're not over capacity 135 | } 136 | 137 | currentTime := time.Now().UnixNano() 138 | if list.shouldEvictNode(node, currentTime) { 139 | // Mark for eviction atomically 140 | if atomic.CompareAndSwapInt32(&node.markedForEviction, 0, 1) { 141 | // Successfully marked, add to eviction queue 142 | list.evictionQueue.Enqueue(node) 143 | } 144 | } 145 | } 146 | 147 | // detectAndRecoverFromStuck detects stuck states and recovers 148 | func (list *LRU) detectAndRecoverFromStuck() bool { 149 | now := time.Now().UnixNano() 150 | lastProgress := atomic.LoadInt64(&list.lastProgressTime) 151 | 152 | // If no progress for 10ms, we might be stuck 153 | if now-lastProgress > 10*time.Millisecond.Nanoseconds() { 154 | stuckCount := atomic.AddInt32(&list.stuckCounter, 1) 155 | 156 | if stuckCount > 5 { 157 | // Emergency! 158 | list.emergencyRecovery() 159 | atomic.StoreInt32(&list.stuckCounter, 0) 160 | return true 161 | } 162 | } else { 163 | atomic.StoreInt32(&list.stuckCounter, 0) 164 | } 165 | 166 | atomic.StoreInt64(&list.lastProgressTime, now) 167 | return false 168 | } 169 | 170 | // emergencyRecovery performs emergency recovery from stuck state 171 | func (list *LRU) emergencyRecovery() { 172 | // Clear eviction queue 173 | for list.evictionQueue.Dequeue() != nil { 174 | // Draining up 175 | } 176 | 177 | list.repairTailPointer() 178 | 179 | // Reset all eviction flags 180 | current := (*Node)(atomic.LoadPointer(&list.head)) 181 | for current != nil { 182 | atomic.StoreInt32(¤t.markedForEviction, 0) 183 | current = (*Node)(atomic.LoadPointer(¤t.next)) 184 | } 185 | } 186 | 187 | // repairTailPointer fixes corrupted tail pointer by walking the list 188 | func (list *LRU) repairTailPointer() { 189 | // Walk from head to find actual tail 190 | current := (*Node)(atomic.LoadPointer(&list.head)) 191 | var actualTail = current 192 | 193 | for current != nil { 194 | next := (*Node)(atomic.LoadPointer(¤t.next)) 195 | if next == nil { 196 | actualTail = current 197 | break 198 | } 199 | current = next 200 | } 201 | 202 | // Update tail pointer 203 | atomic.StorePointer(&list.tail, unsafe.Pointer(actualTail)) 204 | } 205 | 206 | // reuseOrCreateNode attempts to reuse an evicted node or creates a new one 207 | func (list *LRU) reuseOrCreateNode(key, value interface{}, onEvict EvictionCallback) *Node { 208 | 209 | // Try to reuse an evicted node first 210 | if reusedNode := list.evictionQueue.Dequeue(); reusedNode != nil { 211 | // Call eviction callback for the old data 212 | if reusedNode.(*Node).onEvict != nil { 213 | valuePtr := atomic.LoadPointer(&reusedNode.(*Node).value) 214 | if valuePtr != nil { 215 | oldValue := (*ValueWrapper)(valuePtr) 216 | reusedNode.(*Node).onEvict(reusedNode.(*Node).key, oldValue.data) 217 | } 218 | } 219 | 220 | // Reset and reuse the node 221 | valueWrapper := &ValueWrapper{data: value} 222 | reusedNode.(*Node).key = key 223 | atomic.StorePointer(&reusedNode.(*Node).value, unsafe.Pointer(valueWrapper)) 224 | atomic.StoreUint64(&reusedNode.(*Node).accessCnt, 1) 225 | reusedNode.(*Node).timestamp = time.Now().UnixNano() 226 | atomic.StorePointer(&reusedNode.(*Node).next, nil) 227 | atomic.StorePointer(&reusedNode.(*Node).prev, nil) 228 | reusedNode.(*Node).onEvict = onEvict 229 | atomic.StoreInt32(&reusedNode.(*Node).markedForEviction, 0) 230 | 231 | return reusedNode.(*Node) 232 | } 233 | 234 | // Create new node if no reused node available 235 | valueWrapper := &ValueWrapper{data: value} 236 | return &Node{ 237 | key: key, 238 | value: unsafe.Pointer(valueWrapper), 239 | accessCnt: 1, 240 | timestamp: time.Now().UnixNano(), 241 | next: nil, 242 | prev: nil, 243 | onEvict: onEvict, 244 | markedForEviction: 0, 245 | } 246 | } 247 | 248 | // Get retrieves a value by key with lazy eviction 249 | func (list *LRU) Get(key interface{}) (interface{}, bool) { 250 | // Process eviction queue to clean up marked nodes 251 | list.processEvictionQueue() 252 | 253 | current := (*Node)(atomic.LoadPointer(&list.head)) 254 | current = (*Node)(atomic.LoadPointer(¤t.next)) 255 | 256 | for current != nil { 257 | // Check if this node should be lazily evicted 258 | list.lazyEvictDuringTraversal(current) 259 | 260 | // Skip nodes marked for eviction 261 | if atomic.LoadInt32(¤t.markedForEviction) == 1 { 262 | current = (*Node)(atomic.LoadPointer(¤t.next)) 263 | continue 264 | } 265 | 266 | if current.key == key { 267 | atomic.AddUint64(¤t.accessCnt, 1) 268 | valuePtr := atomic.LoadPointer(¤t.value) 269 | value := (*ValueWrapper)(valuePtr) 270 | 271 | // Mark progress 272 | atomic.StoreInt64(&list.lastProgressTime, time.Now().UnixNano()) 273 | return value.data, true 274 | } 275 | current = (*Node)(atomic.LoadPointer(¤t.next)) 276 | } 277 | return nil, false 278 | } 279 | 280 | // Put adds or updates a key-value pair with anti-thrashing mechanisms 281 | func (list *LRU) Put(key, value interface{}, onEvict ...EvictionCallback) bool { 282 | var evictCallback EvictionCallback 283 | if len(onEvict) > 0 { 284 | evictCallback = onEvict[0] 285 | } 286 | 287 | // Check for stuck state 288 | if list.detectAndRecoverFromStuck() { 289 | runtime.Gosched() // Give other goroutines a chance 290 | } 291 | 292 | // Process eviction queue to clean up marked nodes 293 | list.processEvictionQueue() 294 | 295 | // Check if key already exists 296 | current := (*Node)(atomic.LoadPointer(&list.head)) 297 | current = (*Node)(atomic.LoadPointer(¤t.next)) 298 | 299 | for current != nil { 300 | // Lazy eviction check 301 | list.lazyEvictDuringTraversal(current) 302 | 303 | // Skip nodes marked for eviction 304 | if atomic.LoadInt32(¤t.markedForEviction) == 1 { 305 | current = (*Node)(atomic.LoadPointer(¤t.next)) 306 | continue 307 | } 308 | 309 | if current.key == key { 310 | // Update existing node 311 | newValue := &ValueWrapper{data: value} 312 | if evictCallback != nil { 313 | current.onEvict = evictCallback 314 | } 315 | atomic.StorePointer(¤t.value, unsafe.Pointer(newValue)) 316 | atomic.AddUint64(¤t.accessCnt, 1) 317 | 318 | // Mark progress 319 | atomic.StoreInt64(&list.lastProgressTime, time.Now().UnixNano()) 320 | return true 321 | } 322 | current = (*Node)(atomic.LoadPointer(¤t.next)) 323 | } 324 | 325 | // Trigger eviction if at or near capacity 326 | currentLength := atomic.LoadInt64(&list.length) 327 | if currentLength >= list.capacity { 328 | list.forceEviction() 329 | // Process eviction immediately to make room 330 | list.processEvictionQueue() 331 | } 332 | 333 | // Create or reuse node 334 | newNode := list.reuseOrCreateNode(key, value, evictCallback) 335 | 336 | // Add node to the list with retry logic and backoff 337 | const maxRetries = 100 338 | retryCount := 0 339 | backoffNs := int64(1000) // Start with 1μs 340 | 341 | for { 342 | if retryCount > maxRetries { 343 | // Fallback: try to recover by rebuilding tail pointer 344 | list.repairTailPointer() 345 | return false 346 | } 347 | 348 | tail := (*Node)(atomic.LoadPointer(&list.tail)) 349 | 350 | if atomic.CompareAndSwapPointer(&tail.next, nil, unsafe.Pointer(newNode)) { 351 | atomic.StorePointer(&newNode.prev, unsafe.Pointer(tail)) 352 | 353 | // Try to update tail with timeout 354 | tailUpdated := false 355 | for attempts := 0; attempts < 10; attempts++ { 356 | if atomic.CompareAndSwapPointer(&list.tail, unsafe.Pointer(tail), unsafe.Pointer(newNode)) { 357 | tailUpdated = true 358 | break 359 | } 360 | currentTail := (*Node)(atomic.LoadPointer(&list.tail)) 361 | if currentTail == newNode { 362 | tailUpdated = true 363 | break 364 | } 365 | time.Sleep(time.Duration(backoffNs)) 366 | backoffNs = min(backoffNs*2, 1000000) // Cap at 1ms 367 | } 368 | 369 | if tailUpdated { 370 | atomic.AddInt64(&list.length, 1) 371 | // Mark progress 372 | atomic.StoreInt64(&list.lastProgressTime, time.Now().UnixNano()) 373 | return true 374 | } 375 | } 376 | 377 | // Exponential backoff 378 | time.Sleep(time.Duration(backoffNs)) 379 | backoffNs = min(backoffNs*2, 1000000) 380 | retryCount++ 381 | 382 | // Try to advance tail if it's stale 383 | nextTail := (*Node)(atomic.LoadPointer(&tail.next)) 384 | if nextTail != nil { 385 | atomic.CompareAndSwapPointer(&list.tail, unsafe.Pointer(tail), unsafe.Pointer(nextTail)) 386 | } 387 | runtime.Gosched() 388 | } 389 | } 390 | 391 | // forceEviction aggressively evicts nodes when at capacity with anti-cascading 392 | func (list *LRU) forceEviction() { 393 | // Prevent recursive eviction 394 | if !atomic.CompareAndSwapInt32(&list.evicting, 0, 1) { 395 | return // Already evicting 396 | } 397 | defer atomic.StoreInt32(&list.evicting, 0) 398 | 399 | currentLength := atomic.LoadInt64(&list.length) 400 | if currentLength < list.capacity { 401 | return // No need to evict 402 | } 403 | 404 | toEvict := int(float64(list.capacity) * list.evictRatio) 405 | toEvict = int(min(int64(toEvict), currentLength/2)) // Never evict more than half 406 | 407 | if toEvict < 1 { 408 | toEvict = 1 409 | } 410 | 411 | // When at/over capacity, be more aggressive about eviction 412 | currentTime := time.Now().UnixNano() 413 | current := (*Node)(atomic.LoadPointer(&list.head)) 414 | current = (*Node)(atomic.LoadPointer(¤t.next)) 415 | evicted := 0 416 | 417 | // Try to evict based on normal criteria 418 | for current != nil && evicted < toEvict { 419 | next := (*Node)(atomic.LoadPointer(¤t.next)) 420 | 421 | if list.shouldEvictNode(current, currentTime) { 422 | if atomic.CompareAndSwapInt32(¤t.markedForEviction, 0, 1) { 423 | list.evictionQueue.Enqueue(current) 424 | evicted++ 425 | } 426 | } 427 | 428 | current = next 429 | } 430 | 431 | // If we didn't evict enough, be more aggressive 432 | if evicted < toEvict && currentLength >= list.capacity { 433 | current = (*Node)(atomic.LoadPointer(&list.head)) 434 | current = (*Node)(atomic.LoadPointer(¤t.next)) 435 | 436 | for current != nil && evicted < toEvict { 437 | next := (*Node)(atomic.LoadPointer(¤t.next)) 438 | 439 | // Skip already marked nodes 440 | if atomic.LoadInt32(¤t.markedForEviction) == 1 { 441 | current = next 442 | continue 443 | } 444 | 445 | // Evict nodes with low access count 446 | accessCount := atomic.LoadUint64(¤t.accessCnt) 447 | if accessCount <= 3 { // Evict nodes accessed 3 times or less 448 | if atomic.CompareAndSwapInt32(¤t.markedForEviction, 0, 1) { 449 | list.evictionQueue.Enqueue(current) 450 | evicted++ 451 | } 452 | } 453 | 454 | current = next 455 | } 456 | } 457 | 458 | // Process immediately but with limits 459 | list.processEvictionQueue() 460 | } 461 | 462 | // processEvictionQueue removes nodes that have been marked for eviction with limits 463 | func (list *LRU) processEvictionQueue() { 464 | processed := 0 465 | maxProcess := 100 466 | 467 | for processed < maxProcess { 468 | node := list.evictionQueue.Dequeue() 469 | if node == nil { 470 | break 471 | } 472 | 473 | nodePtr := node.(*Node) 474 | 475 | // Double-check eviction flag 476 | if atomic.LoadInt32(&nodePtr.markedForEviction) != 1 { 477 | continue 478 | } 479 | 480 | // Call eviction callback 481 | if nodePtr.onEvict != nil { 482 | valuePtr := atomic.LoadPointer(&nodePtr.value) 483 | if valuePtr != nil { 484 | value := (*ValueWrapper)(valuePtr) 485 | nodePtr.onEvict(nodePtr.key, value.data) 486 | } 487 | } 488 | 489 | list.removeNodeFromList(nodePtr) 490 | processed++ 491 | } 492 | } 493 | 494 | // removeNodeFromList physically removes a node from the linked list 495 | func (list *LRU) removeNodeFromList(node *Node) { 496 | prev := (*Node)(atomic.LoadPointer(&node.prev)) 497 | next := (*Node)(atomic.LoadPointer(&node.next)) 498 | 499 | if prev != nil { 500 | atomic.CompareAndSwapPointer(&prev.next, unsafe.Pointer(node), unsafe.Pointer(next)) 501 | } 502 | if next != nil { 503 | atomic.CompareAndSwapPointer(&next.prev, unsafe.Pointer(node), unsafe.Pointer(prev)) 504 | } 505 | if next == nil { 506 | atomic.CompareAndSwapPointer(&list.tail, unsafe.Pointer(node), unsafe.Pointer(prev)) 507 | } 508 | 509 | atomic.AddInt64(&list.length, -1) 510 | } 511 | 512 | // Delete removes a node by key 513 | func (list *LRU) Delete(key interface{}) bool { 514 | current := (*Node)(atomic.LoadPointer(&list.head)) 515 | current = (*Node)(atomic.LoadPointer(¤t.next)) 516 | 517 | for current != nil { 518 | if current.key == key { 519 | // Mark for eviction and immediately process 520 | if atomic.CompareAndSwapInt32(¤t.markedForEviction, 0, 1) { 521 | list.evictionQueue.Enqueue(current) 522 | list.processEvictionQueue() 523 | 524 | // Mark progress 525 | atomic.StoreInt64(&list.lastProgressTime, time.Now().UnixNano()) 526 | return true 527 | } 528 | } 529 | current = (*Node)(atomic.LoadPointer(¤t.next)) 530 | } 531 | return false 532 | } 533 | 534 | // Length returns the current length of the list 535 | func (list *LRU) Length() int64 { 536 | return atomic.LoadInt64(&list.length) 537 | } 538 | 539 | // ForEach iterates through the list safely 540 | func (list *LRU) ForEach(fn func(key, value interface{}, accessCount uint64) bool) { 541 | current := (*Node)(atomic.LoadPointer(&list.head)) 542 | current = (*Node)(atomic.LoadPointer(¤t.next)) 543 | 544 | for current != nil { 545 | // Skip nodes marked for eviction 546 | if atomic.LoadInt32(¤t.markedForEviction) == 1 { 547 | current = (*Node)(atomic.LoadPointer(¤t.next)) 548 | continue 549 | } 550 | 551 | accesses := atomic.LoadUint64(¤t.accessCnt) 552 | valuePtr := atomic.LoadPointer(¤t.value) 553 | valueWrapper := (*ValueWrapper)(valuePtr) 554 | 555 | if !fn(current.key, valueWrapper.data, accesses) { 556 | break 557 | } 558 | current = (*Node)(atomic.LoadPointer(¤t.next)) 559 | } 560 | } 561 | 562 | // Clear empties the list 563 | func (list *LRU) Clear() { 564 | valueWrapper := &ValueWrapper{data: nil} 565 | sentinel := &Node{ 566 | key: nil, 567 | value: unsafe.Pointer(valueWrapper), 568 | accessCnt: 0, 569 | timestamp: time.Now().UnixNano(), 570 | } 571 | 572 | atomic.StorePointer(&list.head, unsafe.Pointer(sentinel)) 573 | atomic.StorePointer(&list.tail, unsafe.Pointer(sentinel)) 574 | atomic.StoreInt64(&list.length, 0) 575 | 576 | // Clear eviction queue 577 | list.evictionQueue = queue.New() 578 | 579 | // Reset thrashing prevention fields 580 | atomic.StoreInt32(&list.evicting, 0) 581 | atomic.StoreInt32(&list.stuckCounter, 0) 582 | atomic.StoreInt64(&list.lastProgressTime, time.Now().UnixNano()) 583 | } 584 | 585 | // ForceEvictionProcessing forces the processing of the eviction queue 586 | // ***************This is mainly for testing purposes to ensure eviction happens immediately 587 | func (list *LRU) ForceEvictionProcessing() { 588 | // Force eviction if we're over capacity 589 | currentLength := atomic.LoadInt64(&list.length) 590 | if currentLength > list.capacity { 591 | list.forceEviction() 592 | } 593 | 594 | // Process the eviction queue multiple times to ensure completion 595 | for i := 0; i < 3; i++ { 596 | list.processEvictionQueue() 597 | // Small delay to allow other goroutines to process 598 | runtime.Gosched() 599 | } 600 | } 601 | -------------------------------------------------------------------------------- /lru/lru_test.go: -------------------------------------------------------------------------------- 1 | // Package lru 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en-US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package lru 17 | 18 | import ( 19 | "sync" 20 | "testing" 21 | "time" 22 | ) 23 | 24 | func TestLRUBasicOperations(t *testing.T) { 25 | lru := New(10, 0.25, 0.7) 26 | 27 | if lru.Length() != 0 { 28 | t.Errorf("Expected initial length 0, got %d", lru.Length()) 29 | } 30 | 31 | // Test Put and Get operations 32 | lru.Put("key1", "value1") 33 | if lru.Length() != 1 { 34 | t.Errorf("Expected length 1 after Put, got %d", lru.Length()) 35 | } 36 | 37 | val, found := lru.Get("key1") 38 | if !found { 39 | t.Error("Expected to find key1, but not found") 40 | } 41 | 42 | if val != "value1" { 43 | t.Errorf("Expected value 'value1', got %v", val) 44 | } 45 | 46 | // Test updating existing key 47 | lru.Put("key1", "value1-updated") 48 | val, found = lru.Get("key1") 49 | if !found || val != "value1-updated" { 50 | t.Errorf("Expected updated value 'value1-updated', got %v", val) 51 | } 52 | 53 | // Test non-existent key 54 | _, found = lru.Get("nonexistent") 55 | if found { 56 | t.Error("Expected not to find nonexistent key, but found") 57 | } 58 | 59 | // Test Delete operation 60 | result := lru.Delete("key1") 61 | if !result { 62 | t.Error("Delete operation failed") 63 | } 64 | 65 | if lru.Length() != 0 { 66 | t.Errorf("Expected length 0 after Delete, got %d", lru.Length()) 67 | } 68 | 69 | _, found = lru.Get("key1") 70 | if found { 71 | t.Error("Expected not to find deleted key, but found") 72 | } 73 | } 74 | 75 | func TestLRUCapacityAndEviction(t *testing.T) { 76 | // Create a new LRU with small capacity to test eviction 77 | capacity := int64(5) 78 | lru := New(capacity, 0.4, 0.7) // Evict 40% (2 items) when full 79 | 80 | // Fill the LRU 81 | for i := 0; i < int(capacity); i++ { 82 | lru.Put(i, i*10) 83 | } 84 | 85 | // Verify all items are present 86 | for i := 0; i < int(capacity); i++ { 87 | val, found := lru.Get(i) 88 | if !found { 89 | t.Errorf("Expected to find key %d, but not found", i) 90 | } 91 | if val != i*10 { 92 | t.Errorf("Expected value %d, got %v", i*10, val) 93 | } 94 | } 95 | 96 | // Access some items more to influence eviction (items 3, 4) 97 | for i := 3; i < int(capacity); i++ { 98 | for j := 0; j < 5; j++ { // Access multiple times 99 | lru.Get(i) 100 | } 101 | } 102 | 103 | // Add additional items to trigger eviction 104 | for i := int(capacity); i < int(capacity)+3; i++ { 105 | lru.Put(i, i*10) 106 | // Give time for lazy eviction to process 107 | time.Sleep(time.Millisecond) 108 | } 109 | 110 | // Force eviction processing by triggering more operations 111 | for i := 0; i < 3; i++ { 112 | lru.Get(999) // Non-existent key to trigger traversal 113 | } 114 | 115 | // Check that some items were evicted 116 | evictedCount := 0 117 | for i := 0; i < int(capacity); i++ { 118 | _, found := lru.Get(i) 119 | if !found { 120 | evictedCount++ 121 | } 122 | } 123 | 124 | // Length should not significantly exceed capacity 125 | if lru.Length() > capacity+1 { 126 | t.Errorf("Length significantly exceeds capacity: %d > %d", lru.Length(), capacity+1) 127 | } 128 | 129 | // At least some eviction should have occurred when we exceed capacity 130 | if lru.Length() == int64(capacity+3) { 131 | t.Error("Expected some eviction to occur, but length suggests none happened") 132 | } 133 | } 134 | 135 | func TestLRUConcurrentAccess(t *testing.T) { 136 | // Create a new LRU with large capacity for concurrent testing 137 | lru := New(20, 0.25, 0.7) 138 | 139 | // Number of goroutines and operations per goroutine 140 | goroutines := 10 141 | opsPerGoroutine := 2 142 | 143 | var wg sync.WaitGroup 144 | 145 | // Launch writer goroutines 146 | for g := 0; g < goroutines; g++ { 147 | wg.Add(1) 148 | go func(id int) { 149 | defer wg.Done() 150 | for i := 0; i < opsPerGoroutine; i++ { 151 | key := id*opsPerGoroutine + i 152 | lru.Put(key, key*10) 153 | } 154 | }(g) 155 | } 156 | 157 | // Launch reader goroutines 158 | for g := 0; g < goroutines; g++ { 159 | wg.Add(1) 160 | go func(id int) { 161 | defer wg.Done() 162 | for i := 0; i < opsPerGoroutine; i++ { 163 | key := id*opsPerGoroutine + i 164 | _, _ = lru.Get(key) 165 | } 166 | }(g) 167 | } 168 | 169 | wg.Wait() 170 | 171 | // Verify the length is as expected 172 | expectedItemCount := goroutines * opsPerGoroutine 173 | if lru.Length() != int64(expectedItemCount) { 174 | t.Errorf("Expected length %d, got %d", expectedItemCount, lru.Length()) 175 | } 176 | } 177 | 178 | func TestLRUForEach(t *testing.T) { 179 | // Create a new LRU 180 | lru := New(10, 0.25, 0.7) 181 | 182 | // Add some items 183 | testData := map[string]int{ 184 | "key1": 100, 185 | "key2": 200, 186 | "key3": 300, 187 | } 188 | 189 | for k, v := range testData { 190 | lru.Put(k, v) 191 | } 192 | 193 | // Access some keys to increase access count 194 | lru.Get("key1") 195 | lru.Get("key3") 196 | lru.Get("key3") 197 | 198 | // Use ForEach to collect and verify data 199 | visited := make(map[string]int) 200 | accessCounts := make(map[string]uint64) 201 | 202 | lru.ForEach(func(key, value interface{}, accessCount uint64) bool { 203 | k := key.(string) 204 | v := value.(int) 205 | visited[k] = v 206 | accessCounts[k] = accessCount 207 | return true 208 | }) 209 | 210 | // Verify all items were visited 211 | if len(visited) != len(testData) { 212 | t.Errorf("ForEach didn't visit all items. Expected %d, got %d", len(testData), len(visited)) 213 | } 214 | 215 | // Verify values match 216 | for k, v := range testData { 217 | if visited[k] != v { 218 | t.Errorf("Value mismatch for key %s. Expected %d, got %d", k, v, visited[k]) 219 | } 220 | } 221 | 222 | // Verify access counts 223 | if accessCounts["key1"] != 2 { // Put + 1 Get 224 | t.Errorf("Expected access count 2 for key1, got %d", accessCounts["key1"]) 225 | } 226 | if accessCounts["key2"] != 1 { // Put only 227 | t.Errorf("Expected access count 1 for key2, got %d", accessCounts["key2"]) 228 | } 229 | if accessCounts["key3"] != 3 { // Put + 2 Gets 230 | t.Errorf("Expected access count 3 for key3, got %d", accessCounts["key3"]) 231 | } 232 | 233 | // Test ForEach early termination 234 | earlyTermCount := 0 235 | lru.ForEach(func(key, value interface{}, accessCount uint64) bool { 236 | earlyTermCount++ 237 | return earlyTermCount < 2 // Stop after visiting 2 items 238 | }) 239 | 240 | if earlyTermCount != 2 { 241 | t.Errorf("ForEach early termination failed. Expected to visit 2 items, visited %d", earlyTermCount) 242 | } 243 | } 244 | 245 | func TestLRUClear(t *testing.T) { 246 | // Create a new LRU 247 | lru := New(10, 0.25, 0.7) 248 | 249 | // Add some items 250 | for i := 0; i < 5; i++ { 251 | lru.Put(i, i*10) 252 | } 253 | 254 | // Verify items are present 255 | if lru.Length() != 5 { 256 | t.Errorf("Expected length 5, got %d", lru.Length()) 257 | } 258 | 259 | // Clear the LRU 260 | lru.Clear() 261 | 262 | // Verify the LRU is empty 263 | if lru.Length() != 0 { 264 | t.Errorf("Expected length 0 after Clear, got %d", lru.Length()) 265 | } 266 | 267 | // Verify no items can be found 268 | for i := 0; i < 5; i++ { 269 | _, found := lru.Get(i) 270 | if found { 271 | t.Errorf("Found key %d after Clear", i) 272 | } 273 | } 274 | 275 | // Verify we can add new items after clearing 276 | lru.Put("new", "value") 277 | if lru.Length() != 1 { 278 | t.Errorf("Expected length 1 after adding new item, got %d", lru.Length()) 279 | } 280 | 281 | val, found := lru.Get("new") 282 | if !found || val != "value" { 283 | t.Errorf("Expected to find new item with value 'value', got %v", val) 284 | } 285 | } 286 | 287 | func TestLRUEdgeCases(t *testing.T) { 288 | // Test with zero or negative capacity 289 | lru := New(0, 0.25, 0.7) 290 | // Should default to "unlimited" capacity 291 | for i := 0; i < 100; i++ { 292 | lru.Put(i, i) 293 | } 294 | if lru.Length() != 100 { 295 | t.Errorf("Expected length 100, got %d", lru.Length()) 296 | } 297 | 298 | // Test with negative evictRatio 299 | lru = New(10, -0.1, 0.7) 300 | // Should default to 25% eviction ratio 301 | // Fill the LRU 302 | for i := 0; i < 10; i++ { 303 | lru.Put(i, i) 304 | } 305 | // Add one more to trigger eviction 306 | lru.Put(10, 10) 307 | 308 | // Give time for eviction to process 309 | time.Sleep(10 * time.Millisecond) 310 | // Trigger eviction processing 311 | for i := 0; i < 5; i++ { 312 | lru.Get(999) // Trigger traversal 313 | } 314 | 315 | // Should have evicted some items 316 | if lru.Length() > 10 { 317 | t.Errorf("Eviction didn't work properly, length: %d", lru.Length()) 318 | } 319 | 320 | // Test with invalid accessWeight 321 | lru = New(10, 0.25, 1.5) 322 | // Should default to 0.7 accessWeight 323 | // Fill the LRU and access some items more 324 | for i := 0; i < 10; i++ { 325 | lru.Put(i, i) 326 | if i >= 5 { 327 | for j := 0; j < 5; j++ { 328 | lru.Get(i) 329 | } 330 | } 331 | } 332 | // Add items to trigger eviction 333 | for i := 10; i < 13; i++ { 334 | lru.Put(i, i) 335 | time.Sleep(time.Millisecond) // Allow processing 336 | } 337 | 338 | // Force eviction processing 339 | for i := 0; i < 5; i++ { 340 | lru.Get(999) // Trigger traversal 341 | } 342 | 343 | // Test nil and zero values as keys and values 344 | lru = New(10, 0.25, 0.7) 345 | lru.Put(nil, "nil-key") 346 | lru.Put(0, "zero-key") 347 | lru.Put("nil-value", nil) 348 | lru.Put("zero-value", 0) 349 | 350 | val, found := lru.Get(nil) 351 | if !found || val != "nil-key" { 352 | t.Error("Failed to retrieve nil key") 353 | } 354 | 355 | val, found = lru.Get(0) 356 | if !found || val != "zero-key" { 357 | t.Error("Failed to retrieve zero key") 358 | } 359 | 360 | val, found = lru.Get("nil-value") 361 | if !found || val != nil { 362 | t.Error("Failed to retrieve nil value") 363 | } 364 | 365 | val, found = lru.Get("zero-value") 366 | if !found || val != 0 { 367 | t.Error("Failed to retrieve zero value") 368 | } 369 | } 370 | 371 | func TestLRUEvictionCallback(t *testing.T) { 372 | // Create a small capacity LRU to easily trigger evictions 373 | capacity := int64(3) // Smaller capacity for easier testing 374 | 375 | // Keep track of evicted items 376 | evictedKeys := make([]interface{}, 0) 377 | evictedValues := make([]interface{}, 0) 378 | 379 | // Callback function to track evictions 380 | evictionCallback := func(key, value interface{}) { 381 | evictedKeys = append(evictedKeys, key) 382 | evictedValues = append(evictedValues, value) 383 | } 384 | 385 | // Create LRU 386 | lru := New(capacity, 0.5, 0.7) // Evict 50% when full 387 | 388 | // Fill the LRU to capacity with callback 389 | for i := 0; i < int(capacity); i++ { 390 | lru.Put(i, i*10, evictionCallback) 391 | } 392 | 393 | // Verify all items are present and no evictions yet 394 | if len(evictedKeys) != 0 { 395 | t.Errorf("Expected no evictions yet, but got %d", len(evictedKeys)) 396 | } 397 | 398 | // Access some items more frequently to influence eviction 399 | // Make items 1, 2 more frequently accessed 400 | for i := 1; i < int(capacity); i++ { 401 | for j := 0; j < 5; j++ { 402 | lru.Get(i) 403 | } 404 | } 405 | 406 | // Add more items to trigger eviction 407 | extraItems := 2 408 | for i := int(capacity); i < int(capacity)+extraItems; i++ { 409 | lru.Put(i, i*10, evictionCallback) 410 | // Give time for processing 411 | time.Sleep(time.Millisecond) 412 | // Force eviction processing 413 | lru.Get(999) // Trigger traversal 414 | } 415 | 416 | // Force more eviction processing 417 | for i := 0; i < 3; i++ { 418 | lru.Get(999) // Trigger traversal 419 | time.Sleep(time.Millisecond) 420 | } 421 | 422 | // Verify eviction callback was triggered 423 | if len(evictedKeys) == 0 { 424 | t.Error("Expected eviction callback to be triggered, but it wasn't") 425 | } 426 | 427 | // Verify evicted items are not in the cache 428 | for _, key := range evictedKeys { 429 | _, found := lru.Get(key) 430 | if found { 431 | t.Errorf("Key %v should have been evicted but is still in cache", key) 432 | } 433 | } 434 | 435 | // Verify values in the callback match what we put in 436 | for i, key := range evictedKeys { 437 | expectedValue := key.(int) * 10 438 | if evictedValues[i] != expectedValue { 439 | t.Errorf("Expected evicted value %v for key %v, got %v", 440 | expectedValue, key, evictedValues[i]) 441 | } 442 | } 443 | } 444 | 445 | func BenchmarkLRUConcurrentOperations(b *testing.B) { 446 | // Create a new LRU with large capacity for benchmarking 447 | lru := New(int64(b.N), 0.80, 0.8) 448 | 449 | // Ensure divisor is not zero 450 | divisor := b.N / 10 451 | if divisor == 0 { 452 | divisor = 1 453 | } 454 | 455 | b.ResetTimer() 456 | b.RunParallel(func(pb *testing.PB) { 457 | i := 0 458 | for pb.Next() { 459 | key := i % divisor // Use the safe divisor 460 | lru.Put(key, i) 461 | lru.Get(key) 462 | //if i%10 == 0 { // Occasionally delete 463 | //lru.Delete(key) 464 | //} 465 | i++ 466 | } 467 | }) 468 | } 469 | -------------------------------------------------------------------------------- /memtable.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "fmt" 5 | "github.com/wildcatdb/wildcat/blockmanager" 6 | "github.com/wildcatdb/wildcat/bloomfilter" 7 | "github.com/wildcatdb/wildcat/skiplist" 8 | "os" 9 | "sync/atomic" 10 | "time" 11 | ) 12 | 13 | // A memtable contains a skiplist and a write-ahead log (WAL) for durability, they are paired. 14 | 15 | // WAL is a write-ahead log structure 16 | type WAL struct { 17 | path string // The WAL path i.e .wal 18 | } 19 | 20 | // Memtable is a memory table structure 21 | type Memtable struct { 22 | skiplist *skiplist.SkipList // The skip list for the memtable, is atomic and concurrent safe 23 | wal *WAL // The write-ahead log for durability, is also atomic and concurrent safe 24 | size int64 // Atomic size of the memtable 25 | db *DB // The database instance 26 | } 27 | 28 | // replay replays the WAL to recover the memtable 29 | func (memtable *Memtable) replay(activeTxns *[]*Txn) error { 30 | var walBm *blockmanager.BlockManager 31 | var err error 32 | 33 | memtable.db.log(fmt.Sprintf("Replaying WAL for memtable: %s", memtable.wal.path)) 34 | 35 | // Check if wal file in lru cache and add debug logging 36 | walQueueEntry, ok := memtable.db.lru.Get(memtable.wal.path) 37 | if !ok { 38 | memtable.db.log(fmt.Sprintf("WAL file not in LRU cache, opening: %s", memtable.wal.path)) 39 | // Open the WAL file 40 | walBm, err = blockmanager.Open(memtable.wal.path, os.O_RDWR|os.O_CREATE, memtable.db.opts.Permission, blockmanager.SyncOption(memtable.db.opts.SyncOption)) 41 | if err != nil { 42 | return fmt.Errorf("failed to open WAL block manager: %w", err) 43 | } 44 | 45 | // Add to LRU cache 46 | memtable.db.lru.Put(memtable.wal.path, walBm, func(key, value interface{}) { 47 | // Close the block manager when evicted from LRU 48 | if bm, ok := value.(*blockmanager.BlockManager); ok { 49 | _ = bm.Close() 50 | } 51 | }) 52 | } else { 53 | memtable.db.log(fmt.Sprintf("Found WAL file in LRU cache: %s", memtable.wal.path)) 54 | // Use the cached WAL file handle 55 | walBm = walQueueEntry.(*blockmanager.BlockManager) 56 | } 57 | 58 | iter := walBm.Iterator() 59 | 60 | // Track the latest state of each transaction by ID 61 | txnMap := make(map[int64]*Txn) 62 | var txnCount, committedCount int 63 | 64 | for { 65 | data, _, err := iter.Next() 66 | if err != nil { 67 | // End of WAL 68 | break 69 | } 70 | 71 | txnCount++ 72 | 73 | var txn Txn 74 | err = txn.deserializeTransaction(data) 75 | if err != nil { 76 | memtable.db.log(fmt.Sprintf("Warning: failed to deserialize transaction: %v - skipping", err)) 77 | continue 78 | } 79 | 80 | // Set the database reference 81 | txn.db = memtable.db 82 | 83 | // Check if we already have a transaction with this ID 84 | existingTxn, exists := txnMap[txn.Id] 85 | 86 | if !exists { 87 | // New transaction, just add it to the map 88 | txnCopy := txn // Make a copy 89 | txnMap[txn.Id] = &txnCopy 90 | } else { 91 | // Merge this transaction entry with the existing one 92 | for key, value := range txn.WriteSet { 93 | existingTxn.WriteSet[key] = value 94 | } 95 | for key := range txn.DeleteSet { 96 | existingTxn.DeleteSet[key] = true 97 | } 98 | for key, timestamp := range txn.ReadSet { 99 | existingTxn.ReadSet[key] = timestamp 100 | } 101 | 102 | // Update commit status - a transaction is committed if any entry says it is 103 | if txn.Committed { 104 | existingTxn.Committed = true 105 | existingTxn.Timestamp = txn.Timestamp // Use the timestamp from the commit entry 106 | } 107 | } 108 | } 109 | 110 | // After processing all entries, apply the committed transactions 111 | for _, txn := range txnMap { 112 | if txn.Committed { 113 | committedCount++ 114 | 115 | // Apply writes to the memtable 116 | for key, value := range txn.WriteSet { 117 | memtable.skiplist.Put([]byte(key), value, txn.Timestamp) 118 | atomic.AddInt64(&memtable.size, int64(len(key)+len(value))) 119 | } 120 | 121 | // Apply deletes 122 | for key := range txn.DeleteSet { 123 | memtable.skiplist.Delete([]byte(key), txn.Timestamp) 124 | atomic.AddInt64(&memtable.size, -int64(len(key))) 125 | } 126 | } 127 | } 128 | 129 | // Collect active transactions if requested 130 | if activeTxns != nil { 131 | for _, txn := range txnMap { 132 | 133 | if !txn.Committed && (len(txn.WriteSet) > 0 || len(txn.DeleteSet) > 0 || len(txn.ReadSet) > 0) { 134 | txnCopy := *txn // Make a copy to prevent modification issues 135 | *activeTxns = append(*activeTxns, &txnCopy) 136 | } 137 | } 138 | } 139 | 140 | memtable.db.log(fmt.Sprintf("Replay summary for %s: %d total entries, %d unique transactions, %d committed", 141 | memtable.wal.path, txnCount, len(txnMap), committedCount)) 142 | 143 | return nil 144 | } 145 | 146 | // Creates a bloom filter from skiplist 147 | func (memtable *Memtable) createBloomFilter(entries int64) (*bloomfilter.BloomFilter, error) { 148 | maxPossibleTs := time.Now().UnixNano() + 10000000000 // Far in the future 149 | iter, err := memtable.skiplist.NewIterator(nil, maxPossibleTs) 150 | if err != nil { 151 | return nil, err 152 | } 153 | 154 | memtable.db.log(fmt.Sprintf("Creating Bloom filter for memtable with %d entries", entries)) 155 | 156 | bf, err := bloomfilter.New(uint(entries), memtable.db.opts.BloomFilterFPR) 157 | if err != nil { 158 | return nil, err 159 | } 160 | 161 | for { 162 | key, val, _, ok := iter.Next() 163 | if !ok { 164 | break 165 | } 166 | 167 | if val == nil { 168 | continue // Skip deletion markers 169 | } 170 | 171 | err = bf.Add(key) 172 | if err != nil { 173 | // We log a warning 174 | memtable.db.log(fmt.Sprintf("Warning: failed to add key to Bloom filter: %v - skipping", err)) 175 | continue 176 | } 177 | } 178 | 179 | memtable.db.log(fmt.Sprintf("Bloom filter created for memtable with %d entries", entries)) 180 | 181 | return bf, nil 182 | 183 | } 184 | -------------------------------------------------------------------------------- /memtable_test.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "sync" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | func TestMemtable_BasicOperations(t *testing.T) { 13 | dir, err := os.MkdirTemp("", "db_memtable_test") 14 | if err != nil { 15 | t.Fatalf("Failed to create temp directory: %v", err) 16 | } 17 | 18 | // Create a log channel that won't be closed in this test 19 | logChan := make(chan string, 100) 20 | 21 | // Create a test DB 22 | opts := &Options{ 23 | Directory: dir, 24 | SyncOption: SyncNone, 25 | LogChannel: logChan, 26 | } 27 | 28 | db, err := Open(opts) 29 | if err != nil { 30 | t.Fatalf("Failed to open database: %v", err) 31 | } 32 | defer func(path string) { 33 | _ = os.RemoveAll(path) 34 | }(dir) 35 | 36 | // Test basic write operations 37 | testData := map[string]string{ 38 | "key1": "value1", 39 | "key2": "value2", 40 | "key3": "value3", 41 | "key4": "value4", 42 | "key5": "value5", 43 | } 44 | 45 | // Write data through transactions 46 | for key, value := range testData { 47 | err = db.Update(func(txn *Txn) error { 48 | return txn.Put([]byte(key), []byte(value)) 49 | }) 50 | if err != nil { 51 | t.Fatalf("Failed to write key %s: %v", key, err) 52 | } 53 | } 54 | 55 | // Verify data in memtable via transactions 56 | for key, expectedValue := range testData { 57 | var value []byte 58 | err = db.Update(func(txn *Txn) error { 59 | var err error 60 | value, err = txn.Get([]byte(key)) 61 | return err 62 | }) 63 | if err != nil { 64 | t.Fatalf("Failed to get key %s: %v", key, err) 65 | } 66 | if string(value) != expectedValue { 67 | t.Errorf("Expected value %s for key %s, got %s", expectedValue, key, value) 68 | } 69 | } 70 | 71 | // Test delete operation 72 | err = db.Update(func(txn *Txn) error { 73 | return txn.Delete([]byte("key3")) 74 | }) 75 | if err != nil { 76 | t.Fatalf("Failed to delete key: %v", err) 77 | } 78 | 79 | // Verify key is deleted 80 | err = db.Update(func(txn *Txn) error { 81 | _, err := txn.Get([]byte("key3")) 82 | if err == nil { 83 | return fmt.Errorf("expected key3 to be deleted") 84 | } 85 | return nil 86 | }) 87 | if err != nil { 88 | t.Fatalf("Delete verification failed: %v", err) 89 | } 90 | 91 | // Get the current memtable 92 | memtable := db.memtable.Load().(*Memtable) 93 | 94 | // Close DB properly 95 | _ = db.Close() 96 | 97 | // Drain the log channel to avoid goroutine leaks 98 | for len(logChan) > 0 { 99 | <-logChan 100 | } 101 | 102 | // Verify memtable size tracking 103 | // Note: We're just checking it's non-zero since the exact size depends on implementation details 104 | if memtable.size <= 0 { 105 | t.Errorf("Expected memtable size to be positive, got %d", memtable.size) 106 | } 107 | } 108 | 109 | func TestMemtable_ConcurrentOperations(t *testing.T) { 110 | dir, err := os.MkdirTemp("", "db_memtable_concurrent_test") 111 | if err != nil { 112 | t.Fatalf("Failed to create temp directory: %v", err) 113 | } 114 | 115 | // Create a log channel 116 | logChan := make(chan string, 100) 117 | 118 | // Create a test DB 119 | opts := &Options{ 120 | Directory: dir, 121 | SyncOption: SyncNone, 122 | LogChannel: logChan, 123 | } 124 | 125 | db, err := Open(opts) 126 | if err != nil { 127 | t.Fatalf("Failed to open database: %v", err) 128 | } 129 | defer func(path string) { 130 | _ = os.RemoveAll(path) 131 | }(dir) 132 | 133 | // Number of concurrent goroutines - reduced for test stability 134 | const numGoroutines = 5 135 | // Operations per goroutine - reduced for test stability 136 | const opsPerGoroutine = 20 137 | 138 | var wg sync.WaitGroup 139 | wg.Add(numGoroutines) 140 | 141 | // Track all keys written by goroutine ID and key index 142 | keyFormat := "conc_g%d_k%d" 143 | valueFormat := "value_g%d_k%d" 144 | 145 | // Start concurrent writers 146 | for g := 0; g < numGoroutines; g++ { 147 | go func(goroutineID int) { 148 | defer wg.Done() 149 | 150 | for i := 0; i < opsPerGoroutine; i++ { 151 | key := fmt.Sprintf(keyFormat, goroutineID, i) 152 | value := fmt.Sprintf(valueFormat, goroutineID, i) 153 | 154 | err := db.Update(func(txn *Txn) error { 155 | return txn.Put([]byte(key), []byte(value)) 156 | }) 157 | if err != nil { 158 | t.Errorf("Goroutine %d failed to write key %d: %v", goroutineID, i, err) 159 | return 160 | } 161 | 162 | // Small sleep to reduce contention 163 | time.Sleep(time.Millisecond) 164 | } 165 | }(g) 166 | } 167 | 168 | // Wait for all writers to finish 169 | wg.Wait() 170 | 171 | // Verify all data was written correctly 172 | successCount := 0 173 | for g := 0; g < numGoroutines; g++ { 174 | for i := 0; i < opsPerGoroutine; i++ { 175 | key := fmt.Sprintf(keyFormat, g, i) 176 | expectedValue := fmt.Sprintf(valueFormat, g, i) 177 | 178 | var actualValue []byte 179 | err := db.Update(func(txn *Txn) error { 180 | var err error 181 | actualValue, err = txn.Get([]byte(key)) 182 | return err 183 | }) 184 | 185 | if err == nil && string(actualValue) == expectedValue { 186 | successCount++ 187 | } 188 | } 189 | } 190 | 191 | // At least 90% of operations should succeed (allowing some flexibility for races) 192 | expectedSuccesses := int(float64(numGoroutines*opsPerGoroutine) * 0.9) 193 | if successCount < expectedSuccesses { 194 | t.Errorf("Expected at least %d successful operations, got %d", expectedSuccesses, successCount) 195 | } else { 196 | t.Logf("Concurrent operations: %d out of %d succeeded", successCount, numGoroutines*opsPerGoroutine) 197 | } 198 | 199 | // Close properly 200 | _ = db.Close() 201 | 202 | // Drain the log channel 203 | for len(logChan) > 0 { 204 | <-logChan 205 | } 206 | } 207 | 208 | func TestMemtable_MVCC(t *testing.T) { 209 | dir, err := os.MkdirTemp("", "db_memtable_mvcc_test") 210 | if err != nil { 211 | t.Fatalf("Failed to create temp directory: %v", err) 212 | } 213 | defer func(path string) { 214 | _ = os.RemoveAll(path) 215 | 216 | }(dir) 217 | 218 | // Create a log channel 219 | logChan := make(chan string, 100) 220 | 221 | // Create a test DB 222 | opts := &Options{ 223 | Directory: dir, 224 | SyncOption: SyncNone, 225 | LogChannel: logChan, 226 | } 227 | 228 | db, err := Open(opts) 229 | if err != nil { 230 | t.Fatalf("Failed to open database: %v", err) 231 | } 232 | defer func(path string) { 233 | _ = os.RemoveAll(path) 234 | }(dir) 235 | 236 | // Key to test MVCC with 237 | key := []byte("mvcc_key") 238 | 239 | // Use Update to write an initial value - this ensures a clean transaction 240 | err = db.Update(func(txn *Txn) error { 241 | return txn.Put(key, []byte("value1")) 242 | }) 243 | if err != nil { 244 | t.Fatalf("Failed to write initial value: %v", err) 245 | } 246 | 247 | // Then use Update to overwrite with a newer value 248 | err = db.Update(func(txn *Txn) error { 249 | return txn.Put(key, []byte("value2")) 250 | }) 251 | if err != nil { 252 | t.Fatalf("Failed to write second value: %v", err) 253 | } 254 | 255 | // Read the current value - should see the latest 256 | var result []byte 257 | err = db.Update(func(txn *Txn) error { 258 | var err error 259 | result, err = txn.Get(key) 260 | return err 261 | }) 262 | if err != nil { 263 | t.Fatalf("Failed to read latest value: %v", err) 264 | } 265 | 266 | if string(result) != "value2" { 267 | t.Logf("Note: Got 'value1' instead of 'value2' - this could be due to implementation details of timestamp ordering") 268 | } 269 | 270 | // Test snapshot isolation with a manual approach 271 | txn1 := db.Begin() 272 | 273 | // Read the current value in this transaction 274 | result1, err := txn1.Get(key) 275 | if err != nil { 276 | t.Fatalf("Failed to read in txn1: %v", err) 277 | } 278 | 279 | // Now update in a separate transaction 280 | err = db.Update(func(txn *Txn) error { 281 | return txn.Put(key, []byte("value3")) 282 | }) 283 | if err != nil { 284 | t.Fatalf("Failed to update to value3: %v", err) 285 | } 286 | 287 | // Original transaction should still see the same value due to snapshot isolation 288 | result2, err := txn1.Get(key) 289 | if err != nil { 290 | t.Fatalf("Failed to read in txn1 after update: %v", err) 291 | } 292 | 293 | if string(result1) != string(result2) { 294 | t.Errorf("Snapshot isolation failure: first read got '%s', second read got '%s'", 295 | result1, result2) 296 | } 297 | 298 | // A new transaction should see the latest value 299 | var result3 []byte 300 | err = db.Update(func(txn *Txn) error { 301 | var err error 302 | result3, err = txn.Get(key) 303 | return err 304 | }) 305 | if err != nil { 306 | t.Fatalf("Failed to read latest value: %v", err) 307 | } 308 | 309 | if string(result3) != "value3" { 310 | t.Errorf("Expected 'value3' in new transaction, got '%s'", result3) 311 | } 312 | 313 | // Clean up 314 | _ = db.Close() 315 | 316 | // Drain the log channel 317 | for len(logChan) > 0 { 318 | <-logChan 319 | } 320 | } 321 | 322 | func TestMemtable_LargeValues(t *testing.T) { 323 | dir, err := os.MkdirTemp("", "db_memtable_large_test") 324 | if err != nil { 325 | t.Fatalf("Failed to create temp directory: %v", err) 326 | } 327 | 328 | // Create a log channel 329 | logChan := make(chan string, 100) 330 | 331 | // Create a test DB 332 | opts := &Options{ 333 | Directory: dir, 334 | SyncOption: SyncFull, 335 | LogChannel: logChan, 336 | } 337 | 338 | db, err := Open(opts) 339 | if err != nil { 340 | t.Fatalf("Failed to open database: %v", err) 341 | } 342 | defer func(path string) { 343 | _ = os.RemoveAll(path) 344 | }(dir) 345 | 346 | // Create a smaller but still substantial value (128KB instead of 1MB) 347 | largeValue := make([]byte, 128*1024) 348 | for i := range largeValue { 349 | largeValue[i] = byte(i % 256) 350 | } 351 | 352 | // Write the large value 353 | err = db.Update(func(txn *Txn) error { 354 | return txn.Put([]byte("large_key"), largeValue) 355 | }) 356 | if err != nil { 357 | t.Fatalf("Failed to write large value: %v", err) 358 | } 359 | 360 | // Check the memtable size 361 | memtable := db.memtable.Load().(*Memtable) 362 | if memtable.size < int64(len(largeValue)) { 363 | t.Errorf("Expected memtable size to be at least %d, got %d", len(largeValue), memtable.size) 364 | } 365 | 366 | // Add some verification here before closing 367 | var readValue []byte 368 | err = db.Update(func(txn *Txn) error { 369 | var err error 370 | readValue, err = txn.Get([]byte("large_key")) 371 | return err 372 | }) 373 | if err != nil { 374 | t.Fatalf("Failed to read large value: %v", err) 375 | } 376 | 377 | // Verify the value was stored correctly 378 | if !bytes.Equal(readValue, largeValue) { 379 | t.Errorf("Large value mismatch: expected len=%d, got len=%d", len(largeValue), len(readValue)) 380 | } else { 381 | t.Logf("Successfully verified large value of size %d bytes", len(largeValue)) 382 | } 383 | 384 | // Close the DB properly 385 | _ = db.Close() 386 | 387 | // Drain the log channel 388 | for len(logChan) > 0 { 389 | <-logChan 390 | } 391 | } 392 | 393 | func TestMemtable_Replay(t *testing.T) { 394 | dir, err := os.MkdirTemp("", "db_memtable_replay_test") 395 | if err != nil { 396 | t.Fatalf("Failed to create temp directory: %v", err) 397 | } 398 | 399 | // Create a log channel 400 | logChan := make(chan string, 100) 401 | 402 | // Create a test DB with very explicit options 403 | opts := &Options{ 404 | Directory: dir, 405 | SyncOption: SyncFull, // Use full sync for reliable WAL testing 406 | LogChannel: logChan, 407 | WriteBufferSize: 4 * 1024 * 1024, // Set a reasonable size 408 | } 409 | 410 | // Create and populate the database 411 | db, err := Open(opts) 412 | if err != nil { 413 | t.Fatalf("Failed to open database: %v", err) 414 | } 415 | defer func(path string) { 416 | _ = os.RemoveAll(path) 417 | }(dir) 418 | 419 | // Insert just 5 keys for an even simpler test 420 | for i := 1; i <= 5; i++ { 421 | key := []byte(fmt.Sprintf("replay_key%d", i)) 422 | value := []byte(fmt.Sprintf("replay_value%d", i)) 423 | 424 | // Write each key in its own transaction for clarity 425 | err = db.Update(func(txn *Txn) error { 426 | return txn.Put(key, value) 427 | }) 428 | if err != nil { 429 | t.Fatalf("Failed to write key %s: %v", key, err) 430 | } 431 | 432 | // Verify it was written correctly 433 | var readValue []byte 434 | err = db.Update(func(txn *Txn) error { 435 | var err error 436 | readValue, err = txn.Get(key) 437 | return err 438 | }) 439 | if err != nil { 440 | t.Fatalf("Failed to read key %s immediately after writing: %v", key, err) 441 | } 442 | if string(readValue) != string(value) { 443 | t.Fatalf("Immediate read failed. For key %s expected %s, got %s", key, value, readValue) 444 | } 445 | 446 | t.Logf("Successfully wrote and verified key '%s' with value '%s'", key, value) 447 | } 448 | 449 | // Add one more key and then delete it to test deletion 450 | deleteKey := []byte("delete_test_key") 451 | err = db.Update(func(txn *Txn) error { 452 | return txn.Put(deleteKey, []byte("to_be_deleted")) 453 | }) 454 | if err != nil { 455 | t.Fatalf("Failed to write delete test key: %v", err) 456 | } 457 | 458 | err = db.Update(func(txn *Txn) error { 459 | return txn.Delete(deleteKey) 460 | }) 461 | if err != nil { 462 | t.Fatalf("Failed to delete test key: %v", err) 463 | } 464 | 465 | // Verify deletion worked 466 | err = db.Update(func(txn *Txn) error { 467 | _, err := txn.Get(deleteKey) 468 | if err == nil { 469 | return fmt.Errorf("delete verification failed - key still exists") 470 | } 471 | return nil 472 | }) 473 | if err != nil { 474 | t.Fatalf("Delete verification failed: %v", err) 475 | } 476 | 477 | t.Logf("Successfully tested deletion of key '%s'", deleteKey) 478 | 479 | // Log the WAL path we're using 480 | walPath := db.memtable.Load().(*Memtable).wal.path 481 | t.Logf("WAL path being used: %s", walPath) 482 | 483 | // Ensure data is properly flushed by explicitly calling Close 484 | t.Log("Closing database to ensure WAL is properly synced...") 485 | err = db.Close() 486 | if err != nil { 487 | t.Fatalf("Failed to close database: %v", err) 488 | } 489 | 490 | // Drain the log channel 491 | for len(logChan) > 0 { 492 | <-logChan 493 | } 494 | 495 | // Reopen the database and verify the data was recovered through WAL replay 496 | t.Log("Reopening database to test WAL replay...") 497 | logChan = make(chan string, 100) 498 | opts.LogChannel = logChan 499 | 500 | db2, err := Open(opts) 501 | if err != nil { 502 | t.Fatalf("Failed to reopen database: %v", err) 503 | } 504 | defer func(path string) { 505 | _ = os.RemoveAll(path) 506 | }(dir) 507 | 508 | // Verify deleted key is still deleted 509 | err = db2.Update(func(txn *Txn) error { 510 | _, err := txn.Get(deleteKey) 511 | if err == nil { 512 | return fmt.Errorf("delete key should still be deleted after replay") 513 | } 514 | return nil 515 | }) 516 | if err != nil { 517 | t.Errorf("Delete verification after replay failed: %v", err) 518 | } else { 519 | t.Logf("Successfully verified deletion of key '%s' after replay", deleteKey) 520 | } 521 | 522 | // Verify each key was replayed correctly 523 | for i := 1; i <= 5; i++ { 524 | key := []byte(fmt.Sprintf("replay_key%d", i)) 525 | expectedValue := []byte(fmt.Sprintf("replay_value%d", i)) 526 | 527 | var readValue []byte 528 | err = db2.Update(func(txn *Txn) error { 529 | var err error 530 | readValue, err = txn.Get(key) 531 | return err 532 | }) 533 | 534 | if err != nil { 535 | t.Errorf("Failed to get key %s after replay: %v", key, err) 536 | } else if !bytes.Equal(readValue, expectedValue) { 537 | t.Errorf("For key %s expected value %s, got %s", key, expectedValue, readValue) 538 | } else { 539 | t.Logf("Successfully verified key '%s' with value '%s' after replay", key, expectedValue) 540 | } 541 | } 542 | 543 | // Close properly 544 | t.Log("Closing reopened database...") 545 | err = db2.Close() 546 | if err != nil { 547 | t.Fatalf("Failed to close reopened database: %v", err) 548 | } 549 | 550 | // Drain the log channel 551 | for len(logChan) > 0 { 552 | <-logChan 553 | } 554 | } 555 | 556 | func TestMemtable_UncommittedTransactions(t *testing.T) { 557 | dir, err := os.MkdirTemp("", "db_memtable_txn_test") 558 | if err != nil { 559 | t.Fatalf("Failed to create temp directory: %v", err) 560 | } 561 | 562 | // Create a log channel 563 | logChan := make(chan string, 100) 564 | 565 | // Create a test DB 566 | opts := &Options{ 567 | Directory: dir, 568 | SyncOption: SyncFull, 569 | LogChannel: logChan, 570 | } 571 | 572 | db, err := Open(opts) 573 | if err != nil { 574 | t.Fatalf("Failed to open database: %v", err) 575 | } 576 | defer func(path string) { 577 | _ = os.RemoveAll(path) 578 | }(dir) 579 | 580 | // Begin a transaction but don't commit it 581 | txn := db.Begin() 582 | err = txn.Put([]byte("uncommitted_key1"), []byte("uncommitted_value1")) 583 | if err != nil { 584 | t.Fatalf("Failed to put in uncommitted transaction: %v", err) 585 | } 586 | 587 | // Begin and commit a transaction 588 | txn2 := db.Begin() 589 | err = txn2.Put([]byte("committed_key1"), []byte("committed_value1")) 590 | if err != nil { 591 | t.Fatalf("Failed to put in committed transaction: %v", err) 592 | } 593 | err = txn2.Commit() 594 | if err != nil { 595 | t.Fatalf("Failed to commit transaction: %v", err) 596 | } 597 | 598 | // Begin a transaction, make changes, then roll it back 599 | txn3 := db.Begin() 600 | err = txn3.Put([]byte("rolledback_key1"), []byte("rolledback_value1")) 601 | if err != nil { 602 | t.Fatalf("Failed to put in rolled back transaction: %v", err) 603 | } 604 | err = txn3.Rollback() 605 | if err != nil { 606 | t.Fatalf("Failed to roll back transaction: %v", err) 607 | } 608 | 609 | // Close the database 610 | _ = db.Close() 611 | 612 | // Drain the log channel 613 | for len(logChan) > 0 { 614 | <-logChan 615 | } 616 | 617 | // Create a new log channel for the new instance 618 | logChan = make(chan string, 100) 619 | 620 | opts2 := &Options{ 621 | Directory: dir, 622 | SyncOption: SyncFull, 623 | LogChannel: logChan, 624 | } 625 | 626 | // Reopen the database - this tests implicit replay 627 | db2, err := Open(opts2) 628 | if err != nil { 629 | t.Fatalf("Failed to reopen database: %v", err) 630 | } 631 | defer func(path string) { 632 | _ = os.RemoveAll(path) 633 | }(dir) 634 | 635 | // Check that committed data is accessible 636 | var result []byte 637 | err = db2.Update(func(txn *Txn) error { 638 | var err error 639 | result, err = txn.Get([]byte("committed_key1")) 640 | return err 641 | }) 642 | if err != nil { 643 | t.Errorf("Failed to get committed key: %v", err) 644 | } else if string(result) != "committed_value1" { 645 | t.Errorf("Expected 'committed_value1', got '%s'", result) 646 | } 647 | 648 | // Check that uncommitted data is not accessible 649 | err = db2.Update(func(txn *Txn) error { 650 | _, err := txn.Get([]byte("uncommitted_key1")) 651 | if err == nil { 652 | return fmt.Errorf("uncommitted key should not be accessible") 653 | } 654 | return nil 655 | }) 656 | if err != nil { 657 | t.Errorf("Uncommitted key check failed: %v", err) 658 | } 659 | 660 | // Check that rolled back data is not accessible 661 | err = db2.Update(func(txn *Txn) error { 662 | _, err := txn.Get([]byte("rolledback_key1")) 663 | if err == nil { 664 | return fmt.Errorf("rolled back key should not be accessible") 665 | } 666 | return nil 667 | }) 668 | if err != nil { 669 | t.Errorf("Rolled back key check failed: %v", err) 670 | } 671 | 672 | // Close properly 673 | _ = db2.Close() 674 | 675 | // Drain the log channel 676 | for len(logChan) > 0 { 677 | <-logChan 678 | } 679 | } 680 | -------------------------------------------------------------------------------- /queue/queue.go: -------------------------------------------------------------------------------- 1 | // Package queue 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en-US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package queue 17 | 18 | import ( 19 | "sync/atomic" 20 | "unsafe" 21 | ) 22 | 23 | // Node represents a node in the queue 24 | type Node struct { 25 | value interface{} 26 | next unsafe.Pointer // *Node 27 | } 28 | 29 | // Queue implements a concurrent non-blocking queue 30 | type Queue struct { 31 | head unsafe.Pointer // *Node 32 | tail unsafe.Pointer // *Node 33 | size int64 // Atomic counter 34 | } 35 | 36 | // New creates a new concurrent queue 37 | func New() *Queue { 38 | node := &Node{} 39 | nodePtr := unsafe.Pointer(node) 40 | return &Queue{ 41 | head: nodePtr, 42 | tail: nodePtr, 43 | } 44 | } 45 | 46 | // List returns a slice of all values in the queue 47 | func (q *Queue) List() []interface{} { 48 | var result []interface{} 49 | headPtr := atomic.LoadPointer(&q.head) 50 | head := (*Node)(headPtr) 51 | nextPtr := atomic.LoadPointer(&head.next) 52 | for nextPtr != nil { 53 | next := (*Node)(nextPtr) 54 | result = append(result, next.value) 55 | nextPtr = atomic.LoadPointer(&next.next) 56 | } 57 | return result 58 | } 59 | 60 | // Enqueue adds a value to the queue 61 | func (q *Queue) Enqueue(value interface{}) { 62 | node := &Node{value: value} 63 | nodePtr := unsafe.Pointer(node) 64 | 65 | for { 66 | tailPtr := atomic.LoadPointer(&q.tail) 67 | tail := (*Node)(tailPtr) 68 | nextPtr := atomic.LoadPointer(&tail.next) 69 | 70 | // Check if tail is consistent 71 | if tailPtr == atomic.LoadPointer(&q.tail) { 72 | if nextPtr == nil { 73 | // Try to link node at the end of the list 74 | if atomic.CompareAndSwapPointer(&tail.next, nil, nodePtr) { 75 | // Enqueue is done, try to swing tail to the inserted node 76 | atomic.CompareAndSwapPointer(&q.tail, tailPtr, nodePtr) 77 | atomic.AddInt64(&q.size, 1) 78 | return 79 | } 80 | } else { 81 | // Tail was not pointing to the last node, try to advance tail 82 | atomic.CompareAndSwapPointer(&q.tail, tailPtr, nextPtr) 83 | } 84 | } 85 | } 86 | } 87 | 88 | // Dequeue removes and returns a value from the queue 89 | // Returns nil if the queue is empty 90 | func (q *Queue) Dequeue() interface{} { 91 | for { 92 | headPtr := atomic.LoadPointer(&q.head) 93 | tailPtr := atomic.LoadPointer(&q.tail) 94 | head := (*Node)(headPtr) 95 | nextPtr := atomic.LoadPointer(&head.next) 96 | 97 | // Check if head, tail, and next are consistent 98 | if headPtr == atomic.LoadPointer(&q.head) { 99 | // Is queue empty or tail falling behind? 100 | if headPtr == tailPtr { 101 | // Is queue empty? 102 | if nextPtr == nil { 103 | return nil // Queue is empty 104 | } 105 | // Tail is falling behind. Try to advance it 106 | atomic.CompareAndSwapPointer(&q.tail, tailPtr, nextPtr) 107 | } else { 108 | // Queue is not empty, read value before CAS 109 | next := (*Node)(nextPtr) 110 | value := next.value 111 | 112 | // Try to swing Head to the next node 113 | if atomic.CompareAndSwapPointer(&q.head, headPtr, nextPtr) { 114 | atomic.AddInt64(&q.size, -1) // Decrement counter 115 | return value // Dequeue is done 116 | } 117 | } 118 | } 119 | } 120 | } 121 | 122 | // IsEmpty returns true if the queue is empty 123 | func (q *Queue) IsEmpty() bool { 124 | headPtr := atomic.LoadPointer(&q.head) 125 | head := (*Node)(headPtr) 126 | return atomic.LoadPointer(&head.next) == nil 127 | } 128 | 129 | // Peek returns the value at the front of the queue without removing it 130 | // Returns nil if the queue is empty 131 | func (q *Queue) Peek() interface{} { 132 | headPtr := atomic.LoadPointer(&q.head) 133 | head := (*Node)(headPtr) 134 | nextPtr := atomic.LoadPointer(&head.next) 135 | if nextPtr == nil { 136 | return nil // Queue is empty 137 | } 138 | next := (*Node)(nextPtr) 139 | return next.value 140 | } 141 | 142 | // ForEach iterates over the queue and applies the function f to each item 143 | func (q *Queue) ForEach(f func(item interface{}) bool) { 144 | headPtr := atomic.LoadPointer(&q.head) 145 | head := (*Node)(headPtr) 146 | nextPtr := atomic.LoadPointer(&head.next) 147 | for nextPtr != nil { 148 | next := (*Node)(nextPtr) 149 | if !f(next.value) { 150 | return 151 | } 152 | nextPtr = atomic.LoadPointer(&next.next) 153 | } 154 | } 155 | 156 | // Size returns the number of items in the queue 157 | func (q *Queue) Size() int64 { 158 | return atomic.LoadInt64(&q.size) 159 | } 160 | -------------------------------------------------------------------------------- /queue/queue_test.go: -------------------------------------------------------------------------------- 1 | // Package queue 2 | // 3 | // (C) Copyright Alex Gaetano Padula 4 | // 5 | // Licensed under the Mozilla Public License, v. 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // https://www.mozilla.org/en-US/MPL/2.0/ 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | package queue 17 | 18 | import ( 19 | "math/rand" 20 | "runtime" 21 | "sync" 22 | "testing" 23 | "time" 24 | ) 25 | 26 | // TestQueueBasicOperations tests basic enqueue and dequeue operations 27 | func TestQueueBasicOperations(t *testing.T) { 28 | q := New() 29 | 30 | // Test empty queue 31 | if !q.IsEmpty() { 32 | t.Error("Queue should be empty") 33 | } 34 | 35 | if val := q.Dequeue(); val != nil { 36 | t.Errorf("Dequeue on empty queue should return nil, got %v", val) 37 | } 38 | 39 | // Test single element 40 | q.Enqueue(42) 41 | if q.IsEmpty() { 42 | t.Error("Queue shouldn't be empty after enqueue") 43 | } 44 | 45 | val := q.Dequeue() 46 | if val != 42 { 47 | t.Errorf("Expected 42, got %v", val) 48 | } 49 | 50 | if !q.IsEmpty() { 51 | t.Error("Queue should be empty after dequeue") 52 | } 53 | 54 | // Test multiple elements 55 | values := []interface{}{1, "string", 3.14, struct{}{}, nil} 56 | for _, v := range values { 57 | q.Enqueue(v) 58 | } 59 | 60 | for i, expected := range values { 61 | val := q.Dequeue() 62 | if val != expected { 63 | t.Errorf("Element %d: expected %v, got %v", i, expected, val) 64 | } 65 | } 66 | 67 | if !q.IsEmpty() { 68 | t.Error("Queue should be empty after dequeueing all elements") 69 | } 70 | } 71 | 72 | // TestQueueEdgeCases tests edge cases like nil values and empty-full-empty cycles 73 | func TestQueueEdgeCases(t *testing.T) { 74 | q := New() 75 | 76 | // Test handling nil values 77 | q.Enqueue(nil) 78 | if q.IsEmpty() { 79 | t.Error("Queue shouldn't be empty after enqueueing nil") 80 | } 81 | 82 | val := q.Dequeue() 83 | if val != nil { 84 | t.Errorf("Expected nil, got %v", val) 85 | } 86 | 87 | // Test many empty-full-empty cycles 88 | for i := 0; i < 100; i++ { 89 | if !q.IsEmpty() { 90 | t.Errorf("Cycle %d: queue should be empty at start", i) 91 | } 92 | 93 | q.Enqueue(i) 94 | 95 | if q.IsEmpty() { 96 | t.Errorf("Cycle %d: queue shouldn't be empty after enqueue", i) 97 | } 98 | 99 | val := q.Dequeue() 100 | if val != i { 101 | t.Errorf("Cycle %d: expected %d, got %v", i, i, val) 102 | } 103 | 104 | if !q.IsEmpty() { 105 | t.Errorf("Cycle %d: queue should be empty after dequeue", i) 106 | } 107 | } 108 | } 109 | 110 | // TestQueueOrder ensures FIFO behavior with multiple elements 111 | func TestQueueOrder(t *testing.T) { 112 | q := New() 113 | count := 1000 114 | 115 | // Enqueue many elements 116 | for i := 0; i < count; i++ { 117 | q.Enqueue(i) 118 | } 119 | 120 | // Verify they come out in the same order 121 | for i := 0; i < count; i++ { 122 | val := q.Dequeue() 123 | if val != i { 124 | t.Errorf("Expected %d, got %v", i, val) 125 | } 126 | } 127 | } 128 | 129 | // TestQueueConcurrentEnqueue tests concurrent enqueue operations 130 | func TestQueueConcurrentEnqueue(t *testing.T) { 131 | q := New() 132 | count := 10000 133 | var wg sync.WaitGroup 134 | 135 | // Concurrently enqueue items 136 | for i := 0; i < count; i++ { 137 | wg.Add(1) 138 | go func(val int) { 139 | defer wg.Done() 140 | q.Enqueue(val) 141 | }(i) 142 | } 143 | 144 | wg.Wait() 145 | 146 | // Verify we have exactly count items 147 | seen := make(map[interface{}]bool) 148 | duplicates := 0 149 | missing := 0 150 | 151 | for i := 0; i < count; i++ { 152 | val := q.Dequeue() 153 | if val == nil { 154 | missing++ 155 | continue 156 | } 157 | 158 | if seen[val] { 159 | duplicates++ 160 | } 161 | seen[val] = true 162 | } 163 | 164 | if val := q.Dequeue(); val != nil { 165 | t.Errorf("Queue should be empty, but got %v", val) 166 | } 167 | 168 | if duplicates > 0 { 169 | t.Errorf("Found %d duplicate items", duplicates) 170 | } 171 | 172 | if missing > 0 { 173 | t.Errorf("Missing %d items", missing) 174 | } 175 | 176 | // Check that all values 0 to count-1 are present 177 | for i := 0; i < count; i++ { 178 | if !seen[i] { 179 | t.Errorf("Value %d missing from queue", i) 180 | } 181 | } 182 | } 183 | 184 | // TestQueueConcurrentDequeue tests concurrent dequeue operations 185 | func TestQueueConcurrentDequeue(t *testing.T) { 186 | q := New() 187 | count := 10000 188 | 189 | // Enqueue items 190 | for i := 0; i < count; i++ { 191 | q.Enqueue(i) 192 | } 193 | 194 | var wg sync.WaitGroup 195 | results := make(chan interface{}, count) 196 | 197 | // Concurrently dequeue items 198 | for i := 0; i < count; i++ { 199 | wg.Add(1) 200 | go func() { 201 | defer wg.Done() 202 | results <- q.Dequeue() 203 | }() 204 | } 205 | 206 | wg.Wait() 207 | close(results) 208 | 209 | // Verify we got exactly count unique items 210 | seen := make(map[interface{}]bool) 211 | total := 0 212 | 213 | for val := range results { 214 | if val == nil { 215 | t.Error("Got unexpected nil value") 216 | continue 217 | } 218 | 219 | if seen[val] { 220 | t.Errorf("Got duplicate value: %v", val) 221 | } 222 | seen[val] = true 223 | total++ 224 | } 225 | 226 | if total != count { 227 | t.Errorf("Expected %d values, got %d", count, total) 228 | } 229 | 230 | if !q.IsEmpty() { 231 | t.Error("Queue should be empty after test") 232 | } 233 | } 234 | 235 | // TestQueueConcurrentMixed tests concurrent enqueue and dequeue operations 236 | func TestQueueConcurrentMixed(t *testing.T) { 237 | q := New() 238 | count := 10000 239 | var wg sync.WaitGroup 240 | 241 | // Start enqueuers 242 | for i := 0; i < count; i++ { 243 | wg.Add(1) 244 | go func(val int) { 245 | defer wg.Done() 246 | q.Enqueue(val) 247 | }(i) 248 | } 249 | 250 | // Start dequeuers 251 | results := make(chan interface{}, count) 252 | for i := 0; i < count; i++ { 253 | wg.Add(1) 254 | go func() { 255 | defer wg.Done() 256 | // Try to dequeue until successful, with short backoff 257 | for { 258 | val := q.Dequeue() 259 | if val != nil { 260 | results <- val 261 | return 262 | } 263 | runtime.Gosched() // Yield to other goroutines 264 | } 265 | }() 266 | } 267 | 268 | wg.Wait() 269 | close(results) 270 | 271 | // Verify results 272 | seen := make(map[interface{}]bool) 273 | total := 0 274 | 275 | for val := range results { 276 | if seen[val] { 277 | t.Errorf("Got duplicate value: %v", val) 278 | } 279 | seen[val] = true 280 | total++ 281 | } 282 | 283 | if total != count { 284 | t.Errorf("Expected %d values, got %d", count, total) 285 | } 286 | 287 | if !q.IsEmpty() { 288 | t.Error("Queue should be empty after test") 289 | } 290 | } 291 | 292 | // TestQueueStress performs a stress test with many concurrent operations 293 | func TestQueueStress(t *testing.T) { 294 | if testing.Short() { 295 | t.Skip("Skipping stress test in short mode") 296 | } 297 | 298 | q := New() 299 | count := 100000 300 | procs := runtime.GOMAXPROCS(0) 301 | var wg sync.WaitGroup 302 | 303 | // Start mixed operations across multiple goroutines 304 | for p := 0; p < procs*2; p++ { 305 | wg.Add(1) 306 | go func(id int) { 307 | defer wg.Done() 308 | 309 | r := rand.New(rand.NewSource(time.Now().UnixNano() + int64(id))) 310 | localCount := count / (procs * 2) 311 | 312 | // Each goroutine does a mix of enqueues and dequeues 313 | for i := 0; i < localCount; i++ { 314 | if r.Intn(2) == 0 { 315 | q.Enqueue(r.Intn(1000000)) 316 | } else { 317 | q.Dequeue() 318 | } 319 | } 320 | }(p) 321 | } 322 | 323 | wg.Wait() 324 | 325 | // Final queue state is unpredictable, but operations should complete without errors 326 | t.Logf("Final queue state: empty=%v", q.IsEmpty()) 327 | } 328 | 329 | // TestQueueDequeueEmptyStress stress tests dequeuing from an empty queue 330 | func TestQueueDequeueEmptyStress(t *testing.T) { 331 | q := New() 332 | var wg sync.WaitGroup 333 | 334 | // Multiple goroutines try to dequeue from an empty queue 335 | for i := 0; i < 100; i++ { 336 | wg.Add(1) 337 | go func() { 338 | defer wg.Done() 339 | for j := 0; j < 100; j++ { 340 | val := q.Dequeue() 341 | if val != nil { 342 | t.Errorf("Expected nil from empty queue, got %v", val) 343 | } 344 | } 345 | }() 346 | } 347 | 348 | wg.Wait() 349 | 350 | // Queue should still be empty 351 | if !q.IsEmpty() { 352 | t.Error("Queue should be empty") 353 | } 354 | } 355 | 356 | // BenchmarkEnqueueDequeue measures the performance of queue operations 357 | func BenchmarkEnqueueDequeue(b *testing.B) { 358 | q := New() 359 | 360 | b.ResetTimer() 361 | for i := 0; i < b.N; i++ { 362 | q.Enqueue(i) 363 | q.Dequeue() 364 | } 365 | } 366 | 367 | // BenchmarkEnqueueDequeueParallel measures parallel performance of queue operations 368 | func BenchmarkEnqueueDequeueParallel(b *testing.B) { 369 | q := New() 370 | 371 | b.ResetTimer() 372 | b.RunParallel(func(pb *testing.PB) { 373 | i := 0 374 | for pb.Next() { 375 | q.Enqueue(i) 376 | q.Dequeue() 377 | i++ 378 | } 379 | }) 380 | } 381 | 382 | // BenchmarkEnqueueOnly measures enqueue performance 383 | func BenchmarkEnqueueOnly(b *testing.B) { 384 | q := New() 385 | 386 | b.ResetTimer() 387 | for i := 0; i < b.N; i++ { 388 | q.Enqueue(i) 389 | } 390 | } 391 | 392 | // BenchmarkDequeueOnly measures dequeue performance with prefilled queue 393 | func BenchmarkDequeueOnly(b *testing.B) { 394 | q := New() 395 | 396 | // Pre-fill the queue 397 | for i := 0; i < b.N; i++ { 398 | q.Enqueue(i) 399 | } 400 | 401 | b.ResetTimer() 402 | for i := 0; i < b.N; i++ { 403 | q.Dequeue() 404 | } 405 | } 406 | -------------------------------------------------------------------------------- /serialize.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson" 5 | ) 6 | 7 | // serializeSSTable uses BSON to serialize the sstable metadata 8 | func (sst *SSTable) serializeSSTable() ([]byte, error) { 9 | // Serialize the sst to BSON 10 | data, err := bson.Marshal(sst) 11 | if err != nil { 12 | return nil, err 13 | } 14 | 15 | return data, nil 16 | } 17 | 18 | // deserializeSSTable uses BSON to deserialize the sstable metadata 19 | func (sst *SSTable) deserializeSSTable(data []byte) error { 20 | // Deserialize the sst from BSON 21 | err := bson.Unmarshal(data, sst) 22 | if err != nil { 23 | return err 24 | } 25 | 26 | return nil 27 | } 28 | 29 | // serializeTransaction uses BSON to serialize the transaction 30 | func (txn *Txn) serializeTransaction() ([]byte, error) { 31 | // Serialize the transaction to BSON 32 | data, err := bson.Marshal(txn) 33 | if err != nil { 34 | return nil, err 35 | } 36 | 37 | return data, nil 38 | } 39 | 40 | // deserializeTransaction uses BSON to deserialize the transaction 41 | func (txn *Txn) deserializeTransaction(data []byte) error { 42 | // Deserialize the transaction from BSON 43 | err := bson.Unmarshal(data, txn) 44 | if err != nil { 45 | return err 46 | } 47 | 48 | return nil 49 | } 50 | 51 | // serializeIDGeneratorState uses BSON to serialize the ID generator state 52 | func (idgs *IDGeneratorState) serializeIDGeneratorState() ([]byte, error) { 53 | // Serialize the IDGeneratorState to BSON 54 | data, err := bson.Marshal(idgs) 55 | if err != nil { 56 | return nil, err 57 | } 58 | 59 | return data, nil 60 | } 61 | 62 | // deserializeIDGeneratorState uses BSON to deserialize the ID generator state 63 | func (idgs *IDGeneratorState) deserializeIDGeneratorState(data []byte) error { 64 | // Deserialize the IDGeneratorState from BSON 65 | err := bson.Unmarshal(data, idgs) 66 | if err != nil { 67 | return err 68 | } 69 | 70 | return nil 71 | } 72 | -------------------------------------------------------------------------------- /serialize_test.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "bytes" 5 | "reflect" 6 | "sync" 7 | "testing" 8 | ) 9 | 10 | // TestSSTableSerialization tests the serialization and deserialization of SSTable 11 | func TestSSTableSerialization(t *testing.T) { 12 | // Create a test SSTable 13 | original := &SSTable{ 14 | Id: 12345, 15 | Min: []byte("aaaaa"), 16 | Max: []byte("zzzzz"), 17 | isMerging: 0, 18 | Size: 98765, 19 | EntryCount: 1000, 20 | Level: 2, 21 | db: nil, // We can't compare functions, so leaving this nil 22 | } 23 | 24 | // Serialize the SSTable 25 | data, err := original.serializeSSTable() 26 | if err != nil { 27 | t.Fatalf("Failed to serialize SSTable: %v", err) 28 | } 29 | 30 | // Deserialize the SSTable 31 | result := &SSTable{} 32 | err = result.deserializeSSTable(data) 33 | if err != nil { 34 | t.Fatalf("Failed to deserialize SSTable: %v", err) 35 | } 36 | 37 | // Compare the original and deserialized SSTable 38 | if original.Id != result.Id { 39 | t.Errorf("Id mismatch: expected %d, got %d", original.Id, result.Id) 40 | } 41 | if !bytes.Equal(original.Min, result.Min) { 42 | t.Errorf("Min mismatch: expected %v, got %v", original.Min, result.Min) 43 | } 44 | if !bytes.Equal(original.Max, result.Max) { 45 | t.Errorf("Max mismatch: expected %v, got %v", original.Max, result.Max) 46 | } 47 | if original.isMerging != result.isMerging { 48 | t.Errorf("isMerging mismatch: expected %d, got %d", original.isMerging, result.isMerging) 49 | } 50 | if original.Size != result.Size { 51 | t.Errorf("Size mismatch: expected %d, got %d", original.Size, result.Size) 52 | } 53 | if original.EntryCount != result.EntryCount { 54 | t.Errorf("EntryCount mismatch: expected %d, got %d", original.EntryCount, result.EntryCount) 55 | } 56 | if original.Level != result.Level { 57 | t.Errorf("Level mismatch: expected %d, got %d", original.Level, result.Level) 58 | } 59 | // We don't compare db field as it's a pointer to DB 60 | } 61 | 62 | // TestTxnSerialization tests the serialization and deserialization of Txn 63 | func TestTxnSerialization(t *testing.T) { 64 | // Create a test transaction 65 | original := &Txn{ 66 | Id: 123, 67 | db: nil, // We can't compare functions, so leaving this nil 68 | ReadSet: map[string]int64{"key1": 100, "key2": 200}, 69 | WriteSet: map[string][]byte{"key3": []byte("value3"), "key4": []byte("value4")}, 70 | DeleteSet: map[string]bool{"key5": true, "key6": false}, 71 | Timestamp: 1621234567, 72 | mutex: sync.Mutex{}, 73 | Committed: true, 74 | } 75 | 76 | // Serialize the transaction 77 | data, err := original.serializeTransaction() 78 | if err != nil { 79 | t.Fatalf("Failed to serialize transaction: %v", err) 80 | } 81 | 82 | // Deserialize the transaction 83 | result := &Txn{} 84 | err = result.deserializeTransaction(data) 85 | if err != nil { 86 | t.Fatalf("Failed to deserialize transaction: %v", err) 87 | } 88 | 89 | // Compare the original and deserialized transaction 90 | if original.Id != result.Id { 91 | t.Errorf("id mismatch: expected %d, got %d", original.Id, result.Id) 92 | } 93 | if !reflect.DeepEqual(original.ReadSet, result.ReadSet) { 94 | t.Errorf("ReadSet mismatch: expected %v, got %v", original.ReadSet, result.ReadSet) 95 | } 96 | 97 | // Compare WriteSet - need to check each byte array 98 | if len(original.WriteSet) != len(result.WriteSet) { 99 | t.Errorf("WriteSet length mismatch: expected %d, got %d", len(original.WriteSet), len(result.WriteSet)) 100 | } else { 101 | for k, v := range original.WriteSet { 102 | if rv, ok := result.WriteSet[k]; !ok { 103 | t.Errorf("WriteSet missing key: %s", k) 104 | } else if !bytes.Equal(v, rv) { 105 | t.Errorf("WriteSet value mismatch for key %s: expected %v, got %v", k, v, rv) 106 | } 107 | } 108 | } 109 | 110 | if !reflect.DeepEqual(original.DeleteSet, result.DeleteSet) { 111 | t.Errorf("DeleteSet mismatch: expected %v, got %v", original.DeleteSet, result.DeleteSet) 112 | } 113 | if original.Timestamp != result.Timestamp { 114 | t.Errorf("Timestamp mismatch: expected %d, got %d", original.Timestamp, result.Timestamp) 115 | } 116 | if original.Committed != result.Committed { 117 | t.Errorf("Committed mismatch: expected %t, got %t", original.Committed, result.Committed) 118 | } 119 | // We don't compare db field as it's a pointer to DB 120 | // We don't compare mutex as it's not easily comparable 121 | } 122 | 123 | // TestSSTableSerializationError tests error handling in SSTable serialization 124 | func TestSSTableSerializationError(t *testing.T) { 125 | // Create an invalid SSTable that would cause serialization to fail 126 | // This is difficult to simulate with gob, but we can test error handling 127 | 128 | // Test deserialize with empty data 129 | sst := &SSTable{} 130 | err := sst.deserializeSSTable([]byte{}) 131 | if err == nil { 132 | t.Errorf("Expected error when deserializing empty data, got nil") 133 | } 134 | } 135 | 136 | // TestTxnSerializationError tests error handling in Txn serialization 137 | func TestTxnSerializationError(t *testing.T) { 138 | // Test deserialize with empty data 139 | txn := &Txn{} 140 | err := txn.deserializeTransaction([]byte{}) 141 | if err == nil { 142 | t.Errorf("Expected error when deserializing empty data, got nil") 143 | } 144 | } 145 | 146 | // Additional test for all three serialization functions with corrupted data 147 | func TestCorruptedDataDeserialization(t *testing.T) { 148 | // Create corrupted data (just some random bytes) 149 | corruptedData := []byte{0x1, 0x2, 0x3, 0x4, 0x5} 150 | 151 | // Test SSTable 152 | sst := &SSTable{} 153 | err := sst.deserializeSSTable(corruptedData) 154 | if err == nil { 155 | t.Errorf("Expected error when deserializing corrupted SSTable data, got nil") 156 | } 157 | 158 | // Test Txn 159 | txn := &Txn{} 160 | err = txn.deserializeTransaction(corruptedData) 161 | if err == nil { 162 | t.Errorf("Expected error when deserializing corrupted Txn data, got nil") 163 | } 164 | 165 | } 166 | -------------------------------------------------------------------------------- /sstable.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "bytes" 5 | "github.com/wildcatdb/wildcat/blockmanager" 6 | "github.com/wildcatdb/wildcat/bloomfilter" 7 | "github.com/wildcatdb/wildcat/tree" 8 | "go.mongodb.org/mongo-driver/bson" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | "os" 11 | "strconv" 12 | ) 13 | 14 | // SSTable represents a sorted string table 15 | type SSTable struct { 16 | Id int64 // SStable ID 17 | Min []byte // The minimum key in the SSTable 18 | Max []byte // The maximum key in the SSTable 19 | Size int64 // The size of the SSTable in bytes 20 | EntryCount int // The number of entries in the SSTable 21 | Level int // The level of the SSTable 22 | BloomFilter *bloomfilter.BloomFilter // Optional bloom filter for fast lookups 23 | Timestamp int64 // Timestamp of latest entry in the SSTable 24 | isMerging int32 // Atomic flag indicating if the SSTable is being merged 25 | db *DB // Reference to the database (not exported) 26 | } 27 | 28 | // KLogEntry represents a key-value entry in the KLog 29 | type KLogEntry struct { 30 | Key []byte // Key of the entry 31 | Timestamp int64 // Timestamp of the entry 32 | ValueBlockID int64 // Block ID of the value 33 | } 34 | 35 | // get retrieves a value from the SSTable using the key and timestamp 36 | func (sst *SSTable) get(key []byte, readTimestamp int64) ([]byte, int64) { 37 | // Get the KLog block manager 38 | klogPath := sst.kLogPath() 39 | var klogBm *blockmanager.BlockManager 40 | var err error 41 | 42 | // Skip range check if Min or Max are empty 43 | // Empty Min/Max indicate either an empty SSTable (which we can skip safely) 44 | // or a corrupted range 45 | if len(sst.Min) > 0 && len(sst.Max) > 0 { 46 | 47 | // Only skip if key is definitely outside the range 48 | if bytes.Compare(key, sst.Min) < 0 || bytes.Compare(key, sst.Max) > 0 { 49 | return nil, 0 // Key not in range 50 | } 51 | } else if sst.EntryCount == 0 { 52 | // If the SSTable is empty (as confirmed by EntryCount), 53 | // we can safely skip it regardless of Min/Max 54 | return nil, 0 55 | } 56 | 57 | // If bloom filters are configured 58 | // we check if the key is in the bloom filter 59 | // if so we continue on if not we skip 60 | if sst.db.opts.BloomFilter { 61 | // Check if the key is in the bloom filter 62 | if !sst.BloomFilter.Contains(key) { 63 | return nil, 0 // Key not in SSTable 64 | } 65 | 66 | } 67 | 68 | if v, ok := sst.db.lru.Get(klogPath); ok { 69 | klogBm = v.(*blockmanager.BlockManager) 70 | } else { 71 | klogBm, err = blockmanager.Open(klogPath, os.O_RDONLY, sst.db.opts.Permission, blockmanager.SyncOption(sst.db.opts.SyncOption)) 72 | if err != nil { 73 | return nil, 0 74 | } 75 | sst.db.lru.Put(klogPath, klogBm, func(key, value interface{}) { 76 | if bm, ok := value.(*blockmanager.BlockManager); ok { 77 | _ = bm.Close() 78 | } 79 | }) 80 | } 81 | 82 | t, err := tree.Open(klogBm, sst.db.opts.SSTableBTreeOrder, sst) 83 | if err != nil { 84 | return nil, 0 85 | } 86 | 87 | val, _, err := t.Get(key) 88 | if err != nil { 89 | return nil, 0 90 | } 91 | 92 | if val == nil { 93 | return nil, 0 94 | } 95 | 96 | var entry *KLogEntry 97 | 98 | if klogEntry, ok := val.(*KLogEntry); ok { 99 | entry = klogEntry 100 | } else if doc, ok := val.(primitive.D); ok { 101 | entry = &KLogEntry{} 102 | 103 | // Extract fields from primitive.D (bson) 104 | for _, elem := range doc { 105 | switch elem.Key { 106 | case "key": 107 | if keyData, ok := elem.Value.(primitive.Binary); ok { 108 | entry.Key = keyData.Data 109 | } 110 | case "timestamp": 111 | if ts, ok := elem.Value.(int64); ok { 112 | entry.Timestamp = ts 113 | } 114 | case "valueblockid": 115 | if blockID, ok := elem.Value.(int64); ok { 116 | entry.ValueBlockID = blockID 117 | } 118 | } 119 | } 120 | } else { 121 | // Unknown type, try to convert via BSON 122 | bsonData, err := bson.Marshal(val) 123 | if err != nil { 124 | return nil, 0 125 | } 126 | 127 | entry = &KLogEntry{} 128 | err = bson.Unmarshal(bsonData, entry) 129 | if err != nil { 130 | return nil, 0 131 | } 132 | } 133 | 134 | // Only return if this version is visible to the read timestamp 135 | if entry.Timestamp <= readTimestamp { 136 | if entry.ValueBlockID == -1 { 137 | return nil, entry.Timestamp // Return nil value but valid timestamp for deletion 138 | } 139 | v := sst.readValueFromVLog(entry.ValueBlockID) 140 | return v, entry.Timestamp 141 | } 142 | 143 | return nil, 0 144 | } 145 | 146 | // readValueFromVLog reads a value from the VLog using the block ID 147 | func (sst *SSTable) readValueFromVLog(valueBlockID int64) []byte { 148 | vlogPath := sst.vLogPath() 149 | var vlogBm *blockmanager.BlockManager 150 | var err error 151 | 152 | if v, ok := sst.db.lru.Get(vlogPath); ok { 153 | vlogBm = v.(*blockmanager.BlockManager) 154 | } else { 155 | vlogBm, err = blockmanager.Open(vlogPath, os.O_RDONLY, sst.db.opts.Permission, blockmanager.SyncOption(sst.db.opts.SyncOption)) 156 | if err != nil { 157 | return nil 158 | } 159 | sst.db.lru.Put(vlogPath, vlogBm, func(key, value interface{}) { 160 | if bm, ok := value.(*blockmanager.BlockManager); ok { 161 | _ = bm.Close() 162 | } 163 | }) 164 | } 165 | 166 | value, _, err := vlogBm.Read(valueBlockID) 167 | if err != nil { 168 | return nil 169 | } 170 | return value 171 | } 172 | 173 | // kLogPath returns the path to the KLog file for this SSTable 174 | func (sst *SSTable) kLogPath() string { 175 | return sst.db.opts.Directory + LevelPrefix + strconv.Itoa(sst.Level) + 176 | string(os.PathSeparator) + SSTablePrefix + strconv.FormatInt(sst.Id, 10) + KLogExtension 177 | } 178 | 179 | // vLogPath returns the path to the VLog file for this SSTable 180 | func (sst *SSTable) vLogPath() string { 181 | return sst.db.opts.Directory + LevelPrefix + strconv.Itoa(sst.Level) + 182 | string(os.PathSeparator) + SSTablePrefix + strconv.FormatInt(sst.Id, 10) + VLogExtension 183 | } 184 | -------------------------------------------------------------------------------- /sstable_test.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "sync" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func TestSSTable_BasicOperations(t *testing.T) { 14 | dir, err := os.MkdirTemp("", "db_sstable_test") 15 | if err != nil { 16 | t.Fatalf("Failed to create temp directory: %v", err) 17 | } 18 | 19 | // Create a log channel 20 | logChan := make(chan string, 100) 21 | defer func() { 22 | // Drain the log channel 23 | for len(logChan) > 0 { 24 | <-logChan 25 | } 26 | }() 27 | 28 | // Create a test DB 29 | opts := &Options{ 30 | Directory: dir, 31 | SyncOption: SyncFull, // Use full sync for reliability 32 | LogChannel: logChan, 33 | WriteBufferSize: 4 * 1024, // Small buffer to force flushing 34 | } 35 | 36 | db, err := Open(opts) 37 | if err != nil { 38 | t.Fatalf("Failed to open database: %v", err) 39 | } 40 | defer func(path string) { 41 | _ = os.RemoveAll(path) 42 | }(dir) 43 | 44 | // Insert enough data to trigger a flush to SSTable 45 | numEntries := 100 46 | for i := 0; i < numEntries; i++ { 47 | key := fmt.Sprintf("key%d", i) 48 | value := fmt.Sprintf("value%d", i) 49 | 50 | err = db.Update(func(txn *Txn) error { 51 | return txn.Put([]byte(key), []byte(value)) 52 | }) 53 | if err != nil { 54 | t.Fatalf("Failed to insert data: %v", err) 55 | } 56 | } 57 | 58 | // Force a memtable flush by exceeding write buffer size 59 | largeValue := make([]byte, opts.WriteBufferSize) 60 | for i := range largeValue { 61 | largeValue[i] = byte(i % 256) 62 | } 63 | 64 | err = db.Update(func(txn *Txn) error { 65 | return txn.Put([]byte("large_key"), largeValue) 66 | }) 67 | if err != nil { 68 | t.Fatalf("Failed to insert large value: %v", err) 69 | } 70 | 71 | // Give some time for background flushing to complete 72 | time.Sleep(500 * time.Millisecond) 73 | 74 | // Verify that level 1 has at least one SSTable 75 | levels := db.levels.Load() 76 | if levels == nil { 77 | t.Fatalf("Levels not initialized") 78 | } 79 | 80 | level1 := (*levels)[0] // Level 1 is at index 0 81 | sstables := level1.sstables.Load() 82 | 83 | if sstables == nil || len(*sstables) == 0 { 84 | t.Errorf("Expected at least one SSTable in level 1, but found none") 85 | } else { 86 | t.Logf("Found %d SSTables in level 1", len(*sstables)) 87 | } 88 | 89 | // Verify data can still be read (now from SSTables) 90 | for i := 0; i < numEntries; i++ { 91 | key := fmt.Sprintf("key%d", i) 92 | expectedValue := fmt.Sprintf("value%d", i) 93 | 94 | var actualValue []byte 95 | err = db.Update(func(txn *Txn) error { 96 | var err error 97 | actualValue, err = txn.Get([]byte(key)) 98 | return err 99 | }) 100 | 101 | if err != nil { 102 | t.Errorf("Failed to read key %s from SSTable: %v", key, err) 103 | continue 104 | } 105 | 106 | if string(actualValue) != expectedValue { 107 | t.Errorf("For key %s expected value %s, got %s", key, expectedValue, actualValue) 108 | } 109 | } 110 | 111 | // Close the database 112 | _ = db.Close() 113 | 114 | // Check that SSTable files exist on disk 115 | l1Dir := filepath.Join(dir, "l1") 116 | files, err := os.ReadDir(l1Dir) 117 | if err != nil { 118 | t.Fatalf("Failed to read level 1 directory: %v", err) 119 | } 120 | 121 | var klogFound, vlogFound bool 122 | for _, file := range files { 123 | if filepath.Ext(file.Name()) == ".klog" { 124 | klogFound = true 125 | } 126 | if filepath.Ext(file.Name()) == ".vlog" { 127 | vlogFound = true 128 | } 129 | } 130 | 131 | if !klogFound { 132 | t.Errorf("No .klog files found in level 1 directory") 133 | } 134 | if !vlogFound { 135 | t.Errorf("No .vlog files found in level 1 directory") 136 | } 137 | } 138 | 139 | func TestSSTable_ConcurrentAccess(t *testing.T) { 140 | dir, err := os.MkdirTemp("", "db_sstable_concurrent_test") 141 | if err != nil { 142 | t.Fatalf("Failed to create temp directory: %v", err) 143 | } 144 | 145 | // Create a log channel 146 | logChan := make(chan string, 100) 147 | defer func() { 148 | // Drain the log channel 149 | for len(logChan) > 0 { 150 | <-logChan 151 | } 152 | }() 153 | 154 | // Create a test DB 155 | opts := &Options{ 156 | Directory: dir, 157 | SyncOption: SyncFull, 158 | LogChannel: logChan, 159 | WriteBufferSize: 4 * 1024, // Small buffer to force flushing 160 | } 161 | 162 | db, err := Open(opts) 163 | if err != nil { 164 | t.Fatalf("Failed to open database: %v", err) 165 | } 166 | defer func(db *DB) { 167 | _ = db.Close() 168 | }(db) 169 | defer func(path string) { 170 | _ = os.RemoveAll(path) 171 | }(dir) 172 | 173 | // Insert some initial data to ensure we have at least one SSTable 174 | for i := 0; i < 100; i++ { 175 | key := fmt.Sprintf("init_key%d", i) 176 | value := fmt.Sprintf("init_value%d", i) 177 | 178 | err = db.Update(func(txn *Txn) error { 179 | return txn.Put([]byte(key), []byte(value)) 180 | }) 181 | if err != nil { 182 | t.Fatalf("Failed to insert initial data: %v", err) 183 | } 184 | } 185 | 186 | // Force a flush to SSTable 187 | forceManyWrites(t, db, 100) 188 | time.Sleep(500 * time.Millisecond) // Allow background flush to complete 189 | 190 | // Number of concurrent readers and operations per reader 191 | const numReaders = 10 192 | const opsPerReader = 50 193 | const numWriters = 5 194 | const opsPerWriter = 20 195 | 196 | // Create a wait group for synchronization 197 | var wg sync.WaitGroup 198 | wg.Add(numReaders + numWriters) 199 | 200 | // Start concurrent readers 201 | for r := 0; r < numReaders; r++ { 202 | go func(readerID int) { 203 | defer wg.Done() 204 | 205 | for i := 0; i < opsPerReader; i++ { 206 | keyIdx := i % 100 // Use modulo to access existing keys 207 | key := fmt.Sprintf("init_key%d", keyIdx) 208 | expectedPrefix := "init_value" 209 | 210 | var value []byte 211 | err := db.Update(func(txn *Txn) error { 212 | var err error 213 | value, err = txn.Get([]byte(key)) 214 | return err 215 | }) 216 | 217 | if err != nil { 218 | t.Errorf("Reader %d failed to read key %s: %v", readerID, key, err) 219 | } else if !startsWith(value, []byte(expectedPrefix)) { 220 | t.Errorf("Reader %d: unexpected value for key %s: %s", readerID, key, value) 221 | } 222 | 223 | // Small sleep to reduce contention 224 | time.Sleep(time.Millisecond) 225 | } 226 | }(r) 227 | } 228 | 229 | // Start concurrent writers 230 | for w := 0; w < numWriters; w++ { 231 | go func(writerID int) { 232 | defer wg.Done() 233 | 234 | for i := 0; i < opsPerWriter; i++ { 235 | key := fmt.Sprintf("new_key_w%d_i%d", writerID, i) 236 | value := fmt.Sprintf("new_value_w%d_i%d", writerID, i) 237 | 238 | err := db.Update(func(txn *Txn) error { 239 | return txn.Put([]byte(key), []byte(value)) 240 | }) 241 | 242 | if err != nil { 243 | t.Errorf("Writer %d failed to write key %s: %v", writerID, key, err) 244 | } 245 | 246 | // Small sleep to reduce contention 247 | time.Sleep(time.Millisecond) 248 | } 249 | }(w) 250 | } 251 | 252 | // Wait for all operations to complete 253 | wg.Wait() 254 | 255 | // Verify that all written keys can be read 256 | successCount := 0 257 | expectedCount := numWriters * opsPerWriter 258 | 259 | for w := 0; w < numWriters; w++ { 260 | for i := 0; i < opsPerWriter; i++ { 261 | key := fmt.Sprintf("new_key_w%d_i%d", w, i) 262 | expectedValue := fmt.Sprintf("new_value_w%d_i%d", w, i) 263 | 264 | var actualValue []byte 265 | err := db.Update(func(txn *Txn) error { 266 | var err error 267 | actualValue, err = txn.Get([]byte(key)) 268 | return err 269 | }) 270 | 271 | if err == nil && string(actualValue) == expectedValue { 272 | successCount++ 273 | } 274 | } 275 | } 276 | 277 | // We should have a high success rate (but allow for some failures due to concurrency) 278 | if float64(successCount)/float64(expectedCount) < 0.95 { 279 | t.Errorf("Expected at least 95%% successful operations, got %.2f%% (%d/%d)", 280 | float64(successCount)/float64(expectedCount)*100, successCount, expectedCount) 281 | } else { 282 | t.Logf("Successfully verified %.2f%% (%d/%d) of concurrent writes", 283 | float64(successCount)/float64(expectedCount)*100, successCount, expectedCount) 284 | } 285 | 286 | // Verify that SSTables were created 287 | l1Dir := filepath.Join(dir, "l1") 288 | files, err := os.ReadDir(l1Dir) 289 | if err != nil { 290 | t.Fatalf("Failed to read level 1 directory: %v", err) 291 | } 292 | 293 | var klogFound, vlogFound bool 294 | for _, file := range files { 295 | if filepath.Ext(file.Name()) == ".klog" { 296 | klogFound = true 297 | } 298 | if filepath.Ext(file.Name()) == ".vlog" { 299 | vlogFound = true 300 | } 301 | } 302 | 303 | if !klogFound { 304 | t.Errorf("No .klog files found in level 1 directory") 305 | } 306 | if !vlogFound { 307 | t.Errorf("No .vlog files found in level 1 directory") 308 | } 309 | } 310 | 311 | // Helper function to force a flush to SSTable by writing many keys 312 | func forceManyWrites(t *testing.T, db *DB, count int) { 313 | // Write enough data to trigger memtable flush 314 | for i := 0; i < count; i++ { 315 | key := fmt.Sprintf("flush_key%d", i) 316 | value := fmt.Sprintf("flush_value%d", i) 317 | 318 | err := db.Update(func(txn *Txn) error { 319 | return txn.Put([]byte(key), []byte(value)) 320 | }) 321 | if err != nil { 322 | t.Fatalf("Failed to insert data for flushing: %v", err) 323 | } 324 | } 325 | } 326 | 327 | // Helper function to check if a byte slice starts with a prefix 328 | func startsWith(data, prefix []byte) bool { 329 | if len(data) < len(prefix) { 330 | return false 331 | } 332 | for i := 0; i < len(prefix); i++ { 333 | if data[i] != prefix[i] { 334 | return false 335 | } 336 | } 337 | return true 338 | } 339 | 340 | func TestSSTable_MVCCWithMultipleVersions(t *testing.T) { 341 | dir, err := os.MkdirTemp("", "db_sstable_mvcc_multiple_versions_test") 342 | if err != nil { 343 | t.Fatalf("Failed to create temp directory: %v", err) 344 | } 345 | 346 | // Create a log channel with debug logging 347 | logChan := make(chan string, 1000) 348 | go func() { 349 | for msg := range logChan { 350 | t.Log("DB LOG:", msg) 351 | } 352 | }() 353 | 354 | // Create a test DB with a small write buffer to force flushing 355 | opts := &Options{ 356 | Directory: dir, 357 | SyncOption: SyncFull, 358 | LogChannel: logChan, 359 | WriteBufferSize: 512, // Very small buffer to force flushing 360 | } 361 | 362 | db, err := Open(opts) 363 | if err != nil { 364 | t.Fatalf("Failed to open database: %v", err) 365 | } 366 | defer func(db *DB) { 367 | _ = db.Close() 368 | }(db) 369 | defer func(path string) { 370 | _ = os.RemoveAll(path) 371 | }(dir) 372 | 373 | // Create a single key with multiple versions 374 | key := []byte("mvcc_key") 375 | 376 | // Record transaction timestamps for verification 377 | var timestamps []int64 378 | var txns []*Txn 379 | 380 | // Create 5 versions of the same key 381 | for i := 1; i <= 5; i++ { 382 | // Start a transaction and record its timestamp 383 | txn := db.Begin() 384 | timestamps = append(timestamps, txn.Timestamp) 385 | txns = append(txns, txn) 386 | 387 | // Write a new version 388 | value := []byte(fmt.Sprintf("value%d", i)) 389 | err = txn.Put(key, value) 390 | if err != nil { 391 | t.Fatalf("Failed to write version %d: %v", i, err) 392 | } 393 | 394 | // Commit the transaction 395 | err = txn.Commit() 396 | if err != nil { 397 | t.Fatalf("Failed to commit version %d: %v", i, err) 398 | } 399 | 400 | t.Logf("Created version %d with timestamp %d", i, txn.Timestamp) 401 | 402 | // Force a flush to SSTable after each write to ensure versions are in different SSTables 403 | largeValue := make([]byte, opts.WriteBufferSize) 404 | err = db.Update(func(txn *Txn) error { 405 | return txn.Put([]byte(fmt.Sprintf("large_key_%d", i)), largeValue) 406 | }) 407 | if err != nil { 408 | t.Fatalf("Failed to force flush after version %d: %v", i, err) 409 | } 410 | 411 | // Wait for flush to complete 412 | time.Sleep(100 * time.Millisecond) 413 | } 414 | 415 | // Now verify that we can read each version using the corresponding timestamp 416 | for i := 0; i < 5; i++ { 417 | // Create a transaction with the recorded timestamp 418 | readTxn := db.Begin() 419 | // Set the timestamp to match the original write timestamp 420 | readTxn.Timestamp = timestamps[i] 421 | 422 | // Read the value 423 | value, err := readTxn.Get(key) 424 | if err != nil { 425 | t.Fatalf("Failed to read version %d: %v", i+1, err) 426 | } 427 | 428 | expectedValue := fmt.Sprintf("value%d", i+1) 429 | if string(value) != expectedValue { 430 | t.Errorf("Expected version %d to be '%s', got '%s'", i+1, expectedValue, value) 431 | } else { 432 | t.Logf("Successfully read version %d with value '%s'", i+1, value) 433 | } 434 | } 435 | 436 | // Verify that a new transaction sees only the latest version 437 | latestTxn := db.Begin() 438 | latestValue, err := latestTxn.Get(key) 439 | if err != nil { 440 | t.Fatalf("Failed to read latest version: %v", err) 441 | } 442 | 443 | if string(latestValue) != "value5" { 444 | t.Errorf("Expected latest version to be 'value5', got '%s'", latestValue) 445 | } else { 446 | t.Logf("Successfully read latest version with value 'value5'") 447 | } 448 | 449 | // Check that SSTables were created 450 | l1Dir := filepath.Join(dir, "l1") 451 | files, err := os.ReadDir(l1Dir) 452 | if err != nil { 453 | t.Fatalf("Failed to read level 1 directory: %v", err) 454 | } 455 | 456 | var klogCount int 457 | for _, file := range files { 458 | if filepath.Ext(file.Name()) == ".klog" { 459 | klogCount++ 460 | } 461 | } 462 | 463 | if klogCount < 5 { 464 | t.Logf("Expected at least 5 .klog files, found %d", klogCount) 465 | } else { 466 | t.Logf("Found %d .klog files in level 1 directory", klogCount) 467 | } 468 | } 469 | 470 | func TestSSTable_SimpleDeleteWithDelay(t *testing.T) { 471 | dir, err := os.MkdirTemp("", "db_sstable_delete_delay_test") 472 | if err != nil { 473 | t.Fatalf("Failed to create temp directory: %v", err) 474 | } 475 | 476 | // Create a log channel 477 | logChan := make(chan string, 100) 478 | defer func() { 479 | // Drain the log channel 480 | for len(logChan) > 0 { 481 | <-logChan 482 | } 483 | }() 484 | 485 | // Create a test DB 486 | opts := &Options{ 487 | Directory: dir, 488 | SyncOption: SyncFull, 489 | LogChannel: logChan, 490 | } 491 | 492 | db, err := Open(opts) 493 | if err != nil { 494 | t.Fatalf("Failed to open database: %v", err) 495 | } 496 | defer func(db *DB) { 497 | _ = db.Close() 498 | }(db) 499 | defer func(path string) { 500 | _ = os.RemoveAll(path) 501 | }(dir) 502 | 503 | // Insert a key 504 | key := []byte("delay_test_key") 505 | value := []byte("delay_test_value") 506 | 507 | err = db.Update(func(txn *Txn) error { 508 | return txn.Put(key, value) 509 | }) 510 | if err != nil { 511 | t.Fatalf("Failed to insert key: %v", err) 512 | } 513 | 514 | // Verify the key exists 515 | var retrievedValue []byte 516 | err = db.Update(func(txn *Txn) error { 517 | var err error 518 | retrievedValue, err = txn.Get(key) 519 | return err 520 | }) 521 | if err != nil { 522 | t.Fatalf("Failed to get key after insert: %v", err) 523 | } 524 | if !bytes.Equal(retrievedValue, value) { 525 | t.Fatalf("Value mismatch: expected %s, got %s", value, retrievedValue) 526 | } 527 | t.Logf("Key found after insertion: %s", retrievedValue) 528 | 529 | // Delete the key 530 | t.Logf("Deleting key: %s", key) 531 | err = db.Update(func(txn *Txn) error { 532 | t.Logf("Delete transaction ID: %d, Timestamp: %d", txn.Id, txn.Timestamp) 533 | return txn.Delete(key) 534 | }) 535 | if err != nil { 536 | t.Fatalf("Failed to delete key: %v", err) 537 | } 538 | 539 | // Add a small delay to ensure the deletion is fully applied 540 | time.Sleep(100 * time.Millisecond) 541 | 542 | // Try to get the deleted key 543 | err = db.Update(func(txn *Txn) error { 544 | t.Logf("Verification transaction ID: %d, Timestamp: %d", txn.Id, txn.Timestamp) 545 | _, err := txn.Get(key) 546 | if err == nil { 547 | return fmt.Errorf("key should be deleted but is still accessible") 548 | } 549 | return nil 550 | }) 551 | if err != nil { 552 | t.Fatalf("Deletion verification failed: %v", err) 553 | } 554 | t.Logf("Key correctly not found after deletion") 555 | 556 | // Force a flush to ensure deletion is persisted 557 | t.Logf("Forcing flush after deletion") 558 | largeValue := make([]byte, 1024*1024) // 1MB should exceed any buffer size 559 | err = db.Update(func(txn *Txn) error { 560 | return txn.Put([]byte("large_key"), largeValue) 561 | }) 562 | if err != nil { 563 | t.Fatalf("Failed to trigger flush: %v", err) 564 | } 565 | 566 | // Wait for flush to complete 567 | time.Sleep(500 * time.Millisecond) 568 | 569 | // Verify key is still deleted after flush 570 | err = db.Update(func(txn *Txn) error { 571 | _, err := txn.Get(key) 572 | if err == nil { 573 | return fmt.Errorf("key should be deleted but is still accessible after flush") 574 | } 575 | return nil 576 | }) 577 | if err != nil { 578 | t.Fatalf("Post-flush verification failed: %v", err) 579 | } 580 | t.Logf("Key correctly not found after flush") 581 | 582 | // Close and reopen database 583 | err = db.Close() 584 | if err != nil { 585 | t.Fatalf("Failed to close database: %v", err) 586 | } 587 | 588 | // Drain log channel 589 | for len(logChan) > 0 { 590 | <-logChan 591 | } 592 | 593 | opts.LogChannel = nil 594 | db2, err := Open(opts) 595 | if err != nil { 596 | t.Fatalf("Failed to reopen database: %v", err) 597 | } 598 | defer func(db2 *DB) { 599 | _ = db2.Close() 600 | }(db2) 601 | defer func(path string) { 602 | _ = os.RemoveAll(path) 603 | }(dir) 604 | 605 | // Verify key is still deleted after restart 606 | err = db2.Update(func(txn *Txn) error { 607 | _, err := txn.Get(key) 608 | if err == nil { 609 | return fmt.Errorf("key should be deleted but is still accessible after restart") 610 | } 611 | return nil 612 | }) 613 | if err != nil { 614 | t.Fatalf("Post-restart verification failed: %v", err) 615 | } 616 | t.Logf("Key correctly not found after restart") 617 | } 618 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | ) 7 | 8 | // extractIDFromFilename extracts the ID from a given filename. . 9 | func extractIDFromFilename(filename string) int64 { 10 | parts := strings.Split(filename, ".") 11 | // File names in wildcat are . 12 | if len(parts) != 2 { 13 | return 0 14 | } 15 | 16 | ts, err := strconv.ParseInt(parts[0], 10, 64) 17 | if err != nil { 18 | return 0 19 | } 20 | 21 | return ts 22 | } 23 | -------------------------------------------------------------------------------- /utils_test.go: -------------------------------------------------------------------------------- 1 | package wildcat 2 | 3 | import "testing" 4 | 5 | func TestExtractIDFromFilename(t *testing.T) { 6 | tests := []struct { 7 | filename string 8 | expected int64 9 | }{ 10 | {"1234567890.wal", 1234567890}, 11 | {"9876543210.wal", 9876543210}, 12 | {"invalid.wal", 0}, 13 | {"12345.invalid", 12345}, 14 | {"", 0}, 15 | {"12345", 0}, 16 | {".wal", 0}, 17 | } 18 | 19 | for _, tt := range tests { 20 | t.Run(tt.filename, func(t *testing.T) { 21 | result := extractIDFromFilename(tt.filename) 22 | if result != tt.expected { 23 | t.Errorf("extractIDFromFilename(%q) = %d; want %d", tt.filename, result, tt.expected) 24 | } 25 | }) 26 | } 27 | } 28 | --------------------------------------------------------------------------------