├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── go.yml ├── .gitignore ├── LICENSE ├── README.md ├── Taskfile.yml ├── api.go ├── api_test.go ├── bench_test.go ├── bucket.go ├── bucket_test.go ├── buckets.go ├── buckets_test.go ├── cmd ├── main.go └── parser │ └── parser.go ├── docs ├── data_format.png ├── data_format.svg └── forks.svg ├── example_test.go ├── fuzz_test.go ├── go.mod ├── go.sum ├── index ├── index.go ├── index_test.go ├── reader.go ├── reader_test.go ├── writer.go └── writer_test.go ├── item ├── item.go ├── item_test.go └── testutils │ └── testutils.go ├── options.go ├── realworld_test.go ├── run_tests.sh ├── slow_test.go ├── util.go ├── util_test.go └── vlog ├── heap.go ├── iter.go ├── iter_test.go ├── vlog.go └── vlog_test.go /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Go 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | permissions: 13 | contents: read 14 | pull-requests: read 15 | 16 | jobs: 17 | build: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | matrix: 21 | go: [ '1.21', '1.22' ] 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up Go 25 | uses: actions/setup-go@v5 26 | with: 27 | go-version: ${{ matrix.go }} 28 | 29 | - name: Build 30 | run: go build -v ./... 31 | 32 | - name: Test 33 | run: go test -v ./... 34 | 35 | - name: golangci-lint 36 | uses: golangci/golangci-lint-action@v4 37 | with: 38 | version: v1.54 39 | skip-pkg-cache: true 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | timeq 2 | *.out 3 | *.pprof 4 | *.test 5 | cover 6 | .task/checksum 7 | q1908 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Josh Baker 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ``timeq`` 2 | 3 | [![GoDoc](https://godoc.org/github.com/sahib/timeq?status.svg)](https://godoc.org/github.com/sahib/timeq) 4 | ![Build status](https://github.com/sahib/timeq/actions/workflows/go.yml/badge.svg) 5 | 6 | A file-based priority queue in Go. 7 | 8 | Generally speaking, `timeq` can be used to implement these and more: 9 | 10 | - A streaming platform like [NATS](https://nats.io) or message brokers similar to [Mosquitto](https://mosquitto.org). 11 | - A file-backend job queue with different priorities. 12 | - A telemetry pipeline for IoT devices to buffer offline data. 13 | - Wherever you would use a regular file-based queue. 14 | 15 | ## Features 16 | 17 | - Clean and well test code base based on Go 1.22 18 | - High throughput thanks to batch processing and `mmap()` 19 | - Tiny memory footprint that does not depend on the number of items in the queue. 20 | - Simple interface with classic `Push()` and `Read()` and only few other functions. 21 | - Sane default settings, with some knobs that can be tuned for your use case. 22 | - Consuming end can be efficiently and easily forked into several consumers. 23 | 24 | This implementation should be generally useful, despite the ``time`` in the 25 | name. However, the initial design had timestamps as priority keys in mind. For 26 | best performance the following assumptions were made: 27 | 28 | - Your OS supports `mmap()` and `mremap()` (i.e. Linux/FreeBSD) 29 | - Seeking in files during reading is cheap (i.e. no HDD) 30 | - The priority key ideally increases without much duplicates (like timestamps, see [FAQ](#FAQ)). 31 | - You push and pop your data in, ideally, big batches. 32 | - The underlying storage has a low risk for write errors or bit flips. 33 | - You trust your data to some random dude's code on the internet (don't we all?). 34 | 35 | If some of those assumptions do not fit your use case and you still managed to make it work, 36 | I would be happy for some feedback or even pull requests to improve the general usability. 37 | 38 | See the [API documentation here](https://godoc.org/github.com/sahib/timeq) for 39 | examples and the actual documentation. 40 | 41 | ## Use cases 42 | 43 | My primary use case was a embedded Linux device that has different services that generate 44 | a stream of data that needs to be send to the cloud. For this the data was required to be 45 | in ascending order (sorted by time) and also needed to be buffered with tight memory boundaries. 46 | 47 | A previous attempt based on``sqlite3`` did work kinda well but was much slower 48 | than it had to be (partly also due to the heavy cost of ``cgo``). This motivated me to 49 | write this queue implementation. 50 | 51 | ## Usage 52 | 53 | To download the library, just do this in your project: 54 | 55 | ```bash 56 | # Use latest or a specific tag as you like 57 | $ go get github.com/sahib/timeq@latest 58 | ``` 59 | 60 | We also ship a rudimentary command-line client that can be used for experiments. 61 | You can install it like this: 62 | 63 | ```bash 64 | $ go install github.com/sahib/timeq/cmd@latest 65 | ``` 66 | 67 | ## Benchmarks 68 | 69 | The [included benchmark](https://github.com/sahib/timeq/blob/main/bench_test.go#L15) pushes 2000 items with a payload of 40 byte per operation. 70 | 71 | ``` 72 | $ make bench 73 | goos: linux 74 | goarch: amd64 75 | pkg: github.com/sahib/timeq 76 | cpu: 12th Gen Intel(R) Core(TM) i7-1270P 77 | BenchmarkPopSyncNone-16 35924 33738 ns/op 240 B/op 5 allocs/op 78 | BenchmarkPopSyncData-16 35286 33938 ns/op 240 B/op 5 allocs/op 79 | BenchmarkPopSyncIndex-16 34030 34003 ns/op 240 B/op 5 allocs/op 80 | BenchmarkPopSyncFull-16 35170 33592 ns/op 240 B/op 5 allocs/op 81 | BenchmarkPushSyncNone-16 20336 56867 ns/op 72 B/op 2 allocs/op 82 | BenchmarkPushSyncData-16 20630 58613 ns/op 72 B/op 2 allocs/op 83 | BenchmarkPushSyncIndex-16 20684 58782 ns/op 72 B/op 2 allocs/op 84 | BenchmarkPushSyncFull-16 19994 59491 ns/op 72 B/op 2 allocs/op 85 | ``` 86 | 87 | ## Multi Consumer 88 | 89 | `timeq` supports a `Fork()` operation that splits the consuming end of a queue 90 | in two halves. You can then consume from each of the halves individually, 91 | without modifying the state of the other one. It's even possible to fork a fork 92 | again, resulting in a consumer hierarchy. This is probably best explained by 93 | this diagram: 94 | 95 | 96 | 97 | 1. The initial state of the queue with 8 items in it, 98 | 2. We fork the queue by calling `Fork("foo")`. 99 | 3. We consume 3 items from the fork via `fork.Pop()`. 100 | 4. Pushing new data will go to all existing forks. 101 | 102 | This is implemented efficiently (see below) by just having duplicated indexes. 103 | It opens up some interesting use cases: 104 | 105 | - For load-balancing purposes you could have several workers consuming data from `timeq`, each `Pop()`'ing 106 | and working on different parts of the queue. Sometimes it would be nice to let workers work on the same 107 | set of data (e.g. when they all transform the data in different ways). The latter is easily possibly with forks. 108 | - Fork the queue and consume from it until some point as experiment and remove the fork afterwards. The original 109 | data is not affected by this. 110 | - Prevent data from data getting lost by keeping a "deadletter" fork that keeps track of whatever you want. This way 111 | you can implement something like a `max-age` of queue's items. 112 | 113 | ## Design 114 | 115 | * All data is divided into buckets by a user-defined function (»`BucketSplitConf`«). 116 | * Each bucket is it's own priority queue, responsible for a part of the key space. 117 | * A push to a bucket writes the batch of data to a memory-mapped log 118 | file on disk. The location of the batch is stored in an 119 | in-memory index and to a index WAL. 120 | * On pop we select the bucket with the lowest key first and ask the index to give 121 | us the location of the lowest batch. Once done the index is updated to mark the 122 | items as popped. The data stays intact in the data log. 123 | * Once a bucket was completely drained it is removed from disk to retain space. 124 | 125 | Since the index is quite small (only one entry per batch) we can easily fit it in memory. 126 | On the initial load all bucket indexes are loaded, but no memory is mapped yet. 127 | 128 | ### Limits 129 | 130 | * Each item payload might be at most 64M. 131 | * Each bucket can be at most 2^63 bytes in size. 132 | * Using priority keys close to the integer limits is most certainly a bad idea. 133 | * When a bucket was created with a specific `BucketSplitConf` it cannot be changed later. 134 | `timeq` will error out in this case and the queue needs to be migrated. 135 | If this turns out as a practical issue we could implement an automated migration path. 136 | 137 | ### Data Layout 138 | 139 | The data is stored on disk in two files per bucket: 140 | 141 | * ``data.log``: Stores a single entry of a batch. 142 | * ``idx.log``: Stores the key and location of batches. Can be regenerated from ``dat.log``. 143 | 144 | This graphic shows one entry of each: 145 | 146 | ![Data Layout](docs/data_format.png) 147 | 148 | Each bucket lives in its own directory called `K`. 149 | Example: If you have two buckets, your data looks like this on this: 150 | 151 | ``` 152 | /path/to/db/ 153 | ├── split.conf 154 | ├── K00000000000000000001 155 | │  ├── dat.log 156 | │  ├── idx.log 157 | │ └── forkx.idx.log 158 | └── K00000000000000000002 159 |   ├── dat.log 160 |   ├── idx.log 161 | └── forkx.idx.log 162 | ``` 163 | 164 | The actual data is in `dat.log`. This is an append-only log that is 165 | memory-mapped by `timeq`. All files that end with `idx.log` are indexes, that 166 | point to the currently reachable parts of `dat.log`. Each entry in `idx.log` is 167 | a batch, so the log will only increase marginally if your batches are big 168 | enough. `forkx.idx.log` (and possibly more files like that) are index forks, 169 | which work the same way as `idx.log`, but track a different state of the respective bucket. 170 | 171 | NOTE: Buckets get cleaned up on open or when completely empty (i.e. all forks 172 | are empty) during consumption. Do not expect that the disk usage automatically 173 | decreases whenever you pop something. It does decrease, but in batches. 174 | 175 | ### Applied Optimizations 176 | 177 | * Data is pushed and popped as big batches and the index only tracks batches. 178 | This greatly lowers the memory usage, if you use big batches. 179 | * The API is very friendly towards re-using memory internally. Data is directly 180 | sliced from the memory map and given to the user in the read callback. Almost 181 | no allocations made during normal operation. If you need the data outside the callback, 182 | you have the option to copy it. 183 | * Division into small, manageable buckets. Only the buckets that are accessed are actually loaded. 184 | * Both `dat.log` and `idx.log` are append-only, requiring no random seeking for best performance. 185 | * ``dat.log`` is memory mapped and resized using `mremap()` in big batches. The bigger the log, the bigger the pre-allocation. 186 | * Sorting into buckets during `Push()` uses binary search for fast sorting. 187 | * `Shovel()` can move whole bucket directories, if possible. 188 | * In general, the concept of »Mechanical Sympathy« was applied to some extent to make the code cache friendly. 189 | 190 | ## FAQ: 191 | 192 | ### Can timeq be also used with non-time based keys? 193 | 194 | There are no notable places where the key of an item is actually assumed to be 195 | timestamp, except for the default `BucketSplitConf` (which can be configured). If you 196 | find a good way to sort your data into buckets you should be good to go. Keep 197 | in mind that timestamps were the idea behind the original design, so your 198 | mileage may vary - always benchmark your individual usecase. You can modify one 199 | of the existing benchmarks to test your assumptions. 200 | 201 | ### Why should I care about buckets? 202 | 203 | Most importantly: Only buckets are loaded which are being in use. 204 | This allows a very small footprint, especially if the push input is already roughly sorted. 205 | 206 | There are also some other reasons: 207 | 208 | * If one bucket becomes corrupt for some reason, you loose only the data in this bucket. 209 | * On ``Shovel()`` we can cheaply move buckets if they do not exist in the destination. 210 | * ...and some more optimizations. 211 | 212 | ### How do I choose the right size of my buckets? 213 | 214 | It depends on a few things. Answer the following questions in a worst case scenario: 215 | 216 | - How much memory do you have at hand? 217 | - How many items would you push to a single bucket? 218 | - How big is each item? 219 | - How many buckets should be open at the same time? 220 | 221 | As `timeq` uses `mmap(2)` internally, only the pages that were accessed are 222 | actually mapped to physical memory. However when pushing a lot of data this is 223 | mapped to physical memory, as all accessed pages of a bucket stay open (which is 224 | good if you Pop immediately after). So you should be fine if this evaluates to true: 225 | 226 | `BytesPerItem * ItemsPerBucketInWorstCase * MaxOpenParallelBuckets < BytesMemoryAvailable - WiggleRoom`. 227 | 228 | You can lower the number of open buckets with `MaxOpenParallelBuckets`. 229 | 230 | Keep in mind that `timeq` is fast and can be memory-efficient if used correctly, 231 | but it's not a magic device. In future I might introduce a feature that does not 232 | keep the full bucket mapped if it's only being pushed to. The return-on-invest 233 | for such an optimization would be rather small though. 234 | 235 | ### Can I store more than one value per key? 236 | 237 | Yes, no problem. The index may store more than one batch per key. There is a 238 | slight allocation overhead on ``Queue.Push()`` though. Since ``timeq`` was 239 | mostly optimized for mostly-unique keys (i.e. timestamps) you might see better 240 | performance with less duplicates. It should not be very significant though. 241 | 242 | If you want to use priority keys that are in a very narrow range (thus many 243 | duplicates) then you can think about spreading the range a bit wider. 244 | For example: You have priority keys from zero to ten for the tasks in your job 245 | queue. Instead of using zero to ten as keys, you can add the job-id to the key 246 | and shift the priority: ``(prio << 32) | jobID``. 247 | 248 | ### How failsafe is ``timeq``? 249 | 250 | I use it on a big fleet of embedded devices in the field at 251 | [GermanBionic](https://germanbionic.com), so it's already quite a bit battle 252 | tested. Design wise, damaged index files can be regenerated from the data log. 253 | There's no error correction code applied in the data log and no checksums are 254 | currently written. If you need this, I'm happy if a PR comes in that enables it 255 | optionally. 256 | 257 | For durability, the design is build to survive crashes without data loss (Push, 258 | Read) but, in some cases, it might result in duplicated data (Shovel). My 259 | recommendation is **designing your application logic in a way that allows 260 | duplicate items to be handled gracefully**. 261 | 262 | This assumes a filesystem with full journaling (``data=journal`` for ext4) or 263 | some other filesystem that gives your similar guarantees. We do properly call 264 | `msync()` and `fsync()` in the relevant cases. For now, crash safety was not 265 | yet tested a lot though. Help here is welcome. 266 | 267 | The test suite is currently roughly as big as the codebase. The best protection 268 | against bugs is a small code base, so that's not too impressive yet. We're of 269 | course working on improving the testsuite, which is a never ending task. 270 | Additionally we have a bunch of benchmarks and fuzzing tests. 271 | 272 | ### Is `timeq` safely usable from several go-routines? 273 | 274 | Yes. There is no real speed benefit from doing so though currently, 275 | as the current locking strategy prohibits parallel pushes and reads. 276 | Future releases might improve on this. 277 | 278 | ## License 279 | 280 | Source code is available under the MIT [License](/LICENSE). 281 | 282 | ## Contact 283 | 284 | Chris Pahl [@sahib](https://github.com/sahib) 285 | 286 | ## TODO List 287 | 288 | - [ ] Test crash safety in automated way. 289 | - [ ] Check for integer overflows. 290 | - [ ] Have locking strategy that allows more parallelism. 291 | -------------------------------------------------------------------------------- /Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | tasks: 4 | default: 5 | deps: [build] 6 | 7 | build: 8 | cmds: 9 | - go build -o timeq ./cmd 10 | sources: 11 | - ./**.go 12 | 13 | lint: 14 | cmds: 15 | - golangci-lint run ./... 16 | 17 | cover: 18 | deps: [test_all] 19 | cmds: 20 | - go tool cover -html cover.out 21 | 22 | sloc: 23 | silent: true 24 | cmds: 25 | - cloc --json $(find -iname '*.go' ! -iname '*_test.go') | jq '.Go.code' | xargs -n1 printf "Actual code:\t%d Lines\n" 26 | - cloc --json $(find -iname '*_test.go') | jq '.Go.code' | xargs -n1 printf "Test cases:\t%d Lines\n" 27 | - cloc --json $(find -iname '*.go') | jq '.Go.code' | xargs -n1 printf "Total code:\t%d Lines\n" 28 | 29 | fuzz: 30 | cmds: 31 | - go test -v -fuzztime 5m -fuzz ./ 32 | 33 | bench: 34 | cmds: 35 | - sudo sh -c 'sync && echo 3 > /proc/sys/vm/drop_caches' 36 | - go test -v -bench=. -run=xxx -benchmem -cpuprofile=cpu.pprof -memprofile=mem.pprof 37 | - go tool pprof -web ./cpu.pprof 38 | - go tool pprof -web ./mem.pprof 39 | 40 | test: 41 | cmds: 42 | - ./run_tests.sh 43 | 44 | test-all: 45 | cmds: 46 | - ./run_tests.sh --slow 47 | -------------------------------------------------------------------------------- /api.go: -------------------------------------------------------------------------------- 1 | // Package timeq is a file-based priority queue in Go. 2 | package timeq 3 | 4 | import ( 5 | "errors" 6 | "fmt" 7 | "unicode" 8 | 9 | "github.com/sahib/timeq/item" 10 | ) 11 | 12 | // Item is a single item that you push or pop from the queue. 13 | type Item = item.Item 14 | 15 | // Items is a list of items. 16 | type Items = item.Items 17 | 18 | // Key is the priority of each item in the queue. 19 | // Lower keys will be popped first. 20 | type Key = item.Key 21 | 22 | // Queue is the high level API to the priority queue. 23 | type Queue struct { 24 | buckets *buckets 25 | } 26 | 27 | // ForkName is the name of a specific fork. 28 | type ForkName string 29 | 30 | // Validate checks if this for has a valid name. 31 | // A fork is valid if its name only consists of alphanumeric and/or dash or underscore characters. 32 | func (name ForkName) Validate() error { 33 | if name == "" { 34 | return errors.New("empty string not allowed as fork name") 35 | } 36 | 37 | for pos, rn := range []rune(name) { 38 | ok := unicode.IsUpper(rn) || unicode.IsLower(rn) || unicode.IsDigit(rn) || rn == '-' || rn == '_' 39 | if !ok { 40 | return fmt.Errorf("invalid fork name at pos %d: %v (allowed: [a-Z0-9_-])", pos, rn) 41 | } 42 | } 43 | 44 | return nil 45 | } 46 | 47 | // Transaction is a handle to the queue during the read callback. 48 | // See TransactionFn for more details. 49 | type Transaction interface { 50 | Push(items Items) error 51 | } 52 | 53 | // TransactionFn is the function passed to the Read() call. 54 | // It will be called zero to multiple times with a number of items 55 | // that was read. You can decide with the return value what to do with this data. 56 | // Returning an error will immediately stop further reading. The current data will 57 | // not be touched and the error is bubbled up. 58 | // 59 | // The `tx` parameter can be used to push data back to the queue. It might be 60 | // extended in future releases. 61 | type TransactionFn func(tx Transaction, items Items) (ReadOp, error) 62 | 63 | // Open tries to open the priority queue structure in `dir`. 64 | // If `dir` does not exist, then a new, empty priority queue is created. 65 | // The behavior of the queue can be fine-tuned with `opts`. 66 | func Open(dir string, opts Options) (*Queue, error) { 67 | if err := opts.Validate(); err != nil { 68 | return nil, err 69 | } 70 | 71 | bs, err := loadAllBuckets(dir, opts) 72 | if err != nil { 73 | return nil, fmt.Errorf("buckets: %w", err) 74 | } 75 | 76 | if err := bs.ValidateBucketKeys(opts.BucketSplitConf); err != nil { 77 | return nil, err 78 | } 79 | 80 | return &Queue{buckets: bs}, nil 81 | } 82 | 83 | // Push pushes a batch of `items` to the queue. 84 | // It is allowed to call this function during the read callback. 85 | func (q *Queue) Push(items Items) error { 86 | return q.buckets.Push(items, true) 87 | } 88 | 89 | // Read fetches up to `n` items from the queue. It will call the supplied `fn` 90 | // one or several times until either `n` is reached or the queue is empty. If 91 | // the queue is empty before calling Read(), then `fn` is not called. If `n` is 92 | // negative, then as many items as possible are returned until the queue is 93 | // empty. 94 | // 95 | // The `dst` argument can be used to pass a preallocated slice that 96 | // the queue appends to. This can be done to avoid allocations. 97 | // If you don't care you can also simply pass nil. 98 | // 99 | // You should NEVER use the supplied items outside of `fn`, as they 100 | // are directly sliced from a mmap(2). Accessing them outside will 101 | // almost certainly lead to a crash. If you need them outside (e.g. for 102 | // appending to a slice) then you can use the Copy() function of Items. 103 | // 104 | // You can return either ReadOpPop or ReadOpPeek from `fn`. 105 | // 106 | // You may only call Push() inside the read transaction. 107 | // All other operations will DEADLOCK if called! 108 | func (q *Queue) Read(n int, fn TransactionFn) error { 109 | return q.buckets.Read(n, "", fn) 110 | } 111 | 112 | // Delete deletes all items in the range `from` to `to`. 113 | // Both `from` and `to` are including, i.e. keys with this value are deleted. 114 | // The number of deleted items is returned. 115 | func (q *Queue) Delete(from, to Key) (int, error) { 116 | return q.buckets.Delete("", from, to) 117 | } 118 | 119 | // Len returns the number of items in the queue. 120 | // NOTE: This gets more expensive when you have a higher number of buckets, 121 | // so you probably should not call that in a hot loop. 122 | func (q *Queue) Len() int { 123 | return q.buckets.Len("") 124 | } 125 | 126 | // Sync can be called to explicitly sync the queue contents 127 | // to persistent storage, even if you configured SyncNone. 128 | func (q *Queue) Sync() error { 129 | return q.buckets.Sync() 130 | } 131 | 132 | // Clear fully deletes the queue contents. 133 | func (q *Queue) Clear() error { 134 | return q.buckets.Clear() 135 | } 136 | 137 | // Shovel moves items from `src` to `dst`. The `src` queue will be completely drained 138 | // afterwards. For speed reasons this assume that the dst queue uses the same bucket func 139 | // as the source queue. If you cannot guarantee this, you should implement a naive Shovel() 140 | // implementation that just uses Pop/Push. 141 | // 142 | // This method can be used if you want to change options like the BucketSplitConf or if you 143 | // intend to have more than one queue that are connected by some logic. Examples for the 144 | // latter case would be a "deadletter queue" where you put failed calculations for later 145 | // re-calculations or a queue for unacknowledged items. 146 | func (q *Queue) Shovel(dst *Queue) (int, error) { 147 | return q.buckets.Shovel(dst.buckets, "") 148 | } 149 | 150 | // Fork splits the reading end of the queue in two parts. If Pop() is 151 | // called on the returned Fork (which implements the Consumer interface), 152 | // then other forks and the original queue is not affected. 153 | // 154 | // The process of forking is relatively cheap and adds only minor storage and 155 | // memory cost to the queue as a whole. Performance during pushing and popping 156 | // is almost not affected at all. 157 | func (q *Queue) Fork(name ForkName) (*Fork, error) { 158 | if err := q.buckets.Fork("", name); err != nil { 159 | return nil, err 160 | } 161 | 162 | return &Fork{name: name, q: q}, nil 163 | } 164 | 165 | // Forks returns a list of fork names. The list will be empty if there are no forks yet. 166 | // In other words: The initial queue is not counted as fork. 167 | func (q *Queue) Forks() []ForkName { 168 | return q.buckets.Forks() 169 | } 170 | 171 | // Close should always be called and error checked when you're done 172 | // with using the queue. Close might still flush out some data, depending 173 | // on what sync mode you configured. 174 | func (q *Queue) Close() error { 175 | return q.buckets.Close() 176 | } 177 | 178 | // PopCopy works like a simplified Read() but copies the items and pops them. 179 | // It is less efficient and should not be used if you care for performance. 180 | func PopCopy(c Consumer, n int) (Items, error) { 181 | var items Items 182 | return items, c.Read(n, func(_ Transaction, popped Items) (ReadOp, error) { 183 | items = append(items, popped.Copy()...) 184 | return ReadOpPop, nil 185 | }) 186 | } 187 | 188 | // PeekCopy works like a simplified Read() but copies the items and does not 189 | // remove them. It is less efficient and should not be used if you care for 190 | // performance. 191 | func PeekCopy(c Consumer, n int) (Items, error) { 192 | var items Items 193 | return items, c.Read(n, func(_ Transaction, popped Items) (ReadOp, error) { 194 | items = append(items, popped.Copy()...) 195 | return ReadOpPeek, nil 196 | }) 197 | } 198 | 199 | ///////////// 200 | 201 | // Fork is an implementation of the Consumer interface for a named fork. 202 | // See the Fork() method for more explanation. 203 | type Fork struct { 204 | name ForkName 205 | q *Queue 206 | } 207 | 208 | // Consumer is an interface that both Fork and Queue implement. 209 | // It covers every consumer related API. Please refer to the respective 210 | // Queue methods for details. 211 | type Consumer interface { 212 | Read(n int, fn TransactionFn) error 213 | Delete(from, to Key) (int, error) 214 | Shovel(dst *Queue) (int, error) 215 | Len() int 216 | Fork(name ForkName) (*Fork, error) 217 | } 218 | 219 | // Check that Queue also implements the Consumer interface. 220 | var _ Consumer = &Queue{} 221 | 222 | // Read is like Queue.Read(). 223 | func (f *Fork) Read(n int, fn TransactionFn) error { 224 | if f.q == nil { 225 | return ErrNoSuchFork 226 | } 227 | 228 | return f.q.buckets.Read(n, f.name, fn) 229 | } 230 | 231 | // Len is like Queue.Len(). 232 | func (f *Fork) Len() int { 233 | if f.q == nil { 234 | return 0 235 | } 236 | 237 | // ignore the error, as it can only happen with bad consumer name. 238 | return f.q.buckets.Len(f.name) 239 | } 240 | 241 | // Delete is like Queue.Delete(). 242 | func (f *Fork) Delete(from, to Key) (int, error) { 243 | if f.q == nil { 244 | return 0, ErrNoSuchFork 245 | } 246 | 247 | return f.q.buckets.Delete(f.name, from, to) 248 | } 249 | 250 | // Remove removes this fork. If the fork is used after this, the API 251 | // will return ErrNoSuchFork in all cases. 252 | func (f *Fork) Remove() error { 253 | if f.q == nil { 254 | return ErrNoSuchFork 255 | } 256 | 257 | q := f.q 258 | f.q = nil // mark self as deleted. 259 | return q.buckets.RemoveFork(f.name) 260 | } 261 | 262 | // Shovel is like Queue.Shovel(). The data of the current fork 263 | // is pushed to the `dst` queue. 264 | func (f *Fork) Shovel(dst *Queue) (int, error) { 265 | if f.q == nil { 266 | return 0, ErrNoSuchFork 267 | } 268 | return f.q.buckets.Shovel(dst.buckets, f.name) 269 | } 270 | 271 | // Fork is like Queue.Fork(), except that the fork happens relative to the 272 | // current state of the consumer and not to the state of the underlying Queue. 273 | func (f *Fork) Fork(name ForkName) (*Fork, error) { 274 | if err := f.q.buckets.Fork(f.name, name); err != nil { 275 | return nil, err 276 | } 277 | 278 | return &Fork{name: name, q: f.q}, nil 279 | } 280 | -------------------------------------------------------------------------------- /bench_test.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "encoding/binary" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/sahib/timeq/item" 10 | "github.com/sahib/timeq/item/testutils" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func benchmarkPushPopWithSyncMode(b *testing.B, benchmarkPush bool, syncMode SyncMode) { 15 | dir, err := os.MkdirTemp("", "timeq-buckettest") 16 | require.NoError(b, err) 17 | defer os.RemoveAll(dir) 18 | 19 | opts := DefaultOptions() 20 | opts.SyncMode = syncMode 21 | 22 | queueDir := filepath.Join(dir, item.Key(23).String()) 23 | queue, err := Open(queueDir, opts) 24 | require.NoError(b, err) 25 | 26 | // Add some dummy data: 27 | items := make(item.Items, 2000) 28 | timeoff := 0 29 | 30 | b.ResetTimer() 31 | for run := 0; run < b.N; run++ { 32 | b.StopTimer() 33 | for idx := 0; idx < len(items); idx++ { 34 | // use a realistic size for each message: 35 | var buf [40]byte 36 | for pos := 0; pos < cap(buf); pos += 8 { 37 | binary.BigEndian.PutUint64(buf[pos:], uint64(timeoff+idx)) 38 | } 39 | 40 | items[idx].Key = item.Key(timeoff + idx) 41 | items[idx].Blob = buf[:] 42 | } 43 | 44 | timeoff += len(items) 45 | if benchmarkPush { 46 | b.StartTimer() 47 | } 48 | 49 | require.NoError(b, queue.Push(items)) 50 | if benchmarkPush { 51 | b.StopTimer() 52 | } 53 | 54 | if !benchmarkPush { 55 | b.StartTimer() 56 | } 57 | 58 | // The "-1" is to avoid deleting the bucket over and over. 59 | // We want to benchmark the actual pop and not the deletion 60 | // on empty buckets (to make it comparable to previous bench numbers). 61 | err = queue.Read(len(items)-1, func(_ Transaction, items Items) (ReadOp, error) { 62 | return ReadOpPop, nil 63 | }) 64 | 65 | if !benchmarkPush { 66 | b.StopTimer() 67 | } 68 | 69 | require.NoError(b, err) 70 | } 71 | 72 | require.NoError(b, queue.Close()) 73 | } 74 | 75 | func BenchmarkPopSyncNone(b *testing.B) { benchmarkPushPopWithSyncMode(b, false, SyncNone) } 76 | func BenchmarkPopSyncData(b *testing.B) { benchmarkPushPopWithSyncMode(b, false, SyncData) } 77 | func BenchmarkPopSyncIndex(b *testing.B) { benchmarkPushPopWithSyncMode(b, false, SyncIndex) } 78 | func BenchmarkPopSyncFull(b *testing.B) { benchmarkPushPopWithSyncMode(b, false, SyncFull) } 79 | func BenchmarkPushSyncNone(b *testing.B) { benchmarkPushPopWithSyncMode(b, true, SyncNone) } 80 | func BenchmarkPushSyncData(b *testing.B) { benchmarkPushPopWithSyncMode(b, true, SyncData) } 81 | func BenchmarkPushSyncIndex(b *testing.B) { benchmarkPushPopWithSyncMode(b, true, SyncIndex) } 82 | func BenchmarkPushSyncFull(b *testing.B) { benchmarkPushPopWithSyncMode(b, true, SyncFull) } 83 | 84 | var globItems Items 85 | 86 | func BenchmarkCopyItems(b *testing.B) { 87 | items := make(Items, 2000) 88 | for idx := 0; idx < len(items); idx++ { 89 | // use a realistic size for each message: 90 | var buf [40]byte 91 | for pos := 0; pos < cap(buf); pos += 8 { 92 | binary.BigEndian.PutUint64(buf[pos:], uint64(idx)) 93 | } 94 | 95 | items[idx].Key = item.Key(idx) 96 | items[idx].Blob = buf[:] 97 | } 98 | 99 | b.Run("copy-naive-with-alloc", func(b *testing.B) { 100 | b.ResetTimer() 101 | for run := 0; run < b.N; run++ { 102 | globItems = items.Copy() 103 | } 104 | }) 105 | 106 | c := make(Items, 2000) 107 | pseudoMmap := make([]byte, 2000*40) 108 | 109 | // Difference to above bench: It does not allocate anything 110 | // during the benchmark. 111 | b.Run("copy-with-pseudo-mmap", func(b *testing.B) { 112 | b.ResetTimer() 113 | for run := 0; run < b.N; run++ { 114 | // global variable to stop the compiler 115 | // from optimizing the call away: 116 | // globItems = items.Copy() 117 | off := 0 118 | for idx := 0; idx < len(items); idx++ { 119 | 120 | c[idx] = items[idx] 121 | s := pseudoMmap[off : off+40] 122 | copy(s, items[idx].Blob) 123 | c[idx].Blob = s 124 | } 125 | 126 | globItems = c 127 | } 128 | }) 129 | } 130 | 131 | var globalKey Key 132 | 133 | func BenchmarkDefaultBucketSplitConf(b *testing.B) { 134 | b.Run("default", func(b *testing.B) { 135 | globalKey = 23 136 | for run := 0; run < b.N; run++ { 137 | globalKey = DefaultBucketSplitConf.Func(globalKey) 138 | } 139 | }) 140 | 141 | b.Run("baseline", func(b *testing.B) { 142 | globalKey = 23 143 | const div = 9 * 60 * 1e9 144 | for run := 0; run < b.N; run++ { 145 | globalKey = (globalKey / div) * div 146 | } 147 | }) 148 | } 149 | 150 | func BenchmarkShovel(b *testing.B) { 151 | b.StopTimer() 152 | 153 | dir, err := os.MkdirTemp("", "timeq-shovelbench") 154 | require.NoError(b, err) 155 | defer os.RemoveAll(dir) 156 | 157 | srcDir := filepath.Join(dir, "src") 158 | dstDir := filepath.Join(dir, "dst") 159 | srcQueue, err := Open(srcDir, DefaultOptions()) 160 | require.NoError(b, err) 161 | dstQueue, err := Open(dstDir, DefaultOptions()) 162 | require.NoError(b, err) 163 | 164 | for run := 0; run < b.N; run++ { 165 | require.NoError(b, srcQueue.Push(testutils.GenItems(0, 2000, 1))) 166 | b.StartTimer() 167 | _, err := srcQueue.Shovel(dstQueue) 168 | require.NoError(b, err) 169 | b.StopTimer() 170 | require.NoError(b, dstQueue.Clear()) 171 | } 172 | 173 | defer srcQueue.Close() 174 | defer dstQueue.Close() 175 | } 176 | -------------------------------------------------------------------------------- /bucket.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "runtime/debug" 9 | "slices" 10 | 11 | "github.com/otiai10/copy" 12 | "github.com/sahib/timeq/index" 13 | "github.com/sahib/timeq/item" 14 | "github.com/sahib/timeq/vlog" 15 | ) 16 | 17 | const ( 18 | dataLogName = "dat.log" 19 | ) 20 | 21 | // ReadOp defines what timeq should do with the data that was read. 22 | type ReadOp int 23 | 24 | const ( 25 | // ReadOpPeek preserves the read data. It will be available on the next call to Read(). 26 | ReadOpPeek = 0 27 | 28 | // ReadOpPop removes the read data. It will not be available on the next call to Read(). 29 | ReadOpPop = 1 30 | ) 31 | 32 | type bucketReadOpFn func(items Items) (ReadOp, error) 33 | 34 | type bucketIndex struct { 35 | Log *index.Writer 36 | Mem *index.Index 37 | } 38 | 39 | type bucket struct { 40 | dir string 41 | key item.Key 42 | log *vlog.Log 43 | opts Options 44 | indexes map[ForkName]bucketIndex 45 | } 46 | 47 | var ( 48 | ErrNoSuchFork = errors.New("no fork with this name") 49 | ) 50 | 51 | func (b *bucket) idxForFork(fork ForkName) (bucketIndex, error) { 52 | idx, ok := b.indexes[fork] 53 | if !ok { 54 | return idx, ErrNoSuchFork 55 | } 56 | 57 | return idx, nil 58 | } 59 | 60 | func recoverIndexFromLog(opts *Options, log *vlog.Log, idxPath string) (*index.Index, error) { 61 | // We try to re-generate the index from the value log if 62 | // the index is damaged or missing (in case the value log has some entries). 63 | // 64 | // Since we have all the keys and offsets there too, 65 | // we should be able to recover from that. This will not consider already deleted 66 | // entries of course, as those are marked in the index file, but not in the value log. 67 | // It's better to replay old values twice then to loose values. 68 | 69 | var memErr error 70 | mem, memErr := index.FromVlog(log) 71 | if memErr != nil { 72 | // not much we can do for that case: 73 | return nil, fmt.Errorf("index load failed & could not regenerate: %w", memErr) 74 | } 75 | 76 | if mem.Len() > 0 { 77 | opts.Logger.Printf("index is empty, but log is not (%s)", idxPath) 78 | } 79 | 80 | if err := os.Remove(idxPath); err != nil { 81 | return nil, fmt.Errorf("index failover: could not remove broken index: %w", err) 82 | } 83 | 84 | // We should write the repaired index after repair, so we don't have to do it 85 | // again if this gets interrupted. Also this allows us to just push to the index 86 | // instead of having logic later that writes the not yet written part. 87 | if err := index.WriteIndex(mem, idxPath); err != nil { 88 | return nil, fmt.Errorf("index: write during recover did not work") 89 | } 90 | 91 | if ln := mem.Len(); ln > 0 { 92 | opts.Logger.Printf("recovered index with %d entries", ln) 93 | } 94 | 95 | return mem, nil 96 | } 97 | 98 | func idxPath(dir string, fork ForkName) string { 99 | // If there is no fork we use "idx.log" as file name for backwards compatibility. 100 | var idxName string 101 | if fork == "" { 102 | idxName = "idx.log" 103 | } else { 104 | idxName = string(fork) + ".idx.log" 105 | } 106 | 107 | return filepath.Join(dir, idxName) 108 | } 109 | 110 | func loadIndex(idxPath string, log *vlog.Log, opts Options) (bucketIndex, error) { 111 | mem, err := index.Load(idxPath) 112 | if err != nil || (mem.NEntries() == 0 && !log.IsEmpty()) { 113 | mem, err = recoverIndexFromLog(&opts, log, idxPath) 114 | if err != nil { 115 | return bucketIndex{}, err 116 | } 117 | } 118 | 119 | idxLog, err := index.NewWriter(idxPath, opts.SyncMode&SyncIndex > 0) 120 | if err != nil { 121 | return bucketIndex{}, fmt.Errorf("index writer: %w", err) 122 | } 123 | 124 | return bucketIndex{ 125 | Log: idxLog, 126 | Mem: mem, 127 | }, nil 128 | } 129 | 130 | func openBucket(dir string, forks []ForkName, opts Options) (buck *bucket, outErr error) { 131 | if err := os.MkdirAll(dir, 0700); err != nil { 132 | return nil, err 133 | } 134 | 135 | // Setting this allows us to handle mmap() errors gracefully. 136 | // The typical scenario where those errors happen, are full filesystems. 137 | // This can happen like this: 138 | // // * ftruncate() grows a file beyond the available space without error. 139 | // Since the new "space" are just zeros that do not take any physical 140 | // space this makes sense. 141 | // * Accessing this mapped memory however will cause the filesystem to actually 142 | // try to serve some more pages, which fails as it's full (would also happen on 143 | // hardware failure or similar) 144 | // * This causes a SIGBUS to be send to our process. By default Go crashes the program 145 | // and prints a stack trace. Changing this to a recoverable panic allows us to intervene 146 | // and continue execution with a proper error return. 147 | // 148 | // Other errors like suddenly deleted database files might cause this too. 149 | // The drawback of this approach is that this might cause issues if the calling processes 150 | // also sets this option (but with False!). If this turns out to be a problem we have to 151 | // introduce an option to disable this error handling. 152 | debug.SetPanicOnFault(true) 153 | 154 | defer recoverMmapError(&outErr) 155 | 156 | logPath := filepath.Join(dir, dataLogName) 157 | log, err := vlog.Open(logPath, opts.SyncMode&SyncData > 0) 158 | if err != nil { 159 | return nil, fmt.Errorf("open: %w", err) 160 | } 161 | 162 | forks = append(forks, "") 163 | indexes := make(map[ForkName]bucketIndex, len(forks)-1) 164 | 165 | var entries item.Off 166 | for _, fork := range forks { 167 | idxPath := idxPath(dir, fork) 168 | idx, err := loadIndex(idxPath, log, opts) 169 | if err != nil { 170 | return nil, err 171 | } 172 | 173 | indexes[fork] = idx 174 | entries += idx.Mem.NEntries() 175 | } 176 | 177 | key, err := item.KeyFromString(filepath.Base(dir)) 178 | if err != nil { 179 | return nil, err 180 | } 181 | 182 | buck = &bucket{ 183 | dir: dir, 184 | key: item.Key(key), 185 | log: log, 186 | indexes: indexes, 187 | opts: opts, 188 | } 189 | 190 | if buck.AllEmpty() && entries > 0 { 191 | // This means that the buck is empty, but is still occupying space 192 | // (i.e. it contains values that were popped already). Situations where 193 | // this might occur are: a Pop() that was interrupted (e.g. a crash), a 194 | // RemoveFork() that deleted a fork offline, but could not check if 195 | // it's empty or some other weird crash situation. This is not really 196 | // an issue but since we occupy space for no real data we should clean up. 197 | // 198 | // We do this by brute force: Close the bucket, remove the directory and let it 199 | // create again by the logic above. This is an edge case, so it doesn't matter if 200 | // this is perfectly optimized. 201 | if err := buck.Close(); err != nil { 202 | return nil, fmt.Errorf("close for reinit: %w", err) 203 | } 204 | 205 | if err := removeBucketDir(dir, forks); err != nil { 206 | return nil, fmt.Errorf("remove for reinit: %w", err) 207 | } 208 | 209 | return openBucket(dir, forks, opts) 210 | } 211 | 212 | return buck, nil 213 | } 214 | 215 | func (b *bucket) Sync(force bool) error { 216 | err := b.log.Sync(force) 217 | for _, idx := range b.indexes { 218 | err = errors.Join(err, idx.Log.Sync(force)) 219 | } 220 | 221 | return err 222 | } 223 | 224 | func (b *bucket) Trailers(fn func(fork ForkName, trailer index.Trailer)) { 225 | for fork, idx := range b.indexes { 226 | fn(fork, idx.Mem.Trailer()) 227 | } 228 | } 229 | 230 | func (b *bucket) Close() error { 231 | err := b.log.Close() 232 | for _, idx := range b.indexes { 233 | err = errors.Join(err, idx.Log.Close()) 234 | } 235 | 236 | return err 237 | } 238 | 239 | func recoverMmapError(dstErr *error) { 240 | // See comment in Open(). 241 | // NOTE: calling recover() is surprisingly quite expensive. 242 | // Do not call in this loops. 243 | if recErr := recover(); recErr != nil { 244 | *dstErr = fmt.Errorf("panic (check: enough space left / file issues): %v - trace:\n%s", recErr, string(debug.Stack())) 245 | } 246 | } 247 | 248 | // Push expects pre-sorted items! 249 | // If `all` is set, all forks receive the new items. 250 | // If `all` is false, then only the fork with `name` gets the new items. 251 | func (b *bucket) Push(items item.Items, all bool, name ForkName) (outErr error) { 252 | if len(items) == 0 { 253 | return nil 254 | } 255 | 256 | defer recoverMmapError(&outErr) 257 | 258 | loc, err := b.log.Push(items) 259 | if err != nil { 260 | return fmt.Errorf("push: log: %w", err) 261 | } 262 | 263 | if all { 264 | for name, idx := range b.indexes { 265 | idx.Mem.Set(loc) 266 | if err := idx.Log.Push(loc, idx.Mem.Trailer()); err != nil { 267 | return fmt.Errorf("push: index-log: %s: %w", name, err) 268 | } 269 | } 270 | } else { 271 | // only push to a certain index if requested. 272 | idx, err := b.idxForFork(name) 273 | if err != nil { 274 | return err 275 | } 276 | 277 | idx.Mem.Set(loc) 278 | if err := idx.Log.Push(loc, idx.Mem.Trailer()); err != nil { 279 | return fmt.Errorf("push: index-log: %s: %w", name, err) 280 | } 281 | } 282 | 283 | return nil 284 | } 285 | 286 | func (b *bucket) logAt(loc item.Location) vlog.Iter { 287 | continueOnErr := b.opts.ErrorMode != ErrorModeAbort 288 | return b.log.At(loc, continueOnErr) 289 | } 290 | 291 | // addIter adds a new batchIter to `batchIters` and advances the idxIter. 292 | func (b *bucket) addIter(batchIters *vlog.Iters, idxIter *index.Iter) (bool, error) { 293 | loc := idxIter.Value() 294 | batchIter := b.logAt(loc) 295 | if !batchIter.Next() { 296 | // might be empty or I/O error: 297 | return false, batchIter.Err() 298 | } 299 | 300 | batchIters.Push(batchIter) 301 | return !idxIter.Next(), nil 302 | } 303 | 304 | func (b *bucket) Read(n int, dst *item.Items, fork ForkName, fn bucketReadOpFn) error { 305 | if n <= 0 { 306 | // return nothing. 307 | return nil 308 | } 309 | 310 | idx, err := b.idxForFork(fork) 311 | if err != nil { 312 | return err 313 | } 314 | 315 | if dst == nil { 316 | // just for safety: 317 | v := Items{} 318 | dst = &v 319 | } 320 | 321 | iters, items, _, err := b.peek(n, (*dst)[:0], idx.Mem) 322 | if err != nil { 323 | return err 324 | } 325 | 326 | if cap(*dst) < cap(items) { 327 | // if we appended beyond what we pre-allocated, 328 | // then use the newly pre-allocated slice. 329 | *dst = items 330 | } 331 | 332 | op, err := fn(items) 333 | if err != nil { 334 | return err 335 | } 336 | 337 | switch op { 338 | case ReadOpPop: 339 | if iters != nil { 340 | if err := b.popSync(idx, iters); err != nil { 341 | return err 342 | } 343 | } 344 | case ReadOpPeek: 345 | // nothing to do. 346 | } 347 | 348 | return nil 349 | } 350 | 351 | // peek reads from the bucket, but does not mark the elements as deleted yet. 352 | func (b *bucket) peek(n int, dst item.Items, idx *index.Index) (batchIters *vlog.Iters, outItems item.Items, npopped int, outErr error) { 353 | defer recoverMmapError(&outErr) 354 | 355 | // Fetch the lowest entry of the index: 356 | idxIter := idx.Iter() 357 | if !idxIter.Next() { 358 | // The index is empty. Nothing to pop. 359 | return nil, dst, 0, nil 360 | } 361 | 362 | // initialize with first batch iter: 363 | batchItersSlice := make(vlog.Iters, 0, 1) 364 | batchIters = &batchItersSlice 365 | indexExhausted, err := b.addIter(batchIters, &idxIter) 366 | if err != nil { 367 | return nil, dst, 0, err 368 | } 369 | 370 | if len(*batchIters) == 0 { 371 | // this should not happen normally, but can possibly 372 | // in case of broken index or WAL. 373 | return nil, dst, 0, err 374 | } 375 | 376 | // Choose the lowest item of all iterators here and make sure the next loop 377 | // iteration will yield the next highest key. 378 | var numAppends int 379 | for numAppends < n && !(*batchIters)[0].Exhausted() { 380 | var currIter = &(*batchIters)[0] 381 | dst = append(dst, currIter.Item()) 382 | numAppends++ 383 | 384 | // advance current batch iter. We will make sure at the 385 | // end of the loop that the currently first one gets sorted 386 | // correctly if it turns out to be out-of-order. 387 | currIter.Next() 388 | currKey := currIter.Item().Key 389 | if err := currIter.Err(); err != nil { 390 | return nil, dst, 0, err 391 | } 392 | 393 | // Check the exhausted state as heap.Fix might change the sorting. 394 | // NOTE: we could do heap.Pop() here to pop the exhausted iters away, 395 | // but we need the exhausted iters too give them to popSync() later. 396 | currIsExhausted := currIter.Exhausted() 397 | 398 | // Repair sorting of the heap as we changed the value of the first iter. 399 | batchIters.Fix(0) 400 | 401 | // index batch entries might be overlapping. We need to check if the 402 | // next entry in the index needs to be taken into account for the next 403 | // iteration. For this we compare the next index entry to the 404 | // supposedly next batch value. 405 | if !indexExhausted { 406 | nextLoc := idxIter.Value() 407 | if currIsExhausted || nextLoc.Key <= currKey { 408 | indexExhausted, err = b.addIter(batchIters, &idxIter) 409 | if err != nil { 410 | return nil, dst, 0, err 411 | } 412 | } 413 | } 414 | } 415 | 416 | return batchIters, dst, numAppends, nil 417 | } 418 | 419 | func (b *bucket) popSync(idx bucketIndex, batchIters *vlog.Iters) error { 420 | if batchIters == nil || len(*batchIters) == 0 { 421 | return nil 422 | } 423 | 424 | // NOTE: In theory we could also use fallocate(FALLOC_FL_ZERO_RANGE) on 425 | // ext4 to "put holes" into the log file where we read batches from to save 426 | // some space early. This would make sense only for very big buckets 427 | // though. We delete the bucket once it was completely exhausted anyways. 428 | 429 | // Now since we've collected all data we need to remember what we consumed. 430 | for _, batchIter := range *batchIters { 431 | if !batchIter.Exhausted() { 432 | currLoc := batchIter.CurrentLocation() 433 | 434 | // some keys were take from it, but not all (or none) 435 | // we need to adjust the index to keep those reachable. 436 | idx.Mem.Set(currLoc) 437 | 438 | if err := idx.Log.Push(currLoc, idx.Mem.Trailer()); err != nil { 439 | return fmt.Errorf("idxlog: append begun: %w", err) 440 | } 441 | } 442 | 443 | // Make sure the previous batch index entry gets deleted: 444 | idx.Mem.Delete(batchIter.FirstKey()) 445 | deadLoc := item.Location{ 446 | Key: batchIter.FirstKey(), 447 | Len: 0, 448 | Off: 0, 449 | } 450 | if err := idx.Log.Push(deadLoc, idx.Mem.Trailer()); err != nil { 451 | return fmt.Errorf("idxlog: append begun: %w", err) 452 | } 453 | } 454 | 455 | return idx.Log.Sync(false) 456 | } 457 | 458 | func (b *bucket) Delete(fork ForkName, from, to item.Key) (ndeleted int, outErr error) { 459 | defer recoverMmapError(&outErr) 460 | 461 | if b.key > to { 462 | // this bucket is safe from the clear. 463 | // NOTE: We can't check `from` here as we don't know 464 | // how big our bucket is. Could be more efficient if we did... 465 | return 0, nil 466 | } 467 | 468 | if to < from { 469 | return 0, fmt.Errorf("to < from in bucket delete (%d < %d)", to, from) 470 | } 471 | 472 | idx, err := b.idxForFork(fork) 473 | if err != nil { 474 | return 0, err 475 | } 476 | 477 | lenBefore := idx.Mem.Len() 478 | 479 | var pushErr error 480 | var deleteEntries []item.Key 481 | var partialSetEntries []item.Location 482 | 483 | for iter := idx.Mem.Iter(); iter.Next(); { 484 | loc := iter.Value() 485 | if loc.Key > to { 486 | // this index entry may live untouched. 487 | break 488 | } 489 | 490 | // `loc` needs to be at least partly deleted. 491 | // go figure out what part of it. 492 | leftLoc := loc 493 | leftLoc.Len = 0 494 | rightLoc := item.Location{} 495 | 496 | logIter := b.logAt(loc) 497 | for logIter.Next() { 498 | item := logIter.Item() 499 | if item.Key < from { 500 | // key is not affected by the deletion; keep it. 501 | leftLoc.Len++ 502 | } 503 | 504 | if item.Key > to && rightLoc.Len == 0 { 505 | // keys not affected starting here; save position and then 506 | // go remember that part later on. 507 | rightLoc = logIter.CurrentLocation() 508 | break 509 | } 510 | } 511 | 512 | locShrinked := int(loc.Len - leftLoc.Len - rightLoc.Len) 513 | if locShrinked == 0 { 514 | // location was not actually affected; nothing to do. 515 | continue 516 | } 517 | 518 | // always delete the original entry, 519 | // we need to do that outside of the iteration, 520 | // as the iter will break otherwise. 521 | deleteEntries = append(deleteEntries, loc.Key) 522 | ndeleted += locShrinked 523 | 524 | if leftLoc.Len > 0 { 525 | partialSetEntries = append(partialSetEntries, leftLoc) 526 | pushErr = errors.Join( 527 | pushErr, 528 | idx.Log.Push(leftLoc, index.Trailer{ 529 | TotalEntries: lenBefore - loc.Len - leftLoc.Len, 530 | }), 531 | ) 532 | } 533 | 534 | if rightLoc.Len > 0 { 535 | partialSetEntries = append(partialSetEntries, rightLoc) 536 | pushErr = errors.Join( 537 | pushErr, 538 | idx.Log.Push(rightLoc, index.Trailer{ 539 | TotalEntries: lenBefore - item.Off(ndeleted), 540 | }), 541 | ) 542 | } 543 | } 544 | 545 | for _, key := range deleteEntries { 546 | idx.Mem.Delete(key) 547 | pushErr = errors.Join(pushErr, idx.Log.Push(item.Location{ 548 | Key: key, 549 | Len: 0, 550 | Off: 0, 551 | }, index.Trailer{ 552 | TotalEntries: lenBefore - item.Off(ndeleted), 553 | })) 554 | } 555 | 556 | for _, loc := range partialSetEntries { 557 | idx.Mem.Set(loc) 558 | } 559 | 560 | return ndeleted, errors.Join(pushErr, idx.Log.Sync(false)) 561 | } 562 | 563 | func (b *bucket) AllEmpty() bool { 564 | for _, idx := range b.indexes { 565 | if idx.Mem.Len() > 0 { 566 | return false 567 | } 568 | } 569 | 570 | return true 571 | } 572 | 573 | func (b *bucket) Empty(fork ForkName) bool { 574 | idx, err := b.idxForFork(fork) 575 | if err != nil { 576 | return true 577 | } 578 | 579 | return idx.Mem.Len() == 0 580 | } 581 | 582 | func (b *bucket) Key() item.Key { 583 | return b.key 584 | } 585 | 586 | func (b *bucket) Len(fork ForkName) int { 587 | idx, err := b.idxForFork(fork) 588 | if err != nil { 589 | return 0 590 | } 591 | 592 | return int(idx.Mem.Len()) 593 | } 594 | 595 | func (b *bucket) Fork(src, dst ForkName) error { 596 | srcIdx, err := b.idxForFork(src) 597 | if err != nil { 598 | return err 599 | } 600 | 601 | // If we fork to `dst`, then dst should better not exist. 602 | if _, err := b.idxForFork(dst); err == nil { 603 | return nil // fork exists already. 604 | } 605 | 606 | dstPath := idxPath(b.dir, dst) 607 | if err := index.WriteIndex(srcIdx.Mem, dstPath); err != nil { 608 | return err 609 | } 610 | 611 | dstIdxLog, err := index.NewWriter(dstPath, b.opts.SyncMode&SyncIndex > 0) 612 | if err != nil { 613 | return err 614 | } 615 | 616 | b.indexes[dst] = bucketIndex{ 617 | Log: dstIdxLog, 618 | Mem: srcIdx.Mem.Copy(), 619 | } 620 | return nil 621 | } 622 | 623 | func forkOffline(buckDir string, src, dst ForkName) error { 624 | dstPath := idxPath(buckDir, dst) 625 | if _, err := os.Stat(dstPath); err == nil { 626 | // dst already exists. 627 | return nil 628 | } 629 | 630 | srcPath := idxPath(buckDir, src) 631 | opts := copy.Options{Sync: true} 632 | return copy.Copy(srcPath, dstPath, opts) 633 | } 634 | 635 | func (b *bucket) RemoveFork(fork ForkName) error { 636 | idx, err := b.idxForFork(fork) 637 | if err != nil { 638 | return err 639 | } 640 | 641 | dstPath := idxPath(b.dir, fork) 642 | delete(b.indexes, fork) 643 | return errors.Join( 644 | idx.Log.Close(), 645 | os.Remove(dstPath), 646 | ) 647 | } 648 | 649 | // like RemoveFork() but used when the bucket is not loaded. 650 | func removeForkOffline(buckDir string, fork ForkName) error { 651 | // Quick path: bucket was not loaded, so we can just throw out 652 | // the to-be-removed index file: 653 | return os.Remove(idxPath(buckDir, fork)) 654 | } 655 | 656 | func (b *bucket) Forks() []ForkName { 657 | // NOTE: Why + 1? Because some functions like Open() will 658 | // append the ""-fork as default to the list, so we spare 659 | // one allocation if we add one extra. 660 | forks := make([]ForkName, 0, len(b.indexes)+1) 661 | for fork := range b.indexes { 662 | if fork == "" { 663 | continue 664 | } 665 | 666 | forks = append(forks, fork) 667 | } 668 | 669 | // since it comes from a map we should be nice and sort it. 670 | slices.Sort(forks) 671 | return forks 672 | } 673 | 674 | func filterIsNotExist(err error) error { 675 | if os.IsNotExist(err) { 676 | return nil 677 | } 678 | 679 | return err 680 | } 681 | 682 | func removeBucketDir(dir string, forks []ForkName) error { 683 | // We do this here because os.RemoveAll() is a bit more expensive, 684 | // as it does some extra syscalls and some portability checks that 685 | // we do not really need. Just delete them explicitly. 686 | // 687 | // We also don't care if the files actually existed, as long as they 688 | // are gone after this function call. 689 | 690 | var err error 691 | for _, fork := range forks { 692 | err = errors.Join( 693 | err, 694 | filterIsNotExist(os.Remove(idxPath(dir, fork))), 695 | ) 696 | } 697 | 698 | return errors.Join( 699 | err, 700 | filterIsNotExist(os.Remove(filepath.Join(dir, "dat.log"))), 701 | filterIsNotExist(os.Remove(filepath.Join(dir, "idx.log"))), 702 | filterIsNotExist(os.Remove(dir)), 703 | ) 704 | } 705 | -------------------------------------------------------------------------------- /bucket_test.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "path/filepath" 7 | "slices" 8 | "testing" 9 | 10 | "github.com/sahib/timeq/index" 11 | "github.com/sahib/timeq/item" 12 | "github.com/sahib/timeq/item/testutils" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func createEmptyBucket(t *testing.T) (*bucket, string) { 17 | dir, err := os.MkdirTemp("", "timeq-buckettest") 18 | require.NoError(t, err) 19 | 20 | bucketDir := filepath.Join(dir, item.Key(23).String()) 21 | bucket, err := openBucket(bucketDir, nil, DefaultOptions()) 22 | require.NoError(t, err) 23 | 24 | return bucket, dir 25 | } 26 | 27 | // convenience function to avoid typing a lot. 28 | func buckPop(buck *bucket, n int, dst Items, fork ForkName) (Items, int, error) { 29 | result := Items{} 30 | var popped int 31 | return result, popped, buck.Read(n, &dst, fork, func(items Items) (ReadOp, error) { 32 | result = append(result, items.Copy()...) 33 | popped += len(items) 34 | return ReadOpPop, nil 35 | }) 36 | } 37 | 38 | func buckPeek(buck *bucket, n int, dst Items, fork ForkName) (Items, int, error) { 39 | result := Items{} 40 | var peeked int 41 | return result, peeked, buck.Read(n, &dst, fork, func(items Items) (ReadOp, error) { 42 | result = append(result, items.Copy()...) 43 | peeked += len(items) 44 | return ReadOpPeek, nil 45 | }) 46 | } 47 | 48 | func buckMove(buck, dstBuck *bucket, n int, dst Items, fork ForkName) (Items, int, error) { 49 | result := Items{} 50 | var moved int 51 | return result, moved, buck.Read(n, &dst, fork, func(items Items) (ReadOp, error) { 52 | result = append(result, items.Copy()...) 53 | moved += len(items) 54 | return ReadOpPop, dstBuck.Push(items, true, fork) 55 | }) 56 | } 57 | 58 | func withEmptyBucket(t *testing.T, fn func(b *bucket)) { 59 | t.Parallel() 60 | 61 | buck, dir := createEmptyBucket(t) 62 | defer os.RemoveAll(dir) 63 | fn(buck) 64 | require.NoError(t, buck.Close()) 65 | } 66 | 67 | func TestBucketOpenEmpty(t *testing.T) { 68 | withEmptyBucket(t, func(buck *bucket) { 69 | require.True(t, buck.Empty("")) 70 | require.Equal(t, 0, buck.Len("")) 71 | }) 72 | } 73 | 74 | func TestBucketPushEmpty(t *testing.T) { 75 | withEmptyBucket(t, func(buck *bucket) { 76 | require.NoError(t, buck.Push(nil, true, "")) 77 | }) 78 | } 79 | 80 | func TestBucketPopZero(t *testing.T) { 81 | withEmptyBucket(t, func(buck *bucket) { 82 | dst := testutils.GenItems(0, 10, 1)[:0] 83 | gotItems, nPopped, err := buckPop(buck, 0, dst, "") 84 | require.NoError(t, err) 85 | require.Equal(t, dst, gotItems) 86 | require.Equal(t, 0, nPopped) 87 | }) 88 | } 89 | 90 | func TestBucketPopEmpty(t *testing.T) { 91 | withEmptyBucket(t, func(buck *bucket) { 92 | dst := testutils.GenItems(0, 10, 1)[:0] 93 | gotItems, nPopped, err := buckPop(buck, 100, dst, "") 94 | require.NoError(t, err) 95 | require.Equal(t, 0, nPopped) 96 | require.Equal(t, dst, gotItems) 97 | }) 98 | } 99 | 100 | func TestBucketPushPop(t *testing.T) { 101 | withEmptyBucket(t, func(buck *bucket) { 102 | expItems := testutils.GenItems(0, 10, 1) 103 | require.NoError(t, buck.Push(expItems, true, "")) 104 | gotItems, nPopped, err := buckPop(buck, len(expItems), nil, "") 105 | require.NoError(t, err) 106 | require.Equal(t, expItems, gotItems) 107 | require.Equal(t, len(expItems), nPopped) 108 | }) 109 | } 110 | 111 | func TestBucketPushPopReverse(t *testing.T) { 112 | withEmptyBucket(t, func(buck *bucket) { 113 | expItems := testutils.GenItems(10, 0, -1) 114 | require.NoError(t, buck.Push(expItems, true, "")) 115 | gotItems, nPopped, err := buckPop(buck, len(expItems), nil, "") 116 | require.NoError(t, err) 117 | require.Equal(t, expItems, gotItems) 118 | require.Equal(t, len(expItems), nPopped) 119 | }) 120 | } 121 | 122 | func TestBucketPushPopSorted(t *testing.T) { 123 | withEmptyBucket(t, func(buck *bucket) { 124 | push1 := testutils.GenItems(0, 10, 1) 125 | push2 := testutils.GenItems(11, 20, 1) 126 | expItems := append(push1, push2...) 127 | require.NoError(t, buck.Push(push2, true, "")) 128 | require.NoError(t, buck.Push(push1, true, "")) 129 | gotItems, nPopped, err := buckPop(buck, len(push1)+len(push2), nil, "") 130 | require.NoError(t, err) 131 | require.Equal(t, len(push1)+len(push2), nPopped) 132 | require.Equal(t, expItems, gotItems) 133 | }) 134 | } 135 | 136 | func TestBucketPushPopZip(t *testing.T) { 137 | withEmptyBucket(t, func(buck *bucket) { 138 | push1 := testutils.GenItems(0, 20, 2) 139 | push2 := testutils.GenItems(1, 20, 2) 140 | require.NoError(t, buck.Push(push2, true, "")) 141 | require.NoError(t, buck.Push(push1, true, "")) 142 | gotItems, nPopped, err := buckPop(buck, len(push1)+len(push2), nil, "") 143 | require.NoError(t, err) 144 | 145 | for idx := 0; idx < 20; idx++ { 146 | require.Equal(t, testutils.ItemFromIndex(idx), gotItems[idx]) 147 | } 148 | 149 | require.Equal(t, len(push1)+len(push2), nPopped) 150 | }) 151 | } 152 | 153 | func TestBucketPopSeveral(t *testing.T) { 154 | withEmptyBucket(t, func(buck *bucket) { 155 | expItems := testutils.GenItems(0, 10, 1) 156 | require.NoError(t, buck.Push(expItems, true, "")) 157 | gotItems1, nPopped1, err := buckPop(buck, 5, nil, "") 158 | require.NoError(t, err) 159 | gotItems2, nPopped2, err := buckPop(buck, 5, nil, "") 160 | require.NoError(t, err) 161 | 162 | require.Equal(t, 5, nPopped1) 163 | require.Equal(t, 5, nPopped2) 164 | require.Equal(t, expItems, append(gotItems1, gotItems2...)) 165 | }) 166 | } 167 | 168 | func TestBucketPushPopSeveral(t *testing.T) { 169 | withEmptyBucket(t, func(buck *bucket) { 170 | push1 := testutils.GenItems(0, 20, 2) 171 | push2 := testutils.GenItems(1, 20, 2) 172 | require.NoError(t, buck.Push(push2, true, "")) 173 | require.NoError(t, buck.Push(push1, true, "")) 174 | gotItems1, nPopped1, err := buckPop(buck, 10, nil, "") 175 | require.NoError(t, err) 176 | gotItems2, nPopped2, err := buckPop(buck, 10, nil, "") 177 | require.NoError(t, err) 178 | 179 | require.Equal(t, 10, nPopped1) 180 | require.Equal(t, 10, nPopped2) 181 | 182 | gotItems := append(gotItems1, gotItems2...) 183 | for idx := 0; idx < 20; idx++ { 184 | require.Equal(t, testutils.ItemFromIndex(idx), gotItems[idx]) 185 | } 186 | }) 187 | } 188 | 189 | func TestBucketPopLarge(t *testing.T) { 190 | withEmptyBucket(t, func(buck *bucket) { 191 | expItems := testutils.GenItems(0, 10, 1) 192 | require.NoError(t, buck.Push(expItems, true, "")) 193 | gotItems, nPopped, err := buckPop(buck, 20, nil, "") 194 | require.NoError(t, err) 195 | require.Equal(t, len(expItems), nPopped) 196 | require.Equal(t, expItems, gotItems) 197 | 198 | gotItems, nPopped, err = buckPop(buck, 20, nil, "") 199 | require.NoError(t, err) 200 | require.Equal(t, 0, nPopped) 201 | require.Len(t, gotItems, 0) 202 | }) 203 | } 204 | 205 | func TestBucketLen(t *testing.T) { 206 | withEmptyBucket(t, func(buck *bucket) { 207 | require.Equal(t, 0, buck.Len("")) 208 | require.True(t, buck.Empty("")) 209 | 210 | expItems := testutils.GenItems(0, 10, 1) 211 | require.NoError(t, buck.Push(expItems, true, "")) 212 | require.Equal(t, 10, buck.Len("")) 213 | require.False(t, buck.Empty("")) 214 | 215 | _, _, err := buckPop(buck, 5, nil, "") 216 | require.NoError(t, err) 217 | require.Equal(t, 5, buck.Len("")) 218 | require.False(t, buck.Empty("")) 219 | 220 | _, _, err = buckPop(buck, 5, nil, "") 221 | require.NoError(t, err) 222 | require.True(t, buck.Empty("")) 223 | require.Equal(t, 0, buck.Len("")) 224 | }) 225 | } 226 | 227 | func TestBucketDelete(t *testing.T) { 228 | withEmptyBucket(t, func(buck *bucket) { 229 | require.Equal(t, 0, buck.Len("")) 230 | require.True(t, buck.Empty("")) 231 | 232 | expItems := testutils.GenItems(0, 100, 1) 233 | require.NoError(t, buck.Push(expItems, true, "")) 234 | require.Equal(t, 100, buck.Len("")) 235 | 236 | deleted, err := buck.Delete("", 0, 50) 237 | require.NoError(t, err) 238 | require.Equal(t, 51, deleted) 239 | require.False(t, buck.Empty("")) 240 | 241 | existing, npeeked, err := buckPeek(buck, 100, nil, "") 242 | require.NoError(t, err) 243 | require.Equal(t, 49, npeeked) 244 | require.Equal(t, expItems[51:], existing) 245 | 246 | deleted, err = buck.Delete("", 0, 100) 247 | require.NoError(t, err) 248 | require.Equal(t, 49, deleted) 249 | require.True(t, buck.Empty("")) 250 | 251 | // to < from 252 | _, err = buck.Delete("", 100, 99) 253 | require.Error(t, err) 254 | }) 255 | } 256 | 257 | func TestBucketDeleteLeftAndRight(t *testing.T) { 258 | tcs := []struct { 259 | Name string 260 | From, To item.Key 261 | }{ 262 | { 263 | Name: "full_inclusive", 264 | From: 0, 265 | To: 100, 266 | }, { 267 | Name: "full_high_to", 268 | From: 0, 269 | To: 1000, 270 | }, { 271 | Name: "full_low_from", 272 | From: -100, 273 | To: 100, 274 | }, { 275 | Name: "full_both", 276 | From: -100, 277 | To: 100, 278 | }, { 279 | Name: "partial_one_item", 280 | From: 50, 281 | To: 50, 282 | }, { 283 | Name: "partial_two_items", 284 | From: 50, 285 | To: 51, 286 | }, { 287 | Name: "leftmost", 288 | From: 0, 289 | To: 0, 290 | }, { 291 | Name: "rightmost", 292 | From: 99, 293 | To: 99, 294 | }, { 295 | Name: "right_only", 296 | From: 0, 297 | To: 10, 298 | }, 299 | } 300 | 301 | for _, tc := range tcs { 302 | tc := tc 303 | t.Run(tc.Name, func(t *testing.T) { 304 | withEmptyBucket(t, func(buck *bucket) { 305 | buck.key = 0 // fake sets it with 23; we need 0 here. 306 | 307 | require.Equal(t, 0, buck.Len("")) 308 | require.True(t, buck.Empty("")) 309 | 310 | expItems := testutils.GenItems(0, 100, 1) 311 | require.NoError(t, buck.Push(expItems, true, "")) 312 | require.Equal(t, 100, buck.Len("")) 313 | 314 | clampedTo := tc.To 315 | if tc.To > 99 { 316 | clampedTo = 99 317 | } else if tc.To < 0 { 318 | clampedTo = 0 319 | } 320 | 321 | clampedFrom := tc.From 322 | if tc.From < 0 { 323 | clampedFrom = 0 324 | } else if tc.From > 99 { 325 | clampedFrom = 99 326 | } 327 | 328 | ndeletedExp := clampedTo - clampedFrom + 1 329 | 330 | ndeleted, err := buck.Delete("", tc.From, tc.To) 331 | require.NoError(t, err) 332 | require.Equal(t, ndeletedExp, item.Key(ndeleted)) 333 | 334 | got, npeeked, err := buckPeek(buck, 100, item.Items{}, "") 335 | require.Equal(t, 100-ndeleted, npeeked) 336 | require.NoError(t, err) 337 | require.Equal( 338 | t, 339 | append( 340 | expItems[:clampedFrom], 341 | expItems[clampedTo+1:]..., 342 | ), 343 | got, 344 | ) 345 | 346 | if ndeleted == 100 { 347 | require.True(t, buck.Empty("")) 348 | } else { 349 | require.False(t, buck.Empty("")) 350 | } 351 | }) 352 | }) 353 | } 354 | } 355 | 356 | func TestBucketDeleteLowerThanReopen(t *testing.T) { 357 | buck, dir := createEmptyBucket(t) 358 | defer os.RemoveAll(dir) 359 | 360 | require.Equal(t, 0, buck.Len("")) 361 | require.True(t, buck.Empty("")) 362 | 363 | expItems := testutils.GenItems(0, 100, 1) 364 | require.NoError(t, buck.Push(expItems, true, "")) 365 | require.Equal(t, 100, buck.Len("")) 366 | 367 | deleted, err := buck.Delete("", 0, 50) 368 | require.NoError(t, err) 369 | require.Equal(t, 51, deleted) 370 | require.False(t, buck.Empty("")) 371 | 372 | // Re-open the bucket: 373 | require.NoError(t, buck.Close()) 374 | buck, err = openBucket(buck.dir, nil, buck.opts) 375 | require.NoError(t, err) 376 | 377 | // Pop should now see the previous 100: 378 | items, npopped, err := buckPop(buck, 100, nil, "") 379 | require.Equal(t, 49, npopped) 380 | require.Equal(t, expItems[51:], items) 381 | require.NoError(t, err) 382 | require.NoError(t, buck.Close()) 383 | } 384 | 385 | func TestBucketPushDuplicates(t *testing.T) { 386 | withEmptyBucket(t, func(buck *bucket) { 387 | const pushes = 100 388 | expItems := testutils.GenItems(0, 10, 1) 389 | for idx := 0; idx < pushes; idx++ { 390 | require.NoError(t, buck.Push(expItems, true, "")) 391 | require.Equal(t, (idx+1)*len(expItems), buck.Len("")) 392 | } 393 | 394 | buckLen := buck.Len("") 395 | gotItems, popped, err := buckPop(buck, buckLen, nil, "") 396 | require.NoError(t, err) 397 | require.Equal(t, buckLen, popped) 398 | require.Equal(t, buckLen, len(gotItems)) 399 | require.True(t, slices.IsSortedFunc(gotItems, func(i, j item.Item) int { 400 | return int(i.Key - j.Key) 401 | })) 402 | 403 | for key := 0; key < len(expItems); key++ { 404 | for idx := 0; idx < pushes; idx++ { 405 | it := gotItems[key*pushes+idx] 406 | require.Equal(t, item.Key(key), it.Key) 407 | } 408 | } 409 | }) 410 | } 411 | 412 | func TestBucketPeek(t *testing.T) { 413 | withEmptyBucket(t, func(buck *bucket) { 414 | const N = 100 415 | exp := testutils.GenItems(0, N, 1) 416 | require.NoError(t, buck.Push(exp, true, "")) 417 | 418 | // peek should not delete something, so check it's idempotent. 419 | for idx := 0; idx < 2; idx++ { 420 | got, npeeked, err := buckPeek(buck, N, nil, "") 421 | require.NoError(t, err) 422 | require.Equal(t, N, npeeked) 423 | require.Equal(t, exp, got) 424 | } 425 | 426 | // A consequent pop() should yield the same result: 427 | got, npeeked, err := buckPop(buck, N, nil, "") 428 | require.NoError(t, err) 429 | require.Equal(t, N, npeeked) 430 | require.Equal(t, exp, got) 431 | }) 432 | } 433 | 434 | func TestBucketMove(t *testing.T) { 435 | t.Parallel() 436 | 437 | srcBuck, srcDir := createEmptyBucket(t) 438 | dstBuck, dstDir := createEmptyBucket(t) 439 | defer os.RemoveAll(srcDir) 440 | defer os.RemoveAll(dstDir) 441 | 442 | const N = 100 443 | exp := testutils.GenItems(0, N, 1) 444 | require.NoError(t, srcBuck.Push(exp, true, "")) 445 | 446 | // move the first elem: 447 | moved, nshoveled, err := buckMove(srcBuck, dstBuck, 1, nil, "") 448 | require.NoError(t, err) 449 | require.Equal(t, exp[0], moved[0]) 450 | require.Equal(t, 1, nshoveled) 451 | 452 | // move the rest: 453 | moved, nshoveled, err = buckMove(srcBuck, dstBuck, N-1, nil, "") 454 | require.NoError(t, err) 455 | require.Equal(t, exp[1:], moved) 456 | require.Equal(t, N-1, nshoveled) 457 | 458 | require.NoError(t, srcBuck.Close()) 459 | require.NoError(t, dstBuck.Close()) 460 | } 461 | 462 | func TestBucketRegenWith(t *testing.T) { 463 | tcs := []struct { 464 | Name string 465 | DamageFn func(path string) error 466 | IsDamaged bool 467 | }{{ 468 | Name: "removed_index", 469 | DamageFn: os.Remove, 470 | IsDamaged: true, 471 | }, { 472 | Name: "empty_index", 473 | DamageFn: func(path string) error { return os.Truncate(path, 0) }, 474 | IsDamaged: true, 475 | }, { 476 | Name: "bad_permissions", 477 | DamageFn: func(path string) error { return os.Chmod(path, 0300) }, 478 | IsDamaged: true, 479 | }, { 480 | Name: "broken_index", 481 | DamageFn: func(path string) error { return os.Truncate(path, index.LocationSize-1) }, 482 | IsDamaged: true, 483 | }, { 484 | Name: "not_damaged", 485 | DamageFn: func(path string) error { return nil }, 486 | IsDamaged: false, 487 | }} 488 | 489 | for _, tc := range tcs { 490 | t.Run(tc.Name, func(t *testing.T) { 491 | t.Run("noreopen", func(t *testing.T) { 492 | testBucketRegenWith(t, tc.IsDamaged, false, tc.DamageFn) 493 | }) 494 | t.Run("reopen", func(t *testing.T) { 495 | testBucketRegenWith(t, tc.IsDamaged, true, tc.DamageFn) 496 | }) 497 | }) 498 | } 499 | } 500 | 501 | func testBucketRegenWith(t *testing.T, isDamaged bool, reopen bool, damageFn func(path string) error) { 502 | buck, dir := createEmptyBucket(t) 503 | defer os.RemoveAll(dir) 504 | 505 | const N = 100 506 | exp1 := testutils.GenItems(0, N, 2) 507 | exp2 := testutils.GenItems(1, N, 2) 508 | exp := append(exp1, exp2...) 509 | slices.SortFunc(exp, func(i, j item.Item) int { 510 | return int(i.Key - j.Key) 511 | }) 512 | 513 | require.NoError(t, buck.Push(exp1, true, "")) 514 | require.NoError(t, buck.Push(exp2, true, "")) 515 | require.NoError(t, buck.Close()) 516 | 517 | bucketDir := filepath.Join(dir, buck.Key().String()) 518 | idxPath := filepath.Join(bucketDir, "idx.log") 519 | require.NoError(t, damageFn(idxPath)) 520 | 521 | // Re-opening the bucket should regenerate the index 522 | // from the value log contents: 523 | var err error 524 | var logBuffer bytes.Buffer 525 | opts := DefaultOptions() 526 | opts.Logger = &writerLogger{ 527 | w: &logBuffer, 528 | } 529 | 530 | // This should trigger the reindex: 531 | buck, err = openBucket(bucketDir, nil, opts) 532 | require.NoError(t, err) 533 | 534 | if reopen { 535 | // on reindex we store the index in memory. 536 | // make sure we do not make mistakes during writing. 537 | require.NoError(t, buck.Close()) 538 | buck, err = openBucket(bucketDir, nil, opts) 539 | require.NoError(t, err) 540 | } 541 | 542 | // The idx file should already exist again: 543 | _, err = os.Stat(idxPath) 544 | require.NoError(t, err) 545 | 546 | // Let's check it gets created correctly: 547 | got, npopped, err := buckPop(buck, N, nil, "") 548 | require.NoError(t, err) 549 | require.Equal(t, N, npopped) 550 | require.Equal(t, exp, got) 551 | 552 | if isDamaged { 553 | require.NotEmpty(t, logBuffer.String()) 554 | } else { 555 | require.Empty(t, logBuffer.String()) 556 | } 557 | } 558 | 559 | // Test if openBucket() notices that we're wasting space and cleans up afterwards. 560 | func TestBucketReinitOnEmpty(t *testing.T) { 561 | t.Run("no-close-after-reinit", func(t *testing.T) { 562 | testBucketReinitOnEmpty(t, false) 563 | }) 564 | t.Run("close-after-reinit", func(t *testing.T) { 565 | testBucketReinitOnEmpty(t, true) 566 | }) 567 | } 568 | 569 | func testBucketReinitOnEmpty(t *testing.T, closeAfterReinit bool) { 570 | t.Parallel() 571 | 572 | buck, dir := createEmptyBucket(t) 573 | defer os.RemoveAll(dir) 574 | 575 | exp := testutils.GenItems(0, 100, 1) 576 | require.NoError(t, buck.Push(exp, true, "")) 577 | got, npopped, err := buckPop(buck, 100, nil, "") 578 | require.NoError(t, err) 579 | require.Equal(t, exp, got) 580 | require.Equal(t, 100, npopped) 581 | require.NoError(t, buck.Close()) 582 | 583 | // re-open the same bucket - it's empty, but still has data laying around. 584 | // it should still be operational like before. 585 | bucketDir := filepath.Join(dir, item.Key(23).String()) 586 | newBuck, err := openBucket(bucketDir, nil, DefaultOptions()) 587 | 588 | if closeAfterReinit { 589 | require.NoError(t, err) 590 | require.NoError(t, newBuck.Close()) 591 | newBuck, err = openBucket(bucketDir, nil, DefaultOptions()) 592 | require.NoError(t, err) 593 | } 594 | 595 | require.NoError(t, newBuck.Push(exp, true, "")) 596 | got, npopped, err = buckPop(newBuck, 100, nil, "") 597 | require.NoError(t, err) 598 | require.Equal(t, exp, got) 599 | require.Equal(t, 100, npopped) 600 | 601 | require.NoError(t, newBuck.Close()) 602 | } 603 | 604 | func TestBucketForkNameValidate(t *testing.T) { 605 | require.NoError(t, ForkName("hello-world").Validate()) 606 | require.NoError(t, ForkName("HELLO_WORLD").Validate()) 607 | require.NoError(t, ForkName("0").Validate()) 608 | require.NoError(t, ForkName("fOrK999").Validate()) 609 | require.NoError(t, ForkName("_____").Validate()) 610 | require.NoError(t, ForkName("_-_-_").Validate()) 611 | 612 | require.Error(t, ForkName("").Validate()) 613 | require.Error(t, ForkName("space here").Validate()) 614 | require.Error(t, ForkName("space-at-the-end ").Validate()) 615 | require.Error(t, ForkName("fork/sub").Validate()) 616 | require.Error(t, ForkName("huh?").Validate()) 617 | } 618 | 619 | func TestBucketForkInvalid(t *testing.T) { 620 | withEmptyBucket(t, func(buck *bucket) { 621 | require.Error(t, buck.Fork("not-existing", "fork")) 622 | 623 | // forking twice should not yield an error the second time: 624 | require.NoError(t, buck.Fork("", "fork")) 625 | require.NoError(t, buck.Fork("", "fork")) 626 | }) 627 | } 628 | -------------------------------------------------------------------------------- /buckets.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "math" 8 | "os" 9 | "path/filepath" 10 | "slices" 11 | "sync" 12 | 13 | "github.com/google/renameio" 14 | "github.com/sahib/timeq/index" 15 | "github.com/sahib/timeq/item" 16 | "github.com/tidwall/btree" 17 | ) 18 | 19 | const ( 20 | splitConfFile = "split.conf" 21 | ) 22 | 23 | // trailerKey is the key to access a index.Trailer for a certain 24 | // Buckets that are not loaded have some info that is easily accessible without 25 | // loading them fully (i.e. the len). Since the Len can be different for each 26 | // fork we need to keep it for each one separately. 27 | type trailerKey struct { 28 | Key item.Key 29 | fork ForkName 30 | } 31 | 32 | // small wrapper around buckets that calls Push() without locking. 33 | type tx struct { 34 | bs *buckets 35 | } 36 | 37 | func (tx *tx) Push(items item.Items) error { 38 | return tx.bs.Push(items, false) 39 | } 40 | 41 | type buckets struct { 42 | mu sync.Mutex 43 | dir string 44 | tree btree.Map[item.Key, *bucket] 45 | trailers map[trailerKey]index.Trailer 46 | opts Options 47 | forks []ForkName 48 | readBuf Items 49 | } 50 | 51 | func loadAllBuckets(dir string, opts Options) (*buckets, error) { 52 | if err := os.MkdirAll(dir, 0700); err != nil { 53 | return nil, fmt.Errorf("mkdir: %w", err) 54 | } 55 | 56 | ents, err := os.ReadDir(dir) 57 | if err != nil { 58 | return nil, fmt.Errorf("read-dir: %w", err) 59 | } 60 | 61 | // some files like "split.conf" are expected to be there 62 | // so don't be alert. 63 | expectedFiles := 0 64 | 65 | var dirsHandled int 66 | tree := btree.Map[item.Key, *bucket]{} 67 | trailers := make(map[trailerKey]index.Trailer, len(ents)) 68 | for _, ent := range ents { 69 | switch ent.Name() { 70 | case splitConfFile: 71 | expectedFiles++ 72 | } 73 | 74 | if !ent.IsDir() { 75 | continue 76 | } 77 | 78 | buckPath := filepath.Join(dir, ent.Name()) 79 | key, err := item.KeyFromString(filepath.Base(buckPath)) 80 | if err != nil { 81 | if opts.ErrorMode == ErrorModeAbort { 82 | return nil, err 83 | } 84 | 85 | opts.Logger.Printf("failed to parse %s as bucket path\n", buckPath) 86 | continue 87 | } 88 | 89 | dirsHandled++ 90 | 91 | if err := index.ReadTrailers(buckPath, func(fork string, trailer index.Trailer) { 92 | // nil entries indicate buckets that were not loaded yet: 93 | trailers[trailerKey{ 94 | Key: key, 95 | fork: ForkName(fork), 96 | }] = trailer 97 | }); err != nil { 98 | // reading trailers is not too fatal, but applications may break in unexpected 99 | // ways when Len() returns wrong results. 100 | return nil, err 101 | } 102 | 103 | tree.Set(key, nil) 104 | } 105 | 106 | if dirsHandled == 0 && len(ents) > expectedFiles { 107 | return nil, fmt.Errorf("%s is not empty; refusing to create db", dir) 108 | } 109 | 110 | bs := &buckets{ 111 | dir: dir, 112 | tree: tree, 113 | opts: opts, 114 | trailers: trailers, 115 | readBuf: make(Items, 2000), 116 | } 117 | 118 | forks, err := bs.fetchForks() 119 | if err != nil { 120 | return nil, fmt.Errorf("failed to fetch forks: %w", err) 121 | } 122 | 123 | bs.forks = forks 124 | return bs, nil 125 | } 126 | 127 | // ValidateBucketKeys checks if the keys in the buckets correspond to the result 128 | // of the key func. Failure here indicates that the key function changed. No error 129 | // does not guarantee that the key func did not change though (e.g. the identity func 130 | // would produce no error in this check) 131 | func (bs *buckets) ValidateBucketKeys(bucketFn BucketSplitConf) error { 132 | namePath := filepath.Join(bs.dir, splitConfFile) 133 | nameData, err := os.ReadFile(namePath) 134 | if err != nil { 135 | // write the split name so we can figure it out later again. 136 | if err := renameio.WriteFile(namePath, []byte(bucketFn.Name), 0600); err != nil { 137 | // if we couldn't read and write it, then something is very likely wrong. 138 | return err 139 | } 140 | } else { 141 | // split file was valid, go check if it's still the desired split func. 142 | nameData = bytes.TrimSpace(nameData) 143 | if string(nameData) != bucketFn.Name { 144 | return fmt.Errorf( 145 | "%w: split func is currently »%s« but »%s« is configured - migrate?", 146 | ErrChangedSplitFunc, 147 | nameData, 148 | bucketFn.Name, 149 | ) 150 | 151 | } 152 | } 153 | 154 | for iter := bs.tree.Iter(); iter.Next(); { 155 | ik := iter.Key() 156 | bk := bucketFn.Func(ik) 157 | 158 | if ik != bk { 159 | return fmt.Errorf( 160 | "%w: bucket with key %s does not match key func (%d) - did it change?", 161 | ErrChangedSplitFunc, 162 | ik, 163 | bk, 164 | ) 165 | } 166 | } 167 | 168 | return nil 169 | } 170 | 171 | func (bs *buckets) buckPath(key item.Key) string { 172 | return filepath.Join(bs.dir, key.String()) 173 | } 174 | 175 | // forKey returns a bucket for the specified key and creates if not there yet. 176 | // `key` must be the lowest key that is stored in this You cannot just 177 | // use a key that is somewhere in the 178 | func (bs *buckets) forKey(key item.Key) (*bucket, error) { 179 | buck, _ := bs.tree.Get(key) 180 | if buck != nil { 181 | // fast path: 182 | return buck, nil 183 | } 184 | 185 | // make room for one so we don't jump over the maximum: 186 | if err := bs.closeUnused(bs.opts.MaxParallelOpenBuckets - 1); err != nil { 187 | return nil, err 188 | } 189 | 190 | var err error 191 | buck, err = openBucket(bs.buckPath(key), bs.forks, bs.opts) 192 | if err != nil { 193 | return nil, err 194 | } 195 | 196 | bs.tree.Set(key, buck) 197 | return buck, nil 198 | } 199 | 200 | func (bs *buckets) delete(key item.Key) error { 201 | buck, ok := bs.tree.Get(key) 202 | if !ok { 203 | return fmt.Errorf("no bucket with key %v", key) 204 | } 205 | 206 | for tk := range bs.trailers { 207 | if tk.Key == key { 208 | delete(bs.trailers, tk) 209 | } 210 | } 211 | 212 | var err error 213 | var dir string 214 | if buck != nil { 215 | // make sure to close the bucket, otherwise we will accumulate mmaps, which 216 | // will sooner or later lead to memory allocation issues/errors. 217 | err = buck.Close() 218 | dir = buck.dir // save on allocation of buckPath() 219 | } else { 220 | dir = bs.buckPath(key) 221 | } 222 | 223 | bs.tree.Delete(key) 224 | 225 | return errors.Join(err, removeBucketDir(dir, bs.forks)) 226 | } 227 | 228 | type iterMode int 229 | 230 | const ( 231 | // includeNil goes over all buckets, including those that are nil (not loaded.) 232 | includeNil = iterMode(iota) 233 | 234 | // loadedOnly iterates over all buckets that were loaded already. 235 | loadedOnly 236 | 237 | // load loads all buckets, including those that were not loaded yet. 238 | load 239 | ) 240 | 241 | // errIterStop can be returned in Iter's func when you want to stop 242 | // It does not count as error. 243 | var errIterStop = errors.New("iteration stopped") 244 | 245 | // Iter iterates over all buckets, starting with the lowest. The buckets include 246 | // unloaded depending on `mode`. The error you return in `fn` will be returned 247 | // by Iter() and iteration immediately stops. If you return errIterStop then 248 | // Iter() will return nil and will also stop the iteration. Note that Iter() honors the 249 | // MaxParallelOpenBuckets option, i.e. when the mode is `Load` it will immediately close 250 | // old buckets again before proceeding. 251 | func (bs *buckets) iter(mode iterMode, fn func(key item.Key, b *bucket) error) error { 252 | // NOTE: We cannot directly iterate over the tree here, we need to make a copy 253 | // of they keys, as the btree library does not like if the tree is modified during iteration. 254 | // Modifications can happen in forKey() (which might close unused buckets) or in the user-supplied 255 | // function (notably Pop(), which deletes exhausted buckets). By copying it we make sure to 256 | // iterate over one consistent snapshot. This might need to change if we'd create new buckets 257 | // in fn() - let's hope that this does not happen. 258 | keys := bs.tree.Keys() 259 | for _, key := range keys { 260 | // Fetch from non-copied tree as this is the one that is modified. 261 | buck, ok := bs.tree.Get(key) 262 | if !ok { 263 | // it was deleted already? Skip it. 264 | continue 265 | } 266 | 267 | if buck == nil { 268 | if mode == loadedOnly { 269 | continue 270 | } 271 | 272 | if mode == load { 273 | // load the bucket fresh from disk. 274 | // NOTE: This might unload other buckets! 275 | var err error 276 | buck, err = bs.forKey(key) 277 | if err != nil { 278 | return err 279 | } 280 | } 281 | } 282 | 283 | if err := fn(key, buck); err != nil { 284 | if err == errIterStop { 285 | err = nil 286 | } 287 | 288 | return err 289 | } 290 | } 291 | 292 | return nil 293 | } 294 | 295 | func (bs *buckets) Sync() error { 296 | var err error 297 | bs.mu.Lock() 298 | defer bs.mu.Unlock() 299 | 300 | _ = bs.iter(loadedOnly, func(_ item.Key, b *bucket) error { 301 | // try to sync as much as possible: 302 | err = errors.Join(err, b.Sync(true)) 303 | return nil 304 | }) 305 | 306 | return err 307 | } 308 | 309 | func (bs *buckets) Clear() error { 310 | bs.mu.Lock() 311 | defer bs.mu.Unlock() 312 | 313 | return bs.clear() 314 | } 315 | 316 | func (bs *buckets) clear() error { 317 | keys := bs.tree.Keys() 318 | for _, key := range keys { 319 | if err := bs.delete(key); err != nil { 320 | return err 321 | } 322 | } 323 | 324 | return nil 325 | } 326 | 327 | func (bs *buckets) Close() error { 328 | bs.mu.Lock() 329 | defer bs.mu.Unlock() 330 | 331 | return bs.iter(loadedOnly, func(_ item.Key, b *bucket) error { 332 | return b.Close() 333 | }) 334 | } 335 | 336 | func (bs *buckets) Len(fork ForkName) int { 337 | var len int 338 | bs.mu.Lock() 339 | defer bs.mu.Unlock() 340 | 341 | _ = bs.iter(includeNil, func(key item.Key, b *bucket) error { 342 | if b == nil { 343 | trailer, ok := bs.trailers[trailerKey{ 344 | Key: key, 345 | fork: fork, 346 | }] 347 | 348 | if !ok { 349 | bs.opts.Logger.Printf("bug: no trailer for %v", key) 350 | return nil 351 | } 352 | 353 | len += int(trailer.TotalEntries) 354 | return nil 355 | } 356 | 357 | len += b.Len(fork) 358 | return nil 359 | }) 360 | 361 | return len 362 | } 363 | 364 | func (bs *buckets) Shovel(dstBs *buckets, fork ForkName) (int, error) { 365 | bs.mu.Lock() 366 | defer bs.mu.Unlock() 367 | 368 | dstBs.mu.Lock() 369 | defer dstBs.mu.Unlock() 370 | 371 | var ntotalcopied int 372 | err := bs.iter(includeNil, func(key item.Key, _ *bucket) error { 373 | if _, ok := dstBs.tree.Get(key); !ok { 374 | // fast path: We can just move the bucket directory. 375 | dstPath := dstBs.buckPath(key) 376 | srcPath := bs.buckPath(key) 377 | dstBs.tree.Set(key, nil) 378 | 379 | if err := index.ReadTrailers(srcPath, func(srcfork string, trailer index.Trailer) { 380 | if fork == ForkName(srcfork) { 381 | ntotalcopied += int(trailer.TotalEntries) 382 | } 383 | 384 | dstBs.trailers[trailerKey{ 385 | Key: key, 386 | fork: ForkName(srcfork), 387 | }] = trailer 388 | }); err != nil { 389 | return err 390 | } 391 | 392 | return moveFileOrDir(srcPath, dstPath) 393 | } 394 | 395 | // In this case we have to copy the items more intelligently, 396 | // since we have to append it to the destination 397 | 398 | srcBuck, err := bs.forKey(key) 399 | if err != nil { 400 | return err 401 | } 402 | 403 | // NOTE: This assumes that the destination has the same bucket func. 404 | dstBuck, err := dstBs.forKey(key) 405 | if err != nil { 406 | return err 407 | } 408 | 409 | return srcBuck.Read(math.MaxInt, &bs.readBuf, fork, func(items item.Items) (ReadOp, error) { 410 | if err := dstBuck.Push(items, true, fork); err != nil { 411 | return ReadOpPeek, err 412 | } 413 | 414 | ntotalcopied += len(items) 415 | return ReadOpPop, nil 416 | }) 417 | }) 418 | 419 | if err != nil { 420 | return ntotalcopied, err 421 | } 422 | 423 | if err := bs.clear(); err != nil { 424 | return ntotalcopied, err 425 | } 426 | 427 | return ntotalcopied, err 428 | } 429 | 430 | func (bs *buckets) nloaded() int { 431 | var nloaded int 432 | bs.tree.Scan(func(_ item.Key, buck *bucket) bool { 433 | if buck != nil { 434 | nloaded++ 435 | } 436 | return true 437 | }) 438 | 439 | return nloaded 440 | } 441 | 442 | // closeUnused closes as many buckets as needed to reach `maxBucks` total loaded buckets. 443 | // The closed buckets are marked as nil and can be loaded again afterwards. 444 | // If `maxBucks` is negative, this is a no-op. 445 | func (bs *buckets) closeUnused(maxBucks int) error { 446 | if maxBucks < 0 { 447 | // This disables this feature. You likely do not want that. 448 | return nil 449 | } 450 | 451 | // Fetch the number of loaded buckets. 452 | // We could optimize that by having another count for that, 453 | // but it should be cheap enough and this way we have only one 454 | // source of truth. 455 | nloaded := bs.nloaded() 456 | if nloaded <= maxBucks { 457 | // nothing to do, this should be the normal case. 458 | return nil 459 | } 460 | 461 | var closeErrs error 462 | 463 | // This logic here produces a sequence like this: 464 | // pivot=4: 4+0, 4-1, 4+1, 4-2, 4+2, 4-3, 4+3, 4-4, 4+4, ... 465 | // 466 | // In other words, it alternates around the middle of the buckets and 467 | // closes buckets that are more in the middle of the queue. This should be 468 | // a reasonable heuristic for a typical queue system where you pop from end 469 | // and push to the other one, but seldomly access buckets in the middle 470 | // range If you're priorities are very random, this will be rather random 471 | // too though. 472 | nClosed, nClosable := 0, nloaded-maxBucks 473 | pivotIdx := bs.tree.Len() / 2 474 | for idx := 0; idx < bs.tree.Len() && nClosed < nClosable; idx++ { 475 | realIdx := pivotIdx - idx/2 - 1 476 | if idx%2 == 0 { 477 | realIdx = pivotIdx + idx/2 478 | } 479 | 480 | key, buck, ok := bs.tree.GetAt(realIdx) 481 | if !ok { 482 | // should not happen, but better be safe. 483 | continue 484 | } 485 | 486 | if buck == nil { 487 | // already closed. 488 | continue 489 | } 490 | 491 | // We need to store the trailers of each fork, so we know how to 492 | // calculcate the length of the queue without having to load everything. 493 | buck.Trailers(func(fork ForkName, trailer index.Trailer) { 494 | bs.trailers[trailerKey{ 495 | Key: key, 496 | fork: fork, 497 | }] = trailer 498 | }) 499 | 500 | if err := buck.Close(); err != nil { 501 | switch bs.opts.ErrorMode { 502 | case ErrorModeAbort: 503 | closeErrs = errors.Join(closeErrs, err) 504 | case ErrorModeContinue: 505 | bs.opts.Logger.Printf("failed to reap bucket %s", key) 506 | } 507 | } 508 | 509 | bs.tree.Set(key, nil) 510 | // bs.trailers[key] = trailer 511 | nClosed++ 512 | } 513 | 514 | return closeErrs 515 | } 516 | 517 | // binsplit returns the first index of `items` that would 518 | // not go to the bucket `comp`. There are two assumptions: 519 | // 520 | // * "items" is not empty. 521 | // * "comp" exists for at least one fn(item.Key) 522 | // * The first key in `items` must be fn(key) == comp 523 | // 524 | // If assumptions are not fulfilled you will get bogus results. 525 | func binsplit(items item.Items, comp item.Key, fn func(item.Key) item.Key) int { 526 | l := len(items) 527 | if l == 0 { 528 | return 0 529 | } 530 | if l == 1 { 531 | return 1 532 | } 533 | 534 | pivotIdx := l / 2 535 | pivotKey := fn(items[pivotIdx].Key) 536 | if pivotKey != comp { 537 | // search left: 538 | return binsplit(items[:pivotIdx], comp, fn) 539 | } 540 | 541 | // search right: 542 | return pivotIdx + binsplit(items[pivotIdx:], comp, fn) 543 | } 544 | 545 | func (bs *buckets) Push(items item.Items, locked bool) error { 546 | if len(items) == 0 { 547 | return nil 548 | } 549 | 550 | slices.SortFunc(items, func(i, j item.Item) int { 551 | return int(i.Key - j.Key) 552 | }) 553 | 554 | if locked { 555 | bs.mu.Lock() 556 | defer bs.mu.Unlock() 557 | } 558 | 559 | return bs.pushSorted(items) 560 | } 561 | 562 | // Sort items into the respective buckets: 563 | func (bs *buckets) pushSorted(items item.Items) error { 564 | for len(items) > 0 { 565 | keyMod := bs.opts.BucketSplitConf.Func(items[0].Key) 566 | nextIdx := binsplit(items, keyMod, bs.opts.BucketSplitConf.Func) 567 | buck, err := bs.forKey(keyMod) 568 | if err != nil { 569 | if bs.opts.ErrorMode == ErrorModeAbort { 570 | return fmt.Errorf("bucket: for-key: %w", err) 571 | } 572 | 573 | bs.opts.Logger.Printf("failed to push: %v", err) 574 | } else { 575 | if err := buck.Push(items[:nextIdx], true, ""); err != nil { 576 | if bs.opts.ErrorMode == ErrorModeAbort { 577 | return fmt.Errorf("bucket: push: %w", err) 578 | } 579 | 580 | bs.opts.Logger.Printf("failed to push: %v", err) 581 | } 582 | } 583 | 584 | items = items[nextIdx:] 585 | } 586 | 587 | return nil 588 | } 589 | 590 | func (bs *buckets) Read(n int, fork ForkName, fn TransactionFn) error { 591 | if n < 0 { 592 | // use max value to select all. 593 | n = int(^uint(0) >> 1) 594 | } 595 | 596 | bs.mu.Lock() 597 | defer bs.mu.Unlock() 598 | 599 | var count = n 600 | return bs.iter(load, func(key item.Key, b *bucket) error { 601 | lenBefore := b.Len(fork) 602 | 603 | // wrap the bucket call into something that knows about 604 | // transactions - bucket itself does not care about that. 605 | wrappedFn := func(items Items) (ReadOp, error) { 606 | return fn(&tx{bs}, items) 607 | } 608 | 609 | if err := b.Read(count, &bs.readBuf, fork, wrappedFn); err != nil { 610 | if bs.opts.ErrorMode == ErrorModeAbort { 611 | return err 612 | } 613 | 614 | // try with the next bucket in the hope that it works: 615 | bs.opts.Logger.Printf("failed to pop: %v", err) 616 | return nil 617 | } 618 | 619 | if b.AllEmpty() { 620 | if err := bs.delete(key); err != nil { 621 | return fmt.Errorf("failed to delete bucket: %w", err) 622 | } 623 | } 624 | 625 | lenAfter := b.Len(fork) 626 | 627 | count -= (lenBefore - lenAfter) 628 | if count <= 0 { 629 | return errIterStop 630 | } 631 | 632 | return nil 633 | }) 634 | } 635 | 636 | func (bs *buckets) Delete(fork ForkName, from, to item.Key) (int, error) { 637 | var numDeleted int 638 | var deletableBucks []item.Key 639 | 640 | if to < from { 641 | return 0, fmt.Errorf("delete: `to` must be >= `from`") 642 | } 643 | 644 | bs.mu.Lock() 645 | defer bs.mu.Unlock() 646 | 647 | // use the bucket func to figure out which buckets the range limits would be in. 648 | // those buckets might not really exist though. 649 | toBuckKey := bs.opts.BucketSplitConf.Func(to) 650 | fromBuckKey := bs.opts.BucketSplitConf.Func(from) 651 | 652 | iter := bs.tree.Iter() 653 | if !iter.Seek(fromBuckKey) { 654 | // all buckets that we have are > to. Nothing to delete. 655 | return 0, nil 656 | } 657 | 658 | // Seek() already sets the iter.Value(), so iteration becomes a bit awkward. 659 | for { 660 | buckKey := iter.Key() 661 | if buckKey > toBuckKey { 662 | // too far, stop it. 663 | break 664 | } 665 | 666 | buck, err := bs.forKey(buckKey) 667 | if err != nil { 668 | if bs.opts.ErrorMode == ErrorModeAbort { 669 | return numDeleted, err 670 | } 671 | 672 | // try with the next bucket in the hope that it works: 673 | bs.opts.Logger.Printf( 674 | "failed to open %v for deletion delete : %v", 675 | buckKey, 676 | err, 677 | ) 678 | } else { 679 | numDeletedOfBucket, err := buck.Delete(fork, from, to) 680 | if err != nil { 681 | if bs.opts.ErrorMode == ErrorModeAbort { 682 | return numDeleted, err 683 | } 684 | 685 | // try with the next bucket in the hope that it works: 686 | bs.opts.Logger.Printf("failed to delete : %v", err) 687 | } else { 688 | numDeleted += numDeletedOfBucket 689 | if buck.AllEmpty() { 690 | deletableBucks = append(deletableBucks, buckKey) 691 | } 692 | } 693 | } 694 | 695 | if !iter.Next() { 696 | break 697 | } 698 | } 699 | 700 | for _, bucketKey := range deletableBucks { 701 | if err := bs.delete(bucketKey); err != nil { 702 | return numDeleted, fmt.Errorf("bucket delete: %w", err) 703 | } 704 | } 705 | 706 | return numDeleted, nil 707 | } 708 | 709 | func (bs *buckets) Fork(src, dst ForkName) error { 710 | if err := dst.Validate(); err != nil { 711 | return err 712 | } 713 | 714 | if slices.Contains(bs.forks, dst) { 715 | // if no bucket is currently loaded, we still need to check for dupes. 716 | return nil 717 | } 718 | 719 | err := bs.iter(includeNil, func(key item.Key, buck *bucket) error { 720 | if buck != nil { 721 | return buck.Fork(src, dst) 722 | } 723 | 724 | buckDir := filepath.Join(bs.dir, key.String()) 725 | return forkOffline(buckDir, src, dst) 726 | }) 727 | 728 | if err != nil { 729 | return err 730 | } 731 | 732 | bs.forks = append(bs.forks, dst) 733 | return nil 734 | } 735 | 736 | func (bs *buckets) RemoveFork(fork ForkName) error { 737 | bs.mu.Lock() 738 | defer bs.mu.Unlock() 739 | 740 | if err := fork.Validate(); err != nil { 741 | return err 742 | } 743 | 744 | // Remove fork from fork list to avoid creating it again: 745 | bs.forks = slices.DeleteFunc(bs.forks, func(candidate ForkName) bool { 746 | return fork == candidate 747 | }) 748 | 749 | return bs.iter(includeNil, func(key item.Key, buck *bucket) error { 750 | if buck != nil { 751 | if err := buck.RemoveFork(fork); err != nil { 752 | return err 753 | } 754 | 755 | // might be empty after deletion, so we can get rid of the 756 | if !buck.AllEmpty() { 757 | return nil 758 | } 759 | 760 | return bs.delete(key) 761 | } 762 | 763 | // NOTE: In contrast to the "loaded bucket" case above we do not check if the bucket is 764 | // considered AllEmpty() after the fork deletion. We defer that to the next Open() of this 765 | // bucket to avoid having to load all buckets here. We can have a clean up logic in Open() 766 | // that re-initializes the bucket freshly when the index Len() is zero (and no recover needed). 767 | buckDir := filepath.Join(bs.dir, key.String()) 768 | return removeForkOffline(buckDir, fork) 769 | }) 770 | } 771 | 772 | func (bs *buckets) Forks() []ForkName { 773 | bs.mu.Lock() 774 | defer bs.mu.Unlock() 775 | 776 | return bs.forks 777 | } 778 | 779 | // fetchForks actually checks the disk to find the current forks. 780 | func (bs *buckets) fetchForks() ([]ForkName, error) { 781 | var buck *bucket 782 | for iter := bs.tree.Iter(); iter.Next(); { 783 | buck = iter.Value() 784 | if buck == nil { 785 | continue 786 | } 787 | } 788 | 789 | if buck == nil { 790 | // if no bucket was loaded yet, above for will not find any. 791 | // no luck, we gonna need to load one for this operation. 792 | iter := bs.tree.Iter() 793 | if iter.First() { 794 | var err error 795 | buck, err = bs.forKey(iter.Key()) 796 | if err != nil { 797 | return nil, err 798 | } 799 | } 800 | } 801 | 802 | if buck == nil { 803 | // still nil? The tree is probably empty. 804 | return []ForkName{}, nil 805 | } 806 | 807 | return buck.Forks(), nil 808 | } 809 | -------------------------------------------------------------------------------- /buckets_test.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/sahib/timeq/item" 10 | "github.com/sahib/timeq/item/testutils" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func writeDummyBucket(t *testing.T, dir string, key item.Key, items item.Items) { 15 | bucketDir := filepath.Join(dir, key.String()) 16 | require.NoError(t, os.MkdirAll(bucketDir, 0700)) 17 | 18 | buck, err := openBucket(bucketDir, nil, DefaultOptions()) 19 | require.NoError(t, err) 20 | 21 | require.NoError(t, buck.Push(items, true, "")) 22 | require.NoError(t, buck.Sync(true)) 23 | require.NoError(t, buck.Close()) 24 | } 25 | 26 | func TestBucketsOpenEmpty(t *testing.T) { 27 | t.Parallel() 28 | 29 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 30 | require.NoError(t, err) 31 | defer os.RemoveAll(dir) 32 | 33 | opts := DefaultOptions() 34 | opts.MaxParallelOpenBuckets = 1 35 | bs, err := loadAllBuckets(dir, opts) 36 | require.NoError(t, err) 37 | require.Equal(t, 0, bs.Len("")) 38 | require.NoError(t, bs.Sync()) 39 | require.NoError(t, bs.Close()) 40 | } 41 | 42 | func TestBucketsClearEmpty(t *testing.T) { 43 | t.Parallel() 44 | 45 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 46 | require.NoError(t, err) 47 | defer os.RemoveAll(dir) 48 | 49 | opts := DefaultOptions() 50 | opts.MaxParallelOpenBuckets = 1 51 | bs, err := loadAllBuckets(dir, opts) 52 | require.NoError(t, err) 53 | require.NoError(t, bs.Clear()) 54 | require.NoError(t, bs.Close()) 55 | } 56 | 57 | func TestBucketsIter(t *testing.T) { 58 | t.Parallel() 59 | 60 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 61 | require.NoError(t, err) 62 | defer os.RemoveAll(dir) 63 | 64 | expected := []item.Key{10, 20, 40, 80} 65 | for _, key := range expected { 66 | writeDummyBucket( 67 | t, 68 | dir, 69 | key, 70 | testutils.GenItems(int(key), int(key)+10, 1), 71 | ) 72 | } 73 | 74 | opts := DefaultOptions() 75 | opts.MaxParallelOpenBuckets = 1 76 | bs, err := loadAllBuckets(dir, opts) 77 | require.NoError(t, err) 78 | 79 | // load bucket 80 early to check if iter can handle 80 | // already loaded buckets too. 81 | _, err = bs.forKey(80) 82 | require.NoError(t, err) 83 | 84 | got := []item.Key{} 85 | require.NoError(t, bs.iter(load, func(key item.Key, b *bucket) error { 86 | got = append(got, b.Key()) 87 | require.Equal(t, key, b.Key()) 88 | return nil 89 | })) 90 | 91 | require.Equal(t, expected, got) 92 | require.Equal(t, 40, bs.Len("")) 93 | } 94 | 95 | func TestBucketsForKey(t *testing.T) { 96 | t.Parallel() 97 | 98 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 99 | require.NoError(t, err) 100 | defer os.RemoveAll(dir) 101 | 102 | writeDummyBucket(t, dir, 33, testutils.GenItems(0, 10, 1)) 103 | 104 | opts := DefaultOptions() 105 | opts.MaxParallelOpenBuckets = 1 106 | bs, err := loadAllBuckets(dir, opts) 107 | require.NoError(t, err) 108 | 109 | // open freshly: 110 | b1, err := bs.forKey(32) 111 | require.NoError(t, err) 112 | require.Equal(t, item.Key(32), b1.Key()) 113 | 114 | // open again, must be the same memory: 115 | b2, err := bs.forKey(32) 116 | require.NoError(t, err) 117 | require.Equal(t, item.Key(32), b2.Key()) 118 | require.Equal(t, b1, b2) 119 | 120 | // existing bucket should load fine: 121 | b3, err := bs.forKey(33) 122 | require.NoError(t, err) 123 | require.Equal(t, item.Key(33), b3.Key()) 124 | require.Equal(t, 10, b3.Len("")) 125 | 126 | require.NoError(t, bs.Clear()) 127 | 128 | // open again, must be the same memory: 129 | 130 | b3c, err := bs.forKey(33) 131 | require.NoError(t, err) 132 | require.Equal(t, item.Key(33), b3c.Key()) 133 | require.Equal(t, 0, b3c.Len("")) 134 | require.NoError(t, bs.Close()) 135 | } 136 | 137 | func TestBucketsValidateFunc(t *testing.T) { 138 | t.Parallel() 139 | 140 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 141 | require.NoError(t, err) 142 | defer os.RemoveAll(dir) 143 | 144 | writeDummyBucket(t, dir, 30, testutils.GenItems(30, 40, 1)) 145 | writeDummyBucket(t, dir, 50, testutils.GenItems(50, 60, 1)) 146 | 147 | opts := DefaultOptions() 148 | opts.MaxParallelOpenBuckets = 1 149 | bs, err := loadAllBuckets(dir, opts) 150 | require.NoError(t, err) 151 | 152 | require.NoError(t, bs.ValidateBucketKeys(BucketSplitConf{Name: "blub", Func: func(key item.Key) item.Key { 153 | // id-func has to pass always. 154 | return key 155 | }})) 156 | 157 | require.NoError(t, bs.ValidateBucketKeys(BucketSplitConf{Name: "blub", Func: func(key item.Key) item.Key { 158 | // 30 -> 30 and 50 -> 50 159 | return (key * 10) / 10 160 | }})) 161 | 162 | require.Error(t, bs.ValidateBucketKeys(BucketSplitConf{Name: "foo", Func: func(key item.Key) item.Key { 163 | // different name! 164 | return key 165 | }})) 166 | 167 | require.Error(t, bs.ValidateBucketKeys(BucketSplitConf{Name: "blub", Func: func(key item.Key) item.Key { 168 | return (key / 3) * 3 169 | }})) 170 | 171 | require.NoError(t, bs.Close()) 172 | } 173 | 174 | func TestBucketsDelete(t *testing.T) { 175 | t.Parallel() 176 | 177 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 178 | require.NoError(t, err) 179 | defer os.RemoveAll(dir) 180 | 181 | opts := DefaultOptions() 182 | opts.MaxParallelOpenBuckets = 1 183 | bs, err := loadAllBuckets(dir, opts) 184 | require.NoError(t, err) 185 | 186 | // Delete non-existing yet. 187 | require.Error(t, bs.delete(50)) 188 | 189 | // Create bucket and delete again: 190 | _, err = bs.forKey(50) 191 | require.NoError(t, err) 192 | require.NoError(t, bs.delete(50)) 193 | require.Error(t, bs.delete(50)) 194 | } 195 | 196 | func TestBucketsNotEmptyDir(t *testing.T) { 197 | t.Parallel() 198 | 199 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 200 | require.NoError(t, err) 201 | defer os.RemoveAll(dir) 202 | 203 | writeDummyBucket(t, dir, 33, testutils.GenItems(0, 10, 1)) 204 | 205 | subDir := filepath.Join(dir, "sub") 206 | require.NoError(t, os.MkdirAll(subDir, 0700)) 207 | require.NoError(t, os.WriteFile(filepath.Join(subDir, "file"), []byte("Hello World!"), 0700)) 208 | 209 | // Loading such a dir should error out as it seems that we try to open a directory with other things 210 | // in it that are not buckets at all. The caller can prepare this by having a os.Remove() of the contents, 211 | // but we should not do this automatically. 212 | opts := DefaultOptions() 213 | opts.MaxParallelOpenBuckets = 1 214 | _, err = loadAllBuckets(dir, opts) 215 | require.Error(t, err) 216 | } 217 | 218 | func TestAPIBinsplit(t *testing.T) { 219 | t.Parallel() 220 | 221 | idFunc := func(k item.Key) item.Key { return k } 222 | 223 | items := item.Items{ 224 | item.Item{Key: 0}, 225 | item.Item{Key: 0}, 226 | item.Item{Key: 0}, 227 | item.Item{Key: 1}, 228 | item.Item{Key: 1}, 229 | item.Item{Key: 1}, 230 | } 231 | 232 | require.Equal(t, 3, binsplit(items, 0, idFunc)) 233 | require.Equal(t, 6, binsplit(items, 1, idFunc)) 234 | require.Equal(t, 0, binsplit(item.Items{}, 0, idFunc)) 235 | } 236 | 237 | func TestAPIBinsplitSeq(t *testing.T) { 238 | t.Parallel() 239 | 240 | idFunc := func(k item.Key) item.Key { return k } 241 | items := testutils.GenItems(0, 10, 1) 242 | for idx := 0; idx < len(items); idx++ { 243 | require.Equal(t, 1, binsplit(items[idx:], item.Key(idx), idFunc)) 244 | } 245 | } 246 | 247 | func TestBucketsForkMultipleBuckets(t *testing.T) { 248 | t.Parallel() 249 | 250 | dir, err := os.MkdirTemp("", "timeq-bucketstest") 251 | require.NoError(t, err) 252 | defer os.RemoveAll(dir) 253 | 254 | writeDummyBucket(t, dir, 10, testutils.GenItems(10, 20, 1)) 255 | writeDummyBucket(t, dir, 20, testutils.GenItems(20, 30, 1)) 256 | writeDummyBucket(t, dir, 30, testutils.GenItems(30, 40, 1)) 257 | 258 | opts := DefaultOptions() 259 | opts.MaxParallelOpenBuckets = 1 260 | bs, err := loadAllBuckets(dir, opts) 261 | require.NoError(t, err) 262 | 263 | require.Empty(t, bs.Forks()) 264 | 265 | var forkNames []ForkName 266 | for idx := 0; idx < 10; idx++ { 267 | forkName := ForkName(fmt.Sprintf("fork%d", idx)) 268 | require.NoError(t, bs.Fork("", forkName)) 269 | forkNames = append(forkNames, forkName) 270 | } 271 | 272 | require.Equal(t, forkNames, bs.Forks()) 273 | 274 | for _, forkName := range forkNames { 275 | require.NoError(t, bs.RemoveFork(forkName)) 276 | } 277 | 278 | require.Empty(t, bs.Forks()) 279 | require.NoError(t, bs.Close()) 280 | } 281 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/sahib/timeq/cmd/parser" 8 | ) 9 | 10 | func main() { 11 | if err := parser.Run(os.Args); err != nil { 12 | fmt.Fprintf(os.Stderr, "timeq: %v\n", err) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /cmd/parser/parser.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "math" 7 | "os" 8 | "strconv" 9 | "strings" 10 | "time" 11 | 12 | "github.com/sahib/timeq" 13 | "github.com/sahib/timeq/item" 14 | "github.com/sahib/timeq/vlog" 15 | "github.com/urfave/cli" 16 | ) 17 | 18 | func optionsFromCtx(ctx *cli.Context) (timeq.Options, error) { 19 | opts := timeq.DefaultOptions() 20 | 21 | switch mode := ctx.GlobalString("sync-mode"); mode { 22 | case "full": 23 | opts.SyncMode = timeq.SyncFull 24 | case "data": 25 | opts.SyncMode = timeq.SyncData 26 | case "index": 27 | opts.SyncMode = timeq.SyncIndex 28 | case "none": 29 | opts.SyncMode = timeq.SyncNone 30 | default: 31 | return opts, fmt.Errorf("invalid sync mode: %s", mode) 32 | } 33 | 34 | bucketSize := ctx.GlobalDuration("bucket-size") 35 | if bucketSize <= 0 { 36 | return opts, fmt.Errorf("invalid bucket size: %v", bucketSize) 37 | } 38 | 39 | opts.BucketSplitConf = timeq.BucketSplitConf{ 40 | Name: fmt.Sprintf("cmd:%d", bucketSize), 41 | Func: func(key item.Key) item.Key { 42 | return key / item.Key(bucketSize) 43 | }, 44 | } 45 | 46 | return opts, nil 47 | } 48 | 49 | func withQueue(fn func(ctx *cli.Context, q *timeq.Queue) error) cli.ActionFunc { 50 | return func(ctx *cli.Context) error { 51 | dir := ctx.GlobalString("dir") 52 | 53 | opts, err := optionsFromCtx(ctx) 54 | if err != nil { 55 | return fmt.Errorf("options: %w", err) 56 | } 57 | 58 | queue, err := timeq.Open(dir, opts) 59 | if err != nil { 60 | return fmt.Errorf("open: %w", err) 61 | } 62 | 63 | if err := fn(ctx, queue); err != nil { 64 | queue.Close() 65 | return err 66 | } 67 | 68 | if err := queue.Close(); err != nil { 69 | return fmt.Errorf("close: %w", err) 70 | } 71 | 72 | return nil 73 | } 74 | } 75 | 76 | // Run runs the timeq command line on `args` (args[0] should be os.Args[0]) 77 | func Run(args []string) error { 78 | app := cli.NewApp() 79 | app.Name = "timeq" 80 | app.Usage = "A persistent, time-based priority queue" 81 | app.Description = "This is a toy frontend to timeq. It's hellish inefficient, but nice to test behavior." 82 | app.Version = "0.0.1" 83 | 84 | cwd, err := os.Getwd() 85 | if err != nil { 86 | return err 87 | } 88 | 89 | app.Flags = []cli.Flag{ 90 | cli.StringFlag{ 91 | Name: "dir", 92 | Usage: "Path to storage directory (defaults to curent working dir)", 93 | EnvVar: "TIMEQ_DIR", 94 | Value: cwd, 95 | }, 96 | cli.StringFlag{ 97 | Name: "sync-mode", 98 | Usage: "What sync mode to use ('none', 'full', 'data', 'index')", 99 | EnvVar: "TIMEQ_SYNC_MODE", 100 | Value: "full", 101 | }, 102 | cli.DurationFlag{ 103 | Name: "bucket-size", 104 | Usage: "The size of each bucket as time duration", 105 | EnvVar: "TIMEQ_BUCKET_SIZE", 106 | Value: 30 * time.Minute, 107 | }, 108 | } 109 | 110 | app.Commands = []cli.Command{ 111 | { 112 | Name: "pop", 113 | Usage: "Get one or several keys", 114 | Action: withQueue(handlePop), 115 | Flags: []cli.Flag{ 116 | cli.IntFlag{ 117 | Name: "n,number", 118 | Usage: "Number of items to pop", 119 | Value: 1, 120 | }, 121 | }, 122 | }, { 123 | Name: "peek", 124 | Usage: "Get one or several keys, but do not remove them.", 125 | Action: withQueue(handlePeek), 126 | Flags: []cli.Flag{ 127 | cli.IntFlag{ 128 | Name: "n,number", 129 | Usage: "Number of items to pop", 130 | Value: 1, 131 | }, 132 | }, 133 | }, { 134 | Name: "push", 135 | Usage: "Set one or a several key-value pairs", 136 | Action: withQueue(handlePush), 137 | }, { 138 | Name: "len", 139 | Aliases: []string{"l"}, 140 | Usage: "Print the number of items in the queue", 141 | Action: withQueue(handleLen), 142 | }, { 143 | Name: "clear", 144 | Aliases: []string{"c"}, 145 | Usage: "Clear the queue until a certain point", 146 | Action: withQueue(handleClear), 147 | Flags: []cli.Flag{ 148 | cli.Int64Flag{ 149 | Name: "f,from", 150 | Usage: "Lowest key to delete key to delete (including)", 151 | }, 152 | cli.Int64Flag{ 153 | Name: "t,to", 154 | Usage: "Highest key key to delete (including)", 155 | }, 156 | }, 157 | }, { 158 | Name: "shovel", 159 | Usage: "Move the data to another queue", 160 | Action: withQueue(handleShovel), 161 | Flags: []cli.Flag{ 162 | cli.IntFlag{ 163 | Name: "d,dest", 164 | Usage: "Directory of the destination queue", 165 | Required: true, 166 | }, 167 | }, 168 | }, { 169 | Name: "fork", 170 | Usage: "Utilities for forks", 171 | Subcommands: []cli.Command{ 172 | { 173 | Name: "list", 174 | Usage: "List all forks", 175 | Action: withQueue(handleForkList), 176 | }, { 177 | Name: "create", 178 | Usage: "Create a named fork", 179 | Action: withQueue(handleForkCreate), 180 | Flags: []cli.Flag{ 181 | cli.StringFlag{ 182 | Name: "n,name", 183 | Usage: "Name of the fork", 184 | Required: true, 185 | }, 186 | }, 187 | }, { 188 | Name: "remove", 189 | Usage: "Remove a specific fork", 190 | Action: withQueue(handleForkRemove), 191 | Flags: []cli.Flag{ 192 | cli.StringFlag{ 193 | Name: "n,name", 194 | Usage: "Name of the fork", 195 | Required: true, 196 | }, 197 | }, 198 | }, 199 | }, 200 | }, { 201 | Name: "log", 202 | Usage: "Utilities for checking value logs", 203 | Subcommands: []cli.Command{ 204 | { 205 | Name: "dump", 206 | Usage: "Print all values in the log", 207 | Action: handleLogDump, 208 | Flags: []cli.Flag{ 209 | cli.StringFlag{ 210 | Name: "p,path", 211 | Usage: "Where the value log is", 212 | Required: true, 213 | }, 214 | }, 215 | }, 216 | }, 217 | }, 218 | } 219 | 220 | return app.Run(args) 221 | } 222 | 223 | func handlePush(ctx *cli.Context, q *timeq.Queue) error { 224 | args := ctx.Args() 225 | items := make([]timeq.Item, 0, len(args)) 226 | 227 | for _, arg := range args { 228 | split := strings.SplitN(arg, ":", 2) 229 | if len(split) < 2 { 230 | return fmt.Errorf("invalid tuple: %v", arg) 231 | } 232 | 233 | key, err := strconv.ParseInt(split[0], 10, 64) 234 | if err != nil { 235 | return err 236 | } 237 | 238 | items = append(items, timeq.Item{ 239 | Key: timeq.Key(key), 240 | Blob: []byte(split[1]), 241 | }) 242 | } 243 | 244 | return q.Push(items) 245 | } 246 | 247 | func handlePop(ctx *cli.Context, q *timeq.Queue) error { 248 | return handlePopOrPeek(ctx, q, timeq.ReadOpPop) 249 | } 250 | 251 | func handlePeek(ctx *cli.Context, q *timeq.Queue) error { 252 | return handlePopOrPeek(ctx, q, timeq.ReadOpPeek) 253 | } 254 | 255 | func handlePopOrPeek(ctx *cli.Context, q *timeq.Queue, op timeq.ReadOp) error { 256 | n := ctx.Int("number") 257 | err := q.Read(n, func(_ timeq.Transaction, items timeq.Items) (timeq.ReadOp, error) { 258 | for _, item := range items { 259 | fmt.Println(item) 260 | } 261 | 262 | return op, nil 263 | }) 264 | 265 | if err != nil { 266 | return err 267 | } 268 | 269 | return nil 270 | } 271 | 272 | func handleLen(_ *cli.Context, q *timeq.Queue) error { 273 | fmt.Println(q.Len()) 274 | return nil 275 | } 276 | 277 | func handleClear(ctx *cli.Context, q *timeq.Queue) error { 278 | if !ctx.IsSet("to") && !ctx.IsSet("from") { 279 | size := q.Len() 280 | if err := q.Clear(); err != nil { 281 | return err 282 | } 283 | 284 | fmt.Printf("deleted all %v items\n", size) 285 | return nil 286 | } 287 | 288 | from := ctx.Int64("from") 289 | if !ctx.IsSet("from") { 290 | from = math.MinInt64 291 | } 292 | 293 | to := ctx.Int64("to") 294 | if !ctx.IsSet("to") { 295 | to = math.MaxInt64 296 | } 297 | 298 | deleted, err := q.Delete(timeq.Key(from), timeq.Key(to)) 299 | if err != nil { 300 | return err 301 | } 302 | 303 | fmt.Printf("deleted %v items\n", deleted) 304 | return nil 305 | } 306 | 307 | func handleShovel(ctx *cli.Context, srcQueue *timeq.Queue) error { 308 | dstDir := ctx.String("dest") 309 | 310 | dstOpts, err := optionsFromCtx(ctx) 311 | if err != nil { 312 | return err 313 | } 314 | 315 | dstQueue, err := timeq.Open(dstDir, dstOpts) 316 | if err != nil { 317 | return err 318 | } 319 | 320 | nShoveled, err := srcQueue.Shovel(dstQueue) 321 | if err != nil { 322 | return errors.Join(err, dstQueue.Close()) 323 | } 324 | 325 | fmt.Printf("moved %d items\n", nShoveled) 326 | return dstQueue.Close() 327 | } 328 | 329 | func handleLogDump(ctx *cli.Context) error { 330 | log, err := vlog.Open(ctx.String("path"), true) 331 | if err != nil { 332 | return err 333 | } 334 | 335 | var loc = item.Location{Len: 1e9} 336 | for iter := log.At(loc, true); iter.Next(); { 337 | it := iter.Item() 338 | fmt.Printf("%v:%s\n", it.Key, it.Blob) 339 | } 340 | 341 | return log.Close() 342 | } 343 | 344 | func handleForkCreate(ctx *cli.Context, q *timeq.Queue) error { 345 | name := ctx.String("name") 346 | _, err := q.Fork(timeq.ForkName(name)) 347 | return err 348 | } 349 | 350 | func handleForkList(_ *cli.Context, q *timeq.Queue) error { 351 | for _, fork := range q.Forks() { 352 | fmt.Println(fork) 353 | } 354 | 355 | return nil 356 | } 357 | 358 | func handleForkRemove(ctx *cli.Context, q *timeq.Queue) error { 359 | name := ctx.String("name") 360 | fork, err := q.Fork(timeq.ForkName(name)) 361 | if err != nil { 362 | return err 363 | } 364 | 365 | return fork.Remove() 366 | } 367 | -------------------------------------------------------------------------------- /docs/data_format.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahib/timeq/37c44152cea3302f40573864d9a7c4a6ffd4f08b/docs/data_format.png -------------------------------------------------------------------------------- /docs/data_format.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 38 | 40 | 49 | 61 | 68 | 69 | 78 | 90 | 97 | 98 | 99 | 104 | 111 | 118 | 126 | 134 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 146 | dat.log: 157 | idx.log: 24 byte 168 | 0 1 2 3 4 5 6 7 8 9 10 11 n⁰ .. .. .. .. .. .. .. nˢᶻᵉ -2 -1 180 | 188 | 196 | 204 | 212 | 220 | 228 | 236 | 244 | Key Off Len NEntries 255 | Size Key Payload ($Size) FF FF 266 | z 277 | s 288 | 289 | 291 | 292 | 294 | 296 | 297 | 299 | 301 | 303 | 305 | 307 | 309 | 310 | 311 | 312 | 313 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "reflect" 7 | ) 8 | 9 | func ExampleQueue() { 10 | // Error handling stripped for brevity: 11 | dir, _ := os.MkdirTemp("", "timeq-example") 12 | defer os.RemoveAll(dir) 13 | 14 | // Open the queue. If it does not exist, it gets created: 15 | queue, _ := Open(dir, DefaultOptions()) 16 | 17 | // Push some items to it: 18 | pushItems := make(Items, 0, 10) 19 | for idx := 0; idx < 10; idx++ { 20 | pushItems = append(pushItems, Item{ 21 | Key: Key(idx), 22 | Blob: []byte(fmt.Sprintf("key_%d", idx)), 23 | }) 24 | } 25 | 26 | _ = queue.Push(pushItems) 27 | 28 | // Retrieve the same items again: 29 | _ = queue.Read(10, func(_ Transaction, popItems Items) (ReadOp, error) { 30 | // Just for example purposes, check if they match: 31 | if reflect.DeepEqual(pushItems, popItems) { 32 | fmt.Println("They match! :)") 33 | } else { 34 | fmt.Println("They do not match! :(") 35 | } 36 | 37 | return ReadOpPop, nil 38 | }) 39 | 40 | // Output: They match! :) 41 | } 42 | 43 | func ExampleQueue_Fork() { 44 | // Error handling stripped for brevity: 45 | dir, _ := os.MkdirTemp("", "timeq-example") 46 | defer os.RemoveAll(dir) 47 | 48 | // Open the queue. If it does not exist, it gets created: 49 | queue, _ := Open(dir, DefaultOptions()) 50 | 51 | // For the consuming end in half: 52 | fork, _ := queue.Fork("fork") 53 | 54 | // Push some items to it - they are added to both the regular queue 55 | // as well to the fork we just created. 56 | _ = queue.Push(Items{ 57 | Item{ 58 | Key: 123, 59 | Blob: []byte("some data"), 60 | }, 61 | }) 62 | 63 | // Check the main queue contents: 64 | _ = queue.Read(1, func(_ Transaction, items Items) (ReadOp, error) { 65 | fmt.Println(string(items[0].Blob)) 66 | return ReadOpPop, nil 67 | }) 68 | 69 | // The same data should be available in the fork, 70 | // as it was not popped by the read above. 71 | _ = fork.Read(1, func(_ Transaction, items Items) (ReadOp, error) { 72 | fmt.Println(string(items[0].Blob)) 73 | return ReadOpPop, nil 74 | }) 75 | 76 | // Output: 77 | // some data 78 | // some data 79 | } 80 | 81 | func ExampleTransaction() { 82 | // Error handling stripped for brevity: 83 | dir, _ := os.MkdirTemp("", "timeq-example") 84 | defer os.RemoveAll(dir) 85 | 86 | // Open the queue. If it does not exist, it gets created: 87 | queue, _ := Open(dir, DefaultOptions()) 88 | 89 | _ = queue.Push(Items{ 90 | Item{ 91 | Key: 123, 92 | Blob: []byte("some data"), 93 | }, 94 | Item{ 95 | Key: 456, 96 | Blob: []byte("other data"), 97 | }, 98 | }) 99 | 100 | _ = queue.Read(1, func(tx Transaction, items Items) (ReadOp, error) { 101 | // Push half of the data back to the queue. 102 | // You can use that to "unread" parts of what you read. 103 | return ReadOpPop, tx.Push(items[1:]) 104 | }) 105 | 106 | fmt.Println(queue.Len()) 107 | 108 | // Output: 109 | // 1 110 | } 111 | 112 | func ExamplePopCopy() { 113 | // Error handling stripped for brevity: 114 | dir, _ := os.MkdirTemp("", "timeq-example") 115 | defer os.RemoveAll(dir) 116 | 117 | // Open the queue. If it does not exist, it gets created: 118 | queue, _ := Open(dir, DefaultOptions()) 119 | 120 | items := make(Items, 0, 5) 121 | for idx := 0; idx < 10; idx++ { 122 | items = append(items, Item{ 123 | Key: Key(idx), 124 | Blob: []byte(fmt.Sprintf("%d", idx)), 125 | }) 126 | } 127 | 128 | _ = queue.Push(items) 129 | got, _ := PopCopy(queue, 5) 130 | for _, item := range got { 131 | fmt.Println(item.Key) 132 | } 133 | 134 | // Output: 135 | // K00000000000000000000 136 | // K00000000000000000001 137 | // K00000000000000000002 138 | // K00000000000000000003 139 | // K00000000000000000004 140 | } 141 | -------------------------------------------------------------------------------- /fuzz_test.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "os" 5 | "slices" 6 | "testing" 7 | 8 | "github.com/sahib/timeq/item" 9 | "github.com/sahib/timeq/item/testutils" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | // Fuzz ideas: 14 | // - different pop sizes. 15 | // - different number of re-opens in the middle. 16 | 17 | func FuzzPushPop(f *testing.F) { 18 | f.Add(0, 10, 1, 2) 19 | f.Fuzz(func(t *testing.T, start, stop, step, reps int) { 20 | items := Items(testutils.GenItems(start, stop, step)) 21 | if len(items) == 0 || reps <= 0 { 22 | // bogus seed input 23 | return 24 | } 25 | 26 | dir, err := os.MkdirTemp("", "timeq-fuzz") 27 | require.NoError(t, err) 28 | defer os.RemoveAll(dir) 29 | 30 | queue, err := Open(dir, DefaultOptions()) 31 | require.NoError(t, err) 32 | 33 | exp := Items{} 34 | for rep := 0; rep < reps; rep++ { 35 | require.NoError(t, queue.Push(items)) 36 | exp = append(exp, items...) 37 | } 38 | 39 | slices.SortFunc(exp, func(i, j item.Item) int { 40 | return int(i.Key - j.Key) 41 | }) 42 | 43 | require.NoError(t, queue.Read(reps*len(items), func(_ Transaction, got Items) (ReadOp, error) { 44 | require.Equal(t, exp, got) 45 | return ReadOpPop, nil 46 | })) 47 | 48 | require.NoError(t, queue.Close()) 49 | }) 50 | } 51 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/sahib/timeq 2 | 3 | go 1.21.0 4 | 5 | require ( 6 | github.com/otiai10/copy v1.14.0 7 | github.com/stretchr/testify v1.8.4 8 | github.com/tidwall/btree v1.7.0 9 | github.com/urfave/cli v1.22.14 10 | golang.org/x/sys v0.12.0 11 | ) 12 | 13 | require ( 14 | github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect 15 | github.com/davecgh/go-spew v1.1.1 // indirect 16 | github.com/google/renameio v1.0.1 // indirect 17 | github.com/pmezard/go-difflib v1.0.0 // indirect 18 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 19 | golang.org/x/sync v0.3.0 // indirect 20 | gopkg.in/yaml.v3 v3.0.1 // indirect 21 | ) 22 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= 2 | github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= 3 | github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 4 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 6 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/google/renameio v1.0.1 h1:Lh/jXZmvZxb0BBeSY5VKEfidcbcbenKjZFzM/q0fSeU= 8 | github.com/google/renameio v1.0.1/go.mod h1:t/HQoYBZSsWSNK35C6CO/TpPLDVWvxOHboWUAweKUpk= 9 | github.com/otiai10/copy v1.12.0 h1:cLMgSQnXBs1eehF0Wy/FAGsgDTDmAqFR7rQylBb1nDY= 10 | github.com/otiai10/copy v1.12.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= 11 | github.com/otiai10/copy v1.14.0 h1:dCI/t1iTdYGtkvCuBG2BgR6KZa83PTclw4U5n2wAllU= 12 | github.com/otiai10/copy v1.14.0/go.mod h1:ECfuL02W+/FkTWZWgQqXPWZgW9oeKCSQ5qVfSc4qc4w= 13 | github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= 14 | github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= 15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 17 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 18 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 19 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 20 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 21 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 22 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 23 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 24 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 25 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 26 | github.com/tidwall/btree v1.6.0 h1:LDZfKfQIBHGHWSwckhXI0RPSXzlo+KYdjK7FWSqOzzg= 27 | github.com/tidwall/btree v1.6.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY= 28 | github.com/tidwall/btree v1.7.0 h1:L1fkJH/AuEh5zBnnBbmTwQ5Lt+bRJ5A8EWecslvo9iI= 29 | github.com/tidwall/btree v1.7.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY= 30 | github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= 31 | github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA= 32 | golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= 33 | golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= 34 | golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= 35 | golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 36 | golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= 37 | golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 38 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 39 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 40 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 41 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 42 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 43 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 44 | -------------------------------------------------------------------------------- /index/index.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/sahib/timeq/item" 7 | "github.com/sahib/timeq/vlog" 8 | "github.com/tidwall/btree" 9 | ) 10 | 11 | // Index is an in-memory representation of the batch index as b-tree structure. 12 | type Index struct { 13 | m btree.Map[item.Key, []item.Location] 14 | len item.Off 15 | nentries item.Off 16 | } 17 | 18 | // FromVlog produces an index from the data in the value log. It's 19 | // main use is to re-generate the index in case the index file is 20 | // damaged or broken in some way. The resulting index is likely not 21 | // the same as before, but will include items that were popped already. 22 | func FromVlog(log *vlog.Log) (*Index, error) { 23 | // we're cheating a little here by trusting the iterator 24 | // to go not over the end, even if the Len is bogus. 25 | iter := log.At(item.Location{ 26 | Off: 0, 27 | Len: ^item.Off(0), 28 | }, true) 29 | 30 | index := &Index{} 31 | 32 | var prevLoc item.Location 33 | var lastLoc item.Location 34 | var isInitialItem = true 35 | 36 | // Go over the data and try to find runs of data that are sorted in 37 | // ascending order. Each deviant item is the start of a new run. 38 | for iter.Next() { 39 | it := iter.Item() 40 | if prevLoc.Key > it.Key { 41 | index.Set(lastLoc) 42 | lastLoc.Off = prevLoc.Off 43 | lastLoc.Key = it.Key 44 | lastLoc.Len = 0 45 | } 46 | 47 | lastLoc.Len++ 48 | if isInitialItem { 49 | lastLoc.Key = it.Key 50 | isInitialItem = false 51 | } 52 | 53 | prevLoc.Off += item.HeaderSize + item.Off(len(it.Blob)) + item.TrailerSize 54 | prevLoc.Key = it.Key 55 | } 56 | 57 | if err := iter.Err(); err != nil { 58 | return nil, err 59 | } 60 | 61 | // also pick up last run in the data: 62 | if lastLoc.Len > 0 { 63 | index.Set(lastLoc) 64 | } 65 | 66 | return index, nil 67 | } 68 | 69 | func Load(path string) (*Index, error) { 70 | flags := os.O_CREATE | os.O_RDONLY 71 | fd, err := os.OpenFile(path, flags, 0600) 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | defer fd.Close() 77 | 78 | rdr := NewReader(fd) 79 | 80 | var index Index 81 | var loc item.Location 82 | for rdr.Next(&loc) { 83 | if loc.Len == 0 { 84 | // len=0 means that the specific batch was fully consumed. 85 | // delete any previously read values. 86 | index.Delete(loc.Key) 87 | } else { 88 | index.Set(loc) 89 | } 90 | } 91 | 92 | return &index, rdr.Err() 93 | } 94 | 95 | func (i *Index) Set(loc item.Location) (item.Location, int) { 96 | oldLocs, _ := i.m.Get(loc.Key) 97 | i.m.Set(loc.Key, append(oldLocs, loc)) 98 | i.len += loc.Len 99 | i.nentries += loc.Len 100 | return loc, 0 101 | } 102 | 103 | func (i *Index) Delete(key item.Key) (loc item.Location) { 104 | oldLocs, ok := i.m.Get(key) 105 | if !ok { 106 | return 107 | } 108 | 109 | i.len -= oldLocs[0].Len 110 | i.nentries += oldLocs[0].Len 111 | if len(oldLocs) > 1 { 112 | // delete one of the keys: 113 | i.m.Set(key, oldLocs[1:]) 114 | return oldLocs[0] 115 | } 116 | 117 | i.m.Delete(key) 118 | return oldLocs[0] 119 | } 120 | 121 | // Len returns the number of items in the WAL. 122 | // (Not the number of locations or batches!) 123 | func (i *Index) Len() item.Off { 124 | return i.len 125 | } 126 | 127 | // NEntries returns the number of entries in the 128 | // index. This is not the same Len() as a deleted 129 | // item is also inserted into the index. 130 | func (i *Index) NEntries() item.Off { 131 | return i.nentries 132 | } 133 | 134 | func (i *Index) Trailer() Trailer { 135 | return Trailer{ 136 | TotalEntries: i.len, 137 | } 138 | } 139 | 140 | func (i *Index) Copy() *Index { 141 | return &Index{ 142 | m: *i.m.Copy(), 143 | len: i.len, 144 | nentries: i.nentries, 145 | } 146 | } 147 | 148 | //////////// 149 | 150 | type Iter struct { 151 | iter btree.MapIter[item.Key, []item.Location] 152 | curr []item.Location 153 | } 154 | 155 | func (i *Iter) Next() bool { 156 | if len(i.curr) > 1 { 157 | i.curr = i.curr[1:] 158 | return true 159 | } 160 | 161 | if i.iter.Next() { 162 | i.curr = i.iter.Value() 163 | return true 164 | } 165 | 166 | return false 167 | } 168 | 169 | func (i *Iter) Value() item.Location { 170 | if len(i.curr) == 0 { 171 | // this should not happen in case of correct api usage. 172 | // just a guard if someone calls Value() without Next() 173 | return item.Location{} 174 | 175 | } 176 | return i.curr[0] 177 | } 178 | 179 | func (i *Index) Iter() Iter { 180 | return Iter{iter: i.m.Iter()} 181 | } 182 | -------------------------------------------------------------------------------- /index/index_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/sahib/timeq/item" 9 | "github.com/sahib/timeq/item/testutils" 10 | "github.com/sahib/timeq/vlog" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestIndexLoad(t *testing.T) { 15 | t.Parallel() 16 | 17 | tmpDir, err := os.MkdirTemp("", "timeq-indextest") 18 | require.NoError(t, err) 19 | defer os.RemoveAll(tmpDir) 20 | 21 | indexPath := filepath.Join(tmpDir, "index") 22 | w, err := NewWriter(indexPath, true) 23 | require.NoError(t, err) 24 | 25 | var lenCount item.Off 26 | for idx := 0; idx < 10; idx++ { 27 | require.NoError(t, w.Push(item.Location{ 28 | Key: item.Key(idx), 29 | Off: item.Off(idx), 30 | Len: item.Off(idx), 31 | }, Trailer{})) 32 | lenCount += item.Off(idx) 33 | } 34 | 35 | require.NoError(t, w.Close()) 36 | 37 | index, err := Load(indexPath) 38 | require.NoError(t, err) 39 | 40 | // if length=0 then Load() considers the entry 41 | // as "delete previous items with this key". 42 | var count = 1 43 | for iter := index.Iter(); iter.Next(); { 44 | require.Equal(t, item.Location{ 45 | Key: item.Key(count), 46 | Off: item.Off(count), 47 | Len: item.Off(count), 48 | }, iter.Value()) 49 | count++ 50 | } 51 | 52 | require.Equal(t, lenCount, index.Len()) 53 | } 54 | 55 | func TestIndexSet(t *testing.T) { 56 | t.Parallel() 57 | 58 | index := Index{} 59 | 60 | oldLoc := item.Location{Key: 23} 61 | newLoc, skew := index.Set(oldLoc) 62 | require.Equal(t, oldLoc, newLoc) 63 | require.Equal(t, 0, skew) 64 | } 65 | 66 | func testIndexFromVlog(t *testing.T, pushes []item.Items, expLocs [][]item.Location) { 67 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 68 | require.NoError(t, err) 69 | defer os.RemoveAll(tmpDir) 70 | 71 | log, err := vlog.Open(filepath.Join(tmpDir, "log"), true) 72 | require.NoError(t, err) 73 | for _, push := range pushes { 74 | _, err = log.Push(push) 75 | require.NoError(t, err) 76 | } 77 | 78 | index, err := FromVlog(log) 79 | require.NoError(t, err) 80 | 81 | gotLocs := index.m.Values() 82 | gotKeys := index.m.Keys() 83 | 84 | expKeys := []item.Key{} 85 | for _, expSlice := range expLocs { 86 | expKeys = append(expKeys, expSlice[0].Key) 87 | } 88 | 89 | require.Equal(t, expLocs, gotLocs) 90 | require.Equal(t, expKeys, gotKeys) 91 | } 92 | 93 | func TestIndexFromVlog(t *testing.T) { 94 | tcs := []struct { 95 | Name string 96 | Pushes []item.Items 97 | ExpLocs [][]item.Location 98 | }{ 99 | { 100 | Name: "consecutive", 101 | Pushes: []item.Items{ 102 | testutils.GenItems(15, 20, 1), 103 | testutils.GenItems(0, 10, 1), 104 | }, 105 | ExpLocs: [][]item.Location{{{ 106 | Key: 0, 107 | Off: testutils.GenItems(15, 20, 1).StorageSize(), 108 | Len: 10, 109 | }}, {{ 110 | Key: 15, 111 | Off: 0, 112 | Len: 5, 113 | }, 114 | }}, 115 | }, { 116 | Name: "strided", 117 | Pushes: []item.Items{ 118 | testutils.GenItems(0, 10, 2), 119 | testutils.GenItems(1, 10, 2), 120 | }, 121 | ExpLocs: [][]item.Location{{{ 122 | Key: 0, 123 | Off: 0, 124 | Len: 5, 125 | }}, {{ 126 | Key: 1, 127 | Off: testutils.GenItems(1, 10, 2).StorageSize(), 128 | Len: 5, 129 | }, 130 | }}, 131 | }, { 132 | Name: "gap", 133 | Pushes: []item.Items{ 134 | testutils.GenItems(300, 400, 2), 135 | testutils.GenItems(100, 200, 1), 136 | }, 137 | ExpLocs: [][]item.Location{{{ 138 | Key: 100, 139 | Off: testutils.GenItems(300, 400, 2).StorageSize(), 140 | Len: 100, 141 | }}, {{ 142 | Key: 300, 143 | Off: 0, 144 | Len: 50, 145 | }, 146 | }}, 147 | }, 148 | } 149 | 150 | for _, tc := range tcs { 151 | t.Run(tc.Name, func(t *testing.T) { 152 | tc := tc 153 | t.Parallel() 154 | testIndexFromVlog( 155 | t, 156 | tc.Pushes, 157 | tc.ExpLocs, 158 | ) 159 | }) 160 | } 161 | } 162 | 163 | func TestIndexDuplicateSet(t *testing.T) { 164 | t.Parallel() 165 | 166 | index := &Index{} 167 | loc1 := item.Location{ 168 | Key: 10, 169 | Off: 23, 170 | Len: 5, 171 | } 172 | loc2 := item.Location{ 173 | Key: 10, 174 | Off: 42, 175 | Len: 19, 176 | } 177 | index.Set(loc1) 178 | index.Set(loc2) 179 | require.Equal(t, item.Off(24), index.Len()) 180 | 181 | var count int 182 | for iter := index.Iter(); iter.Next(); count++ { 183 | loc := iter.Value() 184 | if count == 0 { 185 | require.Equal(t, loc1, loc) 186 | } 187 | 188 | if count == 1 { 189 | require.Equal(t, loc2, loc) 190 | } 191 | } 192 | require.Equal(t, 2, count) 193 | 194 | // check if deletion of one item 195 | // let's the other one survive: 196 | index.Delete(10) 197 | iter := index.Iter() 198 | require.True(t, iter.Next()) 199 | require.Equal(t, loc2, iter.Value()) 200 | require.False(t, iter.Next()) 201 | require.Equal(t, item.Off(19), index.Len()) 202 | 203 | // check if deleting all of them works nicely: 204 | index.Delete(10) 205 | iter = index.Iter() 206 | require.False(t, iter.Next()) 207 | require.Equal(t, item.Off(0), index.Len()) 208 | } 209 | 210 | func TestIndexNoCrashOnBadAPIUsage(t *testing.T) { 211 | t.Parallel() 212 | index := &Index{} 213 | iter := index.Iter() 214 | iter.Value() // this should not crash 215 | } 216 | -------------------------------------------------------------------------------- /index/reader.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bufio" 5 | "encoding/binary" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | 11 | "github.com/sahib/timeq/item" 12 | ) 13 | 14 | const TrailerSize = 4 15 | 16 | // LocationSize is the physical storage of a single item 17 | // (8 for the key, 8 for the wal offset, 4 for the len) 18 | const LocationSize = 8 + 8 + 4 + TrailerSize 19 | 20 | type Trailer struct { 21 | TotalEntries item.Off 22 | } 23 | 24 | // Reader gives access to a single index on disk 25 | type Reader struct { 26 | r io.Reader 27 | err error 28 | locBuf [LocationSize]byte 29 | } 30 | 31 | func NewReader(r io.Reader) *Reader { 32 | return &Reader{ 33 | // Reduce number of syscalls needed: 34 | r: bufio.NewReaderSize(r, 4*1024), 35 | } 36 | } 37 | 38 | func (fi *Reader) Next(loc *item.Location) bool { 39 | if _, err := io.ReadFull(fi.r, fi.locBuf[:]); err != nil { 40 | if err != io.EOF { 41 | fi.err = err 42 | } 43 | 44 | return false 45 | } 46 | 47 | loc.Key = item.Key(binary.BigEndian.Uint64(fi.locBuf[:8])) 48 | loc.Off = item.Off(binary.BigEndian.Uint64(fi.locBuf[8:])) 49 | loc.Len = item.Off(binary.BigEndian.Uint32(fi.locBuf[16:])) 50 | // NOTE: trailer with size / len is ignored here. See ReadTrailer() 51 | return true 52 | } 53 | 54 | func (fi *Reader) Err() error { 55 | return fi.err 56 | } 57 | 58 | func ReadTrailers(dir string, fn func(consumerName string, trailer Trailer)) error { 59 | ents, err := os.ReadDir(dir) 60 | if err != nil { 61 | return err 62 | } 63 | 64 | for _, ent := range ents { 65 | name := ent.Name() 66 | if !strings.HasSuffix(name, "idx.log") { 67 | continue 68 | } 69 | 70 | path := filepath.Join(dir, name) 71 | trailer, err := ReadTrailer(path) 72 | if err != nil { 73 | return err 74 | } 75 | 76 | consumerName := strings.TrimSuffix(name, "idx.log") 77 | consumerName = strings.TrimSuffix(consumerName, ".") 78 | fn(consumerName, trailer) 79 | } 80 | 81 | return nil 82 | } 83 | 84 | // ReadTrailer reads the trailer of the index log. 85 | // It contains the number of entries in the index. 86 | func ReadTrailer(path string) (Trailer, error) { 87 | fd, err := os.Open(path) 88 | if err != nil { 89 | return Trailer{}, err 90 | } 91 | defer fd.Close() 92 | 93 | info, err := fd.Stat() 94 | if err != nil { 95 | return Trailer{}, err 96 | } 97 | 98 | if info.Size() < LocationSize { 99 | return Trailer{TotalEntries: 0}, nil 100 | } 101 | 102 | if _, err := fd.Seek(-TrailerSize, io.SeekEnd); err != nil { 103 | return Trailer{}, err 104 | } 105 | 106 | buf := make([]byte, TrailerSize) 107 | if _, err := io.ReadFull(fd, buf); err != nil { 108 | return Trailer{}, nil 109 | } 110 | 111 | totalEntries := item.Off(binary.BigEndian.Uint32(buf)) 112 | return Trailer{TotalEntries: totalEntries}, nil 113 | } 114 | -------------------------------------------------------------------------------- /index/reader_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/sahib/timeq/item" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestIndexReadTrailer(t *testing.T) { 13 | t.Parallel() 14 | 15 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 16 | require.NoError(t, err) 17 | defer os.RemoveAll(tmpDir) 18 | 19 | idxPath := filepath.Join(tmpDir, "idx.log") 20 | idxWriter, err := NewWriter(idxPath, true) 21 | require.NoError(t, err) 22 | 23 | for idx := item.Off(0); idx <= 123; idx++ { 24 | require.NoError(t, idxWriter.Push(item.Location{ 25 | Key: item.Key(idx), 26 | Off: idx, 27 | Len: idx, 28 | }, Trailer{ 29 | TotalEntries: idx, 30 | })) 31 | } 32 | 33 | require.NoError(t, idxWriter.Close()) 34 | 35 | trailer, err := ReadTrailer(idxPath) 36 | require.NoError(t, err) 37 | require.Equal(t, item.Off(123), trailer.TotalEntries) 38 | } 39 | -------------------------------------------------------------------------------- /index/writer.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "os" 8 | 9 | "github.com/sahib/timeq/item" 10 | ) 11 | 12 | type Writer struct { 13 | fd *os.File 14 | locBuf [LocationSize]byte 15 | sync bool 16 | } 17 | 18 | func NewWriter(path string, sync bool) (*Writer, error) { 19 | flags := os.O_APPEND | os.O_CREATE | os.O_WRONLY 20 | fd, err := os.OpenFile(path, flags, 0600) 21 | if err != nil { 22 | return nil, err 23 | } 24 | 25 | return &Writer{ 26 | fd: fd, 27 | sync: sync, 28 | }, nil 29 | } 30 | 31 | func (w *Writer) Push(loc item.Location, trailer Trailer) error { 32 | binary.BigEndian.PutUint64(w.locBuf[0:], uint64(loc.Key)) 33 | binary.BigEndian.PutUint64(w.locBuf[8:], uint64(loc.Off)) 34 | binary.BigEndian.PutUint32(w.locBuf[16:], uint32(loc.Len)) 35 | binary.BigEndian.PutUint32(w.locBuf[20:], uint32(trailer.TotalEntries)) 36 | _, err := w.fd.Write(w.locBuf[:]) 37 | return err 38 | } 39 | 40 | func (w *Writer) Close() error { 41 | syncErr := w.fd.Sync() 42 | closeErr := w.fd.Close() 43 | return errors.Join(syncErr, closeErr) 44 | } 45 | 46 | func (w *Writer) Sync(force bool) error { 47 | if !w.sync && !force { 48 | return nil 49 | } 50 | 51 | return w.fd.Sync() 52 | } 53 | 54 | // WriteIndex is a convenience function to write the contents 55 | // of `idx` to `path`. 56 | func WriteIndex(idx *Index, path string) error { 57 | iter := idx.Iter() 58 | writer, err := NewWriter(path, true) 59 | if err != nil { 60 | return err 61 | } 62 | 63 | var totalEntries item.Off 64 | for iter.Next() { 65 | loc := iter.Value() 66 | if err := writer.Push(loc, Trailer{TotalEntries: totalEntries}); err != nil { 67 | return fmt.Errorf("push: %w", err) 68 | } 69 | 70 | totalEntries++ 71 | } 72 | 73 | return nil 74 | } 75 | -------------------------------------------------------------------------------- /index/writer_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestIndexWriterEmpty(t *testing.T) { 12 | t.Parallel() 13 | 14 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 15 | require.NoError(t, err) 16 | defer os.RemoveAll(tmpDir) 17 | 18 | idxPath := filepath.Join(tmpDir, "idx.log") 19 | idxWriter, err := NewWriter(idxPath, false) 20 | require.NoError(t, err) 21 | require.NoError(t, idxWriter.Sync(false)) 22 | require.NoError(t, idxWriter.Close()) 23 | } 24 | -------------------------------------------------------------------------------- /item/item.go: -------------------------------------------------------------------------------- 1 | package item 2 | 3 | import ( 4 | "fmt" 5 | "path/filepath" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | const ( 11 | HeaderSize = 12 12 | TrailerSize = 2 13 | ) 14 | 15 | // Key is a priority key in the queue. It has to be unique 16 | // to avoid overwriting other entries. This was written with 17 | // unix nanosecond epoch stamps in mind. 18 | type Key int64 19 | 20 | // KeyFromString is the reverse of String() 21 | func KeyFromString(s string) (Key, error) { 22 | s = strings.TrimPrefix(s, "K") 23 | key, err := strconv.ParseInt(filepath.Base(s), 10, 64) 24 | if err != nil { 25 | return 0, err 26 | } 27 | 28 | return Key(key), nil 29 | } 30 | 31 | func (k Key) String() string { 32 | // keys are int64, so we need to pad with log10(2**63) at least 33 | // to be sure that buckets are sorted on filesystem. 34 | return fmt.Sprintf("K%020d", int64(k)) 35 | } 36 | 37 | type Off uint64 38 | 39 | type Item struct { 40 | Key Key 41 | Blob []byte 42 | } 43 | 44 | func (i Item) String() string { 45 | return fmt.Sprintf("%s:%s", i.Key, i.Blob) 46 | } 47 | 48 | func (i Item) StorageSize() Off { 49 | return HeaderSize + Off(len(i.Blob)) + TrailerSize 50 | } 51 | 52 | func (i *Item) Copy() Item { 53 | blob := make([]byte, len(i.Blob)) 54 | copy(blob, i.Blob) 55 | return Item{ 56 | Key: i.Key, 57 | Blob: blob, 58 | } 59 | } 60 | 61 | // Location references the location of a batch in dat.log 62 | type Location struct { 63 | // Key is the priority key of the first item in the batch 64 | Key Key 65 | 66 | // Off is the offset in bytes to the start of the batch in the vlog. 67 | Off Off 68 | 69 | // Len is the number of items in this batch. 70 | // A zero len has a special meaning: this batch was deleted. 71 | Len Off 72 | } 73 | 74 | func (l Location) String() string { 75 | return fmt.Sprintf("[key=%s, off=%d, len=%d]", l.Key, l.Off, l.Len) 76 | } 77 | 78 | // Items is a list of items. 79 | type Items []Item 80 | 81 | func (items Items) Copy() Items { 82 | // This Copy() is allocation optimized, i.e. it first goes through the data 83 | // and decides how much memory is required. Then that memory is allocated once 84 | // instead of many times. It's about 50% faster than the straightforward way. 85 | 86 | var bufSize int 87 | for idx := 0; idx < len(items); idx++ { 88 | bufSize += len(items[idx].Blob) 89 | } 90 | 91 | itemsCopy := make(Items, len(items)) 92 | copyBuf := make([]byte, bufSize) 93 | for idx := 0; idx < len(items); idx++ { 94 | blobCopy := copyBuf[:len(items[idx].Blob)] 95 | copy(blobCopy, items[idx].Blob) 96 | itemsCopy[idx] = Item{ 97 | Key: items[idx].Key, 98 | Blob: blobCopy, 99 | } 100 | 101 | copyBuf = copyBuf[len(blobCopy):] 102 | } 103 | 104 | return itemsCopy 105 | } 106 | 107 | func (items Items) StorageSize() Off { 108 | sum := Off(len(items)) * (HeaderSize + TrailerSize) 109 | for idx := 0; idx < len(items); idx++ { 110 | sum += Off(len(items[idx].Blob)) 111 | } 112 | return sum 113 | } 114 | -------------------------------------------------------------------------------- /item/item_test.go: -------------------------------------------------------------------------------- 1 | package item 2 | 3 | import ( 4 | "testing" 5 | "unsafe" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestItemCopy(t *testing.T) { 11 | blobOrig := []byte("hello") 12 | itemOrig := Item{ 13 | Key: 23, 14 | Blob: blobOrig, 15 | } 16 | 17 | itemCopy := itemOrig.Copy() 18 | 19 | // check that it was indeed copied: 20 | require.Equal(t, itemOrig, itemCopy) 21 | require.True(t, unsafe.SliceData(blobOrig) != unsafe.SliceData(itemCopy.Blob)) 22 | } 23 | 24 | func TestKeyFromString(t *testing.T) { 25 | ff, err := KeyFromString("99") 26 | require.NoError(t, err) 27 | require.Equal(t, Key(99), ff) 28 | 29 | ff, err = KeyFromString("K0000099") 30 | require.NoError(t, err) 31 | require.Equal(t, Key(99), ff) 32 | 33 | _, err = KeyFromString("ZZ") 34 | require.Error(t, err) 35 | } 36 | 37 | func TestItemsCopy(t *testing.T) { 38 | items := Items{ 39 | Item{ 40 | Key: 17, 41 | Blob: []byte("blob"), 42 | }, 43 | Item{ 44 | Key: 23, 45 | Blob: []byte(""), 46 | }, 47 | Item{ 48 | Key: 42, 49 | Blob: []byte(""), 50 | }, 51 | } 52 | copied := items.Copy() 53 | require.Equal(t, items, copied) 54 | require.True(t, unsafe.SliceData(items) != unsafe.SliceData(copied)) 55 | } 56 | 57 | func TestItemStorageSize(t *testing.T) { 58 | var items Items 59 | require.Zero(t, items.StorageSize()) 60 | 61 | items = Items{ 62 | Item{ 63 | Key: 23, 64 | Blob: make([]byte, 10), 65 | }, 66 | Item{ 67 | Key: 42, 68 | Blob: make([]byte, 10), 69 | }, 70 | } 71 | 72 | require.Equal(t, Off(48), items.StorageSize()) 73 | } 74 | -------------------------------------------------------------------------------- /item/testutils/testutils.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "path/filepath" 8 | "runtime" 9 | "syscall" 10 | "testing" 11 | 12 | "github.com/sahib/timeq/item" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func ItemFromIndex(idx int) item.Item { 17 | return item.Item{ 18 | Key: item.Key(idx), 19 | Blob: []byte(fmt.Sprintf("%d", idx)), 20 | } 21 | } 22 | 23 | func GenItems(start, stop, step int) item.Items { 24 | if step == 0 { 25 | return nil 26 | } 27 | 28 | var its item.Items 29 | for idx := start; ; idx += step { 30 | if step > 0 && idx >= stop { 31 | break 32 | } 33 | 34 | if step < 0 && idx <= stop { 35 | break 36 | } 37 | 38 | its = append(its, ItemFromIndex(idx)) 39 | } 40 | 41 | return its 42 | } 43 | 44 | // WithTempMount calls `fn` with the path to directory that contains an empty 45 | // ext4 filesystem that will be unmounted once the test finished. 46 | func WithTempMount(t *testing.T, fn func(mountDir string)) { 47 | if runtime.GOOS != "linux" { 48 | t.Skipf("this test uses ext4 and other linux specific tools") 49 | } 50 | 51 | if os.Geteuid() != 0 { 52 | t.Skipf("this test needs to be run with root permissions to work") 53 | } 54 | 55 | dir, err := os.MkdirTemp("", "mount-test") 56 | require.NoError(t, err) 57 | defer os.RemoveAll(dir) 58 | 59 | // Create a file big enough to hold a small filesystem: 60 | loopPath := filepath.Join(dir, "loop") 61 | fd, err := os.Create(loopPath) 62 | require.NoError(t, err) 63 | require.NoError(t, fd.Truncate(1*1024*1024)) 64 | require.NoError(t, fd.Close()) 65 | 66 | // Create a filesystem in the loop file: 67 | ext4Out, err := exec.Command("mkfs.ext4", loopPath).Output() 68 | require.NoError(t, err, string(ext4Out)) 69 | 70 | // Mount the ext4 fs to a newly created directory: 71 | mountDir := filepath.Join(dir, "mount") 72 | require.NoError(t, os.MkdirAll(mountDir, 0600)) 73 | mountOut, err := exec.Command("mount", loopPath, mountDir).Output() 74 | require.NoError(t, err, string(mountOut)) 75 | 76 | defer func() { 77 | require.NoError(t, syscall.Unmount(mountDir, 0)) 78 | }() 79 | 80 | fn(mountDir) 81 | } 82 | -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "os" 8 | 9 | "github.com/sahib/timeq/item" 10 | ) 11 | 12 | type SyncMode int 13 | 14 | func (sm SyncMode) IsValid() bool { 15 | return sm >= 0 && sm <= SyncFull 16 | } 17 | 18 | // The available option are inspired by SQLite: 19 | // https://www.sqlite.org/pragma.html#pragma_synchronous 20 | const ( 21 | // SyncNone does not sync on normal operation (only on close) 22 | SyncNone = SyncMode(0) 23 | // SyncData only synchronizes the data log 24 | SyncData = SyncMode(1 << iota) 25 | // SyncIndex only synchronizes the index log (does not make sense alone) 26 | SyncIndex 27 | // SyncFull syncs both the data and index log 28 | SyncFull = SyncData | SyncIndex 29 | ) 30 | 31 | // Logger is a small interface to redirect logs to. 32 | // The default logger outputs to stderr. 33 | type Logger interface { 34 | Printf(fmt string, args ...any) 35 | } 36 | 37 | type writerLogger struct { 38 | w io.Writer 39 | } 40 | 41 | func (fl *writerLogger) Printf(fmtStr string, args ...any) { 42 | fmt.Fprintf(fl.w, "[timeq] "+fmtStr+"\n", args...) 43 | } 44 | 45 | type ErrorMode int 46 | 47 | func (em ErrorMode) IsValid() bool { 48 | return em < errorModeMax && em >= 0 49 | } 50 | 51 | const ( 52 | // ErrorModeAbort will immediately abort the current 53 | // operation if an error is encountered that might lead to data loss. 54 | ErrorModeAbort = ErrorMode(iota) 55 | 56 | // ErrorModeContinue tries to progress further in case of errors 57 | // by jumping over a faulty bucket or entry in a 58 | // If the error was recoverable, none is returned, but the 59 | // Logger in the Options will be called (if set) to log the error. 60 | ErrorModeContinue 61 | 62 | errorModeMax 63 | ) 64 | 65 | func WriterLogger(w io.Writer) Logger { 66 | return &writerLogger{w: w} 67 | } 68 | 69 | // DefaultLogger produces a logger that writes to stderr. 70 | func DefaultLogger() Logger { 71 | return &writerLogger{w: os.Stderr} 72 | } 73 | 74 | // NullLogger produces a logger that discards all messages. 75 | func NullLogger() Logger { 76 | return &writerLogger{w: io.Discard} 77 | } 78 | 79 | var ( 80 | // ErrChangedSplitFunc is returned when the configured split func 81 | // in options does not fit to the state on disk. 82 | ErrChangedSplitFunc = errors.New("split func changed") 83 | ) 84 | 85 | // BucketSplitConf defines what keys are sorted in what bucket. 86 | // See Options.BucketSplitConf for more info. 87 | type BucketSplitConf struct { 88 | // Func is the function that does the splitting. 89 | Func func(item.Key) item.Key 90 | 91 | // Name is used as identifier to figure out 92 | // when the disk split func changed. 93 | Name string 94 | } 95 | 96 | // Options gives you some knobs to configure the queue. 97 | // Read the individual options carefully, as some of them 98 | // can only be set on the first call to Open() 99 | type Options struct { 100 | // SyncMode controls how often we sync data to the disk. The more data we sync 101 | // the more durable is the queue at the cost of throughput. 102 | // Default is the safe SyncFull. Think twice before lowering this. 103 | SyncMode SyncMode 104 | 105 | // Logger is used to output some non-critical warnigns or errors that could 106 | // have been recovered. By default we print to stderr. 107 | // Only warnings or errors are logged, no debug or informal messages. 108 | Logger Logger 109 | 110 | // ErrorMode defines how non-critical errors are handled. 111 | // See the individual enum values for more info. 112 | ErrorMode ErrorMode 113 | 114 | // BucketSplitConf defines what key goes to what bucket. 115 | // The provided function should clamp the key value to 116 | // a common value. Each same value that was returned goes 117 | // into the same The returned value should be also 118 | // the minimum key of the 119 | // 120 | // Example: '(key / 10) * 10' would produce buckets with 10 items. 121 | // 122 | // What bucket size to choose? Please refer to the FAQ in the README. 123 | // 124 | // NOTE: This may not be changed after you opened a queue with it! 125 | // Only way to change is to create a new queue and shovel the 126 | // old data into it. 127 | BucketSplitConf BucketSplitConf 128 | 129 | // MaxParallelOpenBuckets limits the number of buckets that can be opened 130 | // in parallel. Normally, operations like Push() will create more and more 131 | // buckets with time and old buckets do not get closed automatically, as 132 | // we don't know when they get accessed again. If there are more buckets 133 | // open than this number they get closed and will be re-opened if accessed 134 | // again. If this happens frequently, this comes with a performance penalty. 135 | // If you tend to access your data with rather random keys, you might want 136 | // to increase this number, depending on how much resources you have. 137 | // 138 | // If this number is <= 0, then this feature is disabled, which is not 139 | // recommended. 140 | MaxParallelOpenBuckets int 141 | } 142 | 143 | // DefaultOptions give you a set of options that are good to enough to try some 144 | // experiments. Your mileage can vary a lot with different settings, so make 145 | // sure to do some benchmarking! 146 | func DefaultOptions() Options { 147 | return Options{ 148 | SyncMode: SyncFull, 149 | ErrorMode: ErrorModeAbort, 150 | Logger: DefaultLogger(), 151 | BucketSplitConf: DefaultBucketSplitConf, 152 | MaxParallelOpenBuckets: 4, 153 | } 154 | } 155 | 156 | // DefaultBucketSplitConf assumes that `key` is a nanosecond unix timestamps 157 | // and divides data (roughly) in 2m minute buckets. 158 | var DefaultBucketSplitConf = ShiftBucketSplitConf(37) 159 | 160 | // ShiftBucketSplitConf creates a fast BucketSplitConf that divides data into buckets 161 | // by masking `shift` less significant bits of the key. With a shift 162 | // of 37 you roughly get 2m buckets (if your key input are nanosecond-timestamps). 163 | // If you want to calculate the size of a shift, use this formula: 164 | // (2 ** shift) / (1e9 / 60) = minutes 165 | func ShiftBucketSplitConf(shift int) BucketSplitConf { 166 | timeMask := ^item.Key(0) << shift 167 | return BucketSplitConf{ 168 | Name: fmt.Sprintf("shift:%d", shift), 169 | Func: func(key item.Key) item.Key { 170 | return key & timeMask 171 | }, 172 | } 173 | } 174 | 175 | // FixedSizeBucketSplitConf returns a BucketSplitConf that divides buckets into 176 | // equal sized buckets with `n` entries. This can also be used to create 177 | // time-based keys, if you use nanosecond based keys and pass time.Minute 178 | // to create a buckets with a size of one minute. 179 | func FixedSizeBucketSplitConf(n uint64) BucketSplitConf { 180 | if n == 0 { 181 | // avoid zero division. 182 | n = 1 183 | } 184 | 185 | return BucketSplitConf{ 186 | Name: fmt.Sprintf("fixed:%d", n), 187 | Func: func(key item.Key) item.Key { 188 | return (key / item.Key(n)) * item.Key(n) 189 | }, 190 | } 191 | } 192 | 193 | func (o *Options) Validate() error { 194 | if o.Logger == nil { 195 | // this allows us to leave out quite some null checks when 196 | // using the logger option, even when it's not set. 197 | o.Logger = NullLogger() 198 | } 199 | 200 | if !o.SyncMode.IsValid() { 201 | return errors.New("invalid sync mode") 202 | } 203 | 204 | if !o.ErrorMode.IsValid() { 205 | return errors.New("invalid error mode") 206 | } 207 | 208 | if o.BucketSplitConf.Func == nil { 209 | return errors.New("bucket func is not allowed to be empty") 210 | } 211 | 212 | if o.MaxParallelOpenBuckets == 0 { 213 | // For the outside, that's the same thing, but closeUnused() internally 214 | // actually knows how to keep the number of buckets to zero, so be clear 215 | // that the user wants to disable this feature. 216 | o.MaxParallelOpenBuckets = -1 217 | } 218 | 219 | return nil 220 | } 221 | -------------------------------------------------------------------------------- /realworld_test.go: -------------------------------------------------------------------------------- 1 | //go:build slow 2 | // +build slow 3 | 4 | package timeq 5 | 6 | import ( 7 | "fmt" 8 | "math/rand" 9 | "os" 10 | "path/filepath" 11 | "testing" 12 | "time" 13 | 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | const ( 18 | // We're going to use nanosecond epoch timestamps: 19 | keyOff = 1703242301745157676 20 | 21 | // Points usually come in "bursts", i.e. 10 point with timestamps 22 | // very close together and each burst being roughly 10ms from each other. 23 | burstSize = 10 24 | ) 25 | 26 | func push(t *testing.T, rng *rand.Rand, q *Queue, batchIdx int64) { 27 | // Each batch should have a more or less random offset from the keyOff: 28 | batchOff := batchIdx * int64(2*time.Second+time.Duration(rand.Int63n(int64(500*time.Millisecond)))-500*time.Millisecond) 29 | 30 | // Length of each batch is also more or less random, but most of the time constant: 31 | batchLenOptions := []int64{ 32 | 2000, 33 | 2000, 34 | 2000, 35 | 2000, 36 | 2000, 37 | 2000, 38 | 1, 39 | 32, 40 | 768, 41 | 1024, 42 | 100, 43 | } 44 | 45 | batchLen := batchLenOptions[rng.Intn(len(batchLenOptions))] 46 | 47 | var batch Items 48 | 49 | for idx := int64(0); idx < batchLen; idx++ { 50 | burstOff := (idx / burstSize) * int64(10*time.Millisecond) 51 | key := keyOff + batchOff + burstOff + int64(idx%burstSize) 52 | 53 | blobSize := rng.Intn(100) + 1 54 | blob := make([]byte, blobSize) 55 | 56 | _, err := rng.Read(blob) 57 | require.NoError(t, err) 58 | 59 | item := Item{ 60 | Key: Key(key), 61 | Blob: blob, 62 | } 63 | 64 | batch = append(batch, item) 65 | } 66 | 67 | require.NoError(t, q.Push(batch)) 68 | } 69 | 70 | func shovel(t *testing.T, waiting, unacked *Queue) { 71 | unackedLenBefore := unacked.Len() 72 | waitingLenBefore := waiting.Len() 73 | 74 | nshoveled, err := unacked.Shovel(waiting) 75 | require.NoError(t, err) 76 | 77 | unackedLenAfter := unacked.Len() 78 | waitingLenAfter := waiting.Len() 79 | 80 | require.Equal(t, 0, unackedLenAfter) 81 | require.Equal(t, unackedLenBefore+waitingLenBefore, waitingLenAfter) 82 | require.Equal(t, waitingLenAfter-waitingLenBefore, nshoveled) 83 | } 84 | 85 | func move(t *testing.T, waiting, unacked *Queue) { 86 | var lastKey Key 87 | var count int 88 | 89 | queueLenBefore := waiting.Len() 90 | const popSize = 2000 91 | require.NoError(t, waiting.Read(popSize, func(_ Transaction, items Items) (ReadOp, error) { 92 | count += len(items) 93 | 94 | for idx, item := range items { 95 | if lastKey != 0 && item.Key < lastKey { 96 | diff := time.Duration(lastKey - item.Key) 97 | require.Fail(t, fmt.Sprintf( 98 | "item %d has lower key (%v) than the item before (%v) - diff: %v", 99 | idx, 100 | item.Key, 101 | lastKey, 102 | diff, 103 | )) 104 | } 105 | 106 | lastKey = item.Key 107 | } 108 | 109 | return ReadOpPop, unacked.Push(items) 110 | })) 111 | 112 | expect := popSize 113 | if popSize > queueLenBefore { 114 | expect = queueLenBefore 115 | } 116 | 117 | require.Equal(t, expect, count) 118 | } 119 | 120 | func ack(t *testing.T, rng *rand.Rand, waiting, unacked *Queue) { 121 | // Each batch should have a more or less random offset from the keyOff: 122 | deleteOff := Key(rng.Int63n(int64(time.Minute)) - int64(15*time.Second)) 123 | 124 | var waitingOff Key 125 | require.NoError(t, waiting.Read(1, func(_ Transaction, items Items) (ReadOp, error) { 126 | waitingOff = items[0].Key 127 | return ReadOpPeek, nil 128 | })) 129 | 130 | var unackedOff Key 131 | require.NoError(t, unacked.Read(1, func(_ Transaction, items Items) (ReadOp, error) { 132 | unackedOff = items[0].Key 133 | return ReadOpPeek, nil 134 | })) 135 | 136 | var err error 137 | 138 | _, err = waiting.Delete(-1, waitingOff+deleteOff) 139 | require.NoError(t, err) 140 | 141 | _, err = unacked.Delete(-1, unackedOff+deleteOff) 142 | require.NoError(t, err) 143 | } 144 | 145 | func TestRealWorldAckQueue(t *testing.T) { 146 | // Still create test dir to make sure it does not error out because of that: 147 | dir, err := os.MkdirTemp("", "timeq-realworldtest") 148 | require.NoError(t, err) 149 | defer os.RemoveAll(dir) 150 | 151 | rng := rand.New(rand.NewSource(0)) 152 | 153 | opts := DefaultOptions() 154 | opts.BucketSplitConf = ShiftBucketSplitConf(30) 155 | opts.MaxParallelOpenBuckets = 4 156 | 157 | waitingDir := filepath.Join(dir, "waiting") 158 | unackedDir := filepath.Join(dir, "unacked") 159 | 160 | waitingQueue, err := Open(waitingDir, opts) 161 | require.NoError(t, err) 162 | 163 | unackedQueue, err := Open(unackedDir, opts) 164 | require.NoError(t, err) 165 | 166 | // Plan: 167 | // 168 | // - Push several times (varying sizes with real time, mostly increasing order) 169 | // - Move waiting -> unacked (verify results) 170 | // - Push again. 171 | // - Shovel from waiting to unacked 172 | // - Push. 173 | // - Another move round. 174 | // - Push. 175 | // - Delete some older message. 176 | // - Pop the rest. 177 | // 178 | // Basically the full lifecycle of an ack queue. 179 | 180 | for run := 0; run < 10; run++ { 181 | var batchIdx int64 182 | 183 | for idx := 0; idx < 10; idx++ { 184 | push(t, rng, waitingQueue, batchIdx) 185 | batchIdx++ 186 | } 187 | 188 | for idx := 0; idx < 5; idx++ { 189 | move(t, waitingQueue, unackedQueue) 190 | } 191 | 192 | for idx := 0; idx < 10; idx++ { 193 | push(t, rng, waitingQueue, batchIdx) 194 | batchIdx++ 195 | } 196 | 197 | shovel(t, waitingQueue, unackedQueue) 198 | 199 | for idx := 0; idx < 10; idx++ { 200 | push(t, rng, waitingQueue, batchIdx) 201 | batchIdx++ 202 | } 203 | 204 | for idx := 0; idx < 5; idx++ { 205 | move(t, waitingQueue, unackedQueue) 206 | } 207 | 208 | ack(t, rng, waitingQueue, unackedQueue) 209 | 210 | for idx := 0; idx < 100; idx++ { 211 | move(t, waitingQueue, unackedQueue) 212 | } 213 | 214 | if run == 5 { 215 | // Re-open in between to make it a bit harder: 216 | require.NoError(t, waitingQueue.Close()) 217 | require.NoError(t, unackedQueue.Close()) 218 | 219 | waitingQueue, err = Open(waitingDir, opts) 220 | require.NoError(t, err) 221 | 222 | unackedQueue, err = Open(unackedDir, opts) 223 | require.NoError(t, err) 224 | } 225 | } 226 | 227 | require.NoError(t, waitingQueue.Close()) 228 | require.NoError(t, unackedQueue.Close()) 229 | } 230 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | 6 | usage() { 7 | echo "$0 [--help|--slow]" 8 | echo 9 | echo "Run testsuite." 10 | echo 11 | echo "--slow Run also tests tagged as slow." 12 | echo 13 | exit 0 14 | } 15 | 16 | TAG_ARGUMENT= 17 | 18 | while [[ $# -gt 0 ]]; do 19 | case $1 in 20 | --slow) 21 | TAG_ARGUMENT="--tags=slow" 22 | shift # past argument 23 | ;; 24 | -*) 25 | echo "Unknown option $1" 26 | exit 1 27 | ;; 28 | *) 29 | usage 30 | ;; 31 | esac 32 | done 33 | 34 | # For the coverage we need to figure out a list of all packages 35 | # (this enables coverage of tests that test code in other packages) 36 | ALL_PACKAGES=$( \ 37 | find -iname '*.go' -exec dirname {} \; | \ 38 | tr -d '.' | \ 39 | sort | \ 40 | uniq | \ 41 | xargs -n1 printf 'github.com/sahib/timeq%s\n' | \ 42 | paste -sd ',' \ 43 | ) 44 | 45 | gotestsum -- \ 46 | ./... "${TAG_ARGUMENT}" \ 47 | -race \ 48 | -coverprofile=cover.out \ 49 | -covermode=atomic \ 50 | -coverpkg "${ALL_PACKAGES}" 51 | -------------------------------------------------------------------------------- /slow_test.go: -------------------------------------------------------------------------------- 1 | //go:build slow 2 | // +build slow 3 | 4 | package timeq 5 | 6 | import ( 7 | "io/fs" 8 | "os" 9 | "path/filepath" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func getSizeOfDir(t *testing.T, root string) (size int64) { 16 | require.NoError(t, filepath.Walk(root, func(_ string, info fs.FileInfo, err error) error { 17 | if err != nil { 18 | return err 19 | } 20 | 21 | if !info.Mode().IsRegular() { 22 | return nil 23 | } 24 | 25 | size += info.Size() 26 | return nil 27 | })) 28 | return 29 | } 30 | 31 | // Check that we can create value logs over 4G in size. 32 | func TestAPI4GLog(t *testing.T) { 33 | dir, err := os.MkdirTemp("", "timeq-apitest") 34 | require.NoError(t, err) 35 | defer os.RemoveAll(dir) 36 | 37 | opts := DefaultOptions() 38 | opts.BucketSplitConf = ShiftBucketSplitConf(5 * 1024 * 1024 * 1024) 39 | queue, err := Open(dir, opts) 40 | require.NoError(t, err) 41 | 42 | const N = 1000 43 | 44 | var items Items 45 | for idx := 0; idx < N; idx++ { 46 | items = append(items, Item{ 47 | Key: Key(idx), 48 | Blob: make([]byte, 16*1024), 49 | }) 50 | } 51 | 52 | const FourGB = 4 * 1024 * 1024 * 1024 53 | var expected int 54 | for getSizeOfDir(t, dir) <= FourGB+(1*1024*1024) { 55 | require.NoError(t, queue.Push(items)) 56 | expected += len(items) 57 | } 58 | 59 | var got int 60 | for queue.Len() > 0 { 61 | require.NoError(t, queue.Read(N, func(_ Transaction, items Items) (ReadOp, error) { 62 | got += len(items) 63 | return ReadOpPop, nil 64 | })) 65 | } 66 | 67 | require.Equal(t, got, expected) 68 | require.NoError(t, queue.Close()) 69 | } 70 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "errors" 5 | "os" 6 | "syscall" 7 | 8 | "github.com/otiai10/copy" 9 | ) 10 | 11 | func moveFileOrDir(src, dst string) error { 12 | if err := os.Rename(src, dst); !errors.Is(err, syscall.EXDEV) { 13 | // NOTE: this includes err==nil 14 | return err 15 | } 16 | 17 | // copying directories has many edge cases, so rely on a library 18 | // for that (and avoid writing too much tests for that) 19 | opts := copy.Options{Sync: true} 20 | if err := copy.Copy(src, dst, opts); err != nil { 21 | return err 22 | } 23 | 24 | return os.RemoveAll(src) 25 | } 26 | -------------------------------------------------------------------------------- /util_test.go: -------------------------------------------------------------------------------- 1 | package timeq 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/sahib/timeq/item/testutils" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestMoveFileIfRenamable(t *testing.T) { 13 | t.Parallel() 14 | 15 | dir, err := os.MkdirTemp("", "timeq-copyfile") 16 | require.NoError(t, err) 17 | defer os.RemoveAll(dir) 18 | 19 | expData := []byte("Hello") 20 | 21 | aPath := filepath.Join(dir, "a") 22 | bPath := filepath.Join(dir, "b") 23 | require.NoError(t, os.WriteFile(aPath, expData, 0600)) 24 | require.NoError(t, moveFileOrDir(aPath, bPath)) 25 | 26 | gotData, err := os.ReadFile(bPath) 27 | require.NoError(t, err) 28 | require.Equal(t, expData, gotData) 29 | 30 | _, err = os.Stat(aPath) 31 | require.True(t, os.IsNotExist(err)) 32 | } 33 | 34 | func TestMoveIfNotRenamable(t *testing.T) { 35 | t.Parallel() 36 | 37 | testutils.WithTempMount(t, func(ext4Dir string) { 38 | tmpDir, err := os.MkdirTemp("", "timeq-copyfile") 39 | require.NoError(t, err) 40 | defer os.RemoveAll(tmpDir) 41 | 42 | aData := []byte("Hello") 43 | bData := []byte("World") 44 | aPath := filepath.Join(tmpDir, "a") 45 | bPath := filepath.Join(tmpDir, "b") 46 | require.NoError(t, os.WriteFile(aPath, aData, 0600)) 47 | require.NoError(t, os.WriteFile(bPath, bData, 0600)) 48 | 49 | dstDir := filepath.Join(ext4Dir, "dst") 50 | require.NoError(t, moveFileOrDir(tmpDir, dstDir)) 51 | 52 | entries, err := os.ReadDir(dstDir) 53 | require.NoError(t, err) 54 | require.Len(t, entries, 2) 55 | 56 | expAData, err := os.ReadFile(filepath.Join(dstDir, "a")) 57 | require.NoError(t, err) 58 | 59 | expBData, err := os.ReadFile(filepath.Join(dstDir, "b")) 60 | require.NoError(t, err) 61 | 62 | require.Equal(t, aData, expAData) 63 | require.Equal(t, bData, expBData) 64 | 65 | _, err = os.Stat(aPath) 66 | require.True(t, os.IsNotExist(err)) 67 | _, err = os.Stat(bPath) 68 | require.True(t, os.IsNotExist(err)) 69 | }) 70 | } 71 | 72 | func TestMoveErrorIfRenamable(t *testing.T) { 73 | t.Parallel() 74 | 75 | dir, err := os.MkdirTemp("", "timeq-copyfile") 76 | require.NoError(t, err) 77 | defer os.RemoveAll(dir) 78 | 79 | expData := []byte("Hello") 80 | aPath := filepath.Join(dir, "a") 81 | require.NoError(t, os.WriteFile(aPath, expData, 0600)) 82 | 83 | badDir := filepath.Join(dir, "bad") 84 | require.NoError(t, os.MkdirAll(badDir, 0400)) 85 | require.Error(t, moveFileOrDir(aPath, badDir)) 86 | 87 | // Nothing should have been deleted: 88 | _, err = os.Stat(aPath) 89 | require.NoError(t, err) 90 | } 91 | 92 | func TestMoveErrorIfNotRenamable(t *testing.T) { 93 | t.Parallel() 94 | 95 | testutils.WithTempMount(t, func(ext4Dir string) { 96 | tmpDir, err := os.MkdirTemp("", "timeq-copyfile") 97 | require.NoError(t, err) 98 | defer os.RemoveAll(tmpDir) 99 | 100 | aData := []byte("Hello") 101 | bData := []byte("World") 102 | aPath := filepath.Join(tmpDir, "a") 103 | bPath := filepath.Join(tmpDir, "b") 104 | require.NoError(t, os.WriteFile(aPath, aData, 0600)) 105 | require.NoError(t, os.WriteFile(bPath, bData, 0600)) 106 | 107 | // Create dst dir as only-readable, not even cd-able dir: 108 | dstDir := filepath.Join(ext4Dir, "dst") 109 | require.NoError(t, os.MkdirAll(dstDir, 0400)) 110 | 111 | require.Error(t, moveFileOrDir(tmpDir, dstDir)) 112 | 113 | _, err = os.Stat(aPath) 114 | require.NoError(t, err) 115 | _, err = os.Stat(bPath) 116 | require.NoError(t, err) 117 | }) 118 | } 119 | -------------------------------------------------------------------------------- /vlog/heap.go: -------------------------------------------------------------------------------- 1 | package vlog 2 | 3 | type Iters []Iter 4 | 5 | // NOTE: This is more or less a copy of container/heap in Go's standard 6 | // library. It was modified to directly use the Iter struct instead of relying 7 | // on heap.Interface. This allows the compiler to inline a lot more and to skip 8 | // the expensive Len() check on every Fix() call (which is only expensive 9 | // because it's behind an interface). 10 | // 11 | // This seemingly desperate measure gives us about 10% better performance for 12 | // Pop() heavy workloads! 13 | 14 | func (is *Iters) Push(i Iter) { 15 | *is = append(*is, i) 16 | is.up(is.Len() - 1) 17 | } 18 | 19 | func (is *Iters) Len() int { 20 | return len(*is) 21 | } 22 | 23 | func (is *Iters) Fix(i int) { 24 | if !is.down(i, is.Len()) { 25 | is.up(i) 26 | } 27 | } 28 | 29 | func (is Iters) swap(i, j int) { 30 | is[i], is[j] = is[j], is[i] 31 | } 32 | 33 | func (is Iters) less(i, j int) bool { 34 | if is[i].exhausted != is[j].exhausted { 35 | // sort exhausted iters to the back 36 | return !is[i].exhausted 37 | } 38 | 39 | return is[i].item.Key < is[j].item.Key 40 | } 41 | 42 | func (is *Iters) up(j int) { 43 | for { 44 | i := (j - 1) / 2 // parent 45 | if i == j || !is.less(j, i) { 46 | break 47 | } 48 | 49 | is.swap(i, j) 50 | j = i 51 | } 52 | } 53 | 54 | func (is *Iters) down(i0, n int) bool { 55 | i := i0 56 | for { 57 | j1 := 2*i + 1 58 | if j1 >= n || j1 < 0 { // j1 < 0 after int overflow 59 | break 60 | } 61 | j := j1 // left child 62 | if j2 := j1 + 1; j2 < n && is.less(j2, j1) { 63 | j = j2 // = 2*i + 2 // right child 64 | } 65 | if !is.less(j, i) { 66 | break 67 | } 68 | is.swap(i, j) 69 | i = j 70 | } 71 | 72 | return i > i0 73 | } 74 | -------------------------------------------------------------------------------- /vlog/iter.go: -------------------------------------------------------------------------------- 1 | package vlog 2 | 3 | import ( 4 | "github.com/sahib/timeq/item" 5 | ) 6 | 7 | // NOTE: There is quite some performance potential hidden here, 8 | // if we manage to fit Iter in a single cache line: 9 | // 10 | // Possible ideas to get down from 104 to 64: 11 | // 12 | // - Use only one len field. -> -8 13 | // - Always pass Item out on Next() as out param. -> -32 14 | // -> Not possible, because the item might not be consumed directly 15 | // as we might realize that another iter has more priority. 16 | // - Do not use exhausted, set len to 0. 17 | // -> Does not work, as currLen is zero before last call to Next() 18 | // - continueOnErr can be part of Log. -8 (if exhausted goes away too) 19 | // - error could be returned on Next() directly. 20 | type Iter struct { 21 | firstKey item.Key 22 | currOff, prevOff item.Off 23 | item item.Item 24 | log *Log 25 | err error 26 | currLen, prevLen item.Off 27 | exhausted bool 28 | continueOnErr bool 29 | } 30 | 31 | func (li *Iter) Next() bool { 32 | if li.currLen == 0 || li.exhausted { 33 | li.exhausted = true 34 | return false 35 | } 36 | 37 | if len(li.log.mmap) > 0 && li.currOff >= item.Off(li.log.size) { 38 | // stop iterating when end of log reached. 39 | li.exhausted = true 40 | return false 41 | } 42 | 43 | for { 44 | if err := li.log.readItemAt(li.currOff, &li.item); err != nil { 45 | if !li.continueOnErr { 46 | li.err = err 47 | li.exhausted = true 48 | return false 49 | } 50 | 51 | li.currOff = li.log.findNextItem(li.currOff) 52 | if li.currOff >= item.Off(li.log.size) { 53 | li.exhausted = true 54 | return false 55 | } 56 | 57 | continue 58 | } 59 | 60 | break 61 | } 62 | 63 | li.prevOff = li.currOff 64 | li.prevLen = li.currLen 65 | 66 | // advance iter to next position: 67 | li.currOff += item.Off(li.item.StorageSize()) 68 | li.currLen-- 69 | 70 | return true 71 | } 72 | 73 | func (li *Iter) Exhausted() bool { 74 | return li.exhausted 75 | } 76 | 77 | // Key returns the key this iterator was created with 78 | // This is not the current key of the item! 79 | func (li *Iter) FirstKey() item.Key { 80 | return li.firstKey 81 | } 82 | 83 | // Item returns the current item. 84 | // It is not valid before Next() has been called. 85 | func (li *Iter) Item() item.Item { 86 | return li.item 87 | } 88 | 89 | // CurrentLocation returns the location of the current entry. 90 | // It is not valid before Next() has been called. 91 | func (li *Iter) CurrentLocation() item.Location { 92 | return item.Location{ 93 | Key: li.item.Key, 94 | Off: li.prevOff, 95 | Len: li.prevLen, 96 | } 97 | } 98 | 99 | func (li *Iter) Err() error { 100 | return li.err 101 | } 102 | -------------------------------------------------------------------------------- /vlog/iter_test.go: -------------------------------------------------------------------------------- 1 | package vlog 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/sahib/timeq/item" 10 | "github.com/sahib/timeq/item/testutils" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestIter(t *testing.T) { 15 | t.Parallel() 16 | 17 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 18 | require.NoError(t, err) 19 | defer os.RemoveAll(tmpDir) 20 | 21 | // Push a few items before: 22 | log, err := Open(filepath.Join(tmpDir, "log"), true) 23 | require.NoError(t, err) 24 | _, err = log.Push(testutils.GenItems(0, 10, 1)) 25 | require.NoError(t, err) 26 | 27 | loc, err := log.Push(testutils.GenItems(10, 20, 1)) 28 | require.NoError(t, err) 29 | 30 | firstBatchOff := (item.HeaderSize+item.TrailerSize)*10 + 10 31 | require.Equal(t, loc, item.Location{ 32 | Key: 10, 33 | Off: item.Off(firstBatchOff), 34 | Len: 10, 35 | }) 36 | 37 | var count int 38 | iter := log.At(loc, true) 39 | for iter.Next() { 40 | it := iter.Item() 41 | require.Equal(t, item.Item{ 42 | Key: item.Key(count + 10), 43 | Blob: []byte(fmt.Sprintf("%d", count+10)), 44 | }, it) 45 | 46 | // current location is sitting on the next entry already. 47 | currLoc := iter.CurrentLocation() 48 | require.Equal(t, item.Location{ 49 | Key: item.Key(count + 10), 50 | Off: item.Off(firstBatchOff + count*(item.HeaderSize+2+item.TrailerSize)), 51 | Len: item.Off(10 - count), 52 | }, currLoc) 53 | count++ 54 | } 55 | 56 | require.Equal(t, 10, count) 57 | require.NoError(t, iter.Err()) 58 | require.NoError(t, log.Close()) 59 | } 60 | 61 | func TestIterEmpty(t *testing.T) { 62 | t.Parallel() 63 | 64 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 65 | require.NoError(t, err) 66 | defer os.RemoveAll(tmpDir) 67 | 68 | log, err := Open(filepath.Join(tmpDir, "log"), true) 69 | require.NoError(t, err) 70 | iter := log.At(item.Location{}, true) 71 | 72 | require.False(t, iter.Next()) 73 | require.NoError(t, iter.Err()) 74 | require.NoError(t, log.Close()) 75 | } 76 | 77 | func TestIterInvalidLocation(t *testing.T) { 78 | t.Parallel() 79 | 80 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 81 | require.NoError(t, err) 82 | defer os.RemoveAll(tmpDir) 83 | 84 | log, err := Open(filepath.Join(tmpDir, "log"), true) 85 | require.NoError(t, err) 86 | iter := log.At(item.Location{ 87 | Off: 0x2A, 88 | Len: 1000, 89 | }, true) 90 | 91 | require.False(t, iter.Next()) 92 | require.True(t, iter.Exhausted()) 93 | require.NoError(t, iter.Err()) 94 | require.NoError(t, log.Close()) 95 | } 96 | 97 | func TestIterBrokenStream(t *testing.T) { 98 | t.Parallel() 99 | 100 | for _, continueOnErr := range []bool{false, true} { 101 | for idx := 0; idx < 4; idx++ { 102 | t.Run(fmt.Sprintf("%d-continue-%v", idx, continueOnErr), func(t *testing.T) { 103 | // depending on which index of the size field 104 | // is overwritten we test for different errors. 105 | testIterBrokenStream(t, idx, continueOnErr) 106 | }) 107 | } 108 | } 109 | } 110 | 111 | func testIterBrokenStream(t *testing.T, overwriteIndex int, continueOnErr bool) { 112 | t.Parallel() 113 | 114 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 115 | require.NoError(t, err) 116 | defer os.RemoveAll(tmpDir) 117 | 118 | log, err := Open(filepath.Join(tmpDir, "log"), true) 119 | require.NoError(t, err) 120 | 121 | item1 := item.Item{Key: 23, Blob: []byte("blob1")} 122 | item2 := item.Item{Key: 42, Blob: []byte("blob2")} 123 | 124 | loc, err := log.Push(item.Items{item1, item2}) 125 | require.NoError(t, err) 126 | 127 | // Modify the size field to make bigger than log.size 128 | log.mmap[overwriteIndex] = 0xFF 129 | 130 | // The iterator should be able to figure out the next 131 | // value at least: 132 | iter := log.At(loc, continueOnErr) 133 | if continueOnErr { 134 | require.True(t, iter.Next()) 135 | it := iter.Item() 136 | require.Equal(t, item.Key(42), it.Key) 137 | require.Equal(t, item2.Blob, it.Blob) 138 | } 139 | require.False(t, iter.Next()) 140 | } 141 | 142 | func TestIterHeap(t *testing.T) { 143 | iters := Iters{} 144 | itersHeap := &iters 145 | require.Equal(t, 0, itersHeap.Len()) 146 | 147 | itersHeap.Push(Iter{ 148 | exhausted: true, 149 | item: item.Item{Key: 100}, 150 | }) 151 | itersHeap.Push(Iter{ 152 | exhausted: false, 153 | item: item.Item{Key: 50}, 154 | }) 155 | itersHeap.Push(Iter{ 156 | exhausted: false, 157 | item: item.Item{Key: 0}, 158 | }) 159 | 160 | it1 := iters[0] // min must be at front. 161 | it2 := iters[2] // heap condition says it should be second. 162 | it3 := iters[1] // third one. 163 | 164 | require.False(t, it1.Exhausted()) 165 | require.False(t, it2.Exhausted()) 166 | require.True(t, it3.Exhausted()) 167 | 168 | require.Equal(t, item.Key(0), it1.CurrentLocation().Key) 169 | require.Equal(t, item.Key(50), it2.CurrentLocation().Key) 170 | require.Equal(t, item.Key(100), it3.CurrentLocation().Key) 171 | } 172 | -------------------------------------------------------------------------------- /vlog/vlog.go: -------------------------------------------------------------------------------- 1 | package vlog 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "os" 8 | 9 | "github.com/sahib/timeq/item" 10 | "golang.org/x/sys/unix" 11 | ) 12 | 13 | type Log struct { 14 | path string 15 | fd *os.File 16 | mmap []byte 17 | size int64 18 | syncOnWrite bool 19 | isEmpty bool 20 | } 21 | 22 | var PageSize int64 = 4096 23 | 24 | func init() { 25 | PageSize = int64(os.Getpagesize()) 26 | } 27 | 28 | func nextSize(size int64) int64 { 29 | if size < 0 { 30 | return 0 31 | } 32 | 33 | currPages := size / PageSize 34 | 35 | // decide on how much to increase: 36 | var shift int 37 | const mb int64 = 1024 * 1024 38 | switch { 39 | case size >= (100 * mb): 40 | // 128 pages per block: 41 | shift = 7 42 | case size >= (10 * mb): 43 | // 64 pages per block 44 | shift = 6 45 | case size >= (1 * mb): 46 | // 32 pages per block 47 | shift = 5 48 | case size >= 200*1024: 49 | // 16 pages per block 50 | shift = 4 51 | default: 52 | // 8 pages per block 53 | shift = 3 54 | } 55 | 56 | // use shift to round to next page alignment: 57 | nextSize := (((currPages >> shift) + 1) << shift) * PageSize 58 | return nextSize 59 | } 60 | 61 | func Open(path string, syncOnWrite bool) (*Log, error) { 62 | l := &Log{ 63 | path: path, 64 | syncOnWrite: syncOnWrite, 65 | } 66 | 67 | flags := os.O_APPEND | os.O_CREATE | os.O_RDWR 68 | fd, err := os.OpenFile(path, flags, 0600) 69 | if err != nil { 70 | return nil, fmt.Errorf("log: open: %w", err) 71 | } 72 | 73 | info, err := fd.Stat() 74 | if err != nil { 75 | fd.Close() 76 | return nil, fmt.Errorf("log: stat: %w", err) 77 | } 78 | 79 | mmapSize := info.Size() 80 | if mmapSize == 0 { 81 | mmapSize = nextSize(0) 82 | if err := fd.Truncate(mmapSize); err != nil { 83 | return nil, fmt.Errorf("truncate: %w", err) 84 | } 85 | 86 | l.isEmpty = true 87 | } 88 | 89 | mmap, err := unix.Mmap( 90 | int(fd.Fd()), 91 | 0, 92 | int(mmapSize), 93 | unix.PROT_READ|unix.PROT_WRITE, 94 | unix.MAP_SHARED_VALIDATE, 95 | ) 96 | 97 | if err != nil { 98 | fd.Close() 99 | return nil, fmt.Errorf("log: mmap: %w", err) 100 | } 101 | 102 | // give OS a hint that we will likely need that memory soon: 103 | _ = unix.Madvise(mmap, unix.MADV_WILLNEED) 104 | 105 | l.size = info.Size() 106 | l.fd = fd 107 | l.mmap = mmap 108 | 109 | // read the initial size. We can't use the file size as we 110 | // pre-allocated the file to a certain length and we don't 111 | // know much of it was used. If we would use info.Size() here 112 | // we would waste some space since new pushes are written beyond 113 | // the truncated area. Just shrink to the last written data. 114 | l.size = l.shrink() 115 | return l, nil 116 | } 117 | 118 | func (l *Log) shrink() int64 { 119 | // we take advantage of the end marker appended to each 120 | // log entry. Since ftruncate() will always pad with zeroes 121 | // it's easy for us to find the beginning of the file. 122 | idx := l.size - 1 123 | for ; idx >= 0 && l.mmap[idx] == 0; idx-- { 124 | } 125 | 126 | return idx + 1 127 | } 128 | 129 | func (l *Log) writeItem(it item.Item) { 130 | off := l.size 131 | binary.BigEndian.PutUint32(l.mmap[off:], uint32(len(it.Blob))) 132 | off += 4 133 | binary.BigEndian.PutUint64(l.mmap[off:], uint64(it.Key)) 134 | off += 8 135 | off += int64(copy(l.mmap[off:], it.Blob)) 136 | 137 | // add trailer mark: 138 | l.mmap[off] = 0xFF 139 | l.mmap[off+1] = 0xFF 140 | l.size = off + item.TrailerSize 141 | } 142 | 143 | func (l *Log) Push(items item.Items) (loc item.Location, err error) { 144 | addSize := items.StorageSize() 145 | 146 | loc = item.Location{ 147 | Key: items[0].Key, 148 | Off: item.Off(l.size), 149 | Len: item.Off(len(items)), 150 | } 151 | 152 | nextMmapSize := nextSize(l.size + int64(addSize)) 153 | if nextMmapSize > int64(len(l.mmap)) { 154 | // currently mmapped region does not suffice, 155 | // allocate more space for it. 156 | if err = l.fd.Truncate(nextMmapSize); err != nil { 157 | err = fmt.Errorf("truncate: %w", err) 158 | return 159 | } 160 | 161 | // If we're unlucky we gonna have to move it: 162 | l.mmap, err = unix.Mremap( 163 | l.mmap, 164 | int(nextMmapSize), 165 | unix.MREMAP_MAYMOVE, 166 | ) 167 | if err != nil { 168 | err = fmt.Errorf("remap: %w", err) 169 | return 170 | } 171 | } 172 | 173 | // copy the items to the file map: 174 | for i := 0; i < len(items); i++ { 175 | l.writeItem(items[i]) 176 | } 177 | 178 | if err = l.Sync(false); err != nil { 179 | err = fmt.Errorf("sync: %w", err) 180 | return 181 | } 182 | 183 | if l.isEmpty && len(items) > 0 { 184 | l.isEmpty = false 185 | } 186 | 187 | return loc, nil 188 | } 189 | 190 | func (l *Log) At(loc item.Location, continueOnErr bool) Iter { 191 | return Iter{ 192 | firstKey: loc.Key, 193 | currOff: loc.Off, 194 | currLen: loc.Len, 195 | log: l, 196 | continueOnErr: continueOnErr, 197 | } 198 | } 199 | 200 | func (l *Log) findNextItem(off item.Off) item.Off { 201 | offSize := item.Off(l.size) 202 | 203 | for idx := off + 1; idx < offSize-1; idx++ { 204 | if l.mmap[idx] == 0xFF && l.mmap[idx+1] == 0xFF { 205 | // we found a marker. 206 | nextItemIdx := idx + 2 207 | if nextItemIdx >= offSize { 208 | return offSize 209 | } 210 | 211 | return nextItemIdx 212 | } 213 | } 214 | 215 | return offSize 216 | } 217 | 218 | func (l *Log) readItemAt(off item.Off, it *item.Item) (err error) { 219 | if int64(off)+item.HeaderSize >= l.size { 220 | return nil 221 | } 222 | 223 | // parse header: 224 | siz := binary.BigEndian.Uint32(l.mmap[off+0:]) 225 | key := binary.BigEndian.Uint64(l.mmap[off+4:]) 226 | 227 | if siz > 64*1024*1024 { 228 | // fail-safe if the size field is corrupt: 229 | return fmt.Errorf("log: allocation too big for one value: %d", siz) 230 | } 231 | 232 | if int64(off)+item.HeaderSize+int64(siz)+item.TrailerSize > l.size { 233 | return fmt.Errorf( 234 | "log: bad offset: %d+%d >= %d (payload too big)", 235 | off, 236 | siz, 237 | l.size, 238 | ) 239 | } 240 | 241 | // NOTE: We directly slice the memory map here. This means that the caller 242 | // has to copy the slice if he wants to save it somewhere as we might 243 | // overwrite, unmap or resize the underlying memory at a later point. 244 | // Caller can use item.Copy() or items.Copy() to obtain a copy. 245 | blobOff := off + item.HeaderSize 246 | trailerOff := blobOff + item.Off(siz) 247 | 248 | // check that the trailer was correctly written. 249 | // (not a checksum, but could be made to one in future versions) 250 | if l.mmap[trailerOff] != 0xFF && l.mmap[trailerOff+1] != 0xFF { 251 | return fmt.Errorf("log: %s: missing trailer: %d", l.path, off) 252 | } 253 | 254 | *it = item.Item{ 255 | Key: item.Key(key), 256 | Blob: l.mmap[blobOff:trailerOff], 257 | } 258 | 259 | return nil 260 | } 261 | 262 | func (l *Log) Sync(force bool) error { 263 | if !l.syncOnWrite && !force { 264 | return nil 265 | } 266 | 267 | return unix.Msync(l.mmap, unix.MS_SYNC) 268 | } 269 | 270 | func (l *Log) Close() error { 271 | syncErr := unix.Msync(l.mmap, unix.MS_SYNC) 272 | unmapErr := unix.Munmap(l.mmap) 273 | closeErr := l.fd.Close() 274 | return errors.Join(syncErr, unmapErr, closeErr) 275 | } 276 | 277 | func (l *Log) IsEmpty() bool { 278 | return l.isEmpty 279 | } 280 | -------------------------------------------------------------------------------- /vlog/vlog_test.go: -------------------------------------------------------------------------------- 1 | package vlog 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/sahib/timeq/item" 9 | "github.com/sahib/timeq/item/testutils" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestLogOpenUnaligned(t *testing.T) { 14 | t.Parallel() 15 | 16 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 17 | require.NoError(t, err) 18 | defer os.RemoveAll(tmpDir) 19 | 20 | fakeBlob := []byte{ 21 | 0x0, 0x0, 0x0, 0x1, // size=1 22 | 0x0, 0x0, 0x0, 0x0, 23 | 0x0, 0x0, 0x0, 0xF, // key=15 24 | 0xFF, 25 | } 26 | 27 | logPath := filepath.Join(tmpDir, "log") 28 | require.NoError(t, os.WriteFile(logPath, fakeBlob, 0600)) 29 | 30 | log, err := Open(logPath, true) 31 | require.NoError(t, err) 32 | require.NoError(t, log.Close()) 33 | } 34 | 35 | func TestLogOpenEmpty(t *testing.T) { 36 | t.Parallel() 37 | 38 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 39 | require.NoError(t, err) 40 | defer os.RemoveAll(tmpDir) 41 | 42 | log, err := Open(filepath.Join(tmpDir, "log"), true) 43 | require.NoError(t, err) 44 | require.NoError(t, log.Close()) 45 | } 46 | 47 | func TestLogOpenPushRead(t *testing.T) { 48 | t.Parallel() 49 | 50 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 51 | require.NoError(t, err) 52 | defer os.RemoveAll(tmpDir) 53 | 54 | log, err := Open(filepath.Join(tmpDir, "log"), true) 55 | require.NoError(t, err) 56 | loc, err := log.Push(testutils.GenItems(1, 2, 1)) 57 | require.NoError(t, err) 58 | require.Equal(t, loc, item.Location{ 59 | Key: 1, 60 | Off: 0, 61 | Len: 1, 62 | }) 63 | 64 | var it item.Item 65 | require.NoError(t, log.readItemAt(loc.Off, &it)) 66 | require.Equal(t, item.Item{ 67 | Key: 1, 68 | Blob: []byte("1"), 69 | }, it) 70 | 71 | require.NoError(t, log.Close()) 72 | } 73 | 74 | func TestLogShrink(t *testing.T) { 75 | t.Parallel() 76 | 77 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 78 | require.NoError(t, err) 79 | defer os.RemoveAll(tmpDir) 80 | 81 | log, err := Open(filepath.Join(tmpDir, "log"), true) 82 | require.NoError(t, err) 83 | firstLoc, err := log.Push(testutils.GenItems(1, 2, 1)) 84 | require.NoError(t, err) 85 | require.NoError(t, log.Close()) 86 | 87 | // re-open: 88 | log, err = Open(filepath.Join(tmpDir, "log"), true) 89 | require.NoError(t, err) 90 | 91 | sndLoc, err := log.Push(testutils.GenItems(2, 3, 1)) 92 | require.NoError(t, err) 93 | 94 | iter := log.At(firstLoc, true) 95 | require.True(t, iter.Next()) 96 | require.Equal(t, item.Item{ 97 | Key: 1, 98 | Blob: []byte("1"), 99 | }, iter.Item()) 100 | require.False(t, iter.Next()) 101 | 102 | iter = log.At(sndLoc, true) 103 | require.True(t, iter.Next()) 104 | require.Equal(t, item.Item{ 105 | Key: 2, 106 | Blob: []byte("2"), 107 | }, iter.Item()) 108 | require.False(t, iter.Next()) 109 | 110 | require.NoError(t, iter.Err()) 111 | require.NoError(t, log.Close()) 112 | } 113 | 114 | func TestLogOpenNonExisting(t *testing.T) { 115 | _, err := Open("/nope", true) 116 | require.Error(t, err) 117 | } 118 | 119 | func TestLogNextSize(t *testing.T) { 120 | var kb int64 = 1024 121 | var mb = 1024 * kb 122 | require.Equal(t, int64(0), nextSize(-1)) 123 | require.Equal(t, 8*PageSize, nextSize(0)) 124 | require.Equal(t, 8*PageSize, nextSize(1)) 125 | 126 | require.Equal(t, 14*PageSize, nextSize(200*kb)-200*kb) 127 | require.Equal(t, 32*PageSize, nextSize(1*mb)-1*mb) 128 | require.Equal(t, 64*PageSize, nextSize(10*mb)-10*mb) 129 | require.Equal(t, 128*PageSize, nextSize(100*mb)-100*mb) 130 | } 131 | 132 | func TestLogRemap(t *testing.T) { 133 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 134 | require.NoError(t, err) 135 | defer os.RemoveAll(tmpDir) 136 | 137 | logPath := filepath.Join(tmpDir, "log") 138 | log, err := Open(logPath, true) 139 | require.NoError(t, err) 140 | 141 | infoBefore, err := os.Stat(logPath) 142 | require.NoError(t, err) 143 | require.Equal(t, int64(0), infoBefore.Size()%PageSize) 144 | 145 | // that's enough to trigger the grow quite a few times: 146 | for idx := 0; idx < 100; idx++ { 147 | _, err := log.Push(testutils.GenItems(0, 200, 1)) 148 | require.NoError(t, err) 149 | } 150 | 151 | infoAfter, err := os.Stat(logPath) 152 | require.NoError(t, err) 153 | require.True(t, infoAfter.Size() > infoBefore.Size()) 154 | require.Equal(t, int64(0), infoAfter.Size()%PageSize) 155 | 156 | require.NoError(t, log.Close()) 157 | } 158 | 159 | func TestLogFindNextItem(t *testing.T) { 160 | l := &Log{ 161 | mmap: make([]byte, 200), 162 | } 163 | 164 | item1 := item.Item{Key: 23, Blob: []byte("blob1")} 165 | item2 := item.Item{Key: 42, Blob: []byte("blob2")} 166 | 167 | l.writeItem(item1) 168 | l.writeItem(item2) 169 | 170 | expOffset1 := item1.StorageSize() 171 | expOffset2 := expOffset1 + item2.StorageSize() 172 | nextItemOff := l.findNextItem(0) 173 | require.Equal(t, expOffset1, nextItemOff) 174 | require.Equal(t, uint8(0xFF), l.mmap[nextItemOff-1]) 175 | require.Equal(t, uint8(0xFF), l.mmap[nextItemOff-2]) 176 | 177 | nextNextItemOff := l.findNextItem(nextItemOff) 178 | require.Equal(t, expOffset2, nextNextItemOff) 179 | require.Equal(t, uint8(0xFF), l.mmap[nextNextItemOff-1]) 180 | require.Equal(t, uint8(0xFF), l.mmap[nextNextItemOff-2]) 181 | 182 | // should not progress further (i.e.) beyond size: 183 | require.Equal(t, item.Off(l.size), nextNextItemOff) 184 | require.Equal(t, nextNextItemOff, l.findNextItem(nextNextItemOff)) 185 | } 186 | 187 | func TestLogEmpty(t *testing.T) { 188 | tmpDir, err := os.MkdirTemp("", "timeq-vlogtest") 189 | require.NoError(t, err) 190 | defer os.RemoveAll(tmpDir) 191 | 192 | logPath := filepath.Join(tmpDir, "log") 193 | log, err := Open(logPath, true) 194 | require.NoError(t, err) 195 | 196 | require.True(t, log.IsEmpty()) 197 | _, err = log.Push(testutils.GenItems(0, 10, 1)) 198 | require.NoError(t, err) 199 | require.False(t, log.IsEmpty()) 200 | require.NoError(t, log.Close()) 201 | 202 | log, err = Open(logPath, true) 203 | require.NoError(t, err) 204 | require.False(t, log.IsEmpty()) 205 | require.NoError(t, log.Close()) 206 | } 207 | --------------------------------------------------------------------------------