├── .github
    └── workflows
    │   ├── gh-release.yml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── Makefile
├── NOTICE.txt
├── README.md
├── RELEASE
├── bstream.go
├── bstream_test.go
├── codecov.yml
├── disk_partition.go
├── disk_partition_test.go
├── disk_wal.go
├── disk_wal_test.go
├── doc.go
├── encoding.go
├── encoding_test.go
├── fake_encoder.go
├── fake_partition.go
├── go.mod
├── go.sum
├── internal
    ├── cgroup
    │   ├── cpu.go
    │   ├── cpu_test.go
    │   ├── mem.go
    │   ├── mem_test.go
    │   ├── testdata
    │   │   ├── cgroup
    │   │   │   ├── cpu.cfs_period_us
    │   │   │   ├── cpu.cfs_quota_us
    │   │   │   ├── memory.limit_in_bytes
    │   │   │   └── memory.stat
    │   │   ├── docker
    │   │   │   └── 74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
    │   │   │   │   ├── cpu.cfs_period_us
    │   │   │   │   ├── cpu.cfs_quota_us
    │   │   │   │   ├── memory.limit_in_bytes
    │   │   │   │   └── memory.stat
    │   │   └── self
    │   │   │   └── cgroup
    │   ├── util.go
    │   └── util_test.go
    ├── encoding
    │   └── int.go
    ├── syscall
    │   ├── mmap.go
    │   ├── mmap_386.go
    │   ├── mmap_amd64.go
    │   ├── mmap_arm.go
    │   ├── mmap_unix.go
    │   └── mmap_windows.go
    └── timerpool
    │   ├── timerpool.go
    │   └── timerpool_test.go
├── label.go
├── label_test.go
├── logger.go
├── memory_partition.go
├── memory_partition_test.go
├── partition.go
├── partition_list.go
├── partition_list_test.go
├── storage.go
├── storage_benchmark_test.go
├── storage_examples_test.go
├── storage_test.go
├── testdata
    └── meta.json
└── wal.go


/.github/workflows/gh-release.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |     paths:
 6 |       - 'RELEASE'
 7 |   pull_request:
 8 |     types: [opened, synchronize]
 9 |     branches:
10 |       - main
11 |     paths:
12 |       - 'RELEASE'
13 | 
14 | jobs:
15 |   gh-release:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v1
19 |       - uses: pipe-cd/actions-gh-release@v2.6.0
20 |         with:
21 |           release_file: 'RELEASE'
22 |           token: ${{ secrets.GITHUB_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | on:
 3 |   push:
 4 |     branches:
 5 |     - main
 6 |     paths:
 7 |       - '**.go'
 8 |   pull_request:
 9 |     branches:
10 |     - main
11 |     paths:
12 |       - '**.go'
13 | jobs:
14 |   test:
15 |     strategy:
16 |       matrix:
17 |         platform: [ubuntu-latest, macos-latest]
18 |     runs-on: ${{ matrix.platform }}
19 |     steps:
20 |     - name: Install Go
21 |       uses: actions/setup-go@v2
22 |       with:
23 |         go-version: '1.20'
24 |     - name: Checkout code
25 |       uses: actions/checkout@v2
26 |     - name: Run tests
27 |       run: make test
28 |     - name: Upload coverage to Codecov
29 |       uses: codecov/codecov-action@v2
30 |       with:
31 |         token: ${{ secrets.CODECOV_TOKEN }}
32 |         file: ./coverage.txt
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Output about testing
 9 | *.test
10 | *.out
11 | /coverage.txt
12 | /pprof
13 | 
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 | 
17 | # Editor
18 | /.idea
19 | 
20 | .DS_Store
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | test:
 2 | 	go test -race -v -coverpkg=./... -covermode=atomic -coverprofile=coverage.txt ./...
 3 | 
 4 | test-bench:
 5 | 	go test -benchtime=4s -benchmem -bench=. -cpuprofile=pprof/cpu.out -memprofile=pprof/mem.out .
 6 | 
 7 | pprof-mem:
 8 | 	go tool pprof pprof/mem.out
 9 | 
10 | pprof-cpu:
11 | 	go tool pprof pprof/cpu.out
12 | 
13 | dep:
14 | 	go mod tidy
15 | 
16 | godoc:
17 | 	godoc -http=:6060
18 | 


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
 1 | tstorage
 2 | ===============
 3 | 
 4 | This product contains a modified part of VictoriaMetrics, distributed by VictoriaMetrics, Inc:
 5 | 
 6 |   * License: https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE (Apache License v2.0)
 7 |   * Homepage: https://github.com/VictoriaMetrics/VictoriaMetrics
 8 | 
 9 | This product contains a modified part of catena, distributed by Cistern:
10 | 
11 |   * License: https://github.com/Cistern/catena/blob/master/LICENSE
12 |   * Homepage: https://github.com/Cistern/catena
13 | 
14 | This product contains a modified part of go-tsz, distributed by dgryski:
15 | 
16 |   * License: https://github.com/dgryski/go-tsz/blob/master/LICENSE
17 |   * Homepage: https://github.com/dgryski/go-tsz
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tstorage [![Go Reference](https://pkg.go.dev/badge/mod/github.com/nakabonne/tstorage.svg)](https://pkg.go.dev/mod/github.com/nakabonne/tstorage)
  2 | 
  3 | `tstorage` is a lightweight local on-disk storage engine for time-series data with a straightforward API.
  4 | Especially ingestion is massively optimized as it provides goroutine safe capabilities of write into and read from TSDB that partitions data points by time.
  5 | 
  6 | ## Motivation
  7 | I'm working on a couple of tools that handle a tremendous amount of time-series data, such as [Ali](https://github.com/nakabonne/ali) and [Gosivy](https://github.com/nakabonne/gosivy).
  8 | Especially Ali, I had been facing a problem of increasing heap consumption over time as it's a load testing tool that aims to perform real-time analysis.
  9 | I little poked around a fast TSDB library that offers simple APIs but eventually nothing works as well as I'd like, that's why I settled on writing this package myself.
 10 | 
 11 | To see how much `tstorage` has helped improve Ali's performance, see the release notes [here](https://github.com/nakabonne/ali/releases/tag/v0.7.0).
 12 | 
 13 | ## Usage
 14 | Currently, `tstorage` requires Go version 1.16 or greater
 15 | 
 16 | By default, `tstorage.Storage` works as an in-memory database.
 17 | The below example illustrates how to insert a row into the memory and immediately select it.
 18 | 
 19 | ```go
 20 | package main
 21 | 
 22 | import (
 23 | 	"fmt"
 24 | 
 25 | 	"github.com/nakabonne/tstorage"
 26 | )
 27 | 
 28 | func main() {
 29 | 	storage, _ := tstorage.NewStorage(
 30 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
 31 | 	)
 32 | 	defer storage.Close()
 33 | 
 34 | 	_ = storage.InsertRows([]tstorage.Row{
 35 | 		{
 36 | 			Metric: "metric1",
 37 | 			DataPoint: tstorage.DataPoint{Timestamp: 1600000000, Value: 0.1},
 38 | 		},
 39 | 	})
 40 | 	points, _ := storage.Select("metric1", nil, 1600000000, 1600000001)
 41 | 	for _, p := range points {
 42 | 		fmt.Printf("timestamp: %v, value: %v\n", p.Timestamp, p.Value)
 43 | 		// => timestamp: 1600000000, value: 0.1
 44 | 	}
 45 | }
 46 | ```
 47 | 
 48 | ### Using disk
 49 | To make time-series data persistent on disk, specify the path to directory that stores time-series data through [WithDataPath](https://pkg.go.dev/github.com/nakabonne/tstorage#WithDataPath) option.
 50 | 
 51 | ```go
 52 | storage, _ := tstorage.NewStorage(
 53 | 	tstorage.WithDataPath("./data"),
 54 | )
 55 | defer storage.Close()
 56 | ```
 57 | 
 58 | ### Labeled metrics
 59 | In tstorage, you can identify a metric with combination of metric name and optional labels.
 60 | Here is an example of insertion a labeled metric to the disk.
 61 | 
 62 | ```go
 63 | metric := "mem_alloc_bytes"
 64 | labels := []tstorage.Label{
 65 | 	{Name: "host", Value: "host-1"},
 66 | }
 67 | 
 68 | _ = storage.InsertRows([]tstorage.Row{
 69 | 	{
 70 | 		Metric:    metric,
 71 | 		Labels:    labels,
 72 | 		DataPoint: tstorage.DataPoint{Timestamp: 1600000000, Value: 0.1},
 73 | 	},
 74 | })
 75 | points, _ := storage.Select(metric, labels, 1600000000, 1600000001)
 76 | ```
 77 | 
 78 | For more examples see [the documentation](https://pkg.go.dev/github.com/nakabonne/tstorage#pkg-examples).
 79 | 
 80 | ## Benchmarks
 81 | Benchmark tests were made using Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz with 16GB of RAM on macOS 10.15.7
 82 | 
 83 | ```
 84 | $ go version
 85 | go version go1.16.2 darwin/amd64
 86 | 
 87 | $ go test -benchtime=4s -benchmem -bench=. .
 88 | goos: darwin
 89 | goarch: amd64
 90 | pkg: github.com/nakabonne/tstorage
 91 | cpu: Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz
 92 | BenchmarkStorage_InsertRows-8                  	14135685	       305.9 ns/op	     174 B/op	       2 allocs/op
 93 | BenchmarkStorage_SelectAmongThousandPoints-8   	20548806	       222.4 ns/op	      56 B/op	       2 allocs/op
 94 | BenchmarkStorage_SelectAmongMillionPoints-8    	16185709	       292.2 ns/op	      56 B/op	       1 allocs/op
 95 | PASS
 96 | ok  	github.com/nakabonne/tstorage	16.501s
 97 | ```
 98 | 
 99 | ## Internal
100 | Time-series database has specific characteristics in its workload.
101 | In terms of write operations, a time-series database has to ingest a tremendous amount of data points ordered by time.
102 | Time-series data is immutable, mostly an append-only workload with delete operations performed in batches on less recent data.
103 | In terms of read operations, in most cases, we want to retrieve multiple data points by specifying its time range, also, most recent first: query the recent data in real-time.
104 | Besides, time-series data is already indexed in time order.
105 | 
106 | Based on these characteristics, `tstorage` adopts a linear data model structure that partitions data points by time, totally different from the B-trees or LSM trees based storage engines.
107 | Each partition acts as a fully independent database containing all data points for its time range.
108 | 
109 | 
110 | ```
111 |   │                 │
112 | Read              Write
113 |   │                 │
114 |   │                 V
115 |   │      ┌───────────────────┐ max: 1600010800
116 |   ├─────>   Memory Partition
117 |   │      └───────────────────┘ min: 1600007201
118 |   │
119 |   │      ┌───────────────────┐ max: 1600007200
120 |   ├─────>   Memory Partition
121 |   │      └───────────────────┘ min: 1600003601
122 |   │
123 |   │      ┌───────────────────┐ max: 1600003600
124 |   └─────>   Disk Partition
125 |          └───────────────────┘ min: 1600000000
126 | ```
127 | 
128 | Key benefits:
129 | - We can easily ignore all data outside of the partition time range when querying data points.
130 | - Most read operations work fast because recent data get cached in heap.
131 | - When a partition gets full, we can persist the data from our in-memory database by sequentially writing just a handful of larger files. We avoid any write-amplification and serve SSDs and HDDs equally well.
132 | 
133 | ### Memory partition
134 | The memory partition is writable and stores data points in heap. The head partition is always memory partition. Its next one is also memory partition to accept out-of-order data points.
135 | It stores data points in an ordered Slice, which offers excellent cache hit ratio compared to linked lists unless it gets updated way too often (like delete, add elements at random locations).
136 | 
137 | All incoming data is written to a write-ahead log (WAL) right before inserting into a memory partition to prevent data loss.
138 | 
139 | ### Disk partition
140 | The old memory partitions get compacted and persisted to the directory prefixed with `p-`, under the directory specified with the [WithDataPath](https://pkg.go.dev/github.com/nakabonne/tstorage#WithDataPath) option.
141 | Here is the macro layout of disk partitions:
142 | 
143 | ```
144 | $ tree ./data
145 | ./data
146 | ├── p-1600000001-1600003600
147 | │   ├── data
148 | │   └── meta.json
149 | ├── p-1600003601-1600007200
150 | │   ├── data
151 | │   └── meta.json
152 | └── p-1600007201-1600010800
153 |     ├── data
154 |     └── meta.json
155 | ```
156 | 
157 | As you can see each partition holds two files: `meta.json` and `data`.
158 | The `data` is compressed, read-only and is memory-mapped with [mmap(2)](https://en.wikipedia.org/wiki/Mmap) that maps a kernel address space to a user address space.
159 | Therefore, what it has to store in heap is only partition's metadata. Just looking at `meta.json` gives us a good picture of what it stores:
160 | 
161 | ```json
162 | $ cat ./data/p-1600000001-1600003600/meta.json
163 | {
164 |   "minTimestamp": 1600000001,
165 |   "maxTimestamp": 1600003600,
166 |   "numDataPoints": 7200,
167 |   "metrics": {
168 |     "metric-1": {
169 |       "name": "metric-1",
170 |       "offset": 0,
171 |       "minTimestamp": 1600000001,
172 |       "maxTimestamp": 1600003600,
173 |       "numDataPoints": 3600
174 |     },
175 |     "metric-2": {
176 |       "name": "metric-2",
177 |       "offset": 36014,
178 |       "minTimestamp": 1600000001,
179 |       "maxTimestamp": 1600003600,
180 |       "numDataPoints": 3600
181 |     }
182 |   }
183 | }
184 | ```
185 | 
186 | Each metric has its own file offset of the beginning.
187 | Data point slice for each metric is compressed separately, so all we have to do when reading is to seek, and read the points off.
188 | 
189 | ### Out-of-order data points
190 | What data points get out-of-order in real-world applications is not uncommon because of network latency or clock synchronization issues; `tstorage` basically doesn't discard them.
191 | If out-of-order data points are within the range of the head memory partition, they get temporarily buffered and merged at flush time.
192 | Sometimes we should handle data points that cross a partition boundary. That is the reason why `tstorage` keeps more than one partition writable.
193 | 
194 | ## More
195 | Want to know more details on tstorage internal? If so see the blog post: [Write a time-series database engine from scratch](https://nakabonne.dev/posts/write-tsdb-from-scratch).
196 | 
197 | ## Acknowledgements
198 | This package is implemented based on tons of existing ideas. What I especially got inspired by are:
199 | - https://misfra.me/state-of-the-state-part-iii
200 | - https://fabxc.org/tsdb
201 | - https://questdb.io/blog/2020/11/26/why-timeseries-data
202 | - https://akumuli.org/akumuli/2017/04/29/nbplustree
203 | - https://github.com/VictoriaMetrics/VictoriaMetrics
204 | 
205 | A big "thank you!" goes out to all of them.
206 | 


--------------------------------------------------------------------------------
/RELEASE:
--------------------------------------------------------------------------------
1 | tag: v0.3.6
2 | 


--------------------------------------------------------------------------------
/bstream.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2015,2016 Damian Gryski <damian@gryski.com>
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | // * Redistributions of source code must retain the above copyright notice,
  8 | // this list of conditions and the following disclaimer.
  9 | //
 10 | // * Redistributions in binary form must reproduce the above copyright notice,
 11 | // this list of conditions and the following disclaimer in the documentation
 12 | // and/or other materials provided with the distribution.
 13 | //
 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 15 | // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 16 | // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 17 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 18 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 21 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 22 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 23 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | package tstorage
 26 | 
 27 | import (
 28 | 	"encoding/binary"
 29 | 	"io"
 30 | )
 31 | 
 32 | // bstream is a stream of bits.
 33 | type bstream struct {
 34 | 	stream []byte // the data stream
 35 | 	count  uint8  // how many bits are valid in current byte
 36 | }
 37 | 
 38 | func (b *bstream) bytes() []byte {
 39 | 	return b.stream
 40 | }
 41 | 
 42 | // reset resets the buffer to be empty,
 43 | // but it retains the underlying storage for use by future writes.
 44 | func (b *bstream) reset() {
 45 | 	b.stream = b.stream[:0]
 46 | 	b.count = 0
 47 | }
 48 | 
 49 | type bit bool
 50 | 
 51 | const (
 52 | 	zero bit = false
 53 | 	one  bit = true
 54 | )
 55 | 
 56 | func (b *bstream) writeBit(bit bit) {
 57 | 	if b.count == 0 {
 58 | 		b.stream = append(b.stream, 0)
 59 | 		b.count = 8
 60 | 	}
 61 | 
 62 | 	i := len(b.stream) - 1
 63 | 
 64 | 	if bit {
 65 | 		b.stream[i] |= 1 << (b.count - 1)
 66 | 	}
 67 | 
 68 | 	b.count--
 69 | }
 70 | 
 71 | func (b *bstream) writeByte(byt byte) {
 72 | 	if b.count == 0 {
 73 | 		b.stream = append(b.stream, 0)
 74 | 		b.count = 8
 75 | 	}
 76 | 
 77 | 	i := len(b.stream) - 1
 78 | 
 79 | 	// fill up b.b with b.count bits from byt
 80 | 	b.stream[i] |= byt >> (8 - b.count)
 81 | 
 82 | 	b.stream = append(b.stream, 0)
 83 | 	i++
 84 | 	b.stream[i] = byt << b.count
 85 | }
 86 | 
 87 | func (b *bstream) writeBits(u uint64, nbits int) {
 88 | 	u <<= (64 - uint(nbits))
 89 | 	for nbits >= 8 {
 90 | 		byt := byte(u >> 56)
 91 | 		b.writeByte(byt)
 92 | 		u <<= 8
 93 | 		nbits -= 8
 94 | 	}
 95 | 
 96 | 	for nbits > 0 {
 97 | 		b.writeBit((u >> 63) == 1)
 98 | 		u <<= 1
 99 | 		nbits--
100 | 	}
101 | }
102 | 
103 | type bstreamReader struct {
104 | 	stream       []byte
105 | 	streamOffset int // The offset from which read the next byte from the stream.
106 | 
107 | 	buffer uint64 // The current buffer, filled from the stream, containing up to 8 bytes from which read bits.
108 | 	valid  uint8  // The number of bits valid to read (from left) in the current buffer.
109 | }
110 | 
111 | func newBReader(b []byte) bstreamReader {
112 | 	return bstreamReader{
113 | 		stream: b,
114 | 	}
115 | }
116 | 
117 | func (b *bstreamReader) readBit() (bit, error) {
118 | 	if b.valid == 0 {
119 | 		if !b.loadNextBuffer(1) {
120 | 			return false, io.EOF
121 | 		}
122 | 	}
123 | 
124 | 	return b.readBitFast()
125 | }
126 | 
127 | // readBitFast is like readBit but can return io.EOF if the internal buffer is empty.
128 | // If it returns io.EOF, the caller should retry reading bits calling readBit().
129 | // This function must be kept small and a leaf in order to help the compiler inlining it
130 | // and further improve performances.
131 | func (b *bstreamReader) readBitFast() (bit, error) {
132 | 	if b.valid == 0 {
133 | 		return false, io.EOF
134 | 	}
135 | 
136 | 	b.valid--
137 | 	bitmask := uint64(1) << b.valid
138 | 	return (b.buffer & bitmask) != 0, nil
139 | }
140 | 
141 | func (b *bstreamReader) readBits(nbits uint8) (uint64, error) {
142 | 	if b.valid == 0 {
143 | 		if !b.loadNextBuffer(nbits) {
144 | 			return 0, io.EOF
145 | 		}
146 | 	}
147 | 
148 | 	if nbits <= b.valid {
149 | 		return b.readBitsFast(nbits)
150 | 	}
151 | 
152 | 	// We have to read all remaining valid bits from the current buffer and a part from the next one.
153 | 	bitmask := (uint64(1) << b.valid) - 1
154 | 	nbits -= b.valid
155 | 	v := (b.buffer & bitmask) << nbits
156 | 	b.valid = 0
157 | 
158 | 	if !b.loadNextBuffer(nbits) {
159 | 		return 0, io.EOF
160 | 	}
161 | 
162 | 	bitmask = (uint64(1) << nbits) - 1
163 | 	v = v | ((b.buffer >> (b.valid - nbits)) & bitmask)
164 | 	b.valid -= nbits
165 | 
166 | 	return v, nil
167 | }
168 | 
169 | // readBitsFast is like readBits but can return io.EOF if the internal buffer is empty.
170 | // If it returns io.EOF, the caller should retry reading bits calling readBits().
171 | // This function must be kept small and a leaf in order to help the compiler inlining it
172 | // and further improve performances.
173 | func (b *bstreamReader) readBitsFast(nbits uint8) (uint64, error) {
174 | 	if nbits > b.valid {
175 | 		return 0, io.EOF
176 | 	}
177 | 
178 | 	bitmask := (uint64(1) << nbits) - 1
179 | 	b.valid -= nbits
180 | 
181 | 	return (b.buffer >> b.valid) & bitmask, nil
182 | }
183 | 
184 | func (b *bstreamReader) ReadByte() (byte, error) {
185 | 	v, err := b.readBits(8)
186 | 	if err != nil {
187 | 		return 0, err
188 | 	}
189 | 	return byte(v), nil
190 | }
191 | 
192 | // loadNextBuffer loads the next bytes from the stream into the internal buffer.
193 | // The input nbits is the minimum number of bits that must be read, but the implementation
194 | // can read more (if possible) to improve performances.
195 | func (b *bstreamReader) loadNextBuffer(nbits uint8) bool {
196 | 	if b.streamOffset >= len(b.stream) {
197 | 		return false
198 | 	}
199 | 
200 | 	// Handle the case there are more then 8 bytes in the buffer (most common case)
201 | 	// in a optimized way. It's guaranteed that this branch will never read from the
202 | 	// very last byte of the stream (which suffers race conditions due to concurrent
203 | 	// writes).
204 | 	if b.streamOffset+8 < len(b.stream) {
205 | 		b.buffer = binary.BigEndian.Uint64(b.stream[b.streamOffset:])
206 | 		b.streamOffset += 8
207 | 		b.valid = 64
208 | 		return true
209 | 	}
210 | 
211 | 	// We're here if the are 8 or less bytes left in the stream. Since this reader needs
212 | 	// to handle race conditions with concurrent writes happening on the very last byte
213 | 	// we make sure to never over more than the minimum requested bits (rounded up to
214 | 	// the next byte). The following code is slower but called less frequently.
215 | 	nbytes := int((nbits / 8) + 1)
216 | 	if b.streamOffset+nbytes > len(b.stream) {
217 | 		nbytes = len(b.stream) - b.streamOffset
218 | 	}
219 | 
220 | 	buffer := uint64(0)
221 | 	for i := 0; i < nbytes; i++ {
222 | 		buffer = buffer | (uint64(b.stream[b.streamOffset+i]) << uint(8*(nbytes-i-1)))
223 | 	}
224 | 
225 | 	b.buffer = buffer
226 | 	b.streamOffset += nbytes
227 | 	b.valid = uint8(nbytes * 8)
228 | 
229 | 	return true
230 | }
231 | 


--------------------------------------------------------------------------------
/bstream_test.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestBstreamReader(t *testing.T) {
10 | 	// Write to the bit stream.
11 | 	w := bstream{}
12 | 	for _, bit := range []bit{true, false} {
13 | 		w.writeBit(bit)
14 | 	}
15 | 	for nbits := 1; nbits <= 64; nbits++ {
16 | 		w.writeBits(uint64(nbits), nbits)
17 | 	}
18 | 	for v := 1; v < 10000; v += 123 {
19 | 		w.writeBits(uint64(v), 29)
20 | 	}
21 | 
22 | 	// Read back.
23 | 	r := newBReader(w.bytes())
24 | 	for _, bit := range []bit{true, false} {
25 | 		v, err := r.readBitFast()
26 | 		if err != nil {
27 | 			v, err = r.readBit()
28 | 		}
29 | 		require.NoError(t, err)
30 | 		require.Equal(t, bit, v)
31 | 	}
32 | 	for nbits := uint8(1); nbits <= 64; nbits++ {
33 | 		v, err := r.readBitsFast(nbits)
34 | 		if err != nil {
35 | 			v, err = r.readBits(nbits)
36 | 		}
37 | 		require.NoError(t, err)
38 | 		require.Equal(t, uint64(nbits), v, "nbits=%d", nbits)
39 | 	}
40 | 	for v := 1; v < 10000; v += 123 {
41 | 		actual, err := r.readBitsFast(29)
42 | 		if err != nil {
43 | 			actual, err = r.readBits(29)
44 | 		}
45 | 		require.NoError(t, err)
46 | 		require.Equal(t, uint64(v), actual, "v=%d", v)
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | coverage:
 2 |   range: 30..90
 3 |   round: up
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: 70%
 8 |         threshold: 20%
 9 |         if_not_found: success
10 |         if_ci_failed: success
11 |     patch:
12 |       default:
13 |         target: 0%
14 |         threshold: 20%
15 |         if_no_uploads: success
16 |         if_not_found: success
17 |         if_ci_failed: success
18 | 


--------------------------------------------------------------------------------
/disk_partition.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"os"
 10 | 	"path/filepath"
 11 | 	"time"
 12 | 
 13 | 	"github.com/nakabonne/tstorage/internal/syscall"
 14 | )
 15 | 
 16 | const (
 17 | 	dataFileName = "data"
 18 | 	metaFileName = "meta.json"
 19 | )
 20 | 
 21 | var (
 22 | 	errInvalidPartition = errors.New("invalid partition")
 23 | )
 24 | 
 25 | // A disk partition implements a partition that uses local disk as a storage.
 26 | // It mainly has two files, data file and meta file.
 27 | // The data file is memory-mapped and read only; no need to lock at all.
 28 | type diskPartition struct {
 29 | 	dirPath string
 30 | 	meta    meta
 31 | 	// file descriptor of data file
 32 | 	f *os.File
 33 | 	// memory-mapped file backed by f
 34 | 	mappedFile []byte
 35 | 	// duration to store data
 36 | 	retention time.Duration
 37 | }
 38 | 
 39 | // meta is a mapper for a meta file, which is put for each partition.
 40 | // Note that the CreatedAt is surely timestamped by tstorage but Min/Max Timestamps are likely to do by other process.
 41 | type meta struct {
 42 | 	MinTimestamp  int64                 `json:"minTimestamp"`
 43 | 	MaxTimestamp  int64                 `json:"maxTimestamp"`
 44 | 	NumDataPoints int                   `json:"numDataPoints"`
 45 | 	Metrics       map[string]diskMetric `json:"metrics"`
 46 | 	CreatedAt     time.Time             `json:"createdAt"`
 47 | }
 48 | 
 49 | // diskMetric holds meta data to access actual data from the memory-mapped file.
 50 | type diskMetric struct {
 51 | 	Name          string `json:"name"`
 52 | 	Offset        int64  `json:"offset"`
 53 | 	MinTimestamp  int64  `json:"minTimestamp"`
 54 | 	MaxTimestamp  int64  `json:"maxTimestamp"`
 55 | 	NumDataPoints int64  `json:"numDataPoints"`
 56 | }
 57 | 
 58 | // openDiskPartition first maps the data file into memory with memory-mapping.
 59 | func openDiskPartition(dirPath string, retention time.Duration) (partition, error) {
 60 | 	if dirPath == "" {
 61 | 		return nil, fmt.Errorf("dir path is required")
 62 | 	}
 63 | 	metaFilePath := filepath.Join(dirPath, metaFileName)
 64 | 	_, err := os.Stat(metaFilePath)
 65 | 	if errors.Is(err, os.ErrNotExist) {
 66 | 		return nil, errInvalidPartition
 67 | 	}
 68 | 
 69 | 	// Map data to the memory
 70 | 	dataPath := filepath.Join(dirPath, dataFileName)
 71 | 	f, err := os.Open(dataPath)
 72 | 	if err != nil {
 73 | 		return nil, fmt.Errorf("failed to read data file: %w", err)
 74 | 	}
 75 | 	defer f.Close()
 76 | 	info, err := f.Stat()
 77 | 	if err != nil {
 78 | 		return nil, fmt.Errorf("failed to fetch file info: %w", err)
 79 | 	}
 80 | 	if info.Size() == 0 {
 81 | 		return nil, ErrNoDataPoints
 82 | 	}
 83 | 	mapped, err := syscall.Mmap(int(f.Fd()), int(info.Size()))
 84 | 	if err != nil {
 85 | 		return nil, fmt.Errorf("failed to perform mmap: %w", err)
 86 | 	}
 87 | 
 88 | 	// Read metadata to the heap
 89 | 	m := meta{}
 90 | 	mf, err := os.Open(metaFilePath)
 91 | 	if err != nil {
 92 | 		return nil, fmt.Errorf("failed to read metadata: %w", err)
 93 | 	}
 94 | 	defer mf.Close()
 95 | 	decoder := json.NewDecoder(mf)
 96 | 	if err := decoder.Decode(&m); err != nil {
 97 | 		return nil, fmt.Errorf("failed to decode metadata: %w", err)
 98 | 	}
 99 | 	return &diskPartition{
100 | 		dirPath:    dirPath,
101 | 		meta:       m,
102 | 		f:          f,
103 | 		mappedFile: mapped,
104 | 		retention:  retention,
105 | 	}, nil
106 | }
107 | 
108 | func (d *diskPartition) insertRows(_ []Row) ([]Row, error) {
109 | 	return nil, fmt.Errorf("can't insert rows into disk partition")
110 | }
111 | 
112 | func (d *diskPartition) selectDataPoints(metric string, labels []Label, start, end int64) ([]*DataPoint, error) {
113 | 	if d.expired() {
114 | 		return nil, fmt.Errorf("this partition is expired: %w", ErrNoDataPoints)
115 | 	}
116 | 	name := marshalMetricName(metric, labels)
117 | 	mt, ok := d.meta.Metrics[name]
118 | 	if !ok {
119 | 		return nil, ErrNoDataPoints
120 | 	}
121 | 	r := bytes.NewReader(d.mappedFile)
122 | 	if _, err := r.Seek(mt.Offset, io.SeekStart); err != nil {
123 | 		return nil, fmt.Errorf("failed to seek: %w", err)
124 | 	}
125 | 	decoder, err := newSeriesDecoder(r)
126 | 	if err != nil {
127 | 		return nil, fmt.Errorf("failed to generate decoder for metric %q in %q: %w", name, d.dirPath, err)
128 | 	}
129 | 
130 | 	// TODO: Divide fixed-lengh chunks when flushing, and index it.
131 | 	points := make([]*DataPoint, 0, mt.NumDataPoints)
132 | 	for i := 0; i < int(mt.NumDataPoints); i++ {
133 | 		point := &DataPoint{}
134 | 		if err := decoder.decodePoint(point); err != nil {
135 | 			return nil, fmt.Errorf("failed to decode point of metric %q in %q: %w", name, d.dirPath, err)
136 | 		}
137 | 		if point.Timestamp < start {
138 | 			continue
139 | 		}
140 | 		if point.Timestamp >= end {
141 | 			break
142 | 		}
143 | 		points = append(points, point)
144 | 	}
145 | 	return points, nil
146 | }
147 | 
148 | func (d *diskPartition) minTimestamp() int64 {
149 | 	return d.meta.MinTimestamp
150 | }
151 | 
152 | func (d *diskPartition) maxTimestamp() int64 {
153 | 	return d.meta.MaxTimestamp
154 | }
155 | 
156 | func (d *diskPartition) size() int {
157 | 	return d.meta.NumDataPoints
158 | }
159 | 
160 | // Disk partition is immutable.
161 | func (d *diskPartition) active() bool {
162 | 	return false
163 | }
164 | 
165 | func (d *diskPartition) clean() error {
166 | 	if err := os.RemoveAll(d.dirPath); err != nil {
167 | 		return fmt.Errorf("failed to remove all files inside the partition (%d~%d): %w", d.minTimestamp(), d.maxTimestamp(), err)
168 | 	}
169 | 
170 | 	return nil
171 | }
172 | 
173 | func (d *diskPartition) expired() bool {
174 | 	diff := time.Since(d.meta.CreatedAt)
175 | 	if diff > d.retention {
176 | 		return true
177 | 	}
178 | 	return false
179 | }
180 | 


--------------------------------------------------------------------------------
/disk_partition_test.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestOpenDiskPartition(t *testing.T) {
11 | 	tests := []struct {
12 | 		name      string
13 | 		dirPath   string
14 | 		retention time.Duration
15 | 		want      partition
16 | 		wantErr   bool
17 | 	}{
18 | 		{
19 | 			name:      "empty dir name given",
20 | 			dirPath:   "",
21 | 			retention: 24 * time.Hour,
22 | 			wantErr:   true,
23 | 		},
24 | 		{
25 | 			name:      "non-existent dir given",
26 | 			dirPath:   "./non-existent-dir",
27 | 			retention: 24 * time.Hour,
28 | 			wantErr:   true,
29 | 		},
30 | 	}
31 | 	for _, tt := range tests {
32 | 		t.Run(tt.name, func(t *testing.T) {
33 | 			got, err := openDiskPartition(tt.dirPath, tt.retention)
34 | 			assert.Equal(t, tt.wantErr, err != nil)
35 | 			assert.Equal(t, tt.want, got)
36 | 		})
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/disk_wal.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"encoding/binary"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"io/fs"
 10 | 	"math"
 11 | 	"os"
 12 | 	"path/filepath"
 13 | 	"strconv"
 14 | 	"sync"
 15 | 	"sync/atomic"
 16 | )
 17 | 
 18 | // diskWAL contains multiple segment files. One segment is responsible for one partition.
 19 | // They can be easily sorted because they are named using the created timestamp.
 20 | // Macro layout is like:
 21 | /*
 22 |   .wal/
 23 |   ├── 0
 24 |   └── 1
 25 | */
 26 | type diskWAL struct {
 27 | 	dir          string
 28 | 	bufferedSize int
 29 | 	// Buffered-writer to the active segment
 30 | 	w *bufio.Writer
 31 | 	// File descriptor to the active segment
 32 | 	fd    *os.File
 33 | 	index uint32
 34 | 	mu    sync.Mutex
 35 | }
 36 | 
 37 | func newDiskWAL(dir string, bufferedSize int) (wal, error) {
 38 | 	if err := os.MkdirAll(dir, fs.ModePerm); err != nil {
 39 | 		return nil, fmt.Errorf("failed to make WAL dir: %w", err)
 40 | 	}
 41 | 	w := &diskWAL{
 42 | 		dir:          dir,
 43 | 		bufferedSize: bufferedSize,
 44 | 	}
 45 | 	f, err := w.createSegmentFile(dir)
 46 | 	if err != nil {
 47 | 		return nil, err
 48 | 	}
 49 | 	w.fd = f
 50 | 	w.w = bufio.NewWriterSize(f, bufferedSize)
 51 | 
 52 | 	return w, nil
 53 | }
 54 | 
 55 | // append appends the given entry to the end of a file via the file descriptor it has.
 56 | func (w *diskWAL) append(op walOperation, rows []Row) error {
 57 | 	w.mu.Lock()
 58 | 	defer w.mu.Unlock()
 59 | 
 60 | 	switch op {
 61 | 	case operationInsert:
 62 | 		for _, row := range rows {
 63 | 			// Write the operation type
 64 | 			if err := w.w.WriteByte(byte(op)); err != nil {
 65 | 				return fmt.Errorf("failed to write operation: %w", err)
 66 | 			}
 67 | 			name := marshalMetricName(row.Metric, row.Labels)
 68 | 			// Write the length of the metric name
 69 | 			lBuf := make([]byte, binary.MaxVarintLen64)
 70 | 			n := binary.PutUvarint(lBuf, uint64(len(name)))
 71 | 			if _, err := w.w.Write(lBuf[:n]); err != nil {
 72 | 				return fmt.Errorf("failed to write the length of the metric name: %w", err)
 73 | 			}
 74 | 			// Write the metric name
 75 | 			if _, err := w.w.WriteString(name); err != nil {
 76 | 				return fmt.Errorf("failed to write the metric name: %w", err)
 77 | 			}
 78 | 			// Write the timestamp
 79 | 			tsBuf := make([]byte, binary.MaxVarintLen64)
 80 | 			n = binary.PutVarint(tsBuf, row.DataPoint.Timestamp)
 81 | 			if _, err := w.w.Write(tsBuf[:n]); err != nil {
 82 | 				return fmt.Errorf("failed to write the timestamp: %w", err)
 83 | 			}
 84 | 			// Write the value
 85 | 			vBuf := make([]byte, binary.MaxVarintLen64)
 86 | 			n = binary.PutUvarint(vBuf, math.Float64bits(row.DataPoint.Value))
 87 | 			if _, err := w.w.Write(vBuf[:n]); err != nil {
 88 | 				return fmt.Errorf("failed to write the value: %w", err)
 89 | 			}
 90 | 		}
 91 | 	default:
 92 | 		return fmt.Errorf("unknown operation %v given", op)
 93 | 	}
 94 | 	if w.bufferedSize == 0 {
 95 | 		return w.flush()
 96 | 	}
 97 | 
 98 | 	return nil
 99 | }
100 | 
101 | // flush flushes all buffered entries to the underlying file.
102 | func (w *diskWAL) flush() error {
103 | 	if err := w.w.Flush(); err != nil {
104 | 		return fmt.Errorf("failed to flush buffered-data into the underlying WAL file: %w", err)
105 | 	}
106 | 	return nil
107 | }
108 | 
109 | // punctuate set boundary and creates a new segment.
110 | func (w *diskWAL) punctuate() error {
111 | 	w.mu.Lock()
112 | 	defer w.mu.Unlock()
113 | 	if err := w.flush(); err != nil {
114 | 		return err
115 | 	}
116 | 	if err := w.fd.Close(); err != nil {
117 | 		return err
118 | 	}
119 | 	f, err := w.createSegmentFile(w.dir)
120 | 	if err != nil {
121 | 		return err
122 | 	}
123 | 	w.fd = f
124 | 	w.w = bufio.NewWriterSize(f, w.bufferedSize)
125 | 	return nil
126 | }
127 | 
128 | // truncateOldest removes only the oldest segment.
129 | func (w *diskWAL) removeOldest() error {
130 | 	w.mu.Lock()
131 | 	defer w.mu.Unlock()
132 | 	files, err := os.ReadDir(w.dir)
133 | 	if err != nil {
134 | 		return fmt.Errorf("failed to read WAL directory: %w", err)
135 | 	}
136 | 	if len(files) == 0 {
137 | 		return fmt.Errorf("no segment found")
138 | 	}
139 | 	return os.RemoveAll(filepath.Join(w.dir, files[0].Name()))
140 | }
141 | 
142 | // removeAll removes all segment files.
143 | func (w *diskWAL) removeAll() error {
144 | 	w.mu.Lock()
145 | 	defer w.mu.Unlock()
146 | 	if err := w.fd.Close(); err != nil {
147 | 		return err
148 | 	}
149 | 	if err := os.RemoveAll(w.dir); err != nil {
150 | 		return fmt.Errorf("failed to remove files under %q: %w", w.dir, err)
151 | 	}
152 | 	return os.MkdirAll(w.dir, fs.ModePerm)
153 | }
154 | 
155 | // refresh removes all segment files and make a new segment.
156 | func (w *diskWAL) refresh() error {
157 | 	if err := w.removeAll(); err != nil {
158 | 		return err
159 | 	}
160 | 	w.mu.Lock()
161 | 	defer w.mu.Unlock()
162 | 
163 | 	f, err := w.createSegmentFile(w.dir)
164 | 	if err != nil {
165 | 		return err
166 | 	}
167 | 	w.fd = f
168 | 	w.w = bufio.NewWriterSize(f, w.bufferedSize)
169 | 	return nil
170 | }
171 | 
172 | // createSegmentFile creates a new file with the name of the numbering index.
173 | func (w *diskWAL) createSegmentFile(dir string) (*os.File, error) {
174 | 	name := strconv.Itoa(int(atomic.LoadUint32(&w.index)))
175 | 	f, err := os.OpenFile(filepath.Join(dir, name), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
176 | 	if err != nil {
177 | 		return nil, fmt.Errorf("failed to create segment file: %w", err)
178 | 	}
179 | 	atomic.AddUint32(&w.index, 1)
180 | 	return f, nil
181 | }
182 | 
183 | type walRecord struct {
184 | 	op  walOperation
185 | 	row Row
186 | }
187 | 
188 | type diskWALReader struct {
189 | 	dir          string
190 | 	files        []os.DirEntry
191 | 	rowsToInsert []Row
192 | }
193 | 
194 | func newDiskWALReader(dir string) (*diskWALReader, error) {
195 | 	files, err := os.ReadDir(dir)
196 | 	if err != nil {
197 | 		return nil, fmt.Errorf("failed to read the WAL dir: %w", err)
198 | 	}
199 | 
200 | 	return &diskWALReader{
201 | 		dir:          dir,
202 | 		files:        files,
203 | 		rowsToInsert: make([]Row, 0),
204 | 	}, nil
205 | }
206 | 
207 | // readAll reads all segment files and caches the result for each operation.
208 | func (f *diskWALReader) readAll() error {
209 | 	for _, file := range f.files {
210 | 		if file.IsDir() {
211 | 			return fmt.Errorf("unexpected directory found under the WAL directory: %s", file.Name())
212 | 		}
213 | 		fd, err := os.Open(filepath.Join(f.dir, file.Name()))
214 | 		if err != nil {
215 | 			return fmt.Errorf("failed to open WAL segment file: %w", err)
216 | 		}
217 | 		segment := &segment{
218 | 			file: fd,
219 | 			r:    bufio.NewReader(fd),
220 | 		}
221 | 		for segment.next() {
222 | 			rec := segment.record()
223 | 			switch rec.op {
224 | 			case operationInsert:
225 | 				f.rowsToInsert = append(f.rowsToInsert, rec.row)
226 | 			}
227 | 		}
228 | 		if err := segment.close(); err != nil {
229 | 			return err
230 | 		}
231 | 
232 | 		err = segment.error()
233 | 		if errors.Is(err, io.ErrUnexpectedEOF) || errors.Is(err, io.EOF) {
234 | 			// It is not unusual for a line to be invalid, as it may well terminate in the middle of writing to the WAL.
235 | 			return nil
236 | 		}
237 | 		if err != nil {
238 | 			return fmt.Errorf("encounter an error while reading WAL segment file %q: %w", file.Name(), segment.error())
239 | 		}
240 | 	}
241 | 	return nil
242 | }
243 | 
244 | // segment represents a segment file.
245 | type segment struct {
246 | 	file *os.File
247 | 	r    *bufio.Reader
248 | 	// FIXME: Use interface to support other operation type
249 | 	current walRecord
250 | 	err     error
251 | }
252 | 
253 | func (f *segment) next() bool {
254 | 	op, err := f.r.ReadByte()
255 | 	if errors.Is(err, io.EOF) {
256 | 		return false
257 | 	}
258 | 	if err != nil {
259 | 		f.err = err
260 | 		return false
261 | 	}
262 | 	switch walOperation(op) {
263 | 	case operationInsert:
264 | 		// Read the length of metric name.
265 | 		metricLen, err := binary.ReadUvarint(f.r)
266 | 		if err != nil {
267 | 			f.err = fmt.Errorf("failed to read the length of metric name: %w", err)
268 | 			return false
269 | 		}
270 | 		// Read the metric name.
271 | 		metric := make([]byte, int(metricLen))
272 | 		if _, err := io.ReadFull(f.r, metric); err != nil {
273 | 			f.err = fmt.Errorf("failed to read the metric name: %w", err)
274 | 			return false
275 | 		}
276 | 		// Read timestamp.
277 | 		ts, err := binary.ReadVarint(f.r)
278 | 		if err != nil {
279 | 			f.err = fmt.Errorf("failed to read timestamp: %w", err)
280 | 			return false
281 | 		}
282 | 		// Read value.
283 | 		val, err := binary.ReadUvarint(f.r)
284 | 		if err != nil {
285 | 			f.err = fmt.Errorf("failed to read value: %w", err)
286 | 			return false
287 | 		}
288 | 		f.current = walRecord{
289 | 			op: walOperation(op),
290 | 			row: Row{
291 | 				Metric: string(metric),
292 | 				DataPoint: DataPoint{
293 | 					Timestamp: ts,
294 | 					Value:     math.Float64frombits(val),
295 | 				},
296 | 			},
297 | 		}
298 | 	default:
299 | 		f.err = fmt.Errorf("unknown operation %v found", op)
300 | 		return false
301 | 	}
302 | 
303 | 	return true
304 | }
305 | 
306 | // error gives back an error if it has been facing an error while reading.
307 | func (f *segment) error() error {
308 | 	return f.err
309 | }
310 | 
311 | func (f *segment) record() *walRecord {
312 | 	return &f.current
313 | }
314 | 
315 | func (f *segment) close() error {
316 | 	return f.file.Close()
317 | }
318 | 


--------------------------------------------------------------------------------
/disk_wal_test.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | 	"strconv"
 7 | 	"testing"
 8 | 
 9 | 	"github.com/stretchr/testify/assert"
10 | 	"github.com/stretchr/testify/require"
11 | )
12 | 
13 | func Test_diskWAL_append_read(t *testing.T) {
14 | 	var (
15 | 		op   = operationInsert
16 | 		rows = []Row{
17 | 			{Metric: "metric-1", DataPoint: DataPoint{Value: 0.1, Timestamp: 1600000000}},
18 | 			{Metric: "metric-2", DataPoint: DataPoint{Value: 0.2, Timestamp: 1600000001}},
19 | 			{Metric: "metric-1", DataPoint: DataPoint{Value: 0.1, Timestamp: 1600000001}},
20 | 			{Metric: "metric-2", DataPoint: DataPoint{Value: 0.2, Timestamp: 1600000003}},
21 | 		}
22 | 	)
23 | 	// Append rows into wal
24 | 	tmpDir, err := os.MkdirTemp("", "tstorage-test")
25 | 	defer os.RemoveAll(tmpDir)
26 | 	require.NoError(t, err)
27 | 	path := filepath.Join(tmpDir, "wal")
28 | 
29 | 	wal, err := newDiskWAL(path, 4096)
30 | 	require.NoError(t, err)
31 | 
32 | 	// Append into two segments
33 | 	err = wal.append(op, rows[:2])
34 | 	require.NoError(t, err)
35 | 
36 | 	err = wal.punctuate()
37 | 	require.NoError(t, err)
38 | 
39 | 	err = wal.append(op, rows[2:])
40 | 	require.NoError(t, err)
41 | 
42 | 	err = wal.flush()
43 | 	require.NoError(t, err)
44 | 
45 | 	// Recover rows.
46 | 	reader, err := newDiskWALReader(path)
47 | 	require.NoError(t, err)
48 | 	err = reader.readAll()
49 | 	require.NoError(t, err)
50 | 	got := reader.rowsToInsert
51 | 	assert.Equal(t, rows, got)
52 | }
53 | 
54 | func Test_diskWAL_removeOldest(t *testing.T) {
55 | 	tmpDir, err := os.MkdirTemp("", "tstorage-test")
56 | 	require.NoError(t, err)
57 | 	for i := 0; i < 3; i++ {
58 | 		err := os.Mkdir(filepath.Join(tmpDir, strconv.Itoa(i)), os.ModePerm)
59 | 		require.NoError(t, err)
60 | 	}
61 | 	w := &diskWAL{
62 | 		dir: tmpDir,
63 | 	}
64 | 	err = w.removeOldest()
65 | 	require.NoError(t, err)
66 | 	files, err := os.ReadDir(w.dir)
67 | 	require.NoError(t, err)
68 | 	want := []string{"1", "2"}
69 | 	got := []string{}
70 | 	for _, f := range files {
71 | 		got = append(got, f.Name())
72 | 	}
73 | 	assert.Equal(t, want, got)
74 | }
75 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
1 | // Package tstorage provides goroutine safe capabilities of insertion into and retrieval
2 | // from the time-series storage.
3 | package tstorage
4 | 


--------------------------------------------------------------------------------
/encoding.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2015,2016 Damian Gryski <damian@gryski.com>
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | // * Redistributions of source code must retain the above copyright notice,
  8 | // this list of conditions and the following disclaimer.
  9 | //
 10 | // * Redistributions in binary form must reproduce the above copyright notice,
 11 | // this list of conditions and the following disclaimer in the documentation
 12 | // and/or other materials provided with the distribution.
 13 | //
 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 15 | // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 16 | // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 17 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 18 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 21 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 22 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 23 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | package tstorage
 26 | 
 27 | import (
 28 | 	"encoding/binary"
 29 | 	"fmt"
 30 | 	"io"
 31 | 	"math"
 32 | 	"math/bits"
 33 | )
 34 | 
 35 | type seriesEncoder interface {
 36 | 	encodePoint(point *DataPoint) error
 37 | 	flush() error
 38 | }
 39 | 
 40 | func newSeriesEncoder(w io.Writer) seriesEncoder {
 41 | 	return &gorillaEncoder{
 42 | 		w:   w,
 43 | 		buf: &bstream{stream: make([]byte, 0)},
 44 | 	}
 45 | }
 46 | 
 47 | // gorillaEncoder implements the Gorilla's time-series data compression.
 48 | // See: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf
 49 | type gorillaEncoder struct {
 50 | 	// backend stream writer
 51 | 	w io.Writer
 52 | 
 53 | 	// buffer to be used while encoding
 54 | 	buf *bstream
 55 | 
 56 | 	// Calculate the delta of delta:
 57 | 	// D = (t_n − t_n−1) − (t_n−1 − t_n−2)
 58 | 	//
 59 | 	// t_0, starting timestamp t_0
 60 | 	// immutable
 61 | 	t0 int64
 62 | 	// t_1, the next to starting timestamp
 63 | 	// immutable
 64 | 	t1 int64
 65 | 	// t_n, timestamp of the Nth data point
 66 | 	// mutable
 67 | 	t int64
 68 | 	// delta of t_n
 69 | 	tDelta uint64
 70 | 
 71 | 	// v_n, value of the Nth data point
 72 | 	v        float64
 73 | 	leading  uint8
 74 | 	trailing uint8
 75 | }
 76 | 
 77 | // encodePoints is not goroutine safe. It's caller's responsibility to lock it.
 78 | func (e *gorillaEncoder) encodePoint(point *DataPoint) error {
 79 | 	var tDelta uint64
 80 | 
 81 | 	// Borrowed from https://github.com/prometheus/prometheus/blob/39d79c3cfb86c47d6bc06a9e9317af582f1833bb/tsdb/chunkenc/xor.go#L150
 82 | 	switch {
 83 | 	case e.t0 == 0:
 84 | 		// Write timestamp directly.
 85 | 		buf := make([]byte, binary.MaxVarintLen64)
 86 | 		for _, b := range buf[:binary.PutVarint(buf, point.Timestamp)] {
 87 | 			e.buf.writeByte(b)
 88 | 		}
 89 | 		// Write value directly.
 90 | 		e.buf.writeBits(math.Float64bits(point.Value), 64)
 91 | 		e.t0 = point.Timestamp
 92 | 	case e.t1 == 0:
 93 | 		// Write delta of timestamp.
 94 | 		tDelta = uint64(point.Timestamp - e.t0)
 95 | 
 96 | 		buf := make([]byte, binary.MaxVarintLen64)
 97 | 		for _, b := range buf[:binary.PutUvarint(buf, tDelta)] {
 98 | 			e.buf.writeByte(b)
 99 | 		}
100 | 		// Write value delta.
101 | 		e.writeVDelta(point.Value)
102 | 		e.t1 = point.Timestamp
103 | 	default:
104 | 		// Write delta-of-delta of timestamp.
105 | 		tDelta = uint64(point.Timestamp - e.t)
106 | 		deltaOfDelta := int64(tDelta - e.tDelta)
107 | 		switch {
108 | 		case deltaOfDelta == 0:
109 | 			e.buf.writeBit(zero)
110 | 		case -63 <= deltaOfDelta && deltaOfDelta <= 64:
111 | 			e.buf.writeBits(0x02, 2) // '10'
112 | 			e.buf.writeBits(uint64(deltaOfDelta), 7)
113 | 		case -255 <= deltaOfDelta && deltaOfDelta <= 256:
114 | 			e.buf.writeBits(0x06, 3) // '110'
115 | 			e.buf.writeBits(uint64(deltaOfDelta), 9)
116 | 		case -2047 <= deltaOfDelta && deltaOfDelta <= 2048:
117 | 			e.buf.writeBits(0x0e, 4) // '1110'
118 | 			e.buf.writeBits(uint64(deltaOfDelta), 12)
119 | 		default:
120 | 			e.buf.writeBits(0x0f, 4) // '1111'
121 | 			e.buf.writeBits(uint64(deltaOfDelta), 64)
122 | 		}
123 | 		// Write value delta.
124 | 		e.writeVDelta(point.Value)
125 | 	}
126 | 
127 | 	e.t = point.Timestamp
128 | 	e.v = point.Value
129 | 	e.tDelta = tDelta
130 | 	return nil
131 | }
132 | 
133 | // flush writes the buffered-bytes into the backend io.Writer
134 | // and resets everything used for computation.
135 | func (e *gorillaEncoder) flush() error {
136 | 	// TODO: Compress with ZStandard
137 | 	_, err := e.w.Write(e.buf.bytes())
138 | 	if err != nil {
139 | 		return fmt.Errorf("failed to flush buffered bytes: %w", err)
140 | 	}
141 | 
142 | 	e.buf.reset()
143 | 	e.t0 = 0
144 | 	e.t1 = 0
145 | 	e.t = 0
146 | 	e.tDelta = 0
147 | 	e.v = 0
148 | 	e.v = 0
149 | 	e.leading = 0
150 | 	e.trailing = 0
151 | 
152 | 	return nil
153 | }
154 | 
155 | func (e *gorillaEncoder) writeVDelta(v float64) {
156 | 	vDelta := math.Float64bits(v) ^ math.Float64bits(e.v)
157 | 
158 | 	if vDelta == 0 {
159 | 		e.buf.writeBit(zero)
160 | 		return
161 | 	}
162 | 	e.buf.writeBit(one)
163 | 
164 | 	leading := uint8(bits.LeadingZeros64(vDelta))
165 | 	trailing := uint8(bits.TrailingZeros64(vDelta))
166 | 
167 | 	// Clamp number of leading zeros to avoid overflow when encoding.
168 | 	if leading >= 32 {
169 | 		leading = 31
170 | 	}
171 | 
172 | 	if e.leading != 0xff && leading >= e.leading && trailing >= e.trailing {
173 | 		e.buf.writeBit(zero)
174 | 		e.buf.writeBits(vDelta>>e.trailing, 64-int(e.leading)-int(e.trailing))
175 | 	} else {
176 | 		e.leading, e.trailing = leading, trailing
177 | 
178 | 		e.buf.writeBit(one)
179 | 		e.buf.writeBits(uint64(leading), 5)
180 | 
181 | 		// Note that if leading == trailing == 0, then sigbits == 64.  But that value doesn't actually fit into the 6 bits we have.
182 | 		// Luckily, we never need to encode 0 significant bits, since that would put us in the other case (vdelta == 0).
183 | 		// So instead we write out a 0 and adjust it back to 64 on unpacking.
184 | 		sigbits := 64 - leading - trailing
185 | 		e.buf.writeBits(uint64(sigbits), 6)
186 | 		e.buf.writeBits(vDelta>>trailing, int(sigbits))
187 | 	}
188 | }
189 | 
190 | type seriesDecoder interface {
191 | 	decodePoint(dst *DataPoint) error
192 | }
193 | 
194 | // newSeriesDecoder decompress data from the given Reader, then holds the decompressed data
195 | func newSeriesDecoder(r io.Reader) (seriesDecoder, error) {
196 | 	// TODO: Stop copying entire bytes, then make it possible to to make bstreamReader from io.Reader
197 | 	b, err := io.ReadAll(r)
198 | 	if err != nil {
199 | 		return nil, fmt.Errorf("failed to read all bytes: %w", err)
200 | 	}
201 | 	return &gorillaDecoder{
202 | 		br: newBReader(b),
203 | 	}, nil
204 | }
205 | 
206 | type gorillaDecoder struct {
207 | 	br      bstreamReader
208 | 	numRead uint16
209 | 
210 | 	// timestamp of the Nth data point
211 | 	t      int64
212 | 	tDelta uint64
213 | 
214 | 	// value of the Nth data point
215 | 	v        float64
216 | 	leading  uint8
217 | 	trailing uint8
218 | }
219 | 
220 | func (d *gorillaDecoder) decodePoint(dst *DataPoint) error {
221 | 	if d.numRead == 0 {
222 | 		t, err := binary.ReadVarint(&d.br)
223 | 		if err != nil {
224 | 			return fmt.Errorf("failed to read Timestamp of T0: %w", err)
225 | 		}
226 | 		v, err := d.br.readBits(64)
227 | 		if err != nil {
228 | 			return fmt.Errorf("failed to read Value of T0: %w", err)
229 | 		}
230 | 		d.t = t
231 | 		d.v = math.Float64frombits(v)
232 | 
233 | 		d.numRead++
234 | 		dst.Timestamp = d.t
235 | 		dst.Value = d.v
236 | 		return nil
237 | 	}
238 | 	if d.numRead == 1 {
239 | 		tDelta, err := binary.ReadUvarint(&d.br)
240 | 		if err != nil {
241 | 			return err
242 | 		}
243 | 		d.tDelta = tDelta
244 | 		d.t = d.t + int64(d.tDelta)
245 | 
246 | 		if err := d.readValue(); err != nil {
247 | 			return err
248 | 		}
249 | 		d.numRead++
250 | 		dst.Timestamp = d.t
251 | 		dst.Value = d.v
252 | 		return nil
253 | 	}
254 | 
255 | 	var delimiter byte
256 | 	// read delta-of-delta
257 | 	for i := 0; i < 4; i++ {
258 | 		delimiter <<= 1
259 | 		bit, err := d.br.readBitFast()
260 | 		if err != nil {
261 | 			bit, err = d.br.readBit()
262 | 		}
263 | 		if err != nil {
264 | 			return err
265 | 		}
266 | 		if bit == zero {
267 | 			break
268 | 		}
269 | 		delimiter |= 1
270 | 	}
271 | 	var sz uint8
272 | 	var deltaOfDelta int64
273 | 	switch delimiter {
274 | 	case 0x00:
275 | 		// deltaOfDelta == 0
276 | 	case 0x02:
277 | 		sz = 7
278 | 	case 0x06:
279 | 		sz = 9
280 | 	case 0x0e:
281 | 		sz = 12
282 | 	case 0x0f:
283 | 		// Do not use fast because d's very unlikely d will succeed.
284 | 		bits, err := d.br.readBits(64)
285 | 		if err != nil {
286 | 			return err
287 | 		}
288 | 
289 | 		deltaOfDelta = int64(bits)
290 | 	default:
291 | 		return fmt.Errorf("unknown delimiter found: %v", delimiter)
292 | 	}
293 | 
294 | 	if sz != 0 {
295 | 		bits, err := d.br.readBitsFast(sz)
296 | 		if err != nil {
297 | 			bits, err = d.br.readBits(sz)
298 | 		}
299 | 		if err != nil {
300 | 			return err
301 | 		}
302 | 		if bits > (1 << (sz - 1)) {
303 | 			// or something
304 | 			bits = bits - (1 << sz)
305 | 		}
306 | 		deltaOfDelta = int64(bits)
307 | 	}
308 | 
309 | 	d.tDelta = uint64(int64(d.tDelta) + deltaOfDelta)
310 | 	d.t = d.t + int64(d.tDelta)
311 | 
312 | 	if err := d.readValue(); err != nil {
313 | 		return err
314 | 	}
315 | 	dst.Timestamp = d.t
316 | 	dst.Value = d.v
317 | 	return nil
318 | }
319 | 
320 | func (d *gorillaDecoder) readValue() error {
321 | 	bit, err := d.br.readBitFast()
322 | 	if err != nil {
323 | 		bit, err = d.br.readBit()
324 | 	}
325 | 	if err != nil {
326 | 		return err
327 | 	}
328 | 
329 | 	if bit == zero {
330 | 		// d.val = d.val
331 | 	} else {
332 | 		bit, err := d.br.readBitFast()
333 | 		if err != nil {
334 | 			bit, err = d.br.readBit()
335 | 		}
336 | 		if err != nil {
337 | 			return err
338 | 		}
339 | 		if bit == zero {
340 | 			// reuse leading/trailing zero bits
341 | 			// d.leading, d.trailing = d.leading, d.trailing
342 | 		} else {
343 | 			bits, err := d.br.readBitsFast(5)
344 | 			if err != nil {
345 | 				bits, err = d.br.readBits(5)
346 | 			}
347 | 			if err != nil {
348 | 				return err
349 | 			}
350 | 			d.leading = uint8(bits)
351 | 
352 | 			bits, err = d.br.readBitsFast(6)
353 | 			if err != nil {
354 | 				bits, err = d.br.readBits(6)
355 | 			}
356 | 			if err != nil {
357 | 				return err
358 | 			}
359 | 			mbits := uint8(bits)
360 | 			// 0 significant bits here means we overflowed and we actually need 64; see comment in encoder
361 | 			if mbits == 0 {
362 | 				mbits = 64
363 | 			}
364 | 			d.trailing = 64 - d.leading - mbits
365 | 		}
366 | 
367 | 		mbits := 64 - d.leading - d.trailing
368 | 		bits, err := d.br.readBitsFast(mbits)
369 | 		if err != nil {
370 | 			bits, err = d.br.readBits(mbits)
371 | 		}
372 | 		if err != nil {
373 | 			return err
374 | 		}
375 | 		vbits := math.Float64bits(d.v)
376 | 		vbits ^= bits << d.trailing
377 | 		d.v = math.Float64frombits(vbits)
378 | 	}
379 | 
380 | 	return nil
381 | }
382 | 
383 | func bitRange(x int64, nbits uint8) bool {
384 | 	return -((1<<(nbits-1))-1) <= x && x <= 1<<(nbits-1)
385 | }
386 | 


--------------------------------------------------------------------------------
/encoding_test.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | func Test_gorillaEncoder_encodePoint_decodePoint(t *testing.T) {
 12 | 	tests := []struct {
 13 | 		name                string
 14 | 		input               []*DataPoint // to be encoded
 15 | 		want                []*DataPoint
 16 | 		wantEncodedByteSize int
 17 | 		wantErr             bool
 18 | 	}{
 19 | 		{
 20 | 			name: "one data point",
 21 | 			input: []*DataPoint{
 22 | 				{Timestamp: 1600000000, Value: 0.1},
 23 | 			},
 24 | 			want: []*DataPoint{
 25 | 				{Timestamp: 1600000000, Value: 0.1},
 26 | 			},
 27 | 			wantEncodedByteSize: 14,
 28 | 			wantErr:             false,
 29 | 		},
 30 | 		{
 31 | 			name: "data points at regular intervals",
 32 | 			input: []*DataPoint{
 33 | 				{Timestamp: 1600000000, Value: 0.1},
 34 | 				{Timestamp: 1600000060, Value: 0.1},
 35 | 				{Timestamp: 1600000120, Value: 0.1},
 36 | 				{Timestamp: 1600000180, Value: 0.1},
 37 | 			},
 38 | 			want: []*DataPoint{
 39 | 				{Timestamp: 1600000000, Value: 0.1},
 40 | 				{Timestamp: 1600000060, Value: 0.1},
 41 | 				{Timestamp: 1600000120, Value: 0.1},
 42 | 				{Timestamp: 1600000180, Value: 0.1},
 43 | 			},
 44 | 			wantEncodedByteSize: 15,
 45 | 			wantErr:             false,
 46 | 		},
 47 | 		{
 48 | 			name: "data points at random intervals",
 49 | 			input: []*DataPoint{
 50 | 				{Timestamp: 1600000000, Value: 0.1},
 51 | 				{Timestamp: 1600000060, Value: 1.1},
 52 | 				{Timestamp: 1600000182, Value: 15.01},
 53 | 				{Timestamp: 1600000400, Value: 0.01},
 54 | 				{Timestamp: 1600002000, Value: 10.8},
 55 | 			},
 56 | 			want: []*DataPoint{
 57 | 				{Timestamp: 1600000000, Value: 0.1},
 58 | 				{Timestamp: 1600000060, Value: 1.1},
 59 | 				{Timestamp: 1600000182, Value: 15.01},
 60 | 				{Timestamp: 1600000400, Value: 0.01},
 61 | 				{Timestamp: 1600002000, Value: 10.8},
 62 | 			},
 63 | 			wantEncodedByteSize: 52,
 64 | 			wantErr:             false,
 65 | 		},
 66 | 	}
 67 | 	for _, tt := range tests {
 68 | 		t.Run(tt.name, func(t *testing.T) {
 69 | 			// Encode
 70 | 			var buf bytes.Buffer
 71 | 			var num int
 72 | 			encoder := newSeriesEncoder(&buf)
 73 | 			for _, point := range tt.input {
 74 | 				err := encoder.encodePoint(point)
 75 | 				require.NoError(t, err)
 76 | 				num++
 77 | 			}
 78 | 			err := encoder.flush()
 79 | 			require.NoError(t, err)
 80 | 
 81 | 			assert.Equal(t, tt.wantEncodedByteSize, buf.Len())
 82 | 
 83 | 			// Decode
 84 | 			decoder, err := newSeriesDecoder(&buf)
 85 | 			require.NoError(t, err)
 86 | 			got := make([]*DataPoint, 0, num)
 87 | 			for i := 0; i < num; i++ {
 88 | 				p := &DataPoint{}
 89 | 				err := decoder.decodePoint(p)
 90 | 				require.NoError(t, err)
 91 | 				got = append(got, p)
 92 | 			}
 93 | 			assert.Equal(t, tt.want, got)
 94 | 		})
 95 | 	}
 96 | }
 97 | 
 98 | func Test_bitRange(t *testing.T) {
 99 | 	tests := []struct {
100 | 		name  string
101 | 		x     int64
102 | 		nbits uint8
103 | 		want  bool
104 | 	}{
105 | 		{
106 | 			name:  "inside the range",
107 | 			x:     1,
108 | 			nbits: 1,
109 | 			want:  true,
110 | 		},
111 | 	}
112 | 	for _, tt := range tests {
113 | 		t.Run(tt.name, func(t *testing.T) {
114 | 			got := bitRange(tt.x, tt.nbits)
115 | 			assert.Equal(t, tt.want, got)
116 | 		})
117 | 	}
118 | }
119 | 


--------------------------------------------------------------------------------
/fake_encoder.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | type fakeEncoder struct {
 4 | 	encodePointFunc func(*DataPoint) error
 5 | 	flushFunc       func() error
 6 | }
 7 | 
 8 | func (f *fakeEncoder) encodePoint(p *DataPoint) error {
 9 | 	if f.encodePointFunc == nil {
10 | 		return nil
11 | 	}
12 | 	return f.encodePointFunc(p)
13 | }
14 | 
15 | func (f *fakeEncoder) flush() error {
16 | 	if f.flushFunc == nil {
17 | 		return nil
18 | 	}
19 | 	return f.flushFunc()
20 | }
21 | 


--------------------------------------------------------------------------------
/fake_partition.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | type fakePartition struct {
 4 | 	minT      int64
 5 | 	maxT      int64
 6 | 	numPoints int
 7 | 	IsActive  bool
 8 | 
 9 | 	err error
10 | }
11 | 
12 | func (f *fakePartition) insertRows(_ []Row) ([]Row, error) {
13 | 	return nil, f.err
14 | }
15 | 
16 | func (f *fakePartition) selectDataPoints(_ string, _ []Label, _, _ int64) ([]*DataPoint, error) {
17 | 	return nil, f.err
18 | }
19 | 
20 | func (f *fakePartition) minTimestamp() int64 {
21 | 	return f.minT
22 | }
23 | 
24 | func (f *fakePartition) maxTimestamp() int64 {
25 | 	return f.maxT
26 | }
27 | 
28 | func (f *fakePartition) size() int {
29 | 	return f.numPoints
30 | }
31 | 
32 | func (f *fakePartition) active() bool {
33 | 	return f.IsActive
34 | }
35 | 
36 | func (f *fakePartition) clean() error {
37 | 	return nil
38 | }
39 | 
40 | func (f *fakePartition) expired() bool {
41 | 	return false
42 | }
43 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/nakabonne/tstorage
 2 | 
 3 | go 1.20
 4 | 
 5 | require (
 6 | 	github.com/davecgh/go-spew v1.1.0 // indirect
 7 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
 8 | 	github.com/stretchr/testify v1.7.0 // indirect
 9 | 	gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
10 | )
11 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 6 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 7 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
11 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
12 | 


--------------------------------------------------------------------------------
/internal/cgroup/cpu.go:
--------------------------------------------------------------------------------
  1 | package cgroup
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"runtime"
  6 | 	"strconv"
  7 | 	"strings"
  8 | 	"sync"
  9 | )
 10 | 
 11 | // AvailableCPUs returns the number of available CPU cores for the app.
 12 | func AvailableCPUs() int {
 13 | 	availableCPUsOnce.Do(updateGOMAXPROCSToCPUQuota)
 14 | 	return runtime.GOMAXPROCS(-1)
 15 | }
 16 | 
 17 | var availableCPUsOnce sync.Once
 18 | 
 19 | // updateGOMAXPROCSToCPUQuota updates GOMAXPROCS to cgroup CPU quota if GOMAXPROCS isn't set in environment var.
 20 | func updateGOMAXPROCSToCPUQuota() {
 21 | 	if v := os.Getenv("GOMAXPROCS"); v != "" {
 22 | 		// Do not override explicitly set GOMAXPROCS.
 23 | 		return
 24 | 	}
 25 | 	q := getCPUQuota()
 26 | 	if q <= 0 {
 27 | 		// Do not change GOMAXPROCS
 28 | 		return
 29 | 	}
 30 | 	gomaxprocs := int(q + 0.5)
 31 | 	numCPU := runtime.NumCPU()
 32 | 	if gomaxprocs > numCPU {
 33 | 		// There is no sense in setting more GOMAXPROCS than the number of available CPU cores.
 34 | 		return
 35 | 	}
 36 | 	if gomaxprocs <= 0 {
 37 | 		gomaxprocs = 1
 38 | 	}
 39 | 	runtime.GOMAXPROCS(gomaxprocs)
 40 | }
 41 | 
 42 | func getCPUQuota() float64 {
 43 | 	quotaUS, err := getCPUStat("cpu.cfs_quota_us")
 44 | 	if err != nil {
 45 | 		return 0
 46 | 	}
 47 | 	if quotaUS <= 0 {
 48 | 		// The quota isn't set. This may be the case in multilevel containers.
 49 | 		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/685#issuecomment-674423728
 50 | 		return getOnlineCPUCount()
 51 | 	}
 52 | 	periodUS, err := getCPUStat("cpu.cfs_period_us")
 53 | 	if err != nil {
 54 | 		return 0
 55 | 	}
 56 | 	return float64(quotaUS) / float64(periodUS)
 57 | }
 58 | 
 59 | func getCPUStat(statName string) (int64, error) {
 60 | 	return getStatGeneric(statName, "/sys/fs/cgroup/cpu", "/proc/self/cgroup", "cpu,")
 61 | }
 62 | 
 63 | func getOnlineCPUCount() float64 {
 64 | 	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/685#issuecomment-674423728
 65 | 	data, err := os.ReadFile("/sys/devices/system/cpu/online")
 66 | 	if err != nil {
 67 | 		return -1
 68 | 	}
 69 | 	n := float64(countCPUs(string(data)))
 70 | 	if n <= 0 {
 71 | 		return -1
 72 | 	}
 73 | 	return n
 74 | }
 75 | 
 76 | func countCPUs(data string) int {
 77 | 	data = strings.TrimSpace(data)
 78 | 	n := 0
 79 | 	for _, s := range strings.Split(data, ",") {
 80 | 		n++
 81 | 		if !strings.Contains(s, "-") {
 82 | 			if _, err := strconv.Atoi(s); err != nil {
 83 | 				return -1
 84 | 			}
 85 | 			continue
 86 | 		}
 87 | 		bounds := strings.Split(s, "-")
 88 | 		if len(bounds) != 2 {
 89 | 			return -1
 90 | 		}
 91 | 		start, err := strconv.Atoi(bounds[0])
 92 | 		if err != nil {
 93 | 			return -1
 94 | 		}
 95 | 		end, err := strconv.Atoi(bounds[1])
 96 | 		if err != nil {
 97 | 			return -1
 98 | 		}
 99 | 		n += end - start
100 | 	}
101 | 	return n
102 | }
103 | 


--------------------------------------------------------------------------------
/internal/cgroup/cpu_test.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestCountCPUs(t *testing.T) {
 8 | 	f := func(s string, nExpected int) {
 9 | 		t.Helper()
10 | 		n := countCPUs(s)
11 | 		if n != nExpected {
12 | 			t.Fatalf("unexpected result from countCPUs(%q); got %d; want %d", s, n, nExpected)
13 | 		}
14 | 	}
15 | 	f("", -1)
16 | 	f("1", 1)
17 | 	f("234", 1)
18 | 	f("1,2", 2)
19 | 	f("0-1", 2)
20 | 	f("0-0", 1)
21 | 	f("1-2,3,5-9,200-210", 19)
22 | 	f("0-3", 4)
23 | 	f("0-6", 7)
24 | }
25 | 


--------------------------------------------------------------------------------
/internal/cgroup/mem.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | import (
 4 | 	"strconv"
 5 | )
 6 | 
 7 | // GetMemoryLimit returns cgroup memory limit
 8 | func GetMemoryLimit() int64 {
 9 | 	// Try determining the amount of memory inside docker container.
10 | 	// See https://stackoverflow.com/questions/42187085/check-mem-limit-within-a-docker-container
11 | 	//
12 | 	// Read memory limit according to https://unix.stackexchange.com/questions/242718/how-to-find-out-how-much-memory-lxc-container-is-allowed-to-consume
13 | 	// This should properly determine the limit inside lxc container.
14 | 	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/84
15 | 	n, err := getMemStat("memory.limit_in_bytes")
16 | 	if err != nil {
17 | 		return 0
18 | 	}
19 | 	return n
20 | }
21 | 
22 | func getMemStat(statName string) (int64, error) {
23 | 	return getStatGeneric(statName, "/sys/fs/cgroup/memory", "/proc/self/cgroup", "memory")
24 | }
25 | 
26 | // GetHierarchicalMemoryLimit returns hierarchical memory limit
27 | // https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
28 | func GetHierarchicalMemoryLimit() int64 {
29 | 	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/699
30 | 	n, err := getHierarchicalMemoryLimit("/sys/fs/cgroup/memory", "/proc/self/cgroup")
31 | 	if err != nil {
32 | 		return 0
33 | 	}
34 | 	return n
35 | }
36 | 
37 | func getHierarchicalMemoryLimit(sysfsPrefix, cgroupPath string) (int64, error) {
38 | 	data, err := getFileContents("memory.stat", sysfsPrefix, cgroupPath, "memory")
39 | 	if err != nil {
40 | 		return 0, err
41 | 	}
42 | 	memStat, err := grepFirstMatch(data, "hierarchical_memory_limit", 1, " ")
43 | 	if err != nil {
44 | 		return 0, err
45 | 	}
46 | 	return strconv.ParseInt(memStat, 10, 64)
47 | }
48 | 


--------------------------------------------------------------------------------
/internal/cgroup/mem_test.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestGetHierarchicalMemoryLimitSuccess(t *testing.T) {
 8 | 	f := func(sysPath, cgroupPath string, want int64) {
 9 | 		t.Helper()
10 | 		got, err := getHierarchicalMemoryLimit(sysPath, cgroupPath)
11 | 		if err != nil {
12 | 			t.Fatalf("unexpected error: %s", err)
13 | 		}
14 | 		if got != want {
15 | 			t.Fatalf("unexpected result, got: %d, want %d", got, want)
16 | 		}
17 | 	}
18 | 	f("testdata/", "testdata/self/cgroup", 16)
19 | 	f("testdata/cgroup", "testdata/self/cgroup", 120)
20 | }
21 | 
22 | func TestGetHierarchicalMemoryLimitFailure(t *testing.T) {
23 | 	f := func(sysPath, cgroupPath string) {
24 | 		t.Helper()
25 | 		got, err := getHierarchicalMemoryLimit(sysPath, cgroupPath)
26 | 		if err == nil {
27 | 			t.Fatalf("expecting non-nil error")
28 | 		}
29 | 		if got != 0 {
30 | 			t.Fatalf("unexpected result, got: %d, want 0", got)
31 | 		}
32 | 	}
33 | 	f("testdata/", "testdata/none_existing_folder")
34 | }
35 | 


--------------------------------------------------------------------------------
/internal/cgroup/testdata/cgroup/cpu.cfs_period_us:
--------------------------------------------------------------------------------
1 | 500000


--------------------------------------------------------------------------------
/internal/cgroup/testdata/cgroup/cpu.cfs_quota_us:
--------------------------------------------------------------------------------
1 | 10


--------------------------------------------------------------------------------
/internal/cgroup/testdata/cgroup/memory.limit_in_bytes:
--------------------------------------------------------------------------------
1 | 523372036854771712


--------------------------------------------------------------------------------
/internal/cgroup/testdata/cgroup/memory.stat:
--------------------------------------------------------------------------------
 1 | rss 2
 2 | rss_huge 3
 3 | mapped_file 4
 4 | dirty 5
 5 | writeback 6
 6 | pgpgin 7
 7 | pgpgout 8
 8 | pgfault 9
 9 | pgmajfault 10
10 | inactive_anon 11
11 | active_anon 12
12 | inactive_file 13
13 | active_file 14
14 | unevictable 15
15 | hierarchical_memory_limit 120
16 | hierarchical_memsw_limit 17
17 | total_cache 18
18 | total_rss 19
19 | total_rss_huge 20
20 | total_mapped_file 21
21 | total_dirty 22
22 | total_writeback 23
23 | total_pgpgin 24
24 | total_pgpgout 25
25 | total_pgfault 26
26 | total_pgmajfault 27
27 | total_inactive_anon 28
28 | total_active_anon 29
29 | total_inactive_file 30
30 | total_active_file 31
31 | total_unevictable 32


--------------------------------------------------------------------------------
/internal/cgroup/testdata/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db/cpu.cfs_period_us:
--------------------------------------------------------------------------------
1 | 100000


--------------------------------------------------------------------------------
/internal/cgroup/testdata/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db/cpu.cfs_quota_us:
--------------------------------------------------------------------------------
1 | -1


--------------------------------------------------------------------------------
/internal/cgroup/testdata/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db/memory.limit_in_bytes:
--------------------------------------------------------------------------------
1 | 9223372036854771712


--------------------------------------------------------------------------------
/internal/cgroup/testdata/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db/memory.stat:
--------------------------------------------------------------------------------
 1 | rss 2
 2 | rss_huge 3
 3 | mapped_file 4
 4 | dirty 5
 5 | writeback 6
 6 | pgpgin 7
 7 | pgpgout 8
 8 | pgfault 9
 9 | pgmajfault 10
10 | inactive_anon 11
11 | active_anon 12
12 | inactive_file 13
13 | active_file 14
14 | unevictable 15
15 | hierarchical_memory_limit 16
16 | hierarchical_memsw_limit 17
17 | total_cache 18
18 | total_rss 19
19 | total_rss_huge 20
20 | total_mapped_file 21
21 | total_dirty 22
22 | total_writeback 23
23 | total_pgpgin 24
24 | total_pgpgout 25
25 | total_pgfault 26
26 | total_pgmajfault 27
27 | total_inactive_anon 28
28 | total_active_anon 29
29 | total_inactive_file 30
30 | total_active_file 31
31 | total_unevictable 32


--------------------------------------------------------------------------------
/internal/cgroup/testdata/self/cgroup:
--------------------------------------------------------------------------------
 1 | 12:perf_event:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
 2 | 11:rdma:/
 3 | 10:pids:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
 4 | 9:freezer:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
 5 | 8:memory:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
 6 | 7:devices:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
 7 | 6:cpuset:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
 8 | 5:hugetlb:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
 9 | 4:net_cls,net_prio:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
10 | 3:blkio:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
11 | 2:cpu,cpuacct:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
12 | 1:name=systemd:/docker/74c9abf42b88b9a35b1b56061b08303e56fd1707fe5c5b4df93324dedb36b5db
13 | 0::/system.slice/containerd.service


--------------------------------------------------------------------------------
/internal/cgroup/util.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"path"
 7 | 	"strconv"
 8 | 	"strings"
 9 | )
10 | 
11 | func getStatGeneric(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (int64, error) {
12 | 	data, err := getFileContents(statName, sysfsPrefix, cgroupPath, cgroupGrepLine)
13 | 	if err != nil {
14 | 		return 0, err
15 | 	}
16 | 	n, err := strconv.ParseInt(data, 10, 64)
17 | 	if err != nil {
18 | 		return 0, err
19 | 	}
20 | 	return n, nil
21 | }
22 | 
23 | func getFileContents(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (string, error) {
24 | 	filepath := path.Join(sysfsPrefix, statName)
25 | 	data, err := os.ReadFile(filepath)
26 | 	if err == nil {
27 | 		return string(data), nil
28 | 	}
29 | 	cgroupData, err := os.ReadFile(cgroupPath)
30 | 	if err != nil {
31 | 		return "", err
32 | 	}
33 | 	subPath, err := grepFirstMatch(string(cgroupData), cgroupGrepLine, 2, ":")
34 | 	if err != nil {
35 | 		return "", err
36 | 	}
37 | 	filepath = path.Join(sysfsPrefix, subPath, statName)
38 | 	data, err = os.ReadFile(filepath)
39 | 	if err != nil {
40 | 		return "", err
41 | 	}
42 | 	return string(data), nil
43 | }
44 | 
45 | // grepFirstMatch searches match line at data and returns item from it by index with given delimiter.
46 | func grepFirstMatch(data string, match string, index int, delimiter string) (string, error) {
47 | 	lines := strings.Split(string(data), "\n")
48 | 	for _, s := range lines {
49 | 		if !strings.Contains(s, match) {
50 | 			continue
51 | 		}
52 | 		parts := strings.Split(s, delimiter)
53 | 		if index < len(parts) {
54 | 			return strings.TrimSpace(parts[index]), nil
55 | 		}
56 | 	}
57 | 	return "", fmt.Errorf("cannot find %q in %q", match, data)
58 | }
59 | 


--------------------------------------------------------------------------------
/internal/cgroup/util_test.go:
--------------------------------------------------------------------------------
 1 | package cgroup
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestGetStatGenericSuccess(t *testing.T) {
 8 | 	f := func(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string, want int64) {
 9 | 		t.Helper()
10 | 		got, err := getStatGeneric(statName, sysfsPrefix, cgroupPath, cgroupGrepLine)
11 | 		if err != nil {
12 | 			t.Fatalf("unexpected error: %s", err)
13 | 		}
14 | 		if got != want {
15 | 			t.Fatalf("unexpected result, got: %d, want %d", got, want)
16 | 		}
17 | 	}
18 | 	f("cpu.cfs_quota_us", "testdata/", "testdata/self/cgroup", "cpu,", -1)
19 | 	f("cpu.cfs_quota_us", "testdata/cgroup", "testdata/self/cgroup", "cpu,", 10)
20 | 	f("cpu.cfs_period_us", "testdata/", "testdata/self/cgroup", "cpu,", 100000)
21 | 	f("cpu.cfs_period_us", "testdata/cgroup", "testdata/self/cgroup", "cpu,", 500000)
22 | 	f("memory.limit_in_bytes", "testdata/", "testdata/self/cgroup", "memory", 9223372036854771712)
23 | 	f("memory.limit_in_bytes", "testdata/cgroup", "testdata/self/cgroup", "memory", 523372036854771712)
24 | }
25 | 
26 | func TestGetStatGenericFailure(t *testing.T) {
27 | 	f := func(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) {
28 | 		t.Helper()
29 | 		got, err := getStatGeneric(statName, sysfsPrefix, cgroupPath, cgroupGrepLine)
30 | 		if err == nil {
31 | 			t.Fatalf("expecting non-nil error")
32 | 		}
33 | 		if got != 0 {
34 | 			t.Fatalf("unexpected result, got: %d, want 0", got)
35 | 		}
36 | 	}
37 | 	f("cpu.cfs_quota_us", "testdata/", "testdata/missing_folder", "cpu,")
38 | 	f("cpu.cfs_period_us", "testdata/", "testdata/missing_folder", "cpu,")
39 | 	f("memory.limit_in_bytes", "testdata/", "testdata/none_existing_folder", "memory")
40 | }
41 | 


--------------------------------------------------------------------------------
/internal/encoding/int.go:
--------------------------------------------------------------------------------
 1 | package encoding
 2 | 
 3 | import "encoding/binary"
 4 | 
 5 | // MarshalUint16 appends marshaled v to dst and returns the result.
 6 | func MarshalUint16(dst []byte, u uint16) []byte {
 7 | 	return append(dst, byte(u>>8), byte(u))
 8 | }
 9 | 
10 | // UnmarshalUint16 returns unmarshaled uint32 from src.
11 | func UnmarshalUint16(src []byte) uint16 {
12 | 	// This is faster than the manual conversion.
13 | 	return binary.BigEndian.Uint16(src)
14 | }
15 | 


--------------------------------------------------------------------------------
/internal/syscall/mmap.go:
--------------------------------------------------------------------------------
1 | package syscall
2 | 
3 | func Mmap(fd, length int) ([]byte, error) {
4 | 	return mmap(fd, length)
5 | }
6 | 


--------------------------------------------------------------------------------
/internal/syscall/mmap_386.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Prometheus Authors
 2 | // Licensed under the Apache License, Version 2.0 (the "License");
 3 | // you may not use this file except in compliance with the License.
 4 | // You may obtain a copy of the License at
 5 | //
 6 | // http://www.apache.org/licenses/LICENSE-2.0
 7 | //
 8 | // Unless required by applicable law or agreed to in writing, software
 9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | // +build windows
15 | 
16 | package syscall
17 | 
18 | const maxMapSize = 0x7FFFFFFF // 2GB
19 | 


--------------------------------------------------------------------------------
/internal/syscall/mmap_amd64.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Prometheus Authors
 2 | // Licensed under the Apache License, Version 2.0 (the "License");
 3 | // you may not use this file except in compliance with the License.
 4 | // You may obtain a copy of the License at
 5 | //
 6 | // http://www.apache.org/licenses/LICENSE-2.0
 7 | //
 8 | // Unless required by applicable law or agreed to in writing, software
 9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | // +build windows
15 | 
16 | package syscall
17 | 
18 | const maxMapSize = 0xFFFFFFFFFFFF // 256TB
19 | 


--------------------------------------------------------------------------------
/internal/syscall/mmap_arm.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Prometheus Authors
 2 | // Licensed under the Apache License, Version 2.0 (the "License");
 3 | // you may not use this file except in compliance with the License.
 4 | // You may obtain a copy of the License at
 5 | //
 6 | // http://www.apache.org/licenses/LICENSE-2.0
 7 | //
 8 | // Unless required by applicable law or agreed to in writing, software
 9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | // +build windows
15 | 
16 | package syscall
17 | 
18 | const maxMapSize = 0x7FFFFFFF // 2GB
19 | 


--------------------------------------------------------------------------------
/internal/syscall/mmap_unix.go:
--------------------------------------------------------------------------------
 1 | // +build !windows,!plan9
 2 | 
 3 | package syscall
 4 | 
 5 | import "syscall"
 6 | 
 7 | func mmap(fd, length int) ([]byte, error) {
 8 | 	return syscall.Mmap(
 9 | 		fd,
10 | 		0,
11 | 		length,
12 | 		syscall.PROT_READ,
13 | 		syscall.MAP_SHARED,
14 | 	)
15 | }
16 | 


--------------------------------------------------------------------------------
/internal/syscall/mmap_windows.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Prometheus Authors
 2 | // Licensed under the Apache License, Version 2.0 (the "License");
 3 | // you may not use this file except in compliance with the License.
 4 | // You may obtain a copy of the License at
 5 | //
 6 | // http://www.apache.org/licenses/LICENSE-2.0
 7 | //
 8 | // Unless required by applicable law or agreed to in writing, software
 9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | package syscall
15 | 
16 | import (
17 | 	"os"
18 | 	"syscall"
19 | 	"unsafe"
20 | )
21 | 
22 | func mmap(fd, size int) ([]byte, error) {
23 | 	low, high := uint32(size), uint32(size>>32)
24 | 	h, errno := syscall.CreateFileMapping(syscall.Handle(fd), nil, syscall.PAGE_READONLY, high, low, nil)
25 | 	if h == 0 {
26 | 		return nil, os.NewSyscallError("CreateFileMapping", errno)
27 | 	}
28 | 
29 | 	addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(size))
30 | 	if addr == 0 {
31 | 		return nil, os.NewSyscallError("MapViewOfFile", errno)
32 | 	}
33 | 
34 | 	if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
35 | 		return nil, os.NewSyscallError("CloseHandle", err)
36 | 	}
37 | 
38 | 	return (*[maxMapSize]byte)(unsafe.Pointer(addr))[:size], nil
39 | }
40 | 


--------------------------------------------------------------------------------
/internal/timerpool/timerpool.go:
--------------------------------------------------------------------------------
 1 | package timerpool
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | )
 7 | 
 8 | var timerPool sync.Pool
 9 | 
10 | // Get returns a timer for the given duration d from the pool.
11 | //
12 | // Return back the timer to the pool with Put.
13 | func Get(d time.Duration) *time.Timer {
14 | 	if v := timerPool.Get(); v != nil {
15 | 		t := v.(*time.Timer)
16 | 		if t.Reset(d) {
17 | 			panic("active timer trapped to the pool!")
18 | 		}
19 | 		return t
20 | 	}
21 | 	return time.NewTimer(d)
22 | }
23 | 
24 | // Put returns t to the pool.
25 | //
26 | // t cannot be accessed after returning to the pool.
27 | func Put(t *time.Timer) {
28 | 	if !t.Stop() {
29 | 		// Drain t.C if it wasn't obtained by the caller yet.
30 | 		select {
31 | 		case <-t.C:
32 | 		default:
33 | 		}
34 | 	}
35 | 	timerPool.Put(t)
36 | }
37 | 


--------------------------------------------------------------------------------
/internal/timerpool/timerpool_test.go:
--------------------------------------------------------------------------------
1 | package timerpool
2 | 


--------------------------------------------------------------------------------
/label.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 
 6 | 	"github.com/nakabonne/tstorage/internal/encoding"
 7 | )
 8 | 
 9 | const (
10 | 	// The maximum length of label name.
11 | 	//
12 | 	// Longer names are truncated.
13 | 	maxLabelNameLen = 256
14 | 
15 | 	// The maximum length of label value.
16 | 	//
17 | 	// Longer values are truncated.
18 | 	maxLabelValueLen = 16 * 1024
19 | )
20 | 
21 | // Label is a time-series label.
22 | // A label with missing name or value is invalid.
23 | type Label struct {
24 | 	Name  string
25 | 	Value string
26 | }
27 | 
28 | // marshalMetricName builds a unique bytes by encoding labels.
29 | func marshalMetricName(metric string, labels []Label) string {
30 | 	if len(labels) == 0 {
31 | 		return metric
32 | 	}
33 | 	invalid := func(name, value string) bool {
34 | 		return name == "" || value == ""
35 | 	}
36 | 
37 | 	// Determine the bytes size in advance.
38 | 	size := len(metric) + 2
39 | 	sort.Slice(labels, func(i, j int) bool {
40 | 		return labels[i].Name < labels[j].Name
41 | 	})
42 | 	for i := range labels {
43 | 		label := &labels[i]
44 | 		if invalid(label.Name, label.Value) {
45 | 			continue
46 | 		}
47 | 		if len(label.Name) > maxLabelNameLen {
48 | 			label.Name = label.Name[:maxLabelNameLen]
49 | 		}
50 | 		if len(label.Value) > maxLabelValueLen {
51 | 			label.Value = label.Value[:maxLabelValueLen]
52 | 		}
53 | 		size += len(label.Name)
54 | 		size += len(label.Value)
55 | 		size += 4
56 | 	}
57 | 
58 | 	// Start building the bytes.
59 | 	out := make([]byte, 0, size)
60 | 	out = encoding.MarshalUint16(out, uint16(len(metric)))
61 | 	out = append(out, metric...)
62 | 	for i := range labels {
63 | 		label := &labels[i]
64 | 		if invalid(label.Name, label.Value) {
65 | 			continue
66 | 		}
67 | 		out = encoding.MarshalUint16(out, uint16(len(label.Name)))
68 | 		out = append(out, label.Name...)
69 | 		out = encoding.MarshalUint16(out, uint16(len(label.Value)))
70 | 		out = append(out, label.Value...)
71 | 	}
72 | 	return string(out)
73 | }
74 | 


--------------------------------------------------------------------------------
/label_test.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func TestMarshalMetricName(t *testing.T) {
10 | 	tests := []struct {
11 | 		name   string
12 | 		metric string
13 | 		labels []Label
14 | 		want   string
15 | 	}{
16 | 		{
17 | 			name:   "only metric",
18 | 			metric: "metric1",
19 | 			want:   "metric1",
20 | 		},
21 | 		{
22 | 			name:   "missing label name",
23 | 			metric: "metric1",
24 | 			labels: []Label{
25 | 				{Value: "value1"},
26 | 			},
27 | 
28 | 			want: "\x00\ametric1",
29 | 		},
30 | 		{
31 | 			name:   "missing label value",
32 | 			metric: "metric1",
33 | 			labels: []Label{
34 | 				{Name: "metric1"},
35 | 			},
36 | 
37 | 			want: "\x00\ametric1",
38 | 		},
39 | 		{
40 | 			name:   "metric with a single label",
41 | 			metric: "metric1",
42 | 			labels: []Label{
43 | 				{Name: "name1", Value: "value1"},
44 | 			},
45 | 			want: "\x00\ametric1\x00\x05name1\x00\x06value1",
46 | 		},
47 | 	}
48 | 	for _, tt := range tests {
49 | 		t.Run(tt.name, func(t *testing.T) {
50 | 			got := marshalMetricName(tt.metric, tt.labels)
51 | 			assert.Equal(t, tt.want, got)
52 | 		})
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/logger.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | // TODO: Think about another abstraction way
 4 | 
 5 | // Logger is a logging interface
 6 | type Logger interface {
 7 | 	Printf(format string, v ...interface{})
 8 | }
 9 | 
10 | type nopLogger struct{}
11 | 
12 | func (l *nopLogger) Printf(_ string, _ ...interface{}) {
13 | 	// Do nothing
14 | 	return
15 | }
16 | 


--------------------------------------------------------------------------------
/memory_partition.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 	"sync"
  7 | 	"sync/atomic"
  8 | 	"time"
  9 | )
 10 | 
 11 | // A memoryPartition implements a partition to store data points on heap.
 12 | // It offers a goroutine safe capabilities.
 13 | type memoryPartition struct {
 14 | 	// The number of data points
 15 | 	numPoints int64
 16 | 	// minT is immutable.
 17 | 	minT int64
 18 | 	maxT int64
 19 | 
 20 | 	// A hash map from metric name to memoryMetric.
 21 | 	metrics sync.Map
 22 | 
 23 | 	// Write ahead log.
 24 | 	wal wal
 25 | 	// The timestamp range of partitions after which they get persisted
 26 | 	partitionDuration  int64
 27 | 	timestampPrecision TimestampPrecision
 28 | 	once               sync.Once
 29 | }
 30 | 
 31 | func newMemoryPartition(wal wal, partitionDuration time.Duration, precision TimestampPrecision) partition {
 32 | 	if wal == nil {
 33 | 		wal = &nopWAL{}
 34 | 	}
 35 | 	var d int64
 36 | 	switch precision {
 37 | 	case Nanoseconds:
 38 | 		d = partitionDuration.Nanoseconds()
 39 | 	case Microseconds:
 40 | 		d = partitionDuration.Microseconds()
 41 | 	case Milliseconds:
 42 | 		d = partitionDuration.Milliseconds()
 43 | 	case Seconds:
 44 | 		d = int64(partitionDuration.Seconds())
 45 | 	default:
 46 | 		d = partitionDuration.Nanoseconds()
 47 | 	}
 48 | 	return &memoryPartition{
 49 | 		partitionDuration:  d,
 50 | 		wal:                wal,
 51 | 		timestampPrecision: precision,
 52 | 	}
 53 | }
 54 | 
 55 | // insertRows inserts the given rows to partition.
 56 | func (m *memoryPartition) insertRows(rows []Row) ([]Row, error) {
 57 | 	if len(rows) == 0 {
 58 | 		return nil, fmt.Errorf("no rows given")
 59 | 	}
 60 | 	// FIXME: Just emitting log is enough
 61 | 	err := m.wal.append(operationInsert, rows)
 62 | 	if err != nil {
 63 | 		return nil, fmt.Errorf("failed to write to WAL: %w", err)
 64 | 	}
 65 | 
 66 | 	// Set min timestamp at only first.
 67 | 	m.once.Do(func() {
 68 | 		min := rows[0].Timestamp
 69 | 		for i := range rows {
 70 | 			row := rows[i]
 71 | 			if row.Timestamp < min {
 72 | 				min = row.Timestamp
 73 | 			}
 74 | 		}
 75 | 		atomic.StoreInt64(&m.minT, min)
 76 | 	})
 77 | 
 78 | 	outdatedRows := make([]Row, 0)
 79 | 	maxTimestamp := rows[0].Timestamp
 80 | 	var rowsNum int64
 81 | 	for i := range rows {
 82 | 		row := rows[i]
 83 | 		if row.Timestamp < m.minTimestamp() {
 84 | 			outdatedRows = append(outdatedRows, row)
 85 | 			continue
 86 | 		}
 87 | 		if row.Timestamp == 0 {
 88 | 			row.Timestamp = toUnix(time.Now(), m.timestampPrecision)
 89 | 		}
 90 | 		if row.Timestamp > maxTimestamp {
 91 | 			maxTimestamp = row.Timestamp
 92 | 		}
 93 | 		name := marshalMetricName(row.Metric, row.Labels)
 94 | 		mt := m.getMetric(name)
 95 | 		mt.insertPoint(&row.DataPoint)
 96 | 		rowsNum++
 97 | 	}
 98 | 	atomic.AddInt64(&m.numPoints, rowsNum)
 99 | 
100 | 	// Make max timestamp up-to-date.
101 | 	if atomic.LoadInt64(&m.maxT) < maxTimestamp {
102 | 		atomic.SwapInt64(&m.maxT, maxTimestamp)
103 | 	}
104 | 
105 | 	return outdatedRows, nil
106 | }
107 | 
108 | func toUnix(t time.Time, precision TimestampPrecision) int64 {
109 | 	switch precision {
110 | 	case Nanoseconds:
111 | 		return t.UnixNano()
112 | 	case Microseconds:
113 | 		return t.UnixNano() / 1e3
114 | 	case Milliseconds:
115 | 		return t.UnixNano() / 1e6
116 | 	case Seconds:
117 | 		return t.Unix()
118 | 	default:
119 | 		return t.UnixNano()
120 | 	}
121 | }
122 | 
123 | func (m *memoryPartition) selectDataPoints(metric string, labels []Label, start, end int64) ([]*DataPoint, error) {
124 | 	name := marshalMetricName(metric, labels)
125 | 	mt := m.getMetric(name)
126 | 	return mt.selectPoints(start, end), nil
127 | }
128 | 
129 | // getMetric gives back the reference to the metrics list whose name is the given one.
130 | // If none, it creates a new one.
131 | func (m *memoryPartition) getMetric(name string) *memoryMetric {
132 | 	value, ok := m.metrics.Load(name)
133 | 	if !ok {
134 | 		value = &memoryMetric{
135 | 			name:             name,
136 | 			points:           make([]*DataPoint, 0, 1000),
137 | 			outOfOrderPoints: make([]*DataPoint, 0),
138 | 		}
139 | 		m.metrics.Store(name, value)
140 | 	}
141 | 	return value.(*memoryMetric)
142 | }
143 | 
144 | func (m *memoryPartition) minTimestamp() int64 {
145 | 	return atomic.LoadInt64(&m.minT)
146 | }
147 | 
148 | func (m *memoryPartition) maxTimestamp() int64 {
149 | 	return atomic.LoadInt64(&m.maxT)
150 | }
151 | 
152 | func (m *memoryPartition) size() int {
153 | 	return int(atomic.LoadInt64(&m.numPoints))
154 | }
155 | 
156 | func (m *memoryPartition) active() bool {
157 | 	return m.maxTimestamp()-m.minTimestamp()+1 < m.partitionDuration
158 | }
159 | 
160 | func (m *memoryPartition) clean() error {
161 | 	// What all data managed by memoryPartition is on heap that is automatically removed by GC.
162 | 	// So do nothing.
163 | 	return nil
164 | }
165 | 
166 | func (m *memoryPartition) expired() bool {
167 | 	return false
168 | }
169 | 
170 | // memoryMetric has a list of ordered data points that belong to the memoryMetric
171 | type memoryMetric struct {
172 | 	name         string
173 | 	size         int64
174 | 	minTimestamp int64
175 | 	maxTimestamp int64
176 | 	// points must kept in order
177 | 	points           []*DataPoint
178 | 	outOfOrderPoints []*DataPoint
179 | 	mu               sync.RWMutex
180 | }
181 | 
182 | func (m *memoryMetric) insertPoint(point *DataPoint) {
183 | 	size := atomic.LoadInt64(&m.size)
184 | 	// TODO: Consider to stop using mutex every time.
185 | 	//   Instead, fix the capacity of points slice, kind of like:
186 | 	/*
187 | 		m.points := make([]*DataPoint, 1000)
188 | 		for i := 0; i < 1000; i++ {
189 | 			m.points[i] = point
190 | 		}
191 | 	*/
192 | 	m.mu.Lock()
193 | 	defer m.mu.Unlock()
194 | 
195 | 	// First insertion
196 | 	if size == 0 {
197 | 		m.points = append(m.points, point)
198 | 		atomic.StoreInt64(&m.minTimestamp, point.Timestamp)
199 | 		atomic.StoreInt64(&m.maxTimestamp, point.Timestamp)
200 | 		atomic.AddInt64(&m.size, 1)
201 | 		return
202 | 	}
203 | 	// Insert point in order
204 | 	if m.points[size-1].Timestamp < point.Timestamp {
205 | 		m.points = append(m.points, point)
206 | 		atomic.StoreInt64(&m.maxTimestamp, point.Timestamp)
207 | 		atomic.AddInt64(&m.size, 1)
208 | 		return
209 | 	}
210 | 
211 | 	m.outOfOrderPoints = append(m.outOfOrderPoints, point)
212 | }
213 | 
214 | // selectPoints returns a new slice by re-slicing with [startIdx:endIdx].
215 | func (m *memoryMetric) selectPoints(start, end int64) []*DataPoint {
216 | 	size := atomic.LoadInt64(&m.size)
217 | 	minTimestamp := atomic.LoadInt64(&m.minTimestamp)
218 | 	maxTimestamp := atomic.LoadInt64(&m.maxTimestamp)
219 | 	var startIdx, endIdx int
220 | 
221 | 	if end <= minTimestamp {
222 | 		return []*DataPoint{}
223 | 	}
224 | 
225 | 	m.mu.RLock()
226 | 	defer m.mu.RUnlock()
227 | 	if start <= minTimestamp {
228 | 		startIdx = 0
229 | 	} else {
230 | 		// Use binary search because points are in-order.
231 | 		startIdx = sort.Search(int(size), func(i int) bool {
232 | 			return m.points[i].Timestamp >= start
233 | 		})
234 | 	}
235 | 
236 | 	if end > maxTimestamp {
237 | 		endIdx = int(size)
238 | 	} else {
239 | 		// Use binary search because points are in-order.
240 | 		endIdx = sort.Search(int(size), func(i int) bool {
241 | 			return m.points[i].Timestamp >= end
242 | 		})
243 | 	}
244 | 	return m.points[startIdx:endIdx]
245 | }
246 | 
247 | // encodeAllPoints uses the given seriesEncoder to encode all metric data points in order by timestamp,
248 | // including outOfOrderPoints.
249 | func (m *memoryMetric) encodeAllPoints(encoder seriesEncoder) error {
250 | 	sort.Slice(m.outOfOrderPoints, func(i, j int) bool {
251 | 		return m.outOfOrderPoints[i].Timestamp < m.outOfOrderPoints[j].Timestamp
252 | 	})
253 | 
254 | 	var oi, pi int
255 | 	for oi < len(m.outOfOrderPoints) && pi < len(m.points) {
256 | 		if m.outOfOrderPoints[oi].Timestamp < m.points[pi].Timestamp {
257 | 			if err := encoder.encodePoint(m.outOfOrderPoints[oi]); err != nil {
258 | 				return err
259 | 			}
260 | 			oi++
261 | 		} else {
262 | 			if err := encoder.encodePoint(m.points[pi]); err != nil {
263 | 				return err
264 | 			}
265 | 			pi++
266 | 		}
267 | 	}
268 | 	for oi < len(m.outOfOrderPoints) {
269 | 		if err := encoder.encodePoint(m.outOfOrderPoints[oi]); err != nil {
270 | 			return err
271 | 		}
272 | 		oi++
273 | 	}
274 | 	for pi < len(m.points) {
275 | 		if err := encoder.encodePoint(m.points[pi]); err != nil {
276 | 			return err
277 | 		}
278 | 		pi++
279 | 	}
280 | 
281 | 	return nil
282 | }
283 | 


--------------------------------------------------------------------------------
/memory_partition_test.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"github.com/stretchr/testify/assert"
  9 | 	"github.com/stretchr/testify/require"
 10 | )
 11 | 
 12 | func Test_memoryPartition_InsertRows(t *testing.T) {
 13 | 	tests := []struct {
 14 | 		name               string
 15 | 		memoryPartition    *memoryPartition
 16 | 		rows               []Row
 17 | 		wantErr            bool
 18 | 		wantDataPoints     []*DataPoint
 19 | 		wantOutOfOrderRows []Row
 20 | 	}{
 21 | 		{
 22 | 			name:            "insert in-order rows",
 23 | 			memoryPartition: newMemoryPartition(nil, 0, "").(*memoryPartition),
 24 | 			rows: []Row{
 25 | 				{Metric: "metric1", DataPoint: DataPoint{Timestamp: 1, Value: 0.1}},
 26 | 				{Metric: "metric1", DataPoint: DataPoint{Timestamp: 2, Value: 0.1}},
 27 | 				{Metric: "metric1", DataPoint: DataPoint{Timestamp: 3, Value: 0.1}},
 28 | 			},
 29 | 			wantDataPoints: []*DataPoint{
 30 | 				{Timestamp: 1, Value: 0.1},
 31 | 				{Timestamp: 2, Value: 0.1},
 32 | 				{Timestamp: 3, Value: 0.1},
 33 | 			},
 34 | 			wantOutOfOrderRows: []Row{},
 35 | 		},
 36 | 		{
 37 | 			name: "insert out-of-order rows",
 38 | 			memoryPartition: func() *memoryPartition {
 39 | 				m := newMemoryPartition(nil, 0, "").(*memoryPartition)
 40 | 				m.insertRows([]Row{
 41 | 					{Metric: "metric1", DataPoint: DataPoint{Timestamp: 2, Value: 0.1}},
 42 | 				})
 43 | 				return m
 44 | 			}(),
 45 | 			rows: []Row{
 46 | 				{Metric: "metric1", DataPoint: DataPoint{Timestamp: 1, Value: 0.1}},
 47 | 			},
 48 | 			wantDataPoints: []*DataPoint{
 49 | 				{Timestamp: 2, Value: 0.1},
 50 | 			},
 51 | 			wantOutOfOrderRows: []Row{
 52 | 				{Metric: "metric1", DataPoint: DataPoint{Timestamp: 1, Value: 0.1}},
 53 | 			},
 54 | 		},
 55 | 	}
 56 | 	for _, tt := range tests {
 57 | 		t.Run(tt.name, func(t *testing.T) {
 58 | 			gotOutOfOrder, err := tt.memoryPartition.insertRows(tt.rows)
 59 | 			assert.Equal(t, tt.wantErr, err != nil)
 60 | 			assert.Equal(t, tt.wantOutOfOrderRows, gotOutOfOrder)
 61 | 
 62 | 			got, _ := tt.memoryPartition.selectDataPoints("metric1", nil, 0, 4)
 63 | 			assert.Equal(t, tt.wantDataPoints, got)
 64 | 		})
 65 | 	}
 66 | }
 67 | 
 68 | func Test_memoryPartition_SelectDataPoints(t *testing.T) {
 69 | 	tests := []struct {
 70 | 		name            string
 71 | 		metric          string
 72 | 		labels          []Label
 73 | 		start           int64
 74 | 		end             int64
 75 | 		memoryPartition *memoryPartition
 76 | 		want            []*DataPoint
 77 | 	}{
 78 | 		{
 79 | 			name:            "given non-exist metric name",
 80 | 			metric:          "unknown",
 81 | 			start:           1,
 82 | 			end:             2,
 83 | 			memoryPartition: newMemoryPartition(nil, 0, "").(*memoryPartition),
 84 | 			want:            []*DataPoint{},
 85 | 		},
 86 | 		{
 87 | 			name:   "select some points",
 88 | 			metric: "metric1",
 89 | 			start:  2,
 90 | 			end:    5,
 91 | 			memoryPartition: func() *memoryPartition {
 92 | 				m := newMemoryPartition(nil, 0, "").(*memoryPartition)
 93 | 				m.insertRows([]Row{
 94 | 					{
 95 | 						Metric:    "metric1",
 96 | 						DataPoint: DataPoint{Timestamp: 1, Value: 0.1},
 97 | 					},
 98 | 					{
 99 | 						Metric:    "metric1",
100 | 						DataPoint: DataPoint{Timestamp: 2, Value: 0.1},
101 | 					},
102 | 					{
103 | 						Metric:    "metric1",
104 | 						DataPoint: DataPoint{Timestamp: 3, Value: 0.1},
105 | 					},
106 | 					{
107 | 						Metric:    "metric1",
108 | 						DataPoint: DataPoint{Timestamp: 4, Value: 0.1},
109 | 					},
110 | 					{
111 | 						Metric:    "metric1",
112 | 						DataPoint: DataPoint{Timestamp: 5, Value: 0.1},
113 | 					},
114 | 				})
115 | 				return m
116 | 			}(),
117 | 			want: []*DataPoint{
118 | 				{Timestamp: 2, Value: 0.1},
119 | 				{Timestamp: 3, Value: 0.1},
120 | 				{Timestamp: 4, Value: 0.1},
121 | 			},
122 | 		},
123 | 		{
124 | 			name:   "select all points",
125 | 			metric: "metric1",
126 | 			start:  1,
127 | 			end:    4,
128 | 			memoryPartition: func() *memoryPartition {
129 | 				m := newMemoryPartition(nil, 0, "").(*memoryPartition)
130 | 				m.insertRows([]Row{
131 | 					{
132 | 						Metric:    "metric1",
133 | 						DataPoint: DataPoint{Timestamp: 1, Value: 0.1},
134 | 					},
135 | 					{
136 | 						Metric:    "metric1",
137 | 						DataPoint: DataPoint{Timestamp: 2, Value: 0.1},
138 | 					},
139 | 					{
140 | 						Metric:    "metric1",
141 | 						DataPoint: DataPoint{Timestamp: 3, Value: 0.1},
142 | 					},
143 | 				})
144 | 				return m
145 | 			}(),
146 | 			want: []*DataPoint{
147 | 				{Timestamp: 1, Value: 0.1},
148 | 				{Timestamp: 2, Value: 0.1},
149 | 				{Timestamp: 3, Value: 0.1},
150 | 			},
151 | 		},
152 | 	}
153 | 	for _, tt := range tests {
154 | 		t.Run(tt.name, func(t *testing.T) {
155 | 			got, _ := tt.memoryPartition.selectDataPoints(tt.metric, tt.labels, tt.start, tt.end)
156 | 			assert.Equal(t, tt.want, got)
157 | 		})
158 | 	}
159 | }
160 | 
161 | func Test_memoryMetric_EncodeAllPoints_sorted(t *testing.T) {
162 | 	mt := memoryMetric{
163 | 		points: []*DataPoint{
164 | 			{Timestamp: 1, Value: 0.1},
165 | 			{Timestamp: 3, Value: 0.1},
166 | 		},
167 | 		outOfOrderPoints: []*DataPoint{
168 | 			{Timestamp: 4, Value: 0.1},
169 | 			{Timestamp: 2, Value: 0.1},
170 | 		},
171 | 	}
172 | 	allTimestamps := make([]int64, 0, 4)
173 | 	encoder := fakeEncoder{
174 | 		encodePointFunc: func(p *DataPoint) error {
175 | 			allTimestamps = append(allTimestamps, p.Timestamp)
176 | 			return nil
177 | 		},
178 | 	}
179 | 	err := mt.encodeAllPoints(&encoder)
180 | 	require.NoError(t, err)
181 | 	assert.Equal(t, []int64{1, 2, 3, 4}, allTimestamps)
182 | }
183 | 
184 | func Test_memoryMetric_EncodeAllPoints_error(t *testing.T) {
185 | 	mt := memoryMetric{
186 | 		points: []*DataPoint{{Timestamp: 1, Value: 0.1}},
187 | 	}
188 | 	encoder := fakeEncoder{
189 | 		encodePointFunc: func(p *DataPoint) error {
190 | 			return fmt.Errorf("some error")
191 | 		},
192 | 	}
193 | 	err := mt.encodeAllPoints(&encoder)
194 | 	assert.Error(t, err)
195 | }
196 | 
197 | func Test_toUnix(t *testing.T) {
198 | 	tests := []struct {
199 | 		name      string
200 | 		t         time.Time
201 | 		precision TimestampPrecision
202 | 		want      int64
203 | 	}{
204 | 		{
205 | 			name:      "to nanosecond",
206 | 			t:         time.Unix(1600000000, 0),
207 | 			precision: Nanoseconds,
208 | 			want:      1600000000000000000,
209 | 		},
210 | 		{
211 | 			name:      "to microsecond",
212 | 			t:         time.Unix(1600000000, 0),
213 | 			precision: Microseconds,
214 | 			want:      1600000000000000,
215 | 		},
216 | 		{
217 | 			name:      "to millisecond",
218 | 			t:         time.Unix(1600000000, 0),
219 | 			precision: Milliseconds,
220 | 			want:      1600000000000,
221 | 		},
222 | 		{
223 | 			name:      "to second",
224 | 			t:         time.Unix(1600000000, 0),
225 | 			precision: Seconds,
226 | 			want:      1600000000,
227 | 		},
228 | 	}
229 | 	for _, tt := range tests {
230 | 		t.Run(tt.name, func(t *testing.T) {
231 | 			got := toUnix(tt.t, tt.precision)
232 | 			assert.Equal(t, tt.want, got)
233 | 		})
234 | 	}
235 | }
236 | 


--------------------------------------------------------------------------------
/partition.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | // partition is a chunk of time-series data with the timestamp range.
 4 | // A partition acts as a fully independent database containing all data
 5 | // points for its time range.
 6 | //
 7 | // The partition's lifecycle is: Writable -> ReadOnly.
 8 | // *Writable*:
 9 | //   it can be written. Only one partition can be writable within a partition list.
10 | // *ReadOnly*:
11 | //   it can't be written. Partitions will be ReadOnly if it exceeds the partition range.
12 | type partition interface {
13 | 	// Write operations
14 | 	//
15 | 	// insertRows is a goroutine safe way to insert data points into itself.
16 | 	// If data points older than its min timestamp were given, they won't be
17 | 	// ingested, instead, gave back as a first returned value.
18 | 	insertRows(rows []Row) (outdatedRows []Row, err error)
19 | 	// clean removes everything managed by this partition.
20 | 	clean() error
21 | 
22 | 	// Read operations
23 | 	//
24 | 	// selectDataPoints gives back certain metric's data points within the given range.
25 | 	selectDataPoints(metric string, labels []Label, start, end int64) ([]*DataPoint, error)
26 | 	// minTimestamp returns the minimum Unix timestamp in milliseconds.
27 | 	minTimestamp() int64
28 | 	// maxTimestamp returns the maximum Unix timestamp in milliseconds.
29 | 	maxTimestamp() int64
30 | 	// size returns the number of data points the partition holds.
31 | 	size() int
32 | 	// active means not only writable but having the qualities to be the head partition.
33 | 	active() bool
34 | 	// expired means it should get removed.
35 | 	expired() bool
36 | }
37 | 


--------------------------------------------------------------------------------
/partition_list.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 	"sync"
  7 | 	"sync/atomic"
  8 | )
  9 | 
 10 | // partitionList represents a linked list for partitions.
 11 | // Each partition is arranged in order order of newest to oldest.
 12 | // That is, the head node is always the newest, the tail node is the oldest.
 13 | //
 14 | // Head and its next partitions must be writable to accept out-of-order data points
 15 | // even if it's inactive.
 16 | type partitionList interface {
 17 | 	// insert appends a new node to the head.
 18 | 	insert(partition partition)
 19 | 	// remove eliminates the given partition from the list.
 20 | 	remove(partition partition) error
 21 | 	// swap replaces the old partition with the new one.
 22 | 	swap(old, new partition) error
 23 | 	// getHead gives back the head node which is the newest one.
 24 | 	getHead() partition
 25 | 	// size returns the number of partitions of itself.
 26 | 	size() int
 27 | 	// newIterator gives back the iterator object fot this list.
 28 | 	// If you need to inspect all nodes within the list, use this one.
 29 | 	newIterator() partitionIterator
 30 | 
 31 | 	String() string
 32 | }
 33 | 
 34 | // Iterator represents an iterator for partition list. The basic usage is:
 35 | /*
 36 |   for iterator.next() {
 37 |     partition, err := iterator.value()
 38 |     // Do something with partition
 39 |   }
 40 | */
 41 | type partitionIterator interface {
 42 | 	// next positions the iterator at the next node in the list.
 43 | 	// It will be positioned at the head on the first call.
 44 | 	// The return value will be true if a value can be read from the list.
 45 | 	next() bool
 46 | 	// value gives back the current partition in the iterator.
 47 | 	// If it was called even though next() returns false, it will return nil.
 48 | 	value() partition
 49 | 
 50 | 	currentNode() *partitionNode
 51 | }
 52 | 
 53 | type partitionListImpl struct {
 54 | 	numPartitions int64
 55 | 	head          *partitionNode
 56 | 	tail          *partitionNode
 57 | 	mu            sync.RWMutex
 58 | }
 59 | 
 60 | func newPartitionList() partitionList {
 61 | 	return &partitionListImpl{}
 62 | }
 63 | 
 64 | func (p *partitionListImpl) getHead() partition {
 65 | 	if p.size() <= 0 {
 66 | 		return nil
 67 | 	}
 68 | 	p.mu.RLock()
 69 | 	defer p.mu.RUnlock()
 70 | 	return p.head.value()
 71 | }
 72 | 
 73 | func (p *partitionListImpl) insert(partition partition) {
 74 | 	node := &partitionNode{
 75 | 		val: partition,
 76 | 	}
 77 | 	p.mu.RLock()
 78 | 	head := p.head
 79 | 	p.mu.RUnlock()
 80 | 	if head != nil {
 81 | 		node.next = head
 82 | 	}
 83 | 
 84 | 	p.setHead(node)
 85 | 	atomic.AddInt64(&p.numPartitions, 1)
 86 | }
 87 | 
 88 | func (p *partitionListImpl) remove(target partition) error {
 89 | 	if p.size() <= 0 {
 90 | 		return fmt.Errorf("empty partition")
 91 | 	}
 92 | 
 93 | 	// Iterate over itself from the head.
 94 | 	var prev, next *partitionNode
 95 | 	iterator := p.newIterator()
 96 | 	for iterator.next() {
 97 | 		current := iterator.currentNode()
 98 | 		if !samePartitions(current.value(), target) {
 99 | 			prev = current
100 | 			continue
101 | 		}
102 | 
103 | 		// remove the current node.
104 | 
105 | 		iterator.next()
106 | 		next = iterator.currentNode()
107 | 		switch {
108 | 		case prev == nil:
109 | 			// removing the head node
110 | 			p.setHead(next)
111 | 		case next == nil:
112 | 			// removing the tail node
113 | 			prev.setNext(nil)
114 | 			p.setTail(prev)
115 | 		default:
116 | 			// removing the middle node
117 | 			prev.setNext(next)
118 | 		}
119 | 		atomic.AddInt64(&p.numPartitions, -1)
120 | 
121 | 		if err := current.value().clean(); err != nil {
122 | 			return fmt.Errorf("failed to clean resources managed by partition to be removed: %w", err)
123 | 		}
124 | 		return nil
125 | 	}
126 | 
127 | 	return fmt.Errorf("the given partition was not found")
128 | }
129 | 
130 | func (p *partitionListImpl) swap(old, new partition) error {
131 | 	if p.size() <= 0 {
132 | 		return fmt.Errorf("empty partition")
133 | 	}
134 | 
135 | 	// Iterate over itself from the head.
136 | 	var prev, next *partitionNode
137 | 	iterator := p.newIterator()
138 | 	for iterator.next() {
139 | 		current := iterator.currentNode()
140 | 		if !samePartitions(current.value(), old) {
141 | 			prev = current
142 | 			continue
143 | 		}
144 | 
145 | 		// swap the current node.
146 | 
147 | 		newNode := &partitionNode{
148 | 			val:  new,
149 | 			next: current.getNext(),
150 | 		}
151 | 		iterator.next()
152 | 		next = iterator.currentNode()
153 | 		switch {
154 | 		case prev == nil:
155 | 			// swapping the head node
156 | 			p.setHead(newNode)
157 | 		case next == nil:
158 | 			// swapping the tail node
159 | 			prev.setNext(newNode)
160 | 			p.setTail(newNode)
161 | 		default:
162 | 			// swapping the middle node
163 | 			prev.setNext(newNode)
164 | 		}
165 | 		return nil
166 | 	}
167 | 
168 | 	return fmt.Errorf("the given partition was not found")
169 | }
170 | 
171 | func samePartitions(x, y partition) bool {
172 | 	return x.minTimestamp() == y.minTimestamp()
173 | }
174 | 
175 | func (p *partitionListImpl) size() int {
176 | 	return int(atomic.LoadInt64(&p.numPartitions))
177 | }
178 | 
179 | func (p *partitionListImpl) newIterator() partitionIterator {
180 | 	p.mu.RLock()
181 | 	head := p.head
182 | 	p.mu.RUnlock()
183 | 	// Put a dummy node so that it positions the head on the first next() call.
184 | 	dummy := &partitionNode{
185 | 		next: head,
186 | 	}
187 | 	return &partitionIteratorImpl{
188 | 		current: dummy,
189 | 	}
190 | }
191 | 
192 | func (p *partitionListImpl) setHead(node *partitionNode) {
193 | 	p.mu.Lock()
194 | 	defer p.mu.Unlock()
195 | 	p.head = node
196 | }
197 | 
198 | func (p *partitionListImpl) setTail(node *partitionNode) {
199 | 	p.mu.Lock()
200 | 	defer p.mu.Unlock()
201 | 	p.tail = node
202 | }
203 | 
204 | func (p *partitionListImpl) String() string {
205 | 	b := &strings.Builder{}
206 | 	iterator := p.newIterator()
207 | 	for iterator.next() {
208 | 		p := iterator.value()
209 | 		if _, ok := p.(*memoryPartition); ok {
210 | 			b.WriteString("[Memory Partition]")
211 | 		} else if _, ok := p.(*diskPartition); ok {
212 | 			b.WriteString("[Disk Partition]")
213 | 		} else {
214 | 			b.WriteString("[Unknown Partition]")
215 | 		}
216 | 		b.WriteString("->")
217 | 	}
218 | 	return strings.TrimSuffix(b.String(), "->")
219 | }
220 | 
221 | // partitionNode wraps a partition to hold the pointer to the next one.
222 | type partitionNode struct {
223 | 	// val is immutable
224 | 	val  partition
225 | 	next *partitionNode
226 | 	mu   sync.RWMutex
227 | }
228 | 
229 | // value gives back the actual partition of the node.
230 | func (p *partitionNode) value() partition {
231 | 	return p.val
232 | }
233 | 
234 | func (p *partitionNode) setNext(node *partitionNode) {
235 | 	p.mu.Lock()
236 | 	defer p.mu.Unlock()
237 | 	p.next = node
238 | }
239 | 
240 | func (p *partitionNode) getNext() *partitionNode {
241 | 	p.mu.RLock()
242 | 	defer p.mu.RUnlock()
243 | 	return p.next
244 | }
245 | 
246 | type partitionIteratorImpl struct {
247 | 	current *partitionNode
248 | }
249 | 
250 | func (i *partitionIteratorImpl) next() bool {
251 | 	if i.current == nil {
252 | 		return false
253 | 	}
254 | 	next := i.current.getNext()
255 | 	i.current = next
256 | 	return i.current != nil
257 | }
258 | 
259 | func (i *partitionIteratorImpl) value() partition {
260 | 	if i.current == nil {
261 | 		return nil
262 | 	}
263 | 	return i.current.value()
264 | }
265 | 
266 | func (i *partitionIteratorImpl) currentNode() *partitionNode {
267 | 	return i.current
268 | }
269 | 


--------------------------------------------------------------------------------
/partition_list_test.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/stretchr/testify/assert"
  7 | )
  8 | 
  9 | func Test_partitionList_Remove(t *testing.T) {
 10 | 	tests := []struct {
 11 | 		name              string
 12 | 		partitionList     partitionListImpl
 13 | 		target            partition
 14 | 		wantErr           bool
 15 | 		wantPartitionList partitionListImpl
 16 | 	}{
 17 | 		{
 18 | 			name:          "empty partition",
 19 | 			partitionList: partitionListImpl{},
 20 | 			wantErr:       true,
 21 | 		},
 22 | 		{
 23 | 			name: "remove the head node",
 24 | 			partitionList: func() partitionListImpl {
 25 | 				second := &partitionNode{
 26 | 					val: &fakePartition{
 27 | 						minT: 2,
 28 | 					},
 29 | 				}
 30 | 
 31 | 				first := &partitionNode{
 32 | 					val: &fakePartition{
 33 | 						minT: 1,
 34 | 					},
 35 | 					next: second,
 36 | 				}
 37 | 				return partitionListImpl{
 38 | 					numPartitions: 2,
 39 | 					head:          first,
 40 | 					tail:          second,
 41 | 				}
 42 | 			}(),
 43 | 			target: &fakePartition{
 44 | 				minT: 1,
 45 | 			},
 46 | 			wantPartitionList: partitionListImpl{
 47 | 				numPartitions: 1,
 48 | 				head: &partitionNode{
 49 | 					val: &fakePartition{
 50 | 						minT: 2,
 51 | 					},
 52 | 				},
 53 | 				tail: &partitionNode{
 54 | 					val: &fakePartition{
 55 | 						minT: 2,
 56 | 					},
 57 | 				},
 58 | 			},
 59 | 		},
 60 | 		{
 61 | 			name: "remove the tail node",
 62 | 			partitionList: func() partitionListImpl {
 63 | 				second := &partitionNode{
 64 | 					val: &fakePartition{
 65 | 						minT: 2,
 66 | 					},
 67 | 				}
 68 | 
 69 | 				first := &partitionNode{
 70 | 					val: &fakePartition{
 71 | 						minT: 1,
 72 | 					},
 73 | 					next: second,
 74 | 				}
 75 | 				return partitionListImpl{
 76 | 					numPartitions: 2,
 77 | 					head:          first,
 78 | 					tail:          second,
 79 | 				}
 80 | 			}(),
 81 | 			target: &fakePartition{
 82 | 				minT: 2,
 83 | 			},
 84 | 			wantPartitionList: partitionListImpl{
 85 | 				numPartitions: 1,
 86 | 				head: &partitionNode{
 87 | 					val: &fakePartition{
 88 | 						minT: 1,
 89 | 					},
 90 | 				},
 91 | 				tail: &partitionNode{
 92 | 					val: &fakePartition{
 93 | 						minT: 1,
 94 | 					},
 95 | 				},
 96 | 			},
 97 | 		},
 98 | 		{
 99 | 			name: "remove the middle node",
100 | 			partitionList: func() partitionListImpl {
101 | 				third := &partitionNode{
102 | 					val: &fakePartition{
103 | 						minT: 3,
104 | 					},
105 | 				}
106 | 				second := &partitionNode{
107 | 					val: &fakePartition{
108 | 						minT: 2,
109 | 					},
110 | 					next: third,
111 | 				}
112 | 				first := &partitionNode{
113 | 					val: &fakePartition{
114 | 						minT: 1,
115 | 					},
116 | 					next: second,
117 | 				}
118 | 				return partitionListImpl{
119 | 					numPartitions: 3,
120 | 					head:          first,
121 | 					tail:          third,
122 | 				}
123 | 			}(),
124 | 			target: &fakePartition{
125 | 				minT: 2,
126 | 			},
127 | 			wantPartitionList: partitionListImpl{
128 | 				numPartitions: 2,
129 | 				head: &partitionNode{
130 | 					val: &fakePartition{
131 | 						minT: 1,
132 | 					},
133 | 					next: &partitionNode{
134 | 						val: &fakePartition{
135 | 							minT: 3,
136 | 						},
137 | 					},
138 | 				},
139 | 				tail: &partitionNode{
140 | 					val: &fakePartition{
141 | 						minT: 3,
142 | 					},
143 | 				},
144 | 			},
145 | 		},
146 | 		{
147 | 			name: "given node not found",
148 | 			partitionList: func() partitionListImpl {
149 | 				second := &partitionNode{
150 | 					val: &fakePartition{
151 | 						minT: 2,
152 | 					},
153 | 				}
154 | 
155 | 				first := &partitionNode{
156 | 					val: &fakePartition{
157 | 						minT: 1,
158 | 					},
159 | 					next: second,
160 | 				}
161 | 				return partitionListImpl{
162 | 					numPartitions: 2,
163 | 					head:          first,
164 | 					tail:          second,
165 | 				}
166 | 			}(),
167 | 			target: &fakePartition{
168 | 				minT: 3,
169 | 			},
170 | 			wantPartitionList: func() partitionListImpl {
171 | 				second := &partitionNode{
172 | 					val: &fakePartition{
173 | 						minT: 2,
174 | 					},
175 | 				}
176 | 
177 | 				first := &partitionNode{
178 | 					val: &fakePartition{
179 | 						minT: 1,
180 | 					},
181 | 					next: second,
182 | 				}
183 | 				return partitionListImpl{
184 | 					numPartitions: 2,
185 | 					head:          first,
186 | 					tail:          second,
187 | 				}
188 | 			}(),
189 | 			wantErr: true,
190 | 		},
191 | 	}
192 | 	for _, tt := range tests {
193 | 		t.Run(tt.name, func(t *testing.T) {
194 | 			err := tt.partitionList.remove(tt.target)
195 | 			assert.Equal(t, tt.wantErr, err != nil)
196 | 			assert.Equal(t, tt.wantPartitionList, tt.partitionList)
197 | 		})
198 | 	}
199 | }
200 | 
201 | func Test_partitionList_Swap(t *testing.T) {
202 | 	tests := []struct {
203 | 		name              string
204 | 		partitionList     partitionListImpl
205 | 		old               partition
206 | 		new               partition
207 | 		wantErr           bool
208 | 		wantPartitionList partitionListImpl
209 | 	}{
210 | 		{
211 | 			name:          "empty partition",
212 | 			partitionList: partitionListImpl{},
213 | 			wantErr:       true,
214 | 		},
215 | 		{
216 | 			name: "swap the head node",
217 | 			partitionList: func() partitionListImpl {
218 | 				second := &partitionNode{
219 | 					val: &fakePartition{
220 | 						minT: 2,
221 | 					},
222 | 				}
223 | 
224 | 				first := &partitionNode{
225 | 					val: &fakePartition{
226 | 						minT: 1,
227 | 					},
228 | 					next: second,
229 | 				}
230 | 				return partitionListImpl{
231 | 					numPartitions: 2,
232 | 					head:          first,
233 | 					tail:          second,
234 | 				}
235 | 			}(),
236 | 			old: &fakePartition{
237 | 				minT: 1,
238 | 			},
239 | 			new: &fakePartition{
240 | 				minT: 100,
241 | 			},
242 | 			wantPartitionList: partitionListImpl{
243 | 				numPartitions: 2,
244 | 				head: &partitionNode{
245 | 					val: &fakePartition{
246 | 						minT: 100,
247 | 					},
248 | 					next: &partitionNode{
249 | 						val: &fakePartition{
250 | 							minT: 2,
251 | 						},
252 | 					},
253 | 				},
254 | 				tail: &partitionNode{
255 | 					val: &fakePartition{
256 | 						minT: 2,
257 | 					},
258 | 				},
259 | 			},
260 | 		},
261 | 		{
262 | 			name: "swap the tail node",
263 | 			partitionList: func() partitionListImpl {
264 | 				second := &partitionNode{
265 | 					val: &fakePartition{
266 | 						minT: 2,
267 | 					},
268 | 				}
269 | 
270 | 				first := &partitionNode{
271 | 					val: &fakePartition{
272 | 						minT: 1,
273 | 					},
274 | 					next: second,
275 | 				}
276 | 				return partitionListImpl{
277 | 					numPartitions: 2,
278 | 					head:          first,
279 | 					tail:          second,
280 | 				}
281 | 			}(),
282 | 			old: &fakePartition{
283 | 				minT: 2,
284 | 			},
285 | 			new: &fakePartition{
286 | 				minT: 100,
287 | 			},
288 | 			wantPartitionList: partitionListImpl{
289 | 				numPartitions: 2,
290 | 				head: &partitionNode{
291 | 					val: &fakePartition{
292 | 						minT: 1,
293 | 					},
294 | 					next: &partitionNode{
295 | 						val: &fakePartition{
296 | 							minT: 100,
297 | 						},
298 | 					},
299 | 				},
300 | 				tail: &partitionNode{
301 | 					val: &fakePartition{
302 | 						minT: 100,
303 | 					},
304 | 				},
305 | 			},
306 | 		},
307 | 		{
308 | 			name: "swap the middle node",
309 | 			partitionList: func() partitionListImpl {
310 | 				third := &partitionNode{
311 | 					val: &fakePartition{
312 | 						minT: 3,
313 | 					},
314 | 				}
315 | 				second := &partitionNode{
316 | 					val: &fakePartition{
317 | 						minT: 2,
318 | 					},
319 | 					next: third,
320 | 				}
321 | 
322 | 				first := &partitionNode{
323 | 					val: &fakePartition{
324 | 						minT: 1,
325 | 					},
326 | 					next: second,
327 | 				}
328 | 				return partitionListImpl{
329 | 					numPartitions: 3,
330 | 					head:          first,
331 | 					tail:          third,
332 | 				}
333 | 			}(),
334 | 			old: &fakePartition{
335 | 				minT: 2,
336 | 			},
337 | 			new: &fakePartition{
338 | 				minT: 100,
339 | 			},
340 | 			wantPartitionList: partitionListImpl{
341 | 				numPartitions: 3,
342 | 				head: &partitionNode{
343 | 					val: &fakePartition{
344 | 						minT: 1,
345 | 					},
346 | 					next: &partitionNode{
347 | 						val: &fakePartition{
348 | 							minT: 100,
349 | 						},
350 | 						next: &partitionNode{
351 | 							val: &fakePartition{
352 | 								minT: 3,
353 | 							},
354 | 						},
355 | 					},
356 | 				},
357 | 				tail: &partitionNode{
358 | 					val: &fakePartition{
359 | 						minT: 3,
360 | 					},
361 | 				},
362 | 			},
363 | 		},
364 | 		{
365 | 			name: "given node not found",
366 | 			partitionList: func() partitionListImpl {
367 | 				second := &partitionNode{
368 | 					val: &fakePartition{
369 | 						minT: 2,
370 | 					},
371 | 				}
372 | 
373 | 				first := &partitionNode{
374 | 					val: &fakePartition{
375 | 						minT: 1,
376 | 					},
377 | 					next: second,
378 | 				}
379 | 				return partitionListImpl{
380 | 					numPartitions: 2,
381 | 					head:          first,
382 | 					tail:          second,
383 | 				}
384 | 			}(),
385 | 			old: &fakePartition{
386 | 				minT: 100,
387 | 			},
388 | 			wantPartitionList: partitionListImpl{
389 | 				numPartitions: 2,
390 | 				head: &partitionNode{
391 | 					val: &fakePartition{
392 | 						minT: 1,
393 | 					},
394 | 					next: &partitionNode{
395 | 						val: &fakePartition{
396 | 							minT: 2,
397 | 						},
398 | 					},
399 | 				},
400 | 				tail: &partitionNode{
401 | 					val: &fakePartition{
402 | 						minT: 2,
403 | 					},
404 | 				},
405 | 			},
406 | 			wantErr: true,
407 | 		},
408 | 	}
409 | 	for _, tt := range tests {
410 | 		t.Run(tt.name, func(t *testing.T) {
411 | 			err := tt.partitionList.swap(tt.old, tt.new)
412 | 			assert.Equal(t, tt.wantErr, err != nil)
413 | 			assert.Equal(t, tt.wantPartitionList, tt.partitionList)
414 | 		})
415 | 	}
416 | }
417 | 


--------------------------------------------------------------------------------
/storage.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"io/fs"
  9 | 	"os"
 10 | 	"path/filepath"
 11 | 	"regexp"
 12 | 	"sort"
 13 | 	"sync"
 14 | 	"time"
 15 | 
 16 | 	"github.com/nakabonne/tstorage/internal/cgroup"
 17 | 	"github.com/nakabonne/tstorage/internal/timerpool"
 18 | )
 19 | 
 20 | var (
 21 | 	ErrNoDataPoints = errors.New("no data points found")
 22 | 
 23 | 	// Limit the concurrency for data ingestion to GOMAXPROCS, since this operation
 24 | 	// is CPU bound, so there is no sense in running more than GOMAXPROCS concurrent
 25 | 	// goroutines on data ingestion path.
 26 | 	defaultWorkersLimit = cgroup.AvailableCPUs()
 27 | 
 28 | 	partitionDirRegex = regexp.MustCompile(`^p-.+`)
 29 | )
 30 | 
 31 | // TimestampPrecision represents precision of timestamps. See WithTimestampPrecision
 32 | type TimestampPrecision string
 33 | 
 34 | const (
 35 | 	Nanoseconds  TimestampPrecision = "ns"
 36 | 	Microseconds TimestampPrecision = "us"
 37 | 	Milliseconds TimestampPrecision = "ms"
 38 | 	Seconds      TimestampPrecision = "s"
 39 | 
 40 | 	defaultPartitionDuration  = 1 * time.Hour
 41 | 	defaultRetention          = 336 * time.Hour
 42 | 	defaultTimestampPrecision = Nanoseconds
 43 | 	defaultWriteTimeout       = 30 * time.Second
 44 | 	defaultWALBufferedSize    = 4096
 45 | 
 46 | 	writablePartitionsNum = 2
 47 | 	checkExpiredInterval  = time.Hour
 48 | 
 49 | 	walDirName = "wal"
 50 | )
 51 | 
 52 | // Storage provides goroutine safe capabilities of insertion into and retrieval from the time-series storage.
 53 | type Storage interface {
 54 | 	Reader
 55 | 	// InsertRows ingests the given rows to the time-series storage.
 56 | 	// If the timestamp is empty, it uses the machine's local timestamp in UTC.
 57 | 	// The precision of timestamps is nanoseconds by default. It can be changed using WithTimestampPrecision.
 58 | 	InsertRows(rows []Row) error
 59 | 	// Close gracefully shutdowns by flushing any unwritten data to the underlying disk partition.
 60 | 	Close() error
 61 | }
 62 | 
 63 | // Reader provides reading access to time series data.
 64 | type Reader interface {
 65 | 	// Select gives back a list of data points that matches a set of the given metric and
 66 | 	// labels within the given start-end range. Keep in mind that start is inclusive, end is exclusive,
 67 | 	// and both must be Unix timestamp. ErrNoDataPoints will be returned if no data points found.
 68 | 	Select(metric string, labels []Label, start, end int64) (points []*DataPoint, err error)
 69 | }
 70 | 
 71 | // Row includes a data point along with properties to identify a kind of metrics.
 72 | type Row struct {
 73 | 	// The unique name of metric.
 74 | 	// This field must be set.
 75 | 	Metric string
 76 | 	// An optional key-value properties to further detailed identification.
 77 | 	Labels []Label
 78 | 	// This field must be set.
 79 | 	DataPoint
 80 | }
 81 | 
 82 | // DataPoint represents a data point, the smallest unit of time series data.
 83 | type DataPoint struct {
 84 | 	// The actual value. This field must be set.
 85 | 	Value float64
 86 | 	// Unix timestamp.
 87 | 	Timestamp int64
 88 | }
 89 | 
 90 | // Option is an optional setting for NewStorage.
 91 | type Option func(*storage)
 92 | 
 93 | // WithDataPath specifies the path to directory that stores time-series data.
 94 | // Use this to make time-series data persistent on disk.
 95 | //
 96 | // Defaults to empty string which means no data will get persisted.
 97 | func WithDataPath(dataPath string) Option {
 98 | 	return func(s *storage) {
 99 | 		s.dataPath = dataPath
100 | 	}
101 | }
102 | 
103 | // WithPartitionDuration specifies the timestamp range of partitions.
104 | // Once it exceeds the given time range, the new partition gets inserted.
105 | //
106 | // A partition is a chunk of time-series data with the timestamp range.
107 | // It acts as a fully independent database containing all data
108 | // points for its time range.
109 | //
110 | // Defaults to 1h
111 | func WithPartitionDuration(duration time.Duration) Option {
112 | 	return func(s *storage) {
113 | 		s.partitionDuration = duration
114 | 	}
115 | }
116 | 
117 | // WithRetention specifies when to remove old data.
118 | // Data points will get automatically removed from the disk after a
119 | // specified period of time after a disk partition was created.
120 | // Defaults to 14d.
121 | func WithRetention(retention time.Duration) Option {
122 | 	return func(s *storage) {
123 | 		s.retention = retention
124 | 	}
125 | }
126 | 
127 | // WithTimestampPrecision specifies the precision of timestamps to be used by all operations.
128 | //
129 | // Defaults to Nanoseconds
130 | func WithTimestampPrecision(precision TimestampPrecision) Option {
131 | 	return func(s *storage) {
132 | 		s.timestampPrecision = precision
133 | 	}
134 | }
135 | 
136 | // WithWriteTimeout specifies the timeout to wait when workers are busy.
137 | //
138 | // The storage limits the number of concurrent goroutines to prevent from out of memory
139 | // errors and CPU trashing even if too many goroutines attempt to write.
140 | //
141 | // Defaults to 30s.
142 | func WithWriteTimeout(timeout time.Duration) Option {
143 | 	return func(s *storage) {
144 | 		s.writeTimeout = timeout
145 | 	}
146 | }
147 | 
148 | // WithLogger specifies the logger to emit verbose output.
149 | //
150 | // Defaults to a logger implementation that does nothing.
151 | func WithLogger(logger Logger) Option {
152 | 	return func(s *storage) {
153 | 		s.logger = logger
154 | 	}
155 | }
156 | 
157 | // WithWAL specifies the buffered byte size before flushing a WAL file.
158 | // The larger the size, the less frequently the file is written and more write performance at the expense of durability.
159 | // Giving 0 means it writes to a file whenever data point comes in.
160 | // Giving -1 disables using WAL.
161 | //
162 | // Defaults to 4096.
163 | func WithWALBufferedSize(size int) Option {
164 | 	return func(s *storage) {
165 | 		s.walBufferedSize = size
166 | 	}
167 | }
168 | 
169 | // NewStorage gives back a new storage, which stores time-series data in the process memory by default.
170 | //
171 | // Give the WithDataPath option for running as a on-disk storage. Specify a directory with data already exists,
172 | // then it will be read as the initial data.
173 | func NewStorage(opts ...Option) (Storage, error) {
174 | 	s := &storage{
175 | 		partitionList:      newPartitionList(),
176 | 		workersLimitCh:     make(chan struct{}, defaultWorkersLimit),
177 | 		partitionDuration:  defaultPartitionDuration,
178 | 		retention:          defaultRetention,
179 | 		timestampPrecision: defaultTimestampPrecision,
180 | 		writeTimeout:       defaultWriteTimeout,
181 | 		walBufferedSize:    defaultWALBufferedSize,
182 | 		wal:                &nopWAL{},
183 | 		logger:             &nopLogger{},
184 | 		doneCh:             make(chan struct{}, 0),
185 | 	}
186 | 	for _, opt := range opts {
187 | 		opt(s)
188 | 	}
189 | 
190 | 	if s.inMemoryMode() {
191 | 		s.newPartition(nil, false)
192 | 		return s, nil
193 | 	}
194 | 
195 | 	if err := os.MkdirAll(s.dataPath, fs.ModePerm); err != nil {
196 | 		return nil, fmt.Errorf("failed to make data directory %s: %w", s.dataPath, err)
197 | 	}
198 | 
199 | 	walDir := filepath.Join(s.dataPath, walDirName)
200 | 	if s.walBufferedSize >= 0 {
201 | 		wal, err := newDiskWAL(walDir, s.walBufferedSize)
202 | 		if err != nil {
203 | 			return nil, err
204 | 		}
205 | 		s.wal = wal
206 | 	}
207 | 
208 | 	// Read existent partitions from the disk.
209 | 	dirs, err := os.ReadDir(s.dataPath)
210 | 	if err != nil {
211 | 		return nil, fmt.Errorf("failed to open data directory: %w", err)
212 | 	}
213 | 	if len(dirs) == 0 {
214 | 		s.newPartition(nil, false)
215 | 		return s, nil
216 | 	}
217 | 	isPartitionDir := func(f fs.DirEntry) bool {
218 | 		return f.IsDir() && partitionDirRegex.MatchString(f.Name())
219 | 	}
220 | 	partitions := make([]partition, 0, len(dirs))
221 | 	for _, e := range dirs {
222 | 		if !isPartitionDir(e) {
223 | 			continue
224 | 		}
225 | 		path := filepath.Join(s.dataPath, e.Name())
226 | 		part, err := openDiskPartition(path, s.retention)
227 | 		if errors.Is(err, ErrNoDataPoints) {
228 | 			continue
229 | 		}
230 | 		if errors.Is(err, errInvalidPartition) {
231 | 			// It should be recovered by WAL
232 | 			continue
233 | 		}
234 | 		if err != nil {
235 | 			return nil, fmt.Errorf("failed to open disk partition for %s: %w", path, err)
236 | 		}
237 | 		partitions = append(partitions, part)
238 | 	}
239 | 	sort.Slice(partitions, func(i, j int) bool {
240 | 		return partitions[i].minTimestamp() < partitions[j].minTimestamp()
241 | 	})
242 | 	for _, p := range partitions {
243 | 		s.newPartition(p, false)
244 | 	}
245 | 	// Start WAL recovery if there is.
246 | 	if err := s.recoverWAL(walDir); err != nil {
247 | 		return nil, fmt.Errorf("failed to recover WAL: %w", err)
248 | 	}
249 | 	s.newPartition(nil, false)
250 | 
251 | 	// periodically check and permanently remove expired partitions.
252 | 	go func() {
253 | 		ticker := time.NewTicker(checkExpiredInterval)
254 | 		defer ticker.Stop()
255 | 		for {
256 | 			select {
257 | 			case <-s.doneCh:
258 | 				return
259 | 			case <-ticker.C:
260 | 				err := s.removeExpiredPartitions()
261 | 				if err != nil {
262 | 					s.logger.Printf("%v\n", err)
263 | 				}
264 | 			}
265 | 		}
266 | 	}()
267 | 	return s, nil
268 | }
269 | 
270 | type storage struct {
271 | 	partitionList partitionList
272 | 
273 | 	walBufferedSize    int
274 | 	wal                wal
275 | 	partitionDuration  time.Duration
276 | 	retention          time.Duration
277 | 	timestampPrecision TimestampPrecision
278 | 	dataPath           string
279 | 	writeTimeout       time.Duration
280 | 
281 | 	logger         Logger
282 | 	workersLimitCh chan struct{}
283 | 	// wg must be incremented to guarantee all writes are done gracefully.
284 | 	wg sync.WaitGroup
285 | 
286 | 	doneCh chan struct{}
287 | }
288 | 
289 | func (s *storage) InsertRows(rows []Row) error {
290 | 	s.wg.Add(1)
291 | 	defer s.wg.Done()
292 | 
293 | 	insert := func() error {
294 | 		defer func() { <-s.workersLimitCh }()
295 | 		if err := s.ensureActiveHead(); err != nil {
296 | 			return err
297 | 		}
298 | 		iterator := s.partitionList.newIterator()
299 | 		n := s.partitionList.size()
300 | 		rowsToInsert := rows
301 | 		// Starting at the head partition, try to insert rows, and loop to insert outdated rows
302 | 		// into older partitions. Any rows more than `writablePartitionsNum` partitions out
303 | 		// of date are dropped.
304 | 		for i := 0; i < n && i < writablePartitionsNum; i++ {
305 | 			if len(rowsToInsert) == 0 {
306 | 				break
307 | 			}
308 | 			if !iterator.next() {
309 | 				break
310 | 			}
311 | 			outdatedRows, err := iterator.value().insertRows(rowsToInsert)
312 | 			if err != nil {
313 | 				return fmt.Errorf("failed to insert rows: %w", err)
314 | 			}
315 | 			rowsToInsert = outdatedRows
316 | 		}
317 | 		return nil
318 | 	}
319 | 
320 | 	// Limit the number of concurrent goroutines to prevent from out of memory
321 | 	// errors and CPU trashing even if too many goroutines attempt to write.
322 | 	select {
323 | 	case s.workersLimitCh <- struct{}{}:
324 | 		return insert()
325 | 	default:
326 | 	}
327 | 
328 | 	// Seems like all workers are busy; wait for up to writeTimeout
329 | 
330 | 	t := timerpool.Get(s.writeTimeout)
331 | 	select {
332 | 	case s.workersLimitCh <- struct{}{}:
333 | 		timerpool.Put(t)
334 | 		return insert()
335 | 	case <-t.C:
336 | 		timerpool.Put(t)
337 | 		return fmt.Errorf("failed to write a data point in %s, since it is overloaded with %d concurrent writers",
338 | 			s.writeTimeout, defaultWorkersLimit)
339 | 	}
340 | }
341 | 
342 | // ensureActiveHead ensures the head of partitionList is an active partition.
343 | // If none, it creates a new one.
344 | func (s *storage) ensureActiveHead() error {
345 | 	head := s.partitionList.getHead()
346 | 	if head != nil && head.active() {
347 | 		return nil
348 | 	}
349 | 
350 | 	// All partitions seems to be inactive so add a new partition to the list.
351 | 	if err := s.newPartition(nil, true); err != nil {
352 | 		return err
353 | 	}
354 | 	go func() {
355 | 		if err := s.flushPartitions(); err != nil {
356 | 			s.logger.Printf("failed to flush in-memory partitions: %v", err)
357 | 		}
358 | 	}()
359 | 	return nil
360 | }
361 | 
362 | func (s *storage) Select(metric string, labels []Label, start, end int64) ([]*DataPoint, error) {
363 | 	if metric == "" {
364 | 		return nil, fmt.Errorf("metric must be set")
365 | 	}
366 | 	if start >= end {
367 | 		return nil, fmt.Errorf("the given start is greater than end")
368 | 	}
369 | 	points := make([]*DataPoint, 0)
370 | 
371 | 	// Iterate over all partitions from the newest one.
372 | 	iterator := s.partitionList.newIterator()
373 | 	for iterator.next() {
374 | 		part := iterator.value()
375 | 		if part == nil {
376 | 			return nil, fmt.Errorf("unexpected empty partition found")
377 | 		}
378 | 		if part.minTimestamp() == 0 {
379 | 			// Skip the partition that has no points.
380 | 			continue
381 | 		}
382 | 		if part.maxTimestamp() < start {
383 | 			// No need to keep going anymore
384 | 			break
385 | 		}
386 | 		if part.minTimestamp() > end {
387 | 			continue
388 | 		}
389 | 		ps, err := part.selectDataPoints(metric, labels, start, end)
390 | 		if errors.Is(err, ErrNoDataPoints) {
391 | 			continue
392 | 		}
393 | 		if err != nil {
394 | 			return nil, fmt.Errorf("failed to select data points: %w", err)
395 | 		}
396 | 		// in order to keep the order in ascending.
397 | 		points = append(ps, points...)
398 | 	}
399 | 	if len(points) == 0 {
400 | 		return nil, ErrNoDataPoints
401 | 	}
402 | 	return points, nil
403 | }
404 | 
405 | func (s *storage) Close() error {
406 | 	s.wg.Wait()
407 | 	close(s.doneCh)
408 | 	if err := s.wal.flush(); err != nil {
409 | 		return fmt.Errorf("failed to flush buffered WAL: %w", err)
410 | 	}
411 | 
412 | 	// TODO: Prevent from new goroutines calling InsertRows(), for graceful shutdown.
413 | 
414 | 	// Make all writable partitions read-only by inserting as same number of those.
415 | 	for i := 0; i < writablePartitionsNum; i++ {
416 | 		if err := s.newPartition(nil, true); err != nil {
417 | 			return err
418 | 		}
419 | 	}
420 | 	if err := s.flushPartitions(); err != nil {
421 | 		return fmt.Errorf("failed to close storage: %w", err)
422 | 	}
423 | 	if err := s.removeExpiredPartitions(); err != nil {
424 | 		return fmt.Errorf("failed to remove expired partitions: %w", err)
425 | 	}
426 | 	// All partitions have been flushed, so WAL isn't needed anymore.
427 | 	if err := s.wal.removeAll(); err != nil {
428 | 		return fmt.Errorf("failed to remove WAL: %w", err)
429 | 	}
430 | 	return nil
431 | }
432 | 
433 | func (s *storage) newPartition(p partition, punctuateWal bool) error {
434 | 	if p == nil {
435 | 		p = newMemoryPartition(s.wal, s.partitionDuration, s.timestampPrecision)
436 | 	}
437 | 	s.partitionList.insert(p)
438 | 	if punctuateWal {
439 | 		return s.wal.punctuate()
440 | 	}
441 | 	return nil
442 | }
443 | 
444 | // flushPartitions persists all in-memory partitions ready to persisted.
445 | // For the in-memory mode, just removes it from the partition list.
446 | func (s *storage) flushPartitions() error {
447 | 	// Keep the first two partitions as is even if they are inactive,
448 | 	// to accept out-of-order data points.
449 | 	i := 0
450 | 	iterator := s.partitionList.newIterator()
451 | 	for iterator.next() {
452 | 		if i < writablePartitionsNum {
453 | 			i++
454 | 			continue
455 | 		}
456 | 		part := iterator.value()
457 | 		if part == nil {
458 | 			return fmt.Errorf("unexpected empty partition found")
459 | 		}
460 | 		memPart, ok := part.(*memoryPartition)
461 | 		if !ok {
462 | 			continue
463 | 		}
464 | 
465 | 		if s.inMemoryMode() {
466 | 			if err := s.partitionList.remove(part); err != nil {
467 | 				return fmt.Errorf("failed to remove partition: %w", err)
468 | 			}
469 | 			continue
470 | 		}
471 | 
472 | 		// Start swapping in-memory partition for disk one.
473 | 		// The disk partition will place at where in-memory one existed.
474 | 
475 | 		dir := filepath.Join(s.dataPath, fmt.Sprintf("p-%d-%d", memPart.minTimestamp(), memPart.maxTimestamp()))
476 | 		if err := s.flush(dir, memPart); err != nil {
477 | 			return fmt.Errorf("failed to compact memory partition into %s: %w", dir, err)
478 | 		}
479 | 		newPart, err := openDiskPartition(dir, s.retention)
480 | 		if errors.Is(err, ErrNoDataPoints) {
481 | 			if err := s.partitionList.remove(part); err != nil {
482 | 				return fmt.Errorf("failed to remove partition: %w", err)
483 | 			}
484 | 			continue
485 | 		}
486 | 		if err != nil {
487 | 			return fmt.Errorf("failed to generate disk partition for %s: %w", dir, err)
488 | 		}
489 | 		if err := s.partitionList.swap(part, newPart); err != nil {
490 | 			return fmt.Errorf("failed to swap partitions: %w", err)
491 | 		}
492 | 
493 | 		if err := s.wal.removeOldest(); err != nil {
494 | 			return fmt.Errorf("failed to remove oldest WAL segment: %w", err)
495 | 		}
496 | 	}
497 | 	return nil
498 | }
499 | 
500 | // flush compacts the data points in the given partition and flushes them to the given directory.
501 | func (s *storage) flush(dirPath string, m *memoryPartition) error {
502 | 	if dirPath == "" {
503 | 		return fmt.Errorf("dir path is required")
504 | 	}
505 | 
506 | 	if err := os.MkdirAll(dirPath, fs.ModePerm); err != nil {
507 | 		return fmt.Errorf("failed to make directory %q: %w", dirPath, err)
508 | 	}
509 | 
510 | 	f, err := os.Create(filepath.Join(dirPath, dataFileName))
511 | 	if err != nil {
512 | 		return fmt.Errorf("failed to create file %q: %w", dirPath, err)
513 | 	}
514 | 	defer f.Close()
515 | 	encoder := newSeriesEncoder(f)
516 | 
517 | 	metrics := map[string]diskMetric{}
518 | 	m.metrics.Range(func(key, value interface{}) bool {
519 | 		mt, ok := value.(*memoryMetric)
520 | 		if !ok {
521 | 			s.logger.Printf("unknown value found\n")
522 | 			return false
523 | 		}
524 | 		offset, err := f.Seek(0, io.SeekCurrent)
525 | 		if err != nil {
526 | 			s.logger.Printf("failed to set file offset of metric %q: %v\n", mt.name, err)
527 | 			return false
528 | 		}
529 | 
530 | 		if err := mt.encodeAllPoints(encoder); err != nil {
531 | 			s.logger.Printf("failed to encode a data point that metric is %q: %v\n", mt.name, err)
532 | 			return false
533 | 		}
534 | 
535 | 		if err := encoder.flush(); err != nil {
536 | 			s.logger.Printf("failed to flush data points that metric is %q: %v\n", mt.name, err)
537 | 			return false
538 | 		}
539 | 
540 | 		totalNumPoints := mt.size + int64(len(mt.outOfOrderPoints))
541 | 		metrics[mt.name] = diskMetric{
542 | 			Name:          mt.name,
543 | 			Offset:        offset,
544 | 			MinTimestamp:  mt.minTimestamp,
545 | 			MaxTimestamp:  mt.maxTimestamp,
546 | 			NumDataPoints: totalNumPoints,
547 | 		}
548 | 		return true
549 | 	})
550 | 
551 | 	b, err := json.Marshal(&meta{
552 | 		MinTimestamp:  m.minTimestamp(),
553 | 		MaxTimestamp:  m.maxTimestamp(),
554 | 		NumDataPoints: m.size(),
555 | 		Metrics:       metrics,
556 | 		CreatedAt:     time.Now(),
557 | 	})
558 | 	if err != nil {
559 | 		return fmt.Errorf("failed to encode metadata: %w", err)
560 | 	}
561 | 
562 | 	// It should write the meta file at last because what valid meta file exists proves the disk partition is valid.
563 | 	metaPath := filepath.Join(dirPath, metaFileName)
564 | 	if err := os.WriteFile(metaPath, b, fs.ModePerm); err != nil {
565 | 		return fmt.Errorf("failed to write metadata to %s: %w", metaPath, err)
566 | 	}
567 | 	return nil
568 | }
569 | 
570 | func (s *storage) removeExpiredPartitions() error {
571 | 	expiredList := make([]partition, 0)
572 | 	iterator := s.partitionList.newIterator()
573 | 	for iterator.next() {
574 | 		part := iterator.value()
575 | 		if part == nil {
576 | 			return fmt.Errorf("unexpected nil partition found")
577 | 		}
578 | 		if part.expired() {
579 | 			expiredList = append(expiredList, part)
580 | 		}
581 | 	}
582 | 
583 | 	for i := range expiredList {
584 | 		if err := s.partitionList.remove(expiredList[i]); err != nil {
585 | 			return fmt.Errorf("failed to remove expired partition")
586 | 		}
587 | 	}
588 | 	return nil
589 | }
590 | 
591 | // recoverWAL inserts all records within the given wal, and then removes all WAL segment files.
592 | func (s *storage) recoverWAL(walDir string) error {
593 | 	reader, err := newDiskWALReader(walDir)
594 | 	if errors.Is(err, os.ErrNotExist) {
595 | 		return nil
596 | 	}
597 | 	if err != nil {
598 | 		return err
599 | 	}
600 | 
601 | 	if err := reader.readAll(); err != nil {
602 | 		return fmt.Errorf("failed to read WAL: %w", err)
603 | 	}
604 | 
605 | 	if len(reader.rowsToInsert) == 0 {
606 | 		return nil
607 | 	}
608 | 	if err := s.InsertRows(reader.rowsToInsert); err != nil {
609 | 		return fmt.Errorf("failed to insert rows recovered from WAL: %w", err)
610 | 	}
611 | 	return s.wal.refresh()
612 | }
613 | 
614 | func (s *storage) inMemoryMode() bool {
615 | 	return s.dataPath == ""
616 | }
617 | 


--------------------------------------------------------------------------------
/storage_benchmark_test.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func BenchmarkStorage_InsertRows(b *testing.B) {
10 | 	storage, err := NewStorage()
11 | 	require.NoError(b, err)
12 | 	b.ResetTimer()
13 | 	for i := 1; i < b.N; i++ {
14 | 		storage.InsertRows([]Row{
15 | 			{Metric: "metric1", DataPoint: DataPoint{Timestamp: int64(i), Value: 0.1}},
16 | 		})
17 | 	}
18 | }
19 | 
20 | // Select data points among a thousand data in memory
21 | func BenchmarkStorage_SelectAmongThousandPoints(b *testing.B) {
22 | 	storage, err := NewStorage()
23 | 	require.NoError(b, err)
24 | 	for i := 1; i < 1000; i++ {
25 | 		storage.InsertRows([]Row{
26 | 			{Metric: "metric1", DataPoint: DataPoint{Timestamp: int64(i), Value: 0.1}},
27 | 		})
28 | 	}
29 | 	b.ResetTimer()
30 | 	for i := 1; i < b.N; i++ {
31 | 		_, _ = storage.Select("metric1", nil, 10, 100)
32 | 	}
33 | }
34 | 
35 | // Select data points among a million data in memory
36 | func BenchmarkStorage_SelectAmongMillionPoints(b *testing.B) {
37 | 	storage, err := NewStorage()
38 | 	require.NoError(b, err)
39 | 	for i := 1; i < 1000000; i++ {
40 | 		storage.InsertRows([]Row{
41 | 			{Metric: "metric1", DataPoint: DataPoint{Timestamp: int64(i), Value: 0.1}},
42 | 		})
43 | 	}
44 | 	b.ResetTimer()
45 | 	for i := 1; i < b.N; i++ {
46 | 		_, _ = storage.Select("metric1", nil, 10, 100)
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/storage_examples_test.go:
--------------------------------------------------------------------------------
  1 | package tstorage_test
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"sync"
  8 | 	"time"
  9 | 
 10 | 	"github.com/nakabonne/tstorage"
 11 | )
 12 | 
 13 | func ExampleNewStorage_withDataPath() {
 14 | 	// It will make time-series data persistent under "./data".
 15 | 	storage, err := tstorage.NewStorage(
 16 | 		tstorage.WithDataPath("./data"),
 17 | 	)
 18 | 	if err != nil {
 19 | 		panic(err)
 20 | 	}
 21 | 	storage.Close()
 22 | }
 23 | 
 24 | func ExampleNewStorage_withPartitionDuration() {
 25 | 	storage, err := tstorage.NewStorage(
 26 | 		tstorage.WithPartitionDuration(30*time.Minute),
 27 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
 28 | 	)
 29 | 	if err != nil {
 30 | 		panic(err)
 31 | 	}
 32 | 	defer storage.Close()
 33 | }
 34 | 
 35 | func ExampleStorage_InsertRows() {
 36 | 	storage, err := tstorage.NewStorage(
 37 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
 38 | 	)
 39 | 	if err != nil {
 40 | 		panic(err)
 41 | 	}
 42 | 	defer func() {
 43 | 		if err := storage.Close(); err != nil {
 44 | 			panic(err)
 45 | 		}
 46 | 	}()
 47 | 	err = storage.InsertRows([]tstorage.Row{
 48 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000000, Value: 0.1}},
 49 | 	})
 50 | 	if err != nil {
 51 | 		panic(err)
 52 | 	}
 53 | 	points, err := storage.Select("metric1", nil, 1600000000, 1600000001)
 54 | 	if err != nil {
 55 | 		panic(err)
 56 | 	}
 57 | 	for _, p := range points {
 58 | 		fmt.Printf("timestamp: %v, value: %v\n", p.Timestamp, p.Value)
 59 | 	}
 60 | 	// Output:
 61 | 	// timestamp: 1600000000, value: 0.1
 62 | }
 63 | 
 64 | // simulates writing and reading in concurrent.
 65 | func ExampleStorage_InsertRows_Select_concurrent() {
 66 | 	storage, err := tstorage.NewStorage(
 67 | 		tstorage.WithPartitionDuration(5*time.Hour),
 68 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
 69 | 	)
 70 | 	if err != nil {
 71 | 		panic(err)
 72 | 	}
 73 | 	defer func() {
 74 | 		if err := storage.Close(); err != nil {
 75 | 			panic(err)
 76 | 		}
 77 | 	}()
 78 | 
 79 | 	var wg sync.WaitGroup
 80 | 
 81 | 	// Start write workers that insert 10000 times in concurrent, as fast as possible.
 82 | 	wg.Add(1)
 83 | 	go func() {
 84 | 		defer wg.Done()
 85 | 		for i := int64(1600000000); i < 1600010000; i++ {
 86 | 			wg.Add(1)
 87 | 			go func(timestamp int64) {
 88 | 				defer wg.Done()
 89 | 				if err := storage.InsertRows([]tstorage.Row{
 90 | 					{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: timestamp}},
 91 | 				}); err != nil {
 92 | 					panic(err)
 93 | 				}
 94 | 			}(i)
 95 | 		}
 96 | 	}()
 97 | 
 98 | 	// Start read workers that read 100 times in concurrent, as fast as possible.
 99 | 	wg.Add(1)
100 | 	go func() {
101 | 		defer wg.Done()
102 | 		for i := 0; i < 100; i++ {
103 | 			wg.Add(1)
104 | 			go func() {
105 | 				defer wg.Done()
106 | 				points, err := storage.Select("metric1", nil, 1600000000, 1600010000)
107 | 				if errors.Is(err, tstorage.ErrNoDataPoints) {
108 | 					return
109 | 				}
110 | 				if err != nil {
111 | 					panic(err)
112 | 				}
113 | 				for _, p := range points {
114 | 					_ = p.Timestamp
115 | 					_ = p.Value
116 | 				}
117 | 			}()
118 | 		}
119 | 	}()
120 | 	wg.Wait()
121 | }
122 | 
123 | func ExampleStorage_Select_from_memory() {
124 | 	tmpDir, err := os.MkdirTemp("", "tstorage-example")
125 | 	if err != nil {
126 | 		panic(err)
127 | 	}
128 | 	defer os.RemoveAll(tmpDir)
129 | 
130 | 	storage, err := tstorage.NewStorage(
131 | 		tstorage.WithDataPath(tmpDir),
132 | 		tstorage.WithPartitionDuration(2*time.Hour),
133 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
134 | 	)
135 | 	if err != nil {
136 | 		panic(err)
137 | 	}
138 | 	defer func() {
139 | 		if err := storage.Close(); err != nil {
140 | 			panic(err)
141 | 		}
142 | 	}()
143 | 
144 | 	// Ingest data points of metric1
145 | 	for timestamp := int64(1600000000); timestamp < 1600000050; timestamp++ {
146 | 		err := storage.InsertRows([]tstorage.Row{
147 | 			{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: timestamp, Value: 0.1}},
148 | 		})
149 | 		if err != nil {
150 | 			panic(err)
151 | 		}
152 | 	}
153 | 	// Ingest data points of metric2
154 | 	for timestamp := int64(1600000050); timestamp < 1600000100; timestamp++ {
155 | 		err := storage.InsertRows([]tstorage.Row{
156 | 			{Metric: "metric2", DataPoint: tstorage.DataPoint{Timestamp: timestamp, Value: 0.2}},
157 | 		})
158 | 		if err != nil {
159 | 			panic(err)
160 | 		}
161 | 	}
162 | 
163 | 	points, err := storage.Select("metric1", nil, 1600000000, 1600000050)
164 | 	if errors.Is(err, tstorage.ErrNoDataPoints) {
165 | 		return
166 | 	}
167 | 	if err != nil {
168 | 		panic(err)
169 | 	}
170 | 	fmt.Println("Data points of metric1:")
171 | 	for _, p := range points {
172 | 		fmt.Printf("Timestamp: %v, Value: %v\n", p.Timestamp, p.Value)
173 | 	}
174 | 
175 | 	points2, err := storage.Select("metric2", nil, 1600000050, 1600000100)
176 | 	if errors.Is(err, tstorage.ErrNoDataPoints) {
177 | 		return
178 | 	}
179 | 	if err != nil {
180 | 		panic(err)
181 | 	}
182 | 	fmt.Println("Data points of metric2:")
183 | 	for _, p := range points2 {
184 | 		fmt.Printf("Timestamp: %v, Value: %v\n", p.Timestamp, p.Value)
185 | 	}
186 | 	// Output:
187 | 	//Data points of metric1:
188 | 	//Timestamp: 1600000000, Value: 0.1
189 | 	//Timestamp: 1600000001, Value: 0.1
190 | 	//Timestamp: 1600000002, Value: 0.1
191 | 	//Timestamp: 1600000003, Value: 0.1
192 | 	//Timestamp: 1600000004, Value: 0.1
193 | 	//Timestamp: 1600000005, Value: 0.1
194 | 	//Timestamp: 1600000006, Value: 0.1
195 | 	//Timestamp: 1600000007, Value: 0.1
196 | 	//Timestamp: 1600000008, Value: 0.1
197 | 	//Timestamp: 1600000009, Value: 0.1
198 | 	//Timestamp: 1600000010, Value: 0.1
199 | 	//Timestamp: 1600000011, Value: 0.1
200 | 	//Timestamp: 1600000012, Value: 0.1
201 | 	//Timestamp: 1600000013, Value: 0.1
202 | 	//Timestamp: 1600000014, Value: 0.1
203 | 	//Timestamp: 1600000015, Value: 0.1
204 | 	//Timestamp: 1600000016, Value: 0.1
205 | 	//Timestamp: 1600000017, Value: 0.1
206 | 	//Timestamp: 1600000018, Value: 0.1
207 | 	//Timestamp: 1600000019, Value: 0.1
208 | 	//Timestamp: 1600000020, Value: 0.1
209 | 	//Timestamp: 1600000021, Value: 0.1
210 | 	//Timestamp: 1600000022, Value: 0.1
211 | 	//Timestamp: 1600000023, Value: 0.1
212 | 	//Timestamp: 1600000024, Value: 0.1
213 | 	//Timestamp: 1600000025, Value: 0.1
214 | 	//Timestamp: 1600000026, Value: 0.1
215 | 	//Timestamp: 1600000027, Value: 0.1
216 | 	//Timestamp: 1600000028, Value: 0.1
217 | 	//Timestamp: 1600000029, Value: 0.1
218 | 	//Timestamp: 1600000030, Value: 0.1
219 | 	//Timestamp: 1600000031, Value: 0.1
220 | 	//Timestamp: 1600000032, Value: 0.1
221 | 	//Timestamp: 1600000033, Value: 0.1
222 | 	//Timestamp: 1600000034, Value: 0.1
223 | 	//Timestamp: 1600000035, Value: 0.1
224 | 	//Timestamp: 1600000036, Value: 0.1
225 | 	//Timestamp: 1600000037, Value: 0.1
226 | 	//Timestamp: 1600000038, Value: 0.1
227 | 	//Timestamp: 1600000039, Value: 0.1
228 | 	//Timestamp: 1600000040, Value: 0.1
229 | 	//Timestamp: 1600000041, Value: 0.1
230 | 	//Timestamp: 1600000042, Value: 0.1
231 | 	//Timestamp: 1600000043, Value: 0.1
232 | 	//Timestamp: 1600000044, Value: 0.1
233 | 	//Timestamp: 1600000045, Value: 0.1
234 | 	//Timestamp: 1600000046, Value: 0.1
235 | 	//Timestamp: 1600000047, Value: 0.1
236 | 	//Timestamp: 1600000048, Value: 0.1
237 | 	//Timestamp: 1600000049, Value: 0.1
238 | 	//Data points of metric2:
239 | 	//Timestamp: 1600000050, Value: 0.2
240 | 	//Timestamp: 1600000051, Value: 0.2
241 | 	//Timestamp: 1600000052, Value: 0.2
242 | 	//Timestamp: 1600000053, Value: 0.2
243 | 	//Timestamp: 1600000054, Value: 0.2
244 | 	//Timestamp: 1600000055, Value: 0.2
245 | 	//Timestamp: 1600000056, Value: 0.2
246 | 	//Timestamp: 1600000057, Value: 0.2
247 | 	//Timestamp: 1600000058, Value: 0.2
248 | 	//Timestamp: 1600000059, Value: 0.2
249 | 	//Timestamp: 1600000060, Value: 0.2
250 | 	//Timestamp: 1600000061, Value: 0.2
251 | 	//Timestamp: 1600000062, Value: 0.2
252 | 	//Timestamp: 1600000063, Value: 0.2
253 | 	//Timestamp: 1600000064, Value: 0.2
254 | 	//Timestamp: 1600000065, Value: 0.2
255 | 	//Timestamp: 1600000066, Value: 0.2
256 | 	//Timestamp: 1600000067, Value: 0.2
257 | 	//Timestamp: 1600000068, Value: 0.2
258 | 	//Timestamp: 1600000069, Value: 0.2
259 | 	//Timestamp: 1600000070, Value: 0.2
260 | 	//Timestamp: 1600000071, Value: 0.2
261 | 	//Timestamp: 1600000072, Value: 0.2
262 | 	//Timestamp: 1600000073, Value: 0.2
263 | 	//Timestamp: 1600000074, Value: 0.2
264 | 	//Timestamp: 1600000075, Value: 0.2
265 | 	//Timestamp: 1600000076, Value: 0.2
266 | 	//Timestamp: 1600000077, Value: 0.2
267 | 	//Timestamp: 1600000078, Value: 0.2
268 | 	//Timestamp: 1600000079, Value: 0.2
269 | 	//Timestamp: 1600000080, Value: 0.2
270 | 	//Timestamp: 1600000081, Value: 0.2
271 | 	//Timestamp: 1600000082, Value: 0.2
272 | 	//Timestamp: 1600000083, Value: 0.2
273 | 	//Timestamp: 1600000084, Value: 0.2
274 | 	//Timestamp: 1600000085, Value: 0.2
275 | 	//Timestamp: 1600000086, Value: 0.2
276 | 	//Timestamp: 1600000087, Value: 0.2
277 | 	//Timestamp: 1600000088, Value: 0.2
278 | 	//Timestamp: 1600000089, Value: 0.2
279 | 	//Timestamp: 1600000090, Value: 0.2
280 | 	//Timestamp: 1600000091, Value: 0.2
281 | 	//Timestamp: 1600000092, Value: 0.2
282 | 	//Timestamp: 1600000093, Value: 0.2
283 | 	//Timestamp: 1600000094, Value: 0.2
284 | 	//Timestamp: 1600000095, Value: 0.2
285 | 	//Timestamp: 1600000096, Value: 0.2
286 | 	//Timestamp: 1600000097, Value: 0.2
287 | 	//Timestamp: 1600000098, Value: 0.2
288 | 	//Timestamp: 1600000099, Value: 0.2
289 | }
290 | 
291 | // simulates writing and reading on disk.
292 | func ExampleStorage_Select_from_disk() {
293 | 	tmpDir, err := os.MkdirTemp("", "tstorage-example")
294 | 	if err != nil {
295 | 		panic(err)
296 | 	}
297 | 	defer os.RemoveAll(tmpDir)
298 | 
299 | 	storage, err := tstorage.NewStorage(
300 | 		tstorage.WithDataPath(tmpDir),
301 | 		tstorage.WithPartitionDuration(100*time.Second),
302 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
303 | 	)
304 | 	if err != nil {
305 | 		panic(err)
306 | 	}
307 | 
308 | 	// Ingest data points
309 | 	for timestamp := int64(1600000000); timestamp < 1600000050; timestamp++ {
310 | 		err := storage.InsertRows([]tstorage.Row{
311 | 			{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: timestamp, Value: 0.1}},
312 | 		})
313 | 		if err != nil {
314 | 			panic(err)
315 | 		}
316 | 		err = storage.InsertRows([]tstorage.Row{
317 | 			{Metric: "metric2", DataPoint: tstorage.DataPoint{Timestamp: timestamp, Value: 0.2}},
318 | 		})
319 | 		if err != nil {
320 | 			panic(err)
321 | 		}
322 | 	}
323 | 	// Flush all data points
324 | 	if err := storage.Close(); err != nil {
325 | 		panic(err)
326 | 	}
327 | 
328 | 	// Re-open storage from the persisted data
329 | 	storage, err = tstorage.NewStorage(
330 | 		tstorage.WithDataPath(tmpDir),
331 | 		tstorage.WithPartitionDuration(10*time.Second),
332 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
333 | 	)
334 | 	if err != nil {
335 | 		panic(err)
336 | 	}
337 | 	defer func() {
338 | 		if err := storage.Close(); err != nil {
339 | 			panic(err)
340 | 		}
341 | 	}()
342 | 
343 | 	points, err := storage.Select("metric1", nil, 1600000000, 1600000050)
344 | 	if errors.Is(err, tstorage.ErrNoDataPoints) {
345 | 		return
346 | 	}
347 | 	if err != nil {
348 | 		panic(err)
349 | 	}
350 | 	fmt.Println("Data points of metric1:")
351 | 	for _, p := range points {
352 | 		fmt.Printf("Timestamp: %v, Value: %v\n", p.Timestamp, p.Value)
353 | 	}
354 | 
355 | 	points2, err := storage.Select("metric2", nil, 1600000000, 1600000050)
356 | 	if errors.Is(err, tstorage.ErrNoDataPoints) {
357 | 		return
358 | 	}
359 | 	if err != nil {
360 | 		panic(err)
361 | 	}
362 | 	fmt.Println("Data points of metric2:")
363 | 	for _, p := range points2 {
364 | 		fmt.Printf("Timestamp: %v, Value: %v\n", p.Timestamp, p.Value)
365 | 	}
366 | 	// Output:
367 | 	//Data points of metric1:
368 | 	//Timestamp: 1600000000, Value: 0.1
369 | 	//Timestamp: 1600000001, Value: 0.1
370 | 	//Timestamp: 1600000002, Value: 0.1
371 | 	//Timestamp: 1600000003, Value: 0.1
372 | 	//Timestamp: 1600000004, Value: 0.1
373 | 	//Timestamp: 1600000005, Value: 0.1
374 | 	//Timestamp: 1600000006, Value: 0.1
375 | 	//Timestamp: 1600000007, Value: 0.1
376 | 	//Timestamp: 1600000008, Value: 0.1
377 | 	//Timestamp: 1600000009, Value: 0.1
378 | 	//Timestamp: 1600000010, Value: 0.1
379 | 	//Timestamp: 1600000011, Value: 0.1
380 | 	//Timestamp: 1600000012, Value: 0.1
381 | 	//Timestamp: 1600000013, Value: 0.1
382 | 	//Timestamp: 1600000014, Value: 0.1
383 | 	//Timestamp: 1600000015, Value: 0.1
384 | 	//Timestamp: 1600000016, Value: 0.1
385 | 	//Timestamp: 1600000017, Value: 0.1
386 | 	//Timestamp: 1600000018, Value: 0.1
387 | 	//Timestamp: 1600000019, Value: 0.1
388 | 	//Timestamp: 1600000020, Value: 0.1
389 | 	//Timestamp: 1600000021, Value: 0.1
390 | 	//Timestamp: 1600000022, Value: 0.1
391 | 	//Timestamp: 1600000023, Value: 0.1
392 | 	//Timestamp: 1600000024, Value: 0.1
393 | 	//Timestamp: 1600000025, Value: 0.1
394 | 	//Timestamp: 1600000026, Value: 0.1
395 | 	//Timestamp: 1600000027, Value: 0.1
396 | 	//Timestamp: 1600000028, Value: 0.1
397 | 	//Timestamp: 1600000029, Value: 0.1
398 | 	//Timestamp: 1600000030, Value: 0.1
399 | 	//Timestamp: 1600000031, Value: 0.1
400 | 	//Timestamp: 1600000032, Value: 0.1
401 | 	//Timestamp: 1600000033, Value: 0.1
402 | 	//Timestamp: 1600000034, Value: 0.1
403 | 	//Timestamp: 1600000035, Value: 0.1
404 | 	//Timestamp: 1600000036, Value: 0.1
405 | 	//Timestamp: 1600000037, Value: 0.1
406 | 	//Timestamp: 1600000038, Value: 0.1
407 | 	//Timestamp: 1600000039, Value: 0.1
408 | 	//Timestamp: 1600000040, Value: 0.1
409 | 	//Timestamp: 1600000041, Value: 0.1
410 | 	//Timestamp: 1600000042, Value: 0.1
411 | 	//Timestamp: 1600000043, Value: 0.1
412 | 	//Timestamp: 1600000044, Value: 0.1
413 | 	//Timestamp: 1600000045, Value: 0.1
414 | 	//Timestamp: 1600000046, Value: 0.1
415 | 	//Timestamp: 1600000047, Value: 0.1
416 | 	//Timestamp: 1600000048, Value: 0.1
417 | 	//Timestamp: 1600000049, Value: 0.1
418 | 	//Data points of metric2:
419 | 	//Timestamp: 1600000000, Value: 0.2
420 | 	//Timestamp: 1600000001, Value: 0.2
421 | 	//Timestamp: 1600000002, Value: 0.2
422 | 	//Timestamp: 1600000003, Value: 0.2
423 | 	//Timestamp: 1600000004, Value: 0.2
424 | 	//Timestamp: 1600000005, Value: 0.2
425 | 	//Timestamp: 1600000006, Value: 0.2
426 | 	//Timestamp: 1600000007, Value: 0.2
427 | 	//Timestamp: 1600000008, Value: 0.2
428 | 	//Timestamp: 1600000009, Value: 0.2
429 | 	//Timestamp: 1600000010, Value: 0.2
430 | 	//Timestamp: 1600000011, Value: 0.2
431 | 	//Timestamp: 1600000012, Value: 0.2
432 | 	//Timestamp: 1600000013, Value: 0.2
433 | 	//Timestamp: 1600000014, Value: 0.2
434 | 	//Timestamp: 1600000015, Value: 0.2
435 | 	//Timestamp: 1600000016, Value: 0.2
436 | 	//Timestamp: 1600000017, Value: 0.2
437 | 	//Timestamp: 1600000018, Value: 0.2
438 | 	//Timestamp: 1600000019, Value: 0.2
439 | 	//Timestamp: 1600000020, Value: 0.2
440 | 	//Timestamp: 1600000021, Value: 0.2
441 | 	//Timestamp: 1600000022, Value: 0.2
442 | 	//Timestamp: 1600000023, Value: 0.2
443 | 	//Timestamp: 1600000024, Value: 0.2
444 | 	//Timestamp: 1600000025, Value: 0.2
445 | 	//Timestamp: 1600000026, Value: 0.2
446 | 	//Timestamp: 1600000027, Value: 0.2
447 | 	//Timestamp: 1600000028, Value: 0.2
448 | 	//Timestamp: 1600000029, Value: 0.2
449 | 	//Timestamp: 1600000030, Value: 0.2
450 | 	//Timestamp: 1600000031, Value: 0.2
451 | 	//Timestamp: 1600000032, Value: 0.2
452 | 	//Timestamp: 1600000033, Value: 0.2
453 | 	//Timestamp: 1600000034, Value: 0.2
454 | 	//Timestamp: 1600000035, Value: 0.2
455 | 	//Timestamp: 1600000036, Value: 0.2
456 | 	//Timestamp: 1600000037, Value: 0.2
457 | 	//Timestamp: 1600000038, Value: 0.2
458 | 	//Timestamp: 1600000039, Value: 0.2
459 | 	//Timestamp: 1600000040, Value: 0.2
460 | 	//Timestamp: 1600000041, Value: 0.2
461 | 	//Timestamp: 1600000042, Value: 0.2
462 | 	//Timestamp: 1600000043, Value: 0.2
463 | 	//Timestamp: 1600000044, Value: 0.2
464 | 	//Timestamp: 1600000045, Value: 0.2
465 | 	//Timestamp: 1600000046, Value: 0.2
466 | 	//Timestamp: 1600000047, Value: 0.2
467 | 	//Timestamp: 1600000048, Value: 0.2
468 | 	//Timestamp: 1600000049, Value: 0.2
469 | }
470 | 
471 | // Out of order data points that are not yet flushed are in the buffer
472 | // but do not appear in select.
473 | func ExampleStorage_Select_from_memory_out_of_order() {
474 | 	storage, err := tstorage.NewStorage(
475 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
476 | 	)
477 | 	if err != nil {
478 | 		panic(err)
479 | 	}
480 | 	defer func() {
481 | 		if err := storage.Close(); err != nil {
482 | 			panic(err)
483 | 		}
484 | 	}()
485 | 	err = storage.InsertRows([]tstorage.Row{
486 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000000, Value: 0.1}},
487 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000002, Value: 0.1}},
488 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000001, Value: 0.1}},
489 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000003, Value: 0.1}},
490 | 	})
491 | 	if err != nil {
492 | 		panic(err)
493 | 	}
494 | 	points, err := storage.Select("metric1", nil, 1600000000, 1600000003)
495 | 	if err != nil {
496 | 		panic(err)
497 | 	}
498 | 	for _, p := range points {
499 | 		fmt.Printf("Timestamp: %v, Value: %v\n", p.Timestamp, p.Value)
500 | 	}
501 | 
502 | 	// Out-of-order data points are ignored because they will get merged when flushing.
503 | 
504 | 	// Output:
505 | 	// Timestamp: 1600000000, Value: 0.1
506 | 	// Timestamp: 1600000002, Value: 0.1
507 | 	// Timestamp: 1600000003, Value: 0.1
508 | }
509 | 
510 | // Out of order data points that are flushed should appear in select.
511 | func ExampleStorage_Select_from_disk_out_of_order() {
512 | 	tmpDir, err := os.MkdirTemp("", "tstorage-example")
513 | 	if err != nil {
514 | 		panic(err)
515 | 	}
516 | 	defer os.RemoveAll(tmpDir)
517 | 
518 | 	storage, err := tstorage.NewStorage(
519 | 		tstorage.WithDataPath(tmpDir),
520 | 		tstorage.WithPartitionDuration(100*time.Second),
521 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
522 | 	)
523 | 	if err != nil {
524 | 		panic(err)
525 | 	}
526 | 
527 | 	err = storage.InsertRows([]tstorage.Row{
528 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000000, Value: 0.1}},
529 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000002, Value: 0.1}},
530 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000001, Value: 0.1}},
531 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000003, Value: 0.1}},
532 | 	})
533 | 	if err != nil {
534 | 		panic(err)
535 | 	}
536 | 
537 | 	// Flush all data points
538 | 	if err := storage.Close(); err != nil {
539 | 		panic(err)
540 | 	}
541 | 
542 | 	// Re-open storage from the persisted data
543 | 	storage, err = tstorage.NewStorage(
544 | 		tstorage.WithDataPath(tmpDir),
545 | 		tstorage.WithPartitionDuration(100*time.Second),
546 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
547 | 	)
548 | 	if err != nil {
549 | 		panic(err)
550 | 	}
551 | 	defer func() {
552 | 		if err := storage.Close(); err != nil {
553 | 			panic(err)
554 | 		}
555 | 	}()
556 | 
557 | 	points, err := storage.Select("metric1", nil, 1600000000, 1600000004)
558 | 	if errors.Is(err, tstorage.ErrNoDataPoints) {
559 | 		return
560 | 	}
561 | 	if err != nil {
562 | 		panic(err)
563 | 	}
564 | 	for _, p := range points {
565 | 		fmt.Printf("timestamp: %v, value: %v\n", p.Timestamp, p.Value)
566 | 	}
567 | 	// Output:
568 | 	// timestamp: 1600000000, value: 0.1
569 | 	// timestamp: 1600000001, value: 0.1
570 | 	// timestamp: 1600000002, value: 0.1
571 | 	// timestamp: 1600000003, value: 0.1
572 | }
573 | 
574 | // Simulates inserting an outdated row that forces inserting into a non-head partition.
575 | func ExampleStorage_InsertRows_outdated() {
576 | 	tmpDir, err := os.MkdirTemp("", "tstorage-example")
577 | 	if err != nil {
578 | 		panic(err)
579 | 	}
580 | 	defer os.RemoveAll(tmpDir)
581 | 
582 | 	storage, err := tstorage.NewStorage(
583 | 		tstorage.WithDataPath(tmpDir),
584 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
585 | 		tstorage.WithPartitionDuration(3*time.Second),
586 | 	)
587 | 	if err != nil {
588 | 		panic(err)
589 | 	}
590 | 
591 | 	// Force two partitions with timestamps: (min: 1, max: 3), (min: 4, max: 5)
592 | 	err = storage.InsertRows([]tstorage.Row{
593 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000001, Value: 0.1}, Metric: "metric1"},
594 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000003, Value: 0.1}, Metric: "metric1"},
595 | 	})
596 | 	if err != nil {
597 | 		panic(err)
598 | 	}
599 | 	err = storage.InsertRows([]tstorage.Row{
600 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000004, Value: 0.1}, Metric: "metric1"},
601 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000005, Value: 0.1}, Metric: "metric1"},
602 | 	})
603 | 	if err != nil {
604 | 		panic(err)
605 | 	}
606 | 
607 | 	// Insert a data point that doesn't belong to the head partition. This will be inserted
608 | 	// into the next partition out of order.
609 | 	err = storage.InsertRows([]tstorage.Row{
610 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000002, Value: 0.1}, Metric: "metric1"},
611 | 	})
612 | 	if err != nil {
613 | 		panic(err)
614 | 	}
615 | 
616 | 	// Flush all data points
617 | 	if err := storage.Close(); err != nil {
618 | 		panic(err)
619 | 	}
620 | 
621 | 	// Re-open storage from the persisted data
622 | 	storage, err = tstorage.NewStorage(
623 | 		tstorage.WithDataPath(tmpDir),
624 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
625 | 		tstorage.WithPartitionDuration(3*time.Second),
626 | 	)
627 | 	if err != nil {
628 | 		panic(err)
629 | 	}
630 | 	defer func() {
631 | 		if err := storage.Close(); err != nil {
632 | 			panic(err)
633 | 		}
634 | 	}()
635 | 
636 | 	points, err := storage.Select("metric1", nil, 1600000001, 1600000006)
637 | 	if err != nil {
638 | 		panic(err)
639 | 	}
640 | 	for _, p := range points {
641 | 		fmt.Printf("Timestamp: %v, Value: %v\n", p.Timestamp, p.Value)
642 | 	}
643 | 	// Output:
644 | 	// Timestamp: 1600000001, Value: 0.1
645 | 	// Timestamp: 1600000002, Value: 0.1
646 | 	// Timestamp: 1600000003, Value: 0.1
647 | 	// Timestamp: 1600000004, Value: 0.1
648 | 	// Timestamp: 1600000005, Value: 0.1
649 | }
650 | 
651 | // Simulates inserting a row that's outside of the writable time window.
652 | func ExampleStorage_InsertRows_expired() {
653 | 	tmpDir, err := os.MkdirTemp("", "tstorage-example")
654 | 	if err != nil {
655 | 		panic(err)
656 | 	}
657 | 	defer os.RemoveAll(tmpDir)
658 | 
659 | 	storage, err := tstorage.NewStorage(
660 | 		tstorage.WithDataPath(tmpDir),
661 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
662 | 		tstorage.WithPartitionDuration(3*time.Second),
663 | 	)
664 | 	if err != nil {
665 | 		panic(err)
666 | 	}
667 | 
668 | 	// Force three partitions with timestamps: (min: 1, max: 3), (min: 4, max: 6), (min: 7, max: 8).
669 | 	// Inserting the third partition will force the first one to be flushed to disk and become unwritable.
670 | 	err = storage.InsertRows([]tstorage.Row{
671 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000001, Value: 0.1}, Metric: "metric1"},
672 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000003, Value: 0.1}, Metric: "metric1"},
673 | 	})
674 | 	if err != nil {
675 | 		panic(err)
676 | 	}
677 | 	err = storage.InsertRows([]tstorage.Row{
678 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000004, Value: 0.1}, Metric: "metric1"},
679 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000005, Value: 0.1}, Metric: "metric1"},
680 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000006, Value: 0.1}, Metric: "metric1"},
681 | 	})
682 | 	if err != nil {
683 | 		panic(err)
684 | 	}
685 | 	err = storage.InsertRows([]tstorage.Row{
686 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000007, Value: 0.1}, Metric: "metric1"},
687 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000008, Value: 0.1}, Metric: "metric1"},
688 | 	})
689 | 	if err != nil {
690 | 		panic(err)
691 | 	}
692 | 
693 | 	// Try to insert a data point into an already flushed partition.
694 | 	err = storage.InsertRows([]tstorage.Row{
695 | 		{DataPoint: tstorage.DataPoint{Timestamp: 1600000002, Value: 0.1}, Metric: "metric1"},
696 | 	})
697 | 	if err != nil {
698 | 		panic(err)
699 | 	}
700 | 
701 | 	// Flush all data points
702 | 	if err := storage.Close(); err != nil {
703 | 		panic(err)
704 | 	}
705 | 
706 | 	// Re-open storage from the persisted data
707 | 	storage, err = tstorage.NewStorage(
708 | 		tstorage.WithDataPath(tmpDir),
709 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
710 | 		tstorage.WithPartitionDuration(3*time.Second),
711 | 	)
712 | 	if err != nil {
713 | 		panic(err)
714 | 	}
715 | 	defer func() {
716 | 		if err := storage.Close(); err != nil {
717 | 			panic(err)
718 | 		}
719 | 	}()
720 | 
721 | 	points, err := storage.Select("metric1", nil, 1600000001, 1600000009)
722 | 	if err != nil {
723 | 		panic(err)
724 | 	}
725 | 	for _, p := range points {
726 | 		fmt.Printf("Timestamp: %v, Value: %v\n", p.Timestamp, p.Value)
727 | 	}
728 | 
729 | 	// Missing data point at 1600000002 because it was dropped.
730 | 
731 | 	// Output:
732 | 	// Timestamp: 1600000001, Value: 0.1
733 | 	// Timestamp: 1600000003, Value: 0.1
734 | 	// Timestamp: 1600000004, Value: 0.1
735 | 	// Timestamp: 1600000005, Value: 0.1
736 | 	// Timestamp: 1600000006, Value: 0.1
737 | 	// Timestamp: 1600000007, Value: 0.1
738 | 	// Timestamp: 1600000008, Value: 0.1
739 | }
740 | 
741 | func ExampleStorage_InsertRows_concurrent() {
742 | 	storage, err := tstorage.NewStorage(
743 | 		tstorage.WithTimestampPrecision(tstorage.Seconds),
744 | 	)
745 | 	if err != nil {
746 | 		panic(err)
747 | 	}
748 | 	defer storage.Close()
749 | 
750 | 	// First insert in order to ensure min timestamp
751 | 	if err := storage.InsertRows([]tstorage.Row{
752 | 		{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: 1600000000}},
753 | 	}); err != nil {
754 | 		panic(err)
755 | 	}
756 | 
757 | 	var wg sync.WaitGroup
758 | 	for i := int64(1600000001); i < 1600000100; i++ {
759 | 		wg.Add(1)
760 | 		go func(timestamp int64) {
761 | 			if err := storage.InsertRows([]tstorage.Row{
762 | 				{Metric: "metric1", DataPoint: tstorage.DataPoint{Timestamp: timestamp}},
763 | 			}); err != nil {
764 | 				panic(err)
765 | 			}
766 | 			wg.Done()
767 | 		}(i)
768 | 	}
769 | 	wg.Wait()
770 | 
771 | 	points, err := storage.Select("metric1", nil, 1600000000, 1600000100)
772 | 	if err != nil {
773 | 		panic(err)
774 | 	}
775 | 	for _, p := range points {
776 | 		fmt.Printf("timestamp: %v, value: %v\n", p.Timestamp, p.Value)
777 | 	}
778 | }
779 | 


--------------------------------------------------------------------------------
/storage_test.go:
--------------------------------------------------------------------------------
  1 | package tstorage
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"time"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func Test_storage_Select(t *testing.T) {
 11 | 	tests := []struct {
 12 | 		name    string
 13 | 		storage storage
 14 | 		metric  string
 15 | 		labels  []Label
 16 | 		start   int64
 17 | 		end     int64
 18 | 		want    []*DataPoint
 19 | 		wantErr bool
 20 | 	}{
 21 | 		{
 22 | 			name:   "select from single partition",
 23 | 			metric: "metric1",
 24 | 			start:  1,
 25 | 			end:    4,
 26 | 			storage: func() storage {
 27 | 				part1 := newMemoryPartition(nil, 1*time.Hour, Seconds)
 28 | 				_, err := part1.insertRows([]Row{
 29 | 					{DataPoint: DataPoint{Timestamp: 1}, Metric: "metric1"},
 30 | 					{DataPoint: DataPoint{Timestamp: 2}, Metric: "metric1"},
 31 | 					{DataPoint: DataPoint{Timestamp: 3}, Metric: "metric1"},
 32 | 				})
 33 | 				if err != nil {
 34 | 					panic(err)
 35 | 				}
 36 | 				list := newPartitionList()
 37 | 				list.insert(part1)
 38 | 				return storage{
 39 | 					partitionList:  list,
 40 | 					workersLimitCh: make(chan struct{}, defaultWorkersLimit),
 41 | 				}
 42 | 			}(),
 43 | 			want: []*DataPoint{
 44 | 				{Timestamp: 1},
 45 | 				{Timestamp: 2},
 46 | 				{Timestamp: 3},
 47 | 			},
 48 | 		},
 49 | 		{
 50 | 			name:   "select from three partitions",
 51 | 			metric: "metric1",
 52 | 			start:  1,
 53 | 			end:    10,
 54 | 			storage: func() storage {
 55 | 				part1 := newMemoryPartition(nil, 1*time.Hour, Seconds)
 56 | 				_, err := part1.insertRows([]Row{
 57 | 					{DataPoint: DataPoint{Timestamp: 1}, Metric: "metric1"},
 58 | 					{DataPoint: DataPoint{Timestamp: 2}, Metric: "metric1"},
 59 | 					{DataPoint: DataPoint{Timestamp: 3}, Metric: "metric1"},
 60 | 				})
 61 | 				if err != nil {
 62 | 					panic(err)
 63 | 				}
 64 | 				part2 := newMemoryPartition(nil, 1*time.Hour, Seconds)
 65 | 				_, err = part2.insertRows([]Row{
 66 | 					{DataPoint: DataPoint{Timestamp: 4}, Metric: "metric1"},
 67 | 					{DataPoint: DataPoint{Timestamp: 5}, Metric: "metric1"},
 68 | 					{DataPoint: DataPoint{Timestamp: 6}, Metric: "metric1"},
 69 | 				})
 70 | 				if err != nil {
 71 | 					panic(err)
 72 | 				}
 73 | 				part3 := newMemoryPartition(nil, 1*time.Hour, Seconds)
 74 | 				_, err = part3.insertRows([]Row{
 75 | 					{DataPoint: DataPoint{Timestamp: 7}, Metric: "metric1"},
 76 | 					{DataPoint: DataPoint{Timestamp: 8}, Metric: "metric1"},
 77 | 					{DataPoint: DataPoint{Timestamp: 9}, Metric: "metric1"},
 78 | 				})
 79 | 				if err != nil {
 80 | 					panic(err)
 81 | 				}
 82 | 				list := newPartitionList()
 83 | 				list.insert(part1)
 84 | 				list.insert(part2)
 85 | 				list.insert(part3)
 86 | 
 87 | 				return storage{
 88 | 					partitionList:  list,
 89 | 					workersLimitCh: make(chan struct{}, defaultWorkersLimit),
 90 | 				}
 91 | 			}(),
 92 | 			want: []*DataPoint{
 93 | 				{Timestamp: 1},
 94 | 				{Timestamp: 2},
 95 | 				{Timestamp: 3},
 96 | 				{Timestamp: 4},
 97 | 				{Timestamp: 5},
 98 | 				{Timestamp: 6},
 99 | 				{Timestamp: 7},
100 | 				{Timestamp: 8},
101 | 				{Timestamp: 9},
102 | 			},
103 | 		},
104 | 	}
105 | 	for _, tt := range tests {
106 | 		t.Run(tt.name, func(t *testing.T) {
107 | 			got, err := tt.storage.Select(tt.metric, tt.labels, tt.start, tt.end)
108 | 			assert.Equal(t, tt.wantErr, err != nil)
109 | 			assert.Equal(t, tt.want, got)
110 | 			assert.Equal(t, tt.want, got)
111 | 		})
112 | 	}
113 | }
114 | 


--------------------------------------------------------------------------------
/testdata/meta.json:
--------------------------------------------------------------------------------
1 | {
2 |   "minTimestamp": 1600000000,
3 |   "maxTimestamp": 1600000001,
4 |   "numDatapoints": 2
5 | }
6 | 


--------------------------------------------------------------------------------
/wal.go:
--------------------------------------------------------------------------------
 1 | package tstorage
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"sync"
 6 | )
 7 | 
 8 | type walOperation byte
 9 | 
10 | const (
11 | 	// The record format for operateInsert is as shown below:
12 | 	/*
13 | 	   +--------+---------------------+--------+--------------------+----------------+
14 | 	   | op(1b) | len metric(varints) | metric | timestamp(varints) | value(varints) |
15 | 	   +--------+---------------------+--------+--------------------+----------------+
16 | 	*/
17 | 	operationInsert walOperation = iota
18 | )
19 | 
20 | // wal represents a write-ahead log, which offers durability guarantees.
21 | type wal interface {
22 | 	append(op walOperation, rows []Row) error
23 | 	flush() error
24 | 	punctuate() error
25 | 	removeOldest() error
26 | 	removeAll() error
27 | 	refresh() error
28 | }
29 | 
30 | type nopWAL struct {
31 | 	filename string
32 | 	f        *os.File
33 | 	mu       sync.Mutex
34 | }
35 | 
36 | func (f *nopWAL) append(_ walOperation, _ []Row) error {
37 | 	return nil
38 | }
39 | 
40 | func (f *nopWAL) flush() error {
41 | 	return nil
42 | }
43 | 
44 | func (f *nopWAL) punctuate() error {
45 | 	return nil
46 | }
47 | 
48 | func (f *nopWAL) removeOldest() error {
49 | 	return nil
50 | }
51 | 
52 | func (f *nopWAL) removeAll() error {
53 | 	return nil
54 | }
55 | 
56 | func (f *nopWAL) refresh() error {
57 | 	return nil
58 | }
59 | 


--------------------------------------------------------------------------------