├── .gitignore
├── Gopkg.lock
├── Gopkg.toml
├── Makefile
├── README.md
├── protos
    ├── .gen
    │   ├── rpc.pb.go
    │   └── rpc.pb.gw.go
    ├── google
    │   └── api
    │   │   ├── annotations.proto
    │   │   └── http.proto
    └── rpc.proto
├── resources
    ├── fdb_index.png
    ├── fdb_storage.png
    ├── fdb_time.png
    ├── m3db_storage.png
    └── m3db_time.png
└── src
    ├── cmd
        ├── bench
        │   └── main.go
        └── server
        │   └── main.go
    ├── encoding
        ├── common.go
        ├── decoder.go
        ├── encoder.go
        ├── merge.go
        ├── merge_test.go
        ├── multi_decoder.go
        ├── multi_decoder_test.go
        ├── ostream.go
        └── round_trip_test.go
    └── layer
        ├── dircompress
            ├── layer.go
            └── layer_test.go
        ├── raw
            └── layer.go
        ├── rawblock
            ├── buffer.go
            ├── buffer_test.go
            ├── commitlog.go
            ├── commitlog_test.go
            ├── common_test.go
            └── layer.go
        ├── server
            └── server.go
        └── types.go


/.gitignore:
--------------------------------------------------------------------------------
1 | /vendor
2 | /main
3 | blog.md
4 | 


--------------------------------------------------------------------------------
/Gopkg.lock:
--------------------------------------------------------------------------------
  1 | # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
  2 | 
  3 | 
  4 | [[projects]]
  5 |   digest = "1:b39cf81d5f440b9c0757a25058432d33af867e5201109bf53621356d9dab4b73"
  6 |   name = "github.com/apache/thrift"
  7 |   packages = ["lib/go/thrift"]
  8 |   pruneopts = "UT"
  9 |   revision = "384647d290e2e4a55a14b1b7ef1b7e66293a2c33"
 10 |   version = "v0.12.0"
 11 | 
 12 | [[projects]]
 13 |   digest = "1:010ac780f94633dc91ef721a395c5768ace4456b4833dea5ad7c026802ff143e"
 14 |   name = "github.com/apple/foundationdb"
 15 |   packages = [
 16 |     "bindings/go/src/fdb",
 17 |     "bindings/go/src/fdb/tuple",
 18 |   ]
 19 |   pruneopts = "UT"
 20 |   revision = "a6c09645bf9ec2f2c5a00839bcf17a37cec87fdb"
 21 |   version = "6.1.9"
 22 | 
 23 | [[projects]]
 24 |   digest = "1:d6afaeed1502aa28e80a4ed0981d570ad91b2579193404256ce672ed0a609e0d"
 25 |   name = "github.com/beorn7/perks"
 26 |   packages = ["quantile"]
 27 |   pruneopts = "UT"
 28 |   revision = "4b2b341e8d7715fae06375aa633dbb6e91b3fb46"
 29 |   version = "v1.0.0"
 30 | 
 31 | [[projects]]
 32 |   digest = "1:998cf998358a303ac2430c386ba3fd3398477d6013153d3c6e11432765cc9ae6"
 33 |   name = "github.com/cespare/xxhash"
 34 |   packages = ["."]
 35 |   pruneopts = "UT"
 36 |   revision = "3b82fb7d186719faeedd0c2864f868c74fbf79a1"
 37 |   version = "v2.0.0"
 38 | 
 39 | [[projects]]
 40 |   digest = "1:ffe9824d294da03b391f44e1ae8281281b4afc1bdaa9588c9097785e3af10cec"
 41 |   name = "github.com/davecgh/go-spew"
 42 |   packages = ["spew"]
 43 |   pruneopts = "UT"
 44 |   revision = "8991bc29aa16c548c550c7ff78260e27b9ab7c73"
 45 |   version = "v1.1.1"
 46 | 
 47 | [[projects]]
 48 |   digest = "1:48092bf6632f55839850666c33469f546f6d45fdbd59a66759ec12e84d853dc2"
 49 |   name = "github.com/gogo/protobuf"
 50 |   packages = ["proto"]
 51 |   pruneopts = "UT"
 52 |   revision = "ba06b47c162d49f2af050fb4c75bcbc86a159d5c"
 53 |   version = "v1.2.1"
 54 | 
 55 | [[projects]]
 56 |   digest = "1:be408f349cae090a7c17a279633d6e62b00068e64af66a582cae0983de8890ea"
 57 |   name = "github.com/golang/mock"
 58 |   packages = ["gomock"]
 59 |   pruneopts = "UT"
 60 |   revision = "9fa652df1129bef0e734c9cf9bf6dbae9ef3b9fa"
 61 |   version = "1.3.1"
 62 | 
 63 | [[projects]]
 64 |   digest = "1:7033673f364203b103c09f683c83ec2881d3cd22740ccf21e73dd90145a4a3ec"
 65 |   name = "github.com/golang/protobuf"
 66 |   packages = [
 67 |     "jsonpb",
 68 |     "proto",
 69 |     "protoc-gen-go/descriptor",
 70 |     "protoc-gen-go/plugin",
 71 |     "ptypes/any",
 72 |     "ptypes/duration",
 73 |     "ptypes/empty",
 74 |     "ptypes/struct",
 75 |     "ptypes/timestamp",
 76 |     "ptypes/wrappers",
 77 |   ]
 78 |   pruneopts = "UT"
 79 |   revision = "b5d812f8a3706043e23a9cd5babf2e5423744d30"
 80 |   version = "v1.3.1"
 81 | 
 82 | [[projects]]
 83 |   digest = "1:73bea52c5e1bbd7d80166e9255183b6fd41cbae463f98a2bd32b7f58f7438975"
 84 |   name = "github.com/jhump/protoreflect"
 85 |   packages = [
 86 |     "desc",
 87 |     "desc/internal",
 88 |     "desc/protoparse",
 89 |     "dynamic",
 90 |     "internal",
 91 |   ]
 92 |   pruneopts = "UT"
 93 |   revision = "92269e4a44a442365a8824f1e184b8ddbca3ec7a"
 94 |   version = "v1.4.1"
 95 | 
 96 | [[projects]]
 97 |   digest = "1:f7b7cc4601639f90815fb2fe02b2775278a83b468d7dbbd800d861405de40d74"
 98 |   name = "github.com/m3db/m3"
 99 |   packages = [
100 |     "src/cluster/client",
101 |     "src/cluster/generated/proto/metadatapb",
102 |     "src/cluster/generated/proto/placementpb",
103 |     "src/cluster/kv",
104 |     "src/cluster/kv/util/runtime",
105 |     "src/cluster/placement",
106 |     "src/cluster/placement/algo",
107 |     "src/cluster/placement/selector",
108 |     "src/cluster/placement/service",
109 |     "src/cluster/placement/storage",
110 |     "src/cluster/services",
111 |     "src/cluster/services/leader/campaign",
112 |     "src/cluster/shard",
113 |     "src/dbnode/encoding",
114 |     "src/dbnode/encoding/m3tsz",
115 |     "src/dbnode/generated/proto/namespace",
116 |     "src/dbnode/namespace",
117 |     "src/dbnode/retention",
118 |     "src/dbnode/ts",
119 |     "src/dbnode/x/xio",
120 |     "src/dbnode/x/xpool",
121 |     "src/metrics/metric/id",
122 |     "src/x/checked",
123 |     "src/x/clock",
124 |     "src/x/close",
125 |     "src/x/context",
126 |     "src/x/errors",
127 |     "src/x/ident",
128 |     "src/x/instrument",
129 |     "src/x/opentracing",
130 |     "src/x/pool",
131 |     "src/x/process",
132 |     "src/x/resource",
133 |     "src/x/serialize",
134 |     "src/x/time",
135 |     "src/x/watch",
136 |   ]
137 |   pruneopts = "UT"
138 |   revision = "c9820911fadc15f64c3af2ee8a07a8cb429d5374"
139 |   version = "v0.9.6"
140 | 
141 | [[projects]]
142 |   digest = "1:b8c7482d3a298ea2a4fc2cec50d04770180f2e5799170578c40500ae01f103e0"
143 |   name = "github.com/m3db/prometheus_client_golang"
144 |   packages = [
145 |     "prometheus",
146 |     "prometheus/promhttp",
147 |   ]
148 |   pruneopts = "UT"
149 |   revision = "8ae269d24972b8695572fa6b2e3718b5ea82d6b4"
150 |   version = "v0.8.1"
151 | 
152 | [[projects]]
153 |   digest = "1:9cf4ac6c9a81579807b1cd6a394f22795aacaf25bd2ad4b8b7dd3f829aa206c9"
154 |   name = "github.com/m3db/prometheus_client_model"
155 |   packages = ["go"]
156 |   pruneopts = "UT"
157 |   revision = "d3fff8420252ef63bffb96f689d1a85096c97321"
158 |   version = "v0.1.0"
159 | 
160 | [[projects]]
161 |   digest = "1:a4021f830c7bb25416a7acd0521e80aa7b05586ac9ef03c4ca58c016146ed12f"
162 |   name = "github.com/m3db/prometheus_common"
163 |   packages = [
164 |     "expfmt",
165 |     "internal/bitbucket.org/ww/goautoneg",
166 |     "model",
167 |   ]
168 |   pruneopts = "UT"
169 |   revision = "d550673fc477123acb69017380567e8fafc765fc"
170 |   version = "v0.1.0"
171 | 
172 | [[projects]]
173 |   digest = "1:195a65e93248ff74ff5959b2c9d8a19f47b6506284a7c826692b1352e9ad9f92"
174 |   name = "github.com/m3db/prometheus_procfs"
175 |   packages = ["."]
176 |   pruneopts = "UT"
177 |   revision = "1878d9fbb537119d24b21ca07effd591627cd160"
178 |   version = "v0.8.1"
179 | 
180 | [[projects]]
181 |   digest = "1:ff5ebae34cfbf047d505ee150de27e60570e8c394b3b8fdbb720ff6ac71985fc"
182 |   name = "github.com/matttproud/golang_protobuf_extensions"
183 |   packages = ["pbutil"]
184 |   pruneopts = "UT"
185 |   revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c"
186 |   version = "v1.0.1"
187 | 
188 | [[projects]]
189 |   digest = "1:11e62d6050198055e6cd87ed57e5d8c669e84f839c16e16f192374d913d1a70d"
190 |   name = "github.com/opentracing/opentracing-go"
191 |   packages = [
192 |     ".",
193 |     "ext",
194 |     "log",
195 |   ]
196 |   pruneopts = "UT"
197 |   revision = "659c90643e714681897ec2521c60567dd21da733"
198 |   version = "v1.1.0"
199 | 
200 | [[projects]]
201 |   digest = "1:cf31692c14422fa27c83a05292eb5cbe0fb2775972e8f1f8446a71549bd8980b"
202 |   name = "github.com/pkg/errors"
203 |   packages = ["."]
204 |   pruneopts = "UT"
205 |   revision = "ba968bfe8b2f7e042a574c888954fccecfa385b4"
206 |   version = "v0.8.1"
207 | 
208 | [[projects]]
209 |   digest = "1:0028cb19b2e4c3112225cd871870f2d9cf49b9b4276531f03438a88e94be86fe"
210 |   name = "github.com/pmezard/go-difflib"
211 |   packages = ["difflib"]
212 |   pruneopts = "UT"
213 |   revision = "792786c7400a136282c1664665ae0a8db921c6c2"
214 |   version = "v1.0.0"
215 | 
216 | [[projects]]
217 |   digest = "1:5da8ce674952566deae4dbc23d07c85caafc6cfa815b0b3e03e41979cedb8750"
218 |   name = "github.com/stretchr/testify"
219 |   packages = [
220 |     "assert",
221 |     "require",
222 |   ]
223 |   pruneopts = "UT"
224 |   revision = "ffdc059bfe9ce6a4e144ba849dbedead332c6053"
225 |   version = "v1.3.0"
226 | 
227 | [[projects]]
228 |   digest = "1:5604990ce6c053672bf1c4666c867c65e53ec9bbe51327e7471f73974258bcf0"
229 |   name = "github.com/uber-go/tally"
230 |   packages = [
231 |     ".",
232 |     "m3",
233 |     "m3/customtransports",
234 |     "m3/thrift",
235 |     "m3/thriftudp",
236 |     "multi",
237 |     "prometheus",
238 |   ]
239 |   pruneopts = "UT"
240 |   revision = "24c699f78afd17db5aac42f83c1c5cad70254294"
241 |   version = "v3.3.10"
242 | 
243 | [[projects]]
244 |   digest = "1:57e707ba5fcbab4913a1c81e640ebb9f05f6327dcf88ab3b0e16dba3b8bb31fb"
245 |   name = "github.com/uber/jaeger-client-go"
246 |   packages = [
247 |     ".",
248 |     "config",
249 |     "internal/baggage",
250 |     "internal/baggage/remote",
251 |     "internal/spanlog",
252 |     "internal/throttler",
253 |     "internal/throttler/remote",
254 |     "log",
255 |     "log/zap",
256 |     "rpcmetrics",
257 |     "thrift",
258 |     "thrift-gen/agent",
259 |     "thrift-gen/baggage",
260 |     "thrift-gen/jaeger",
261 |     "thrift-gen/sampling",
262 |     "thrift-gen/zipkincore",
263 |     "transport",
264 |     "utils",
265 |   ]
266 |   pruneopts = "UT"
267 |   revision = "2f47546e3facd43297739439600bcf43f44cce5d"
268 |   version = "v2.16.0"
269 | 
270 | [[projects]]
271 |   digest = "1:034f3a72349013b835bc829136f88204a2c0115df4b8d4b94b6ed4f0e1f4a9db"
272 |   name = "github.com/uber/jaeger-lib"
273 |   packages = [
274 |     "metrics",
275 |     "metrics/tally",
276 |   ]
277 |   pruneopts = "UT"
278 |   revision = "0e30338a695636fe5bcf7301e8030ce8dd2a8530"
279 |   version = "v2.0.0"
280 | 
281 | [[projects]]
282 |   digest = "1:a5158647b553c61877aa9ae74f4015000294e47981e6b8b07525edcbb0747c81"
283 |   name = "go.uber.org/atomic"
284 |   packages = ["."]
285 |   pruneopts = "UT"
286 |   revision = "df976f2515e274675050de7b3f42545de80594fd"
287 |   version = "v1.4.0"
288 | 
289 | [[projects]]
290 |   digest = "1:60bf2a5e347af463c42ed31a493d817f8a72f102543060ed992754e689805d1a"
291 |   name = "go.uber.org/multierr"
292 |   packages = ["."]
293 |   pruneopts = "UT"
294 |   revision = "3c4937480c32f4c13a875a1829af76c98ca3d40a"
295 |   version = "v1.1.0"
296 | 
297 | [[projects]]
298 |   digest = "1:676160e6a4722b08e0e26b11521d575c2cb2b6f0c679e1ee6178c5d8dee51e5e"
299 |   name = "go.uber.org/zap"
300 |   packages = [
301 |     ".",
302 |     "buffer",
303 |     "internal/bufferpool",
304 |     "internal/color",
305 |     "internal/exit",
306 |     "zapcore",
307 |   ]
308 |   pruneopts = "UT"
309 |   revision = "27376062155ad36be76b0f12cf1572a221d3a48c"
310 |   version = "v1.10.0"
311 | 
312 | [[projects]]
313 |   branch = "master"
314 |   digest = "1:9f915ece988ec60eb54677e0dcc77fd53a7f42a496d984c351416ffcfd16b8f7"
315 |   name = "google.golang.org/genproto"
316 |   packages = [
317 |     "protobuf/api",
318 |     "protobuf/field_mask",
319 |     "protobuf/ptype",
320 |     "protobuf/source_context",
321 |   ]
322 |   pruneopts = "UT"
323 |   revision = "eb0b1bdb6ae60fcfc41b8d907b50dfb346112301"
324 | 
325 | [solve-meta]
326 |   analyzer-name = "dep"
327 |   analyzer-version = 1
328 |   input-imports = [
329 |     "github.com/apple/foundationdb/bindings/go/src/fdb",
330 |     "github.com/apple/foundationdb/bindings/go/src/fdb/tuple",
331 |     "github.com/m3db/m3/src/dbnode/encoding",
332 |     "github.com/m3db/m3/src/dbnode/encoding/m3tsz",
333 |     "github.com/m3db/m3/src/x/time",
334 |     "github.com/stretchr/testify/require",
335 |   ]
336 |   solver-name = "gps-cdcl"
337 |   solver-version = 1
338 | 


--------------------------------------------------------------------------------
/Gopkg.toml:
--------------------------------------------------------------------------------
 1 | # Gopkg.toml example
 2 | #
 3 | # Refer to https://golang.github.io/dep/docs/Gopkg.toml.html
 4 | # for detailed Gopkg.toml documentation.
 5 | #
 6 | # required = ["github.com/user/thing/cmd/thing"]
 7 | # ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
 8 | #
 9 | # [[constraint]]
10 | #   name = "github.com/user/project"
11 | #   version = "1.0.0"
12 | #
13 | # [[constraint]]
14 | #   name = "github.com/user/project2"
15 | #   branch = "dev"
16 | #   source = "github.com/myfork/project2"
17 | #
18 | # [[override]]
19 | #   name = "github.com/x/y"
20 | #   version = "2.4.0"
21 | #
22 | # [prune]
23 | #   non-go = false
24 | #   go-tests = true
25 | #   unused-packages = true
26 | 
27 | 
28 | [[constraint]]
29 |   name = "github.com/apple/foundationdb"
30 |   version = "6.1.8"
31 | 
32 | [[constraint]]
33 |   name = "github.com/m3db/m3"
34 |   version = "0.9.6"
35 | 
36 | [[constraint]]
37 |   name = "github.com/stretchr/testify"
38 |   version = "1.3.0"
39 | 
40 | [[constraint]]
41 |   name = "google.golang.org/grpc"
42 | 
43 | [prune]
44 |   go-tests = true
45 |   unused-packages = true
46 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | gen-proto: install-go-proto-plugin
 2 | 	protoc  --proto_path=./protos --go_out=plugins=grpc:./protos/.gen/ ./protos/rpc.proto
 3 | 	protoc  --proto_path=./protos --grpc-gateway_out=logtostderr=true:./protos/.gen/ ./protos/rpc.proto
 4 | 
 5 | install-go-proto-plugin:
 6 | 	go get -u github.com/golang/protobuf/protoc-gen-go
 7 | 	go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
 8 | 	go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
 9 | 
10 | bench:
11 | 	go run ./src/cmd/bench/main.go -numSeries 10000 -batchSize 1 -numWorkers 1 -duration 30s


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Time Series and FoundationDB: Millions of writes/s and 10x compression in under 2,000 lines of Go
  2 | 
  3 | ## Disclaimer
  4 | 
  5 | I want to preface everything you’re about to read with the disclaimer that I built DiamondDB purely as a PoC to measure performance of different architectures for storing time series data in FoundationDB. It is in no way production ready. In fact, the code is littered with TODOs, cut corners, and missing features. The only thing DiamondDB is useful for in its current form is demonstrating how a performant time series database **could** be built on-top of FDB and reminding me that I should go outside more often. If you want a distributed database with the functionality described in this blog post, you should just use [M3DB](https://github.com/m3db/m3) itself.
  6 | 
  7 | ## Target Audience
  8 | 
  9 | This blog post is targeted at engineers who either work on large-scale distributed systems, or are curious about them.
 10 | 
 11 | Throughout this post we’ll look at the problem of storing high volume time series data to illustrate how FoundationDB's excellent performance characteristics and strong consistency guarantees can be utilized to build reliable and performant distributed systems.
 12 | 
 13 | In this case we're going to build a distributed time series database (modeled after [M3DB](https://github.com/m3db/m3)) that can handle millions of writes/s (with best in class compression!) on my 2018 Macbook pro in less than 2,000 lines of Go code.
 14 | 
 15 | ## High Volume Time Series Data
 16 | 
 17 | At $DAYJOB I spend most of my time developing an open-source distributed time series database called [M3DB](https://github.com/m3db/m3). So naturally my first instinct was to see if I could replicate an M3DB-like system using FDB.
 18 | 
 19 | Time series means different things to different people. In this case, I want to focus on the type of time series storage engine that could efficiently power an [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) system (strong consistency and immediately read your writes) or a monitoring / observability workload as opposed to a time series database designed for [OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) workloads.
 20 | 
 21 | Primarily, our system should support the following two APIs:
 22 | 
 23 | ```golang
 24 | type Value struct {
 25 |   Timestamp int64
 26 |   Value float64
 27 | }
 28 | 
 29 | Write(seriesID string, value Value)
 30 | 
 31 | Read(seriesID) ([]Value)
 32 | ```
 33 | 
 34 | Note that M3DB has support for several other important features, such as custom types and inverted indexing, but let's put that aside for a moment.
 35 | 
 36 | At this point you may be wondering why do we even need a fancy distributed system in the first place? Can’t we easily solve this problem using PostgreSQL with a simple table schema? An example being:
 37 | 
 38 | ```sql
 39 | CREATE TABLE timeseries (
 40 |   series_id TEXT,
 41 |   timestamp integer,
 42 |   value double precision,
 43 |   PRIMARY KEY(series_id, timestamp)
 44 | );
 45 | ```
 46 | 
 47 | This implementation would work for some small use-cases, but M3DB has three properties that the Postgres implementation does not:
 48 | 
 49 | 1. Horizontal scalability (as additional machines are added the throughput of the system should increase in a roughly linear fashion)
 50 | 2. High write throughput (millions of writes/s)
 51 | 3. Efficient compression
 52 | 
 53 | Its possible for PostGres to partially address the compression requirement in a variety of ways. ([This gist](https://gist.github.com/richardartoul/23b66ea6924f28fc6ec8dfcd06901302) is an example that demonstrates how a stored procedure can be used to perform time series compression, but the compression will never be as good as a custom designed algorithm, such as [Gorilla](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf). In addition, the Postgres implementation will never achieve horizontal scalability or high write throughput without application layer sharding.
 54 | 
 55 | If you're wondering why you would ever need to store so much data that you wouldn't be able to fit it all in a single large Postgres instance, consider the monitoring / observability use-case. Imagine you have a fleet of `50,000` servers and you want to monitor `100` different metrics (free disk space, CPU utilization, etc.) about each one at `10s` intervals. This would generate `5,000,000` unique time series and `500,000` data points per second, and you're still not even tracking any application level metrics!
 56 | 
 57 | Implementing the `Write` and `Read` interfaces, while also achieving the three properties listed above, is the crux of what M3DB and other distributed time series databases in this space seek to accomplish.
 58 | 
 59 | ## A Software Foundation for Distributed Systems
 60 | 
 61 | FoundationDB (FDB) first piqued I first started paying attention to FoundationDB when I listened to a [podcast](https://www.dataengineeringpodcast.com/foundationdb-distributed-systems-episode-80/) during which [Ryan Worl](https://twitter.com/ryanworl) explained how FDB can be used as an extremely powerful primitive for building distributed systems. This piqued my interest because distributed systems engineers are **severely** lacking in good primitives.
 62 | 
 63 | But why does FDB make for such a good primitive? To answer that question, we first need to understand the data model of FoundationDB. FoundationDB is a distributed system that provides the following semantics:
 64 | 
 65 | 1. Key/Value storage where keys and values can be arbitrary byte arrays.
 66 | 2. Keys are "sorted" lexicographically such that reading and truncating large sorted ranges is efficient.
 67 | 3. Automatic detection and redistribution of hot keys (this one is particularly unique and I’m not aware of many other systems that handle this gracefully).
 68 | 4. Completely ACID transactions at the highest level of isolation `strict serializability` across arbitrary key/value pairs.
 69 | 
 70 | This is basically the holy grail of primitives for building distributed systems. For example, the architecture of almost every "distributed sql" database on the market right now boils down to some (admittedly really hairy) logic for dealing with SQL and transactions wrapped around a distributed key/value store:
 71 | 
 72 | - [Exhibit A](https://pingcap.com/docs/v3.0/architecture/)
 73 | - [Exhibit B](https://github.com/cockroachdb/cockroach/blob/master/docs/design.md)
 74 | 
 75 | While there are other systems out there that offer similar semantics as FoundationDB does, FDB is notable for the fact that it was designed from the ground up with the idea of building other distributed systems on top of it and this decision permeates the entire system, from its architecture and documentation to its performance characteristics and APIs.
 76 | 
 77 | On top of that, its impossible to spend any amount of time with FDB and not come away with a deep appreciation for the level of careful consideration and engineering that went into it.
 78 | 
 79 | The path to distributed systems hell is paved with good ideas ruined by mediocre and poorly tested implementations, and there is nothing mediocre or poorly tested about FoundationDB.
 80 | 
 81 | Of course I don't want to spend this entire blog post gushing about how amazing FoundationDB is (although it really is quite good) so if you want to learn more about it here are some resources to get started:
 82 | 
 83 | - [(Video) Technical Overview of FoundationDB](https://www.youtube.com/watch?v=EMwhsGsxfPU)
 84 | 
 85 | - [The Docs](https://apple.github.io/foundationdb/#documentation)
 86 | 
 87 | Now that we have established some much needed context, let’s switch gears and actually build something!
 88 | 
 89 | ## The Design and Implementation of DiamondDB
 90 | 
 91 | The question I wanted to explore was this: Could I build a system with the same API, compression, and performance characteristics as M3DB, but as a thin layer on-top of FDB instead of a custom distributed system written from the ground up with its own storage engine (as M3DB is).
 92 | 
 93 | The most naive approach to storing timeseries data in FDB looks something like this:
 94 | 
 95 | ```golang
 96 | db.Transact(func(tr FDB.Transaction) (interface{}, error) {
 97 |   for _, w := range writes {
 98 |     key := tuple.Tuple{w.ID, w.Timestamp.UnixNano()}
 99 |     tr.Set(key, tuple.Tuple{w.Value}.Pack())
100 |   }
101 |   return nil, nil
102 | })
103 | ```
104 | 
105 | Each datapoint is stored as an individual record in FDB where the key is a tuple in the form `<time_series_id, unix_timestamp>` and the value is a tuple in the form `<value>`.
106 | 
107 | FDB keys are sorted so we can "efficiently" query for all the values for a given series by issuing a prefix query for all keys that begin with the specified time series ID.
108 | 
109 | This design has several issues:
110 | 
111 | 1. Compression is terrible because the time series ID is repeated for each record. This could be addressed by assigning each time series a unique integer ID so that each time series ID would only be stored once and all the datapoint entries would reference the integer. This is equivalent to a foreign key relationship in traditional relational databases and is easy to implement because of FDB’s strong transactional semantics, however, compression would still be poor compared to modern time series databases’ as we'd still have to store the timestamp (8 bytes) and value (8 bytes) in their entirety, plus an additional 8 bytes for the time series ID "pointer" (assuming we used an unsigned 64 bit integer).
112 | 
113 | 2. Write throughput is terrible because every write to FDB is a real transaction. Benchmarking on my laptop indicated that getting more than a few thousand writes per second per storage node on commodity hardware using the `ssd` engine would be difficult. We could use the `memory` engine which is much faster while still being durable, however, that requires the entire working set of the database to fit in memory of all the storage nodes which is a constraint I didn’t want to impose on this project since RAM is much more expensive than disk.
114 | 
115 | I didn’t expect this design to work, but its always good to benchmark the simple approach first so you can measure exactly how much of an improvement you’re getting with the more complex solution and weigh the benefits of complexity vs. performance.
116 | 
117 | The next design I attempted was to perform [Gorilla Compression](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf) on the time series data. This turns out to be tricky because Gorilla compression is usually performed in memory since it involves writing individual bits at a time. Despite this obstacle I was able to implement a prototype where each write was performed by loading the current state of a Gorilla encoder out of FDB, encoding the new value into the (now in-memory) encoder, and then finally writing the state of the encoder back to FDB.
118 | 
119 | Here is a simplified version of the primary FDB transaction for this implementation:
120 | 
121 | ```golang
122 | _, err := l.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
123 |   metadataKey := newTimeseriesMetadataKeyFromID(write.seriesID)
124 |   metadata, err := tr.Get(metadataKey).Get()
125 |   if err != nil {
126 |     return nil, err
127 |   }
128 | 
129 |   var (
130 |     metaValue  timeSeriesMetadata
131 |     dataAppend []byte
132 |     enc         = encoding.NewEncoder()
133 |   )
134 | 
135 |   if len(metadataBytes) == 0 {
136 |     // Never written.
137 |     enc := encoding.NewEncoder()
138 |     if err := enc.Encode(write.Timestamp, write.Value); err != nil {
139 |       return nil, err
140 |     }
141 | 
142 |     metaValue = timeSeriesMetadata{
143 |       State: enc.State(),
144 |     }
145 | 
146 |     b := enc.Bytes()
147 |     if len(b) > 1 {
148 |       dataAppend = enc.Bytes()[:len(b)-1]
149 |     }
150 |   } else {
151 |     if err := json.Unmarshal(metadataBytes, &metaValue); err != nil {
152 |       return nil, err
153 |     }
154 | 
155 |     // Has been written before, restore encoder state.
156 |     if err := enc.Restore(metaValue.State); err != nil {
157 |       return nil, err
158 |     }
159 | 
160 |     if err := enc.Encode(write.Timestamp, write.Value); err != nil {
161 |       return nil, err
162 |     }
163 | 
164 |     // Ensure new state gets persisted.
165 |     var (
166 |       newState = enc.State()
167 |       b        = enc.Bytes()
168 |     )
169 |     if len(b) == 0 {
170 |       return nil, errors.New("encoder bytes was length zero")
171 |     }
172 |     if len(b) == 1 {
173 |       // The existing last byte was modified without adding any additional bytes. The last
174 |       // byte is always tracked by the state so there is nothing to append here.
175 |     }
176 |     if len(b) > 1 {
177 |       // The last byte will be kept track of by the state, but any bytes preceding it are
178 |       // new "complete" bytes which should be appended to the compressed stream.
179 |       dataAppend = b[:len(b)-1]
180 |     }
181 |     metaValue.LastByte = b[len(b)-1]
182 |     metaValue.State = newState
183 |   }
184 | 
185 |   newMetadataBytes, err := json.Marshal(&metaValue)
186 |   if err != nil {
187 |     return nil, err
188 |   }
189 | 
190 |   tr.Set(metadataKey, newMetadataBytes)
191 |   dataKey := newTimeseriesDataKeyFromID(write.ID)
192 |   tr.AppendIfFits(dataKey, dataAppend)
193 | 
194 |   return nil, nil
195 | })
196 | ```
197 | 
198 | Note that Gorilla compression operates at the bit (not byte) level so some care had to be taken to manage the last byte of the compressed stream (which could be partial).
199 | 
200 | This implementation provided compression levels as good as any modern time series database, but still suffered from terrible write throughput. I couldn't get more than ~5000 writes per second on my laptop using this implementation which makes a lot of sense considering that even though sometimes I was only adding a bit or two to a compressed stream, FDB still had to read/write an entire page of data to add those two additional bits of information since it uses a modified version of SQLite as its storage engine.
201 | 
202 | The conclusion I came to was that in order to achieve high write throughput I'd have to implement a semi-stateful system in front of FDB. I'd been trying to avoid doing this since it's much more complicated than implementing a simple stateless layer, but in the words of Spiderman: "With great scale comes great complexity".
203 | 
204 | What do I mean by "semi-stateful"? Let’s start by looking at the architecture of a truly stateful system, such as M3DB.
205 | 
206 | M3DB's storage system behaves similar to a [Log Structured Merge Tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree), except instead of compacting based on levels, compaction is based on time.
207 | 
208 | ![](./resources/m3db_storage.png)
209 | 
210 | To put that into concrete terms, as M3DB accepts writes they're immediately written to the commit log for durability. This ensures that all acknowledged writes can always be recovered in the case of a crash or failure. At the same time, incoming writes are also buffered in memory where they are actively compressed using Gorilla encoding.
211 | 
212 | At regular intervals, data that has been compressed in memory is flushed to disk as immutable files (merging with any existing files if they already exist) where each set of files contains all of the data for all of the values during a fixed "block" period. For example, if the blocksize is configured to 2 hours then a set of files would contain all values with timestamps between 12pm and 2pm.
213 | 
214 | This architecture allows M3DB to achieve high levels of compression AND also achieve high write throughput (since writes only need to be buffered in memory and written to a commitlog file before being acknowledged). The only caveat is that if an M3DB node fails (for whatever reason) when it then starts back up it will first need to read the commitlog in its entirety and rebuild the pre-failure in-memory state before it can begin serving reads. This can take some time.
215 | 
216 | Another way to understand M3DB’s architecture is that at any point in time an acknowledged write must live in either an immutable fileset **or** a mutable encoder **and** a commit log file.
217 | 
218 | ![](./resources/m3db_time.png)
219 | 
220 | I decided that if I was going to achieve similar levels of performance as M3DB that I would need to replicate the architecture as well.
221 | 
222 | ![](./resources/fdb_storage.png)
223 | 
224 | Notice that the architecture looks very similar to M3DB's, except instead of using the filesystem we use FDB. This is why I refer to the architecture as "semi-stateful". It's stateful in the sense that it needs to hold some state in memory and if a node fails or reboots it will have to "bootstrap" that state from the commit logs just like M3DB does.
225 | 
226 | However, since the commit logs and compressed data blocks are stored in FDB we don't have to worry about storage state. This is important because it greatly simplifies operational concerns. For example, imagine we wanted to run our database on Kubernetes. Accomplishing this with a completely stateful system like M3DB requires using [StatefulSets](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/), [Persistent Volumes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/), and also writing an [operator](https://github.com/m3db/m3db-operator) to manage it all. With this implementation where the storage is backed by FDB, running this on Kubernetes would be much more straightforward. As long as we had some way to assign each instance of our database a unique identifier, Kubernetes would be free to move the instances around since each instance could simply bootstrap itself from FDB after being moved.
227 | 
228 | Of course, all of this relies upon the fact that you're able to maintain and operate an FDB cluster, but that's the point of building on top of FDB. Once you've figured out how to setup and operate FDB clusters, you can build all of your other distributed systems on top of it and let FDB handle the most complicated portions of the distributed systems so you can focus on the portions that are unique to the problem you're trying to solve.
229 | 
230 | Let’s examine the implementation of this architecture in more detail, starting with the commit logs. If you want to read the code yourself, you can find it [here](https://github.com/richardartoul/tsdb-layer/blob/master/src/layer/rawblock/commitlog.go). The general idea with the commitlog is that we need to batch many writes together and encode them into a binary format that can be decoded in a linear fashion quickly. The commitlog format does **not** need to support random reads in an efficient manner.
231 | 
232 | Implementing this is rather straightforward, we just need to gather writes together and then send them in large batches to FDB. Most of the code linked above concerns itself with making sure we can do this in a performant manner, as well as concurrent signaling (since we can't acknowledge writes back to the caller until a commitlog "chunk" containing all of their writes has been persisted in FDB).
233 | 
234 | The other requirement of the commit log chunks is that we need to be able to:
235 | 
236 | 1. Fetch all undeleted commitlog chunks from FDB (required to "bootstrap" an instance after a restart/failure/reschedule)
237 | 2. Delete all commitlog chunks before a provided chunk (the reason for this will become clear in a minute)
238 | 
239 | Luckily, both of these operations are relatively efficient in FDB as long as the keys are formatted correctly. Remember, the abstraction provided by FDB is that of a sorted key/value store, so we just need to format the keys such that they sort lexicographically in a way that makes the two operations described above efficient.
240 | 
241 | The way we accomplish this is very straightforward. We use FDB's [tuple layer](https://apple.github.io/foundationdb/data-modeling.html#tuples) to generate keys in the form `<commitlog, COMMITLOG_INDEX>`, for example: `<commitlog, 0>` would be the key for the first chunk. The `commitlog` prefix is used to separate the commitlog from entries for other portions of the system, and the index number provides a monotonically increasing number that can be used so that we can perform operations like: "delete all commitlog chunks before chunk number #127".
242 | 
243 | The storage engine will be writing out commitlog chunks constantly so they need to be cleaned up regularly. But how do we know when it's safe to delete a given chunk? One easy way to do that is to take advantage of the fact that the chunks are ordered.
244 | 
245 | We can setup a background process that runs on regular intervals and performs the following steps:
246 | 
247 | 1. Wait for a new commitlog chunk to be written out and then take note of the index of the chunk.
248 | 2. Flush all in-memory buffered data as compressed chunks to FDB (note that the storage engine will still be accepting writes while this operation is going on, but that’s fine, this flow only needs to ensure that all writes that were already acknowledged **before** the commitlog chunk from #1 was written out are flushed to FDB).
249 | 3. Delete all commitlog chunks with an index **lower** than the chunk from step #1. Note this operation is now safe because the previous step (if it succeeds) guarantees that all the data in all the commitlog chunks that will be deleted have already been persisted to FDB in the form of compressed data chunks.
250 | 
251 | ![](./resources/fdb_time.png)
252 | 
253 | Using the diagram above as an example, the persistence loop would wait for chunk #3 to be flushed, then the buffer would begin flushing everything that was currently in-memory, and finally once that completed the storage engine could delete all commitlog chunks lower than 4 because all the data they contained was flushed to FDB as compressed chunks.
254 | 
255 | In code, it looks like this:
256 | 
257 | ```golang
258 | func (l *rawBlock) startPersistLoop() {
259 |   for {
260 |     // Prevent excessive activity when there are no incoming writes.
261 |     time.Sleep(persistLoopInterval)
262 | 
263 |     // truncToken is opaque to the caller but the commit log can use it
264 |     // to truncate all chunks whose index is lower than the chunk that
265 |     // was just flushed as part of the commit log rotation.
266 |     truncToken, err := l.cl.WaitForRotation()
267 |     if err != nil {
268 |       log.Printf("error waiting for commitlog rotation: %v", err)
269 |       continue
270 |     }
271 | 
272 |     if err := l.buffer.Flush(); err != nil {
273 |       log.Printf("error flushing buffer: %v", err)
274 |       continue
275 |     }
276 | 
277 |     if err := l.cl.Truncate(truncToken); err != nil {
278 |       log.Printf("error truncating commitlog: %v", err)
279 |       continue
280 |     }
281 |   }
282 | }
283 | ```
284 | 
285 | The last thing to consider about the commit log chunks is that once an instance is restarted it will need to read all of the existing chunks before accepting any writes or reads. I didn’t implement this in the prototype to save time and because I’m fairly certain it wouldn’t be an issue from a performance perspective because read performance is one of FDB’s strengths.
286 | 
287 | The next thing we need to understand is how the `buffer` works, both in terms of read and write operations, as well as the `Flush()` mechanism that we alluded to in the snippet above.
288 | 
289 | The `buffer` system's job is straightforward: buffer writes in-memory (actively Gorilla compressing them to save memory) until the compressed block can be merged with an existing one in FDB, or inserted as a new chunk entirely.
290 | 
291 | I won't go over the implementation of the encoders themselves because that's mainly just straightforward bit-fiddling and described well in the [Gorilla paper]([Gorilla](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf). Also, the compression code is mostly just a knock-off of M3DB's ;). If you're really curious, you can check out the code for the [encoder](https://github.com/richardartoul/tsdb-layer/blob/master/src/encoding/encoder.go) and [decoder](https://github.com/richardartoul/tsdb-layer/blob/master/src/encoding/decoder.go) here. The only thing you really need to understand about the encoder and decoders are their (simplified) interfaces:
292 | 
293 | ```golang
294 | type Encoder interface {
295 |   Encode(timestamp time.Time, value float64) error
296 |   Bytes() []byte
297 | }
298 | 
299 | type Decoder interface {
300 |   Next() bool
301 |   Current() (time.Time, float64)
302 |   Err() error
303 |   Reset(b []byte)
304 | }
305 | ```
306 | 
307 | The implementation of the buffer itself is reasonably straightforward. The actual struct looks like this:
308 | 
309 | ```golang
310 | type buffer struct {
311 |   sync.Mutex
312 |   encoders map[string][]encoding.Encoder
313 | }
314 | ```
315 | 
316 | The basic data structure is a synchronized hashmap from time series ID to an array of encoders. The existing implementation is simplified to make things easier and as a result has a few basic limitations (like the inability to write data points out of order) that would require a slightly more complicated data structure to solve, but the basic idea and performance would remain the same.
317 | 
318 | Let's start by looking at the write path. This is the most straightforward part. All the encoders are treated as immutable (except the last one), so writing is as simple as finding the last encoder for a given seriesID (or creating one if necessary), and then encoding the newest value into it.
319 | 
320 | ```golang
321 | func (b *buffer) Write(writes []layer.Write) error {
322 |   b.Lock()
323 |   defer b.Unlock()
324 | 
325 |   for _, w := range writes {
326 |     encoders, ok := b.encoders[w.ID]
327 |     if !ok {
328 |       encoders = []encoding.Encoder{encoding.NewEncoder()}
329 |       b.encoders[w.ID] = encoders
330 |     }
331 | 
332 |     enc := encoders[len(encoders)-1]
333 |     lastT, _, hasWrittenAnyValues := enc.LastEncoded()
334 |     if hasWrittenAnyValues {
335 |       if w.Timestamp.Before(lastT) {
336 |         return fmt.Errorf(
337 |           "cannot write data out of order, series: %s, prevTimestamp: %s, currTimestamp: %s",
338 |           w.ID, lastT.String(), w.Timestamp.String())
339 |       }
340 |       if w.Timestamp.Equal(lastT) {
341 |         return fmt.Errorf(
342 |           "cannot upsert existing values, series: %s, currTimestamp: %s",
343 |           w.ID, lastT.String())
344 |       }
345 |     }
346 | 
347 |     if err := enc.Encode(w.Timestamp, w.Value); err != nil {
348 |       return err
349 |     }
350 |   }
351 | 
352 |  return nil
353 | }
354 | ```
355 | 
356 | Before we discuss the Read path, we need to go over the `Flush` path which is how data gets moved from temporary storage in the in-memory buffers to persistent storage in FDB. Remember from our earlier discussion of the background "persist loop" that the contract of the `Flush` method is that when it completes all writes that were already in the buffer when the function started **must** be persisted to FDB.
357 | 
358 | The actual implementation (which you can read through [here](https://github.com/richardartoul/tsdb-layer/blob/master/src/layer/rawblock/buffer.go)) is unfortunately complicated by some complex synchronization and concurrency code that I don't want to delve into right now (mainly for performance reasons) but the basic idea is simple: iterate through every time series that was in memory when the function started, create a new encoder for it (making all previous encoders immutable) into which new writes will be encoded, and then flush all the immutable encoders to FDB.
359 | 
360 | The first step for flushing the in-memory encoder data to FDB is to retrieve the existing metadata for that series from FDB. The metadata stored in FDB for each series looks like this:
361 | 
362 | ```golang
363 | type tsMetadata struct {
364 |   Chunks []chunkMetadata
365 | }
366 | ```
367 | 
368 | and each `chunkMetadata` looks like this:
369 | 
370 | ```golang
371 | type chunkMetadata struct {
372 |   Key       []byte
373 |   First     time.Time
374 |   Last      time.Time
375 |   SizeBytes int
376 | }
377 | ```
378 | 
379 | The series metadata entry serves as a sort of index for the series data by keeping track of all the compressed data chunks associated with that series. For each chunk it keeps track of:
380 | 
381 | 1. The FDB key for that chunk (so that the chunk can be retrieved).
382 | 2. The timestamp for the first and last datapoint stored in the compressed block. This information is important for the read path as it informs us which chunks need to be retrieved to satisfy a query with a given time range. It can also be useful to the merge logic so that it can make better decisions about which chunks to merge together to form larger contiguous blocks.
383 | 3. The size (in bytes) of the chunk. This is used by the merging logic to determine when an encoder that is being flushed should be inserted as a new chunk or merged with an existing one.
384 | 
385 | This is where FDB's unique programming model really shines. I said earlier that FoundationDB provides the abstraction of a sorted key/value storage system, but more importantly, it supports completely ACID transactions at the highest level of isolation `strict serializability` (which means you're not vulnerable to the types of bugs described in [this excellent blog post by the FaunaDB team](https://fauna.com/blog/demystifying-database-systems-correctness-anomalies-under-serializable-isolation)).
386 | 
387 | Because of these guarantees, programming the flush logic is **almost** as simple as if we were programming against an in-memory system. In a single strict serializability ACID transaction we can do the following:
388 | 
389 | 1. Read the existing metadata for the series being flushed.
390 | 2. Use the series metadata to decide if the data being flushed should be merged with an existing chunk or written out as a new, independent chunk (this makes experimenting with different compaction methods trivial since we don’t have to rewrite the underlying storage engine).
391 | 3. Read the existing chunk that we need to merge with (if necessary).
392 | 4. Write the merged (or new) chunk to FDB.
393 | 5. Write back the series metadata with the updated chunk information.
394 | 
395 | Everything we’ve accomplished up until this point could probably have been accomplished on any distributed system with a sorted key/value interface (of which there are many). However, implementing the 5 steps described above 100% correctly with no edge-cases or race conditions using a loosely / eventually consistent distributed system like Cassandra would be a nightmare. Accomplishing it with FDB is a breeze.
396 | 
397 | Finally, now that we've covered both the write and flush paths, we can discuss the read path. Implementing reads turns out to actually be quite straight forward. The steps are:
398 | 
399 | 1. Read the latest version of the series metadata out of FDB.
400 | 2. Use the metadata to determine which chunk need to be pulled out of FDB to satisfy the provided query range (I.E if data points between the times of 12p.m and 2p.m are requested then any chunks where the `First`/`Last` data points intersect that range need to be pulled back).
401 | 3. Determine which in-memory encoders (which may not yet be flushed to FDB) also contain data points within the requested time range.
402 | 4. Return a decoder that will transparently iterate through all of the data points (returning them in order) by merging across all of the chunks retrieved from FDB as well as the in-memory encoders. This problem turns out to be equivalent to merging k sorted arrays and [this blog post](https://medium.com/outco/how-to-merge-k-sorted-arrays-c35d87aa298e) has a good explanation of how to accomplish that using a min heap. You can also take a look at my implementation [here](https://github.com/richardartoul/tsdb-layer/blob/master/src/encoding/multi_decoder.go).
403 | 
404 | A lot of effort went into optimizing the write path, but we haven't done much of anything to optimize the read path. The reason for that is two-fold:
405 | 
406 | 1. Systems like M3DB are designed for workloads where write throughput is much higher than read throughput.
407 | 2. FDB can perform reads at a much higher rate than writes by default, so less optimization is required.
408 | 
409 | Let's pause for a moment and see if we’ve accomplished our goals. To reiterate, we wanted our system to implement the `Write` and `Read` interfaces (check) as well as satisfy the following properties:
410 | 
411 | 1. Horizontal scalability - Check. Benchmarking shows that this design has a transaction conflict rate near zero which means the number of transactions we can do [should scale linearly as we add hardware](https://apple.github.io/foundationdb/performance.html).
412 | 2. High write throughput - Check. This implementation can easily handle over a million logical writes/s on my 2018 MacbookPro.
413 | 3. Efficient compression - Check. We’re using almost the exact same time series compression that M3DB and all the other popular time series databases use.
414 | 
415 | ## Future Considerations and Extensions
416 | 
417 | DiamondDB is missing a ton of features, but most notably it lacks:
418 | 
419 | 1. The ability to store and compress custom types
420 | 2. Secondary indexing (Ex. Fetch all time series where `city` tag equals `san_francisco`)
421 | 3. Automatic TTL (time to live I.E data should “expire” after a certain period of time)
422 | 
423 | ### Complex Types
424 | 
425 | Storing and compressing custom types turns out to be the easiest to solve. All we have to do is replace our Gorilla encoder with one that can efficiently compress more complicated types. Fortunately, we had to solve that exact problem recently in M3DB as part of our plan to evolve it from a metrics store to a more general purpose time series database. The solution we came up with was to model our complex types as Protobufs and then write a general purpose compression scheme that can perform streaming delta compression of Protobuf messages much like Gorilla performs streaming delta compression of floats. The code for that solution is [open source](https://github.com/m3db/m3/tree/master/src/dbnode/encoding/proto) and could be lifted directly into DiamondDB. If you’re curious about how the bit-fiddly details of how the compression works, take a look at [this documentation](https://github.com/m3db/m3/blob/master/src/dbnode/encoding/proto/docs/encoding.md).
426 | 
427 | ### Inverted / Secondary Indexing
428 | 
429 | Next up is secondary indexing. We already got a brief glimpse about how to perform secondary indexing in FDB earlier with the `flush` code where we atomically wrote a new time series chunk and updated the series’ metadata entry (which is effectively a secondary index over the compressed data chunks). Implementing exact-match secondary indexing would be fairly straightforward. For example, let's say we wanted to implement a tag-based inverted index like the one M3DB supports. For each unique combination of key/value pair in the index we would store an FDB entry that contained a list of all the time series IDs that were tagged with that unique combination. The image below depicts a simple example of how to store and index two separate time series:
430 | 
431 | ![](./resources/fdb_index.png)
432 | 
433 | If we wanted to query for all the time series where the `city` tag is equal to `san_francisco` then we would retrieve the FDB entry with the key `<storage,index,term,city,san_francisco>` which would immediately tell us that there applicable series are `sf_num_widgets` and `sf_num_people`. More complicated queries could be executed by unioning and intersecting the results of these individual term queries. For example, it's not difficult to imagine how this simple schema could evaluate the query: “fetch all time series where `city` equals `san_francisco` OR `type` equals `widgets`. Ta da! We’ve just implemented a [postings list] on top of FDB.
434 | 
435 | Of course, this is a fairly naive solution that could end up using a lot of disk space. If we’re willing to exchange complexity for better compression, we could assign each time series a unique ID (an operation that can be implemented efficiently in FDB) and then store a list of integers instead of time series IDs. This would reduce the size of secondary index entries substantially, but we could take it even further by storing a [roaring bitmap](https://roaringbitmap.org/) instead of a list of integers.
436 | 
437 | Supporting regular expression queries (as M3DB does) gets more complicated, and if I’m being completely honest, I’d have to spend a few weeks building prototypes to come up with the best way to do this. Luckily, this is the internet, so I can just tell you my opinion with absolutely no evidence to back it up.
438 | 
439 | First, the naive solution. In addition to the index entries from the previous example, we could also store entries where the key is the tag name and the value is all unique values that exist for that tag. We could then retrieve those index entries on-demand and run regular expressions on them in memory. This would tell us which unique tag/value pairs exist that match the regular expression which we could then use to pull back the index entries from the previous example and look up the matching series IDs. For many use-cases this would be reasonably performant, but you didn’t put up with my rambling for this long to settle for anything reasonable!
440 | 
441 | M3DB handles regular expression queries by maintaining a [finite state transducer](https://en.wikipedia.org/wiki/Finite-state_transducer)(FST) for each tag in the inverted index (in our example above there would be an FST for the `city` tag and another for the `type` tag). The FST itself stores a mapping between all the unique values for the tag (`widgets` and `num_people` for the `type` tag) and an integer. In M3DB the integer is an offset into a file where a [roaring bitmap](https://roaringbitmap.org/) is stored that contains the unique integer IDs of all the time series that contain that tag. Andrew Gallant’s now infamous [blog post](https://blog.burntsushi.net/transducers/) is a great resource to understand why FSTs solve this problem so effectively, but the short of it is that they’re incredibly efficient in this situation because they have the dual properties of:
442 | 
443 | 1. Compressing extremely well.
444 | 2. Supporting running regular expressions against them in a performant way.
445 | 
446 | Could we leverage this solution in our FDB-backed system? Possibly. We’ve already discussed storing complex data structures like roaring bitmaps in FDB and there is no reason we couldn’t do something similar with FSTs. One limitation we might run into, however, is the fact that an individual value in FDB can’t exceed 100KiB in size which seems like a show-stopper, but we could probably work around it. For example, it’s not hard to imagine designing an mmap-like interface in the programming language of your choice that provides the abstraction of a byte array of arbitrary size that is transparently split and mapped onto FDB. You could then fork / modify existing FST libraries to execute against this interface since many of them are already designed with the ability to execute against FSTs stored in byte arrays or mmap’d files.
447 | 
448 | ### Data Time To Live (TTL)
449 | 
450 | Finally, let's talk about automatic TTLs (data expiry). I saved this one for last because it’s just a special case of secondary indexing and there are numerous ways you could build indices that would allow you to expire and clean up data in an efficient manner, but this blog post is already far too long.
451 | 
452 | ## Conclusion
453 | 
454 | There are lots of distributed systems problems that are difficult to solve, but that can be implemented trivially as stateless layers over FoundationDB. However, some problems that seem like a poor match for FDB at first glance can actually be solved with a semi-stateful layer. Of course, building a semi-stateful layer is significantly more complicated than building a stateless one, but it's also significantly **less** complicated than building a distributed system from scratch. While I cut a lot of corners in my implementation, I was still able to build a distributed system that can accept millions of time series writes per second (with competitive levels of compression) in under 2000 lines of Go code. It's not hard to imagine that with a few couple weeks/months of dedicated work and a few thousand more lines of code that we could build this out into a production-ready system.
455 | 
456 | FoundationDB will never be able to beat a purpose designed storage engine, but programming against it is much easier than programming against the operating system, filesystem, network, and physical hardware. In a lot of ways building a distributed system on-top of FDB after having built one from scratch feels a lot like upgrading to Python from assembly.
457 | 
458 | FoundationDB is fast, reliable, easy to use, and a lot of fun to program against. Next time you need to build a distributed system consider if FDB could make your job a little bit easier. You might be surprised by just how far you can push it with the right design.
459 | 
460 | 


--------------------------------------------------------------------------------
/protos/.gen/rpc.pb.go:
--------------------------------------------------------------------------------
  1 | // Code generated by protoc-gen-go. DO NOT EDIT.
  2 | // source: rpc.proto
  3 | 
  4 | package tsdblayer
  5 | 
  6 | import (
  7 | 	context "context"
  8 | 	fmt "fmt"
  9 | 	proto "github.com/golang/protobuf/proto"
 10 | 	_ "google.golang.org/genproto/googleapis/api/annotations"
 11 | 	grpc "google.golang.org/grpc"
 12 | 	codes "google.golang.org/grpc/codes"
 13 | 	status "google.golang.org/grpc/status"
 14 | 	math "math"
 15 | )
 16 | 
 17 | // Reference imports to suppress errors if they are not otherwise used.
 18 | var _ = proto.Marshal
 19 | var _ = fmt.Errorf
 20 | var _ = math.Inf
 21 | 
 22 | // This is a compile-time assertion to ensure that this generated file
 23 | // is compatible with the proto package it is being compiled against.
 24 | // A compilation error at this line likely means your copy of the
 25 | // proto package needs to be updated.
 26 | const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
 27 | 
 28 | type WriteBatchRequest struct {
 29 | 	Batch                []*WriteRequest `protobuf:"bytes,1,rep,name=batch,proto3" json:"batch,omitempty"`
 30 | 	XXX_NoUnkeyedLiteral struct{}        `json:"-"`
 31 | 	XXX_unrecognized     []byte          `json:"-"`
 32 | 	XXX_sizecache        int32           `json:"-"`
 33 | }
 34 | 
 35 | func (m *WriteBatchRequest) Reset()         { *m = WriteBatchRequest{} }
 36 | func (m *WriteBatchRequest) String() string { return proto.CompactTextString(m) }
 37 | func (*WriteBatchRequest) ProtoMessage()    {}
 38 | func (*WriteBatchRequest) Descriptor() ([]byte, []int) {
 39 | 	return fileDescriptor_77a6da22d6a3feb1, []int{0}
 40 | }
 41 | 
 42 | func (m *WriteBatchRequest) XXX_Unmarshal(b []byte) error {
 43 | 	return xxx_messageInfo_WriteBatchRequest.Unmarshal(m, b)
 44 | }
 45 | func (m *WriteBatchRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 46 | 	return xxx_messageInfo_WriteBatchRequest.Marshal(b, m, deterministic)
 47 | }
 48 | func (m *WriteBatchRequest) XXX_Merge(src proto.Message) {
 49 | 	xxx_messageInfo_WriteBatchRequest.Merge(m, src)
 50 | }
 51 | func (m *WriteBatchRequest) XXX_Size() int {
 52 | 	return xxx_messageInfo_WriteBatchRequest.Size(m)
 53 | }
 54 | func (m *WriteBatchRequest) XXX_DiscardUnknown() {
 55 | 	xxx_messageInfo_WriteBatchRequest.DiscardUnknown(m)
 56 | }
 57 | 
 58 | var xxx_messageInfo_WriteBatchRequest proto.InternalMessageInfo
 59 | 
 60 | func (m *WriteBatchRequest) GetBatch() []*WriteRequest {
 61 | 	if m != nil {
 62 | 		return m.Batch
 63 | 	}
 64 | 	return nil
 65 | }
 66 | 
 67 | type ReadBatchRequest struct {
 68 | 	Batch                []*ReadRequest `protobuf:"bytes,1,rep,name=batch,proto3" json:"batch,omitempty"`
 69 | 	XXX_NoUnkeyedLiteral struct{}       `json:"-"`
 70 | 	XXX_unrecognized     []byte         `json:"-"`
 71 | 	XXX_sizecache        int32          `json:"-"`
 72 | }
 73 | 
 74 | func (m *ReadBatchRequest) Reset()         { *m = ReadBatchRequest{} }
 75 | func (m *ReadBatchRequest) String() string { return proto.CompactTextString(m) }
 76 | func (*ReadBatchRequest) ProtoMessage()    {}
 77 | func (*ReadBatchRequest) Descriptor() ([]byte, []int) {
 78 | 	return fileDescriptor_77a6da22d6a3feb1, []int{1}
 79 | }
 80 | 
 81 | func (m *ReadBatchRequest) XXX_Unmarshal(b []byte) error {
 82 | 	return xxx_messageInfo_ReadBatchRequest.Unmarshal(m, b)
 83 | }
 84 | func (m *ReadBatchRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 85 | 	return xxx_messageInfo_ReadBatchRequest.Marshal(b, m, deterministic)
 86 | }
 87 | func (m *ReadBatchRequest) XXX_Merge(src proto.Message) {
 88 | 	xxx_messageInfo_ReadBatchRequest.Merge(m, src)
 89 | }
 90 | func (m *ReadBatchRequest) XXX_Size() int {
 91 | 	return xxx_messageInfo_ReadBatchRequest.Size(m)
 92 | }
 93 | func (m *ReadBatchRequest) XXX_DiscardUnknown() {
 94 | 	xxx_messageInfo_ReadBatchRequest.DiscardUnknown(m)
 95 | }
 96 | 
 97 | var xxx_messageInfo_ReadBatchRequest proto.InternalMessageInfo
 98 | 
 99 | func (m *ReadBatchRequest) GetBatch() []*ReadRequest {
100 | 	if m != nil {
101 | 		return m.Batch
102 | 	}
103 | 	return nil
104 | }
105 | 
106 | type ReadBatchResponse struct {
107 | 	Batch                []*ReadResponse `protobuf:"bytes,1,rep,name=batch,proto3" json:"batch,omitempty"`
108 | 	XXX_NoUnkeyedLiteral struct{}        `json:"-"`
109 | 	XXX_unrecognized     []byte          `json:"-"`
110 | 	XXX_sizecache        int32           `json:"-"`
111 | }
112 | 
113 | func (m *ReadBatchResponse) Reset()         { *m = ReadBatchResponse{} }
114 | func (m *ReadBatchResponse) String() string { return proto.CompactTextString(m) }
115 | func (*ReadBatchResponse) ProtoMessage()    {}
116 | func (*ReadBatchResponse) Descriptor() ([]byte, []int) {
117 | 	return fileDescriptor_77a6da22d6a3feb1, []int{2}
118 | }
119 | 
120 | func (m *ReadBatchResponse) XXX_Unmarshal(b []byte) error {
121 | 	return xxx_messageInfo_ReadBatchResponse.Unmarshal(m, b)
122 | }
123 | func (m *ReadBatchResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
124 | 	return xxx_messageInfo_ReadBatchResponse.Marshal(b, m, deterministic)
125 | }
126 | func (m *ReadBatchResponse) XXX_Merge(src proto.Message) {
127 | 	xxx_messageInfo_ReadBatchResponse.Merge(m, src)
128 | }
129 | func (m *ReadBatchResponse) XXX_Size() int {
130 | 	return xxx_messageInfo_ReadBatchResponse.Size(m)
131 | }
132 | func (m *ReadBatchResponse) XXX_DiscardUnknown() {
133 | 	xxx_messageInfo_ReadBatchResponse.DiscardUnknown(m)
134 | }
135 | 
136 | var xxx_messageInfo_ReadBatchResponse proto.InternalMessageInfo
137 | 
138 | func (m *ReadBatchResponse) GetBatch() []*ReadResponse {
139 | 	if m != nil {
140 | 		return m.Batch
141 | 	}
142 | 	return nil
143 | }
144 | 
145 | type WriteRequest struct {
146 | 	SeriesId             string     `protobuf:"bytes,1,opt,name=series_id,json=seriesId,proto3" json:"series_id,omitempty"`
147 | 	Datapoint            *Datapoint `protobuf:"bytes,2,opt,name=datapoint,proto3" json:"datapoint,omitempty"`
148 | 	XXX_NoUnkeyedLiteral struct{}   `json:"-"`
149 | 	XXX_unrecognized     []byte     `json:"-"`
150 | 	XXX_sizecache        int32      `json:"-"`
151 | }
152 | 
153 | func (m *WriteRequest) Reset()         { *m = WriteRequest{} }
154 | func (m *WriteRequest) String() string { return proto.CompactTextString(m) }
155 | func (*WriteRequest) ProtoMessage()    {}
156 | func (*WriteRequest) Descriptor() ([]byte, []int) {
157 | 	return fileDescriptor_77a6da22d6a3feb1, []int{3}
158 | }
159 | 
160 | func (m *WriteRequest) XXX_Unmarshal(b []byte) error {
161 | 	return xxx_messageInfo_WriteRequest.Unmarshal(m, b)
162 | }
163 | func (m *WriteRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
164 | 	return xxx_messageInfo_WriteRequest.Marshal(b, m, deterministic)
165 | }
166 | func (m *WriteRequest) XXX_Merge(src proto.Message) {
167 | 	xxx_messageInfo_WriteRequest.Merge(m, src)
168 | }
169 | func (m *WriteRequest) XXX_Size() int {
170 | 	return xxx_messageInfo_WriteRequest.Size(m)
171 | }
172 | func (m *WriteRequest) XXX_DiscardUnknown() {
173 | 	xxx_messageInfo_WriteRequest.DiscardUnknown(m)
174 | }
175 | 
176 | var xxx_messageInfo_WriteRequest proto.InternalMessageInfo
177 | 
178 | func (m *WriteRequest) GetSeriesId() string {
179 | 	if m != nil {
180 | 		return m.SeriesId
181 | 	}
182 | 	return ""
183 | }
184 | 
185 | func (m *WriteRequest) GetDatapoint() *Datapoint {
186 | 	if m != nil {
187 | 		return m.Datapoint
188 | 	}
189 | 	return nil
190 | }
191 | 
192 | type ReadRequest struct {
193 | 	// TODO(rartoul): Time ranges.
194 | 	SeriesId             string   `protobuf:"bytes,1,opt,name=series_id,json=seriesId,proto3" json:"series_id,omitempty"`
195 | 	XXX_NoUnkeyedLiteral struct{} `json:"-"`
196 | 	XXX_unrecognized     []byte   `json:"-"`
197 | 	XXX_sizecache        int32    `json:"-"`
198 | }
199 | 
200 | func (m *ReadRequest) Reset()         { *m = ReadRequest{} }
201 | func (m *ReadRequest) String() string { return proto.CompactTextString(m) }
202 | func (*ReadRequest) ProtoMessage()    {}
203 | func (*ReadRequest) Descriptor() ([]byte, []int) {
204 | 	return fileDescriptor_77a6da22d6a3feb1, []int{4}
205 | }
206 | 
207 | func (m *ReadRequest) XXX_Unmarshal(b []byte) error {
208 | 	return xxx_messageInfo_ReadRequest.Unmarshal(m, b)
209 | }
210 | func (m *ReadRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
211 | 	return xxx_messageInfo_ReadRequest.Marshal(b, m, deterministic)
212 | }
213 | func (m *ReadRequest) XXX_Merge(src proto.Message) {
214 | 	xxx_messageInfo_ReadRequest.Merge(m, src)
215 | }
216 | func (m *ReadRequest) XXX_Size() int {
217 | 	return xxx_messageInfo_ReadRequest.Size(m)
218 | }
219 | func (m *ReadRequest) XXX_DiscardUnknown() {
220 | 	xxx_messageInfo_ReadRequest.DiscardUnknown(m)
221 | }
222 | 
223 | var xxx_messageInfo_ReadRequest proto.InternalMessageInfo
224 | 
225 | func (m *ReadRequest) GetSeriesId() string {
226 | 	if m != nil {
227 | 		return m.SeriesId
228 | 	}
229 | 	return ""
230 | }
231 | 
232 | type ReadResponse struct {
233 | 	SeriesId             string       `protobuf:"bytes,1,opt,name=series_id,json=seriesId,proto3" json:"series_id,omitempty"`
234 | 	Datapoints           []*Datapoint `protobuf:"bytes,2,rep,name=datapoints,proto3" json:"datapoints,omitempty"`
235 | 	XXX_NoUnkeyedLiteral struct{}     `json:"-"`
236 | 	XXX_unrecognized     []byte       `json:"-"`
237 | 	XXX_sizecache        int32        `json:"-"`
238 | }
239 | 
240 | func (m *ReadResponse) Reset()         { *m = ReadResponse{} }
241 | func (m *ReadResponse) String() string { return proto.CompactTextString(m) }
242 | func (*ReadResponse) ProtoMessage()    {}
243 | func (*ReadResponse) Descriptor() ([]byte, []int) {
244 | 	return fileDescriptor_77a6da22d6a3feb1, []int{5}
245 | }
246 | 
247 | func (m *ReadResponse) XXX_Unmarshal(b []byte) error {
248 | 	return xxx_messageInfo_ReadResponse.Unmarshal(m, b)
249 | }
250 | func (m *ReadResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
251 | 	return xxx_messageInfo_ReadResponse.Marshal(b, m, deterministic)
252 | }
253 | func (m *ReadResponse) XXX_Merge(src proto.Message) {
254 | 	xxx_messageInfo_ReadResponse.Merge(m, src)
255 | }
256 | func (m *ReadResponse) XXX_Size() int {
257 | 	return xxx_messageInfo_ReadResponse.Size(m)
258 | }
259 | func (m *ReadResponse) XXX_DiscardUnknown() {
260 | 	xxx_messageInfo_ReadResponse.DiscardUnknown(m)
261 | }
262 | 
263 | var xxx_messageInfo_ReadResponse proto.InternalMessageInfo
264 | 
265 | func (m *ReadResponse) GetSeriesId() string {
266 | 	if m != nil {
267 | 		return m.SeriesId
268 | 	}
269 | 	return ""
270 | }
271 | 
272 | func (m *ReadResponse) GetDatapoints() []*Datapoint {
273 | 	if m != nil {
274 | 		return m.Datapoints
275 | 	}
276 | 	return nil
277 | }
278 | 
279 | type Datapoint struct {
280 | 	TimestampNanos       uint64   `protobuf:"varint,1,opt,name=timestamp_nanos,json=timestampNanos,proto3" json:"timestamp_nanos,omitempty"`
281 | 	Value                float64  `protobuf:"fixed64,2,opt,name=value,proto3" json:"value,omitempty"`
282 | 	XXX_NoUnkeyedLiteral struct{} `json:"-"`
283 | 	XXX_unrecognized     []byte   `json:"-"`
284 | 	XXX_sizecache        int32    `json:"-"`
285 | }
286 | 
287 | func (m *Datapoint) Reset()         { *m = Datapoint{} }
288 | func (m *Datapoint) String() string { return proto.CompactTextString(m) }
289 | func (*Datapoint) ProtoMessage()    {}
290 | func (*Datapoint) Descriptor() ([]byte, []int) {
291 | 	return fileDescriptor_77a6da22d6a3feb1, []int{6}
292 | }
293 | 
294 | func (m *Datapoint) XXX_Unmarshal(b []byte) error {
295 | 	return xxx_messageInfo_Datapoint.Unmarshal(m, b)
296 | }
297 | func (m *Datapoint) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
298 | 	return xxx_messageInfo_Datapoint.Marshal(b, m, deterministic)
299 | }
300 | func (m *Datapoint) XXX_Merge(src proto.Message) {
301 | 	xxx_messageInfo_Datapoint.Merge(m, src)
302 | }
303 | func (m *Datapoint) XXX_Size() int {
304 | 	return xxx_messageInfo_Datapoint.Size(m)
305 | }
306 | func (m *Datapoint) XXX_DiscardUnknown() {
307 | 	xxx_messageInfo_Datapoint.DiscardUnknown(m)
308 | }
309 | 
310 | var xxx_messageInfo_Datapoint proto.InternalMessageInfo
311 | 
312 | func (m *Datapoint) GetTimestampNanos() uint64 {
313 | 	if m != nil {
314 | 		return m.TimestampNanos
315 | 	}
316 | 	return 0
317 | }
318 | 
319 | func (m *Datapoint) GetValue() float64 {
320 | 	if m != nil {
321 | 		return m.Value
322 | 	}
323 | 	return 0
324 | }
325 | 
326 | type Empty struct {
327 | 	XXX_NoUnkeyedLiteral struct{} `json:"-"`
328 | 	XXX_unrecognized     []byte   `json:"-"`
329 | 	XXX_sizecache        int32    `json:"-"`
330 | }
331 | 
332 | func (m *Empty) Reset()         { *m = Empty{} }
333 | func (m *Empty) String() string { return proto.CompactTextString(m) }
334 | func (*Empty) ProtoMessage()    {}
335 | func (*Empty) Descriptor() ([]byte, []int) {
336 | 	return fileDescriptor_77a6da22d6a3feb1, []int{7}
337 | }
338 | 
339 | func (m *Empty) XXX_Unmarshal(b []byte) error {
340 | 	return xxx_messageInfo_Empty.Unmarshal(m, b)
341 | }
342 | func (m *Empty) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
343 | 	return xxx_messageInfo_Empty.Marshal(b, m, deterministic)
344 | }
345 | func (m *Empty) XXX_Merge(src proto.Message) {
346 | 	xxx_messageInfo_Empty.Merge(m, src)
347 | }
348 | func (m *Empty) XXX_Size() int {
349 | 	return xxx_messageInfo_Empty.Size(m)
350 | }
351 | func (m *Empty) XXX_DiscardUnknown() {
352 | 	xxx_messageInfo_Empty.DiscardUnknown(m)
353 | }
354 | 
355 | var xxx_messageInfo_Empty proto.InternalMessageInfo
356 | 
357 | func init() {
358 | 	proto.RegisterType((*WriteBatchRequest)(nil), "tsdblayer.WriteBatchRequest")
359 | 	proto.RegisterType((*ReadBatchRequest)(nil), "tsdblayer.ReadBatchRequest")
360 | 	proto.RegisterType((*ReadBatchResponse)(nil), "tsdblayer.ReadBatchResponse")
361 | 	proto.RegisterType((*WriteRequest)(nil), "tsdblayer.WriteRequest")
362 | 	proto.RegisterType((*ReadRequest)(nil), "tsdblayer.ReadRequest")
363 | 	proto.RegisterType((*ReadResponse)(nil), "tsdblayer.ReadResponse")
364 | 	proto.RegisterType((*Datapoint)(nil), "tsdblayer.Datapoint")
365 | 	proto.RegisterType((*Empty)(nil), "tsdblayer.Empty")
366 | }
367 | 
368 | func init() { proto.RegisterFile("rpc.proto", fileDescriptor_77a6da22d6a3feb1) }
369 | 
370 | var fileDescriptor_77a6da22d6a3feb1 = []byte{
371 | 	// 408 bytes of a gzipped FileDescriptorProto
372 | 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x7c, 0x93, 0xc1, 0xce, 0xd2, 0x40,
373 | 	0x10, 0xc7, 0xb3, 0x28, 0xea, 0x0e, 0x04, 0x61, 0x43, 0xa0, 0x81, 0x9a, 0x90, 0xbd, 0x48, 0x88,
374 | 	0x40, 0xac, 0x9e, 0x38, 0x99, 0x06, 0x0f, 0x1a, 0x63, 0x48, 0x35, 0xf1, 0xe0, 0x81, 0x2c, 0x74,
375 | 	0x83, 0x4d, 0xa0, 0xbb, 0x76, 0x17, 0x0c, 0x57, 0x5f, 0xc1, 0xf7, 0xf2, 0xf2, 0xbd, 0xc2, 0xf7,
376 | 	0x20, 0x5f, 0xba, 0xfb, 0xb5, 0x2c, 0x84, 0x70, 0xec, 0xcc, 0x7f, 0x7e, 0xf3, 0x9f, 0xce, 0x2c,
377 | 	0xe0, 0x4c, 0xae, 0x27, 0x32, 0x13, 0x5a, 0x10, 0xac, 0x55, 0xbc, 0xda, 0xb2, 0x23, 0xcf, 0x7a,
378 | 	0xfe, 0x46, 0x88, 0xcd, 0x96, 0x4f, 0x99, 0x4c, 0xa6, 0x2c, 0x4d, 0x85, 0x66, 0x3a, 0x11, 0xa9,
379 | 	0xb2, 0x42, 0x1a, 0x42, 0xeb, 0x47, 0x96, 0x68, 0x1e, 0x32, 0xbd, 0xfe, 0x15, 0xf1, 0xdf, 0x7b,
380 | 	0xae, 0x34, 0x19, 0x43, 0x75, 0x95, 0x7f, 0x7b, 0x68, 0xf0, 0x64, 0x58, 0x0b, 0xba, 0x93, 0x92,
381 | 	0x36, 0x31, 0xe2, 0x47, 0x5d, 0x64, 0x55, 0xf4, 0x03, 0x34, 0x23, 0xce, 0xe2, 0x33, 0xc4, 0x9b,
382 | 	0x73, 0x44, 0xc7, 0x41, 0xe4, 0xda, 0x0b, 0x42, 0x08, 0x2d, 0x87, 0xa0, 0xa4, 0x48, 0x15, 0xbf,
383 | 	0xe5, 0xc2, 0x22, 0xac, 0xae, 0x60, 0x2c, 0xa1, 0xee, 0x9a, 0x23, 0x7d, 0xc0, 0x8a, 0x67, 0x09,
384 | 	0x57, 0xcb, 0x24, 0xf6, 0xd0, 0x00, 0x0d, 0x71, 0xf4, 0xc2, 0x06, 0x3e, 0xc5, 0x24, 0x00, 0x1c,
385 | 	0x33, 0xcd, 0xa4, 0x48, 0x52, 0xed, 0x55, 0x06, 0x68, 0x58, 0x0b, 0xda, 0x0e, 0x7f, 0x5e, 0xe4,
386 | 	0xa2, 0x93, 0x8c, 0x8e, 0xa0, 0xe6, 0x58, 0xbf, 0xc9, 0xa7, 0x0c, 0xea, 0xae, 0xc7, 0xdb, 0x66,
387 | 	0xde, 0x03, 0x94, 0x5d, 0x94, 0x57, 0x31, 0xd3, 0x5e, 0x77, 0xe3, 0xe8, 0xe8, 0x67, 0xc0, 0x65,
388 | 	0x82, 0xbc, 0x86, 0x97, 0x3a, 0xd9, 0x71, 0xa5, 0xd9, 0x4e, 0x2e, 0x53, 0x96, 0x0a, 0x65, 0xba,
389 | 	0x3c, 0x8d, 0x1a, 0x65, 0xf8, 0x6b, 0x1e, 0x25, 0x6d, 0xa8, 0x1e, 0xd8, 0x76, 0xcf, 0xcd, 0xd0,
390 | 	0x28, 0xb2, 0x1f, 0xf4, 0x39, 0x54, 0x3f, 0xee, 0xa4, 0x3e, 0x06, 0xff, 0x11, 0xe0, 0xef, 0xdf,
391 | 	0xe6, 0xe1, 0x97, 0xbc, 0x31, 0xf9, 0x09, 0x70, 0x3a, 0x0e, 0xe2, 0x5f, 0x9e, 0x81, 0xbb, 0xf0,
392 | 	0x5e, 0xd3, 0xc9, 0x1a, 0x16, 0x7d, 0xf5, 0xf7, 0xee, 0xfe, 0x5f, 0xa5, 0x4b, 0x89, 0x39, 0xbd,
393 | 	0xc3, 0xdb, 0xe9, 0x9f, 0xb2, 0x68, 0x86, 0x46, 0x24, 0x06, 0x5c, 0xee, 0x9c, 0xf4, 0x2f, 0x96,
394 | 	0x7b, 0x86, 0xf6, 0xaf, 0x27, 0xed, 0xaf, 0xa5, 0xbe, 0x69, 0xd3, 0xa1, 0xad, 0xa2, 0x4d, 0x56,
395 | 	0x48, 0x66, 0x68, 0x14, 0x52, 0x68, 0xe4, 0xc5, 0x63, 0x5b, 0x9d, 0xc9, 0x75, 0xd8, 0x28, 0xe7,
396 | 	0x5b, 0xe4, 0x2f, 0x60, 0x81, 0x56, 0xcf, 0xcc, 0x53, 0x78, 0xf7, 0x10, 0x00, 0x00, 0xff, 0xff,
397 | 	0xb2, 0x3f, 0x6f, 0x0d, 0x40, 0x03, 0x00, 0x00,
398 | }
399 | 
400 | // Reference imports to suppress errors if they are not otherwise used.
401 | var _ context.Context
402 | var _ grpc.ClientConn
403 | 
404 | // This is a compile-time assertion to ensure that this generated file
405 | // is compatible with the grpc package it is being compiled against.
406 | const _ = grpc.SupportPackageIsVersion4
407 | 
408 | // TSDBLayerClient is the client API for TSDBLayer service.
409 | //
410 | // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
411 | type TSDBLayerClient interface {
412 | 	WriteBatch(ctx context.Context, in *WriteBatchRequest, opts ...grpc.CallOption) (*Empty, error)
413 | 	ReadBatch(ctx context.Context, in *ReadBatchRequest, opts ...grpc.CallOption) (*ReadBatchResponse, error)
414 | }
415 | 
416 | type tSDBLayerClient struct {
417 | 	cc *grpc.ClientConn
418 | }
419 | 
420 | func NewTSDBLayerClient(cc *grpc.ClientConn) TSDBLayerClient {
421 | 	return &tSDBLayerClient{cc}
422 | }
423 | 
424 | func (c *tSDBLayerClient) WriteBatch(ctx context.Context, in *WriteBatchRequest, opts ...grpc.CallOption) (*Empty, error) {
425 | 	out := new(Empty)
426 | 	err := c.cc.Invoke(ctx, "/tsdblayer.TSDBLayer/WriteBatch", in, out, opts...)
427 | 	if err != nil {
428 | 		return nil, err
429 | 	}
430 | 	return out, nil
431 | }
432 | 
433 | func (c *tSDBLayerClient) ReadBatch(ctx context.Context, in *ReadBatchRequest, opts ...grpc.CallOption) (*ReadBatchResponse, error) {
434 | 	out := new(ReadBatchResponse)
435 | 	err := c.cc.Invoke(ctx, "/tsdblayer.TSDBLayer/ReadBatch", in, out, opts...)
436 | 	if err != nil {
437 | 		return nil, err
438 | 	}
439 | 	return out, nil
440 | }
441 | 
442 | // TSDBLayerServer is the server API for TSDBLayer service.
443 | type TSDBLayerServer interface {
444 | 	WriteBatch(context.Context, *WriteBatchRequest) (*Empty, error)
445 | 	ReadBatch(context.Context, *ReadBatchRequest) (*ReadBatchResponse, error)
446 | }
447 | 
448 | // UnimplementedTSDBLayerServer can be embedded to have forward compatible implementations.
449 | type UnimplementedTSDBLayerServer struct {
450 | }
451 | 
452 | func (*UnimplementedTSDBLayerServer) WriteBatch(ctx context.Context, req *WriteBatchRequest) (*Empty, error) {
453 | 	return nil, status.Errorf(codes.Unimplemented, "method WriteBatch not implemented")
454 | }
455 | func (*UnimplementedTSDBLayerServer) ReadBatch(ctx context.Context, req *ReadBatchRequest) (*ReadBatchResponse, error) {
456 | 	return nil, status.Errorf(codes.Unimplemented, "method ReadBatch not implemented")
457 | }
458 | 
459 | func RegisterTSDBLayerServer(s *grpc.Server, srv TSDBLayerServer) {
460 | 	s.RegisterService(&_TSDBLayer_serviceDesc, srv)
461 | }
462 | 
463 | func _TSDBLayer_WriteBatch_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
464 | 	in := new(WriteBatchRequest)
465 | 	if err := dec(in); err != nil {
466 | 		return nil, err
467 | 	}
468 | 	if interceptor == nil {
469 | 		return srv.(TSDBLayerServer).WriteBatch(ctx, in)
470 | 	}
471 | 	info := &grpc.UnaryServerInfo{
472 | 		Server:     srv,
473 | 		FullMethod: "/tsdblayer.TSDBLayer/WriteBatch",
474 | 	}
475 | 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
476 | 		return srv.(TSDBLayerServer).WriteBatch(ctx, req.(*WriteBatchRequest))
477 | 	}
478 | 	return interceptor(ctx, in, info, handler)
479 | }
480 | 
481 | func _TSDBLayer_ReadBatch_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
482 | 	in := new(ReadBatchRequest)
483 | 	if err := dec(in); err != nil {
484 | 		return nil, err
485 | 	}
486 | 	if interceptor == nil {
487 | 		return srv.(TSDBLayerServer).ReadBatch(ctx, in)
488 | 	}
489 | 	info := &grpc.UnaryServerInfo{
490 | 		Server:     srv,
491 | 		FullMethod: "/tsdblayer.TSDBLayer/ReadBatch",
492 | 	}
493 | 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
494 | 		return srv.(TSDBLayerServer).ReadBatch(ctx, req.(*ReadBatchRequest))
495 | 	}
496 | 	return interceptor(ctx, in, info, handler)
497 | }
498 | 
499 | var _TSDBLayer_serviceDesc = grpc.ServiceDesc{
500 | 	ServiceName: "tsdblayer.TSDBLayer",
501 | 	HandlerType: (*TSDBLayerServer)(nil),
502 | 	Methods: []grpc.MethodDesc{
503 | 		{
504 | 			MethodName: "WriteBatch",
505 | 			Handler:    _TSDBLayer_WriteBatch_Handler,
506 | 		},
507 | 		{
508 | 			MethodName: "ReadBatch",
509 | 			Handler:    _TSDBLayer_ReadBatch_Handler,
510 | 		},
511 | 	},
512 | 	Streams:  []grpc.StreamDesc{},
513 | 	Metadata: "rpc.proto",
514 | }
515 | 


--------------------------------------------------------------------------------
/protos/.gen/rpc.pb.gw.go:
--------------------------------------------------------------------------------
  1 | // Code generated by protoc-gen-grpc-gateway. DO NOT EDIT.
  2 | // source: rpc.proto
  3 | 
  4 | /*
  5 | Package tsdblayer is a reverse proxy.
  6 | 
  7 | It translates gRPC into RESTful JSON APIs.
  8 | */
  9 | package tsdblayer
 10 | 
 11 | import (
 12 | 	"context"
 13 | 	"io"
 14 | 	"net/http"
 15 | 
 16 | 	"github.com/golang/protobuf/proto"
 17 | 	"github.com/grpc-ecosystem/grpc-gateway/runtime"
 18 | 	"github.com/grpc-ecosystem/grpc-gateway/utilities"
 19 | 	"google.golang.org/grpc"
 20 | 	"google.golang.org/grpc/codes"
 21 | 	"google.golang.org/grpc/grpclog"
 22 | 	"google.golang.org/grpc/status"
 23 | )
 24 | 
 25 | var _ codes.Code
 26 | var _ io.Reader
 27 | var _ status.Status
 28 | var _ = runtime.String
 29 | var _ = utilities.NewDoubleArray
 30 | 
 31 | func request_TSDBLayer_WriteBatch_0(ctx context.Context, marshaler runtime.Marshaler, client TSDBLayerClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
 32 | 	var protoReq WriteBatchRequest
 33 | 	var metadata runtime.ServerMetadata
 34 | 
 35 | 	newReader, berr := utilities.IOReaderFactory(req.Body)
 36 | 	if berr != nil {
 37 | 		return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", berr)
 38 | 	}
 39 | 	if err := marshaler.NewDecoder(newReader()).Decode(&protoReq); err != nil && err != io.EOF {
 40 | 		return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
 41 | 	}
 42 | 
 43 | 	msg, err := client.WriteBatch(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
 44 | 	return msg, metadata, err
 45 | 
 46 | }
 47 | 
 48 | func request_TSDBLayer_ReadBatch_0(ctx context.Context, marshaler runtime.Marshaler, client TSDBLayerClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) {
 49 | 	var protoReq ReadBatchRequest
 50 | 	var metadata runtime.ServerMetadata
 51 | 
 52 | 	newReader, berr := utilities.IOReaderFactory(req.Body)
 53 | 	if berr != nil {
 54 | 		return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", berr)
 55 | 	}
 56 | 	if err := marshaler.NewDecoder(newReader()).Decode(&protoReq); err != nil && err != io.EOF {
 57 | 		return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err)
 58 | 	}
 59 | 
 60 | 	msg, err := client.ReadBatch(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD))
 61 | 	return msg, metadata, err
 62 | 
 63 | }
 64 | 
 65 | // RegisterTSDBLayerHandlerFromEndpoint is same as RegisterTSDBLayerHandler but
 66 | // automatically dials to "endpoint" and closes the connection when "ctx" gets done.
 67 | func RegisterTSDBLayerHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) {
 68 | 	conn, err := grpc.Dial(endpoint, opts...)
 69 | 	if err != nil {
 70 | 		return err
 71 | 	}
 72 | 	defer func() {
 73 | 		if err != nil {
 74 | 			if cerr := conn.Close(); cerr != nil {
 75 | 				grpclog.Infof("Failed to close conn to %s: %v", endpoint, cerr)
 76 | 			}
 77 | 			return
 78 | 		}
 79 | 		go func() {
 80 | 			<-ctx.Done()
 81 | 			if cerr := conn.Close(); cerr != nil {
 82 | 				grpclog.Infof("Failed to close conn to %s: %v", endpoint, cerr)
 83 | 			}
 84 | 		}()
 85 | 	}()
 86 | 
 87 | 	return RegisterTSDBLayerHandler(ctx, mux, conn)
 88 | }
 89 | 
 90 | // RegisterTSDBLayerHandler registers the http handlers for service TSDBLayer to "mux".
 91 | // The handlers forward requests to the grpc endpoint over "conn".
 92 | func RegisterTSDBLayerHandler(ctx context.Context, mux *runtime.ServeMux, conn *grpc.ClientConn) error {
 93 | 	return RegisterTSDBLayerHandlerClient(ctx, mux, NewTSDBLayerClient(conn))
 94 | }
 95 | 
 96 | // RegisterTSDBLayerHandlerClient registers the http handlers for service TSDBLayer
 97 | // to "mux". The handlers forward requests to the grpc endpoint over the given implementation of "TSDBLayerClient".
 98 | // Note: the gRPC framework executes interceptors within the gRPC handler. If the passed in "TSDBLayerClient"
 99 | // doesn't go through the normal gRPC flow (creating a gRPC client etc.) then it will be up to the passed in
100 | // "TSDBLayerClient" to call the correct interceptors.
101 | func RegisterTSDBLayerHandlerClient(ctx context.Context, mux *runtime.ServeMux, client TSDBLayerClient) error {
102 | 
103 | 	mux.Handle("POST", pattern_TSDBLayer_WriteBatch_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
104 | 		ctx, cancel := context.WithCancel(req.Context())
105 | 		defer cancel()
106 | 		inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
107 | 		rctx, err := runtime.AnnotateContext(ctx, mux, req)
108 | 		if err != nil {
109 | 			runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
110 | 			return
111 | 		}
112 | 		resp, md, err := request_TSDBLayer_WriteBatch_0(rctx, inboundMarshaler, client, req, pathParams)
113 | 		ctx = runtime.NewServerMetadataContext(ctx, md)
114 | 		if err != nil {
115 | 			runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
116 | 			return
117 | 		}
118 | 
119 | 		forward_TSDBLayer_WriteBatch_0(ctx, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
120 | 
121 | 	})
122 | 
123 | 	mux.Handle("POST", pattern_TSDBLayer_ReadBatch_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) {
124 | 		ctx, cancel := context.WithCancel(req.Context())
125 | 		defer cancel()
126 | 		inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req)
127 | 		rctx, err := runtime.AnnotateContext(ctx, mux, req)
128 | 		if err != nil {
129 | 			runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
130 | 			return
131 | 		}
132 | 		resp, md, err := request_TSDBLayer_ReadBatch_0(rctx, inboundMarshaler, client, req, pathParams)
133 | 		ctx = runtime.NewServerMetadataContext(ctx, md)
134 | 		if err != nil {
135 | 			runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err)
136 | 			return
137 | 		}
138 | 
139 | 		forward_TSDBLayer_ReadBatch_0(ctx, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...)
140 | 
141 | 	})
142 | 
143 | 	return nil
144 | }
145 | 
146 | var (
147 | 	pattern_TSDBLayer_WriteBatch_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"api", "v1", "writeBatch"}, ""))
148 | 
149 | 	pattern_TSDBLayer_ReadBatch_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2}, []string{"api", "v1", "readBatch"}, ""))
150 | )
151 | 
152 | var (
153 | 	forward_TSDBLayer_WriteBatch_0 = runtime.ForwardResponseMessage
154 | 
155 | 	forward_TSDBLayer_ReadBatch_0 = runtime.ForwardResponseMessage
156 | )
157 | 


--------------------------------------------------------------------------------
/protos/google/api/annotations.proto:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2015, Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | syntax = "proto3";
16 | 
17 | package google.api;
18 | 
19 | import "google/api/http.proto";
20 | import "google/protobuf/descriptor.proto";
21 | 
22 | option go_package = "google.golang.org/genproto/googleapis/api/annotations;annotations";
23 | option java_multiple_files = true;
24 | option java_outer_classname = "AnnotationsProto";
25 | option java_package = "com.google.api";
26 | option objc_class_prefix = "GAPI";
27 | 
28 | extend google.protobuf.MethodOptions {
29 |   // See `HttpRule`.
30 |   HttpRule http = 72295728;
31 | }


--------------------------------------------------------------------------------
/protos/google/api/http.proto:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Google LLC.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | //
 15 | 
 16 | syntax = "proto3";
 17 | 
 18 | package google.api;
 19 | 
 20 | option cc_enable_arenas = true;
 21 | option go_package = "google.golang.org/genproto/googleapis/api/annotations;annotations";
 22 | option java_multiple_files = true;
 23 | option java_outer_classname = "HttpProto";
 24 | option java_package = "com.google.api";
 25 | option objc_class_prefix = "GAPI";
 26 | 
 27 | // Defines the HTTP configuration for an API service. It contains a list of
 28 | // [HttpRule][google.api.HttpRule], each specifying the mapping of an RPC method
 29 | // to one or more HTTP REST API methods.
 30 | message Http {
 31 |   // A list of HTTP configuration rules that apply to individual API methods.
 32 |   //
 33 |   // **NOTE:** All service configuration rules follow "last one wins" order.
 34 |   repeated HttpRule rules = 1;
 35 | 
 36 |   // When set to true, URL path parameters will be fully URI-decoded except in
 37 |   // cases of single segment matches in reserved expansion, where "%2F" will be
 38 |   // left encoded.
 39 |   //
 40 |   // The default behavior is to not decode RFC 6570 reserved characters in multi
 41 |   // segment matches.
 42 |   bool fully_decode_reserved_expansion = 2;
 43 | }
 44 | 
 45 | // # gRPC Transcoding
 46 | //
 47 | // gRPC Transcoding is a feature for mapping between a gRPC method and one or
 48 | // more HTTP REST endpoints. It allows developers to build a single API service
 49 | // that supports both gRPC APIs and REST APIs. Many systems, including [Google
 50 | // APIs](https://github.com/googleapis/googleapis),
 51 | // [Cloud Endpoints](https://cloud.google.com/endpoints), [gRPC
 52 | // Gateway](https://github.com/grpc-ecosystem/grpc-gateway),
 53 | // and [Envoy](https://github.com/envoyproxy/envoy) proxy support this feature
 54 | // and use it for large scale production services.
 55 | //
 56 | // `HttpRule` defines the schema of the gRPC/REST mapping. The mapping specifies
 57 | // how different portions of the gRPC request message are mapped to the URL
 58 | // path, URL query parameters, and HTTP request body. It also controls how the
 59 | // gRPC response message is mapped to the HTTP response body. `HttpRule` is
 60 | // typically specified as an `google.api.http` annotation on the gRPC method.
 61 | //
 62 | // Each mapping specifies a URL path template and an HTTP method. The path
 63 | // template may refer to one or more fields in the gRPC request message, as long
 64 | // as each field is a non-repeated field with a primitive (non-message) type.
 65 | // The path template controls how fields of the request message are mapped to
 66 | // the URL path.
 67 | //
 68 | // Example:
 69 | //
 70 | //     service Messaging {
 71 | //       rpc GetMessage(GetMessageRequest) returns (Message) {
 72 | //         option (google.api.http) = {
 73 | //             get: "/v1/{name=messages/*}"
 74 | //         };
 75 | //       }
 76 | //     }
 77 | //     message GetMessageRequest {
 78 | //       string name = 1; // Mapped to URL path.
 79 | //     }
 80 | //     message Message {
 81 | //       string text = 1; // The resource content.
 82 | //     }
 83 | //
 84 | // This enables an HTTP REST to gRPC mapping as below:
 85 | //
 86 | // HTTP | gRPC
 87 | // -----|-----
 88 | // `GET /v1/messages/123456`  | `GetMessage(name: "messages/123456")`
 89 | //
 90 | // Any fields in the request message which are not bound by the path template
 91 | // automatically become HTTP query parameters if there is no HTTP request body.
 92 | // For example:
 93 | //
 94 | //     service Messaging {
 95 | //       rpc GetMessage(GetMessageRequest) returns (Message) {
 96 | //         option (google.api.http) = {
 97 | //             get:"/v1/messages/{message_id}"
 98 | //         };
 99 | //       }
100 | //     }
101 | //     message GetMessageRequest {
102 | //       message SubMessage {
103 | //         string subfield = 1;
104 | //       }
105 | //       string message_id = 1; // Mapped to URL path.
106 | //       int64 revision = 2;    // Mapped to URL query parameter `revision`.
107 | //       SubMessage sub = 3;    // Mapped to URL query parameter `sub.subfield`.
108 | //     }
109 | //
110 | // This enables a HTTP JSON to RPC mapping as below:
111 | //
112 | // HTTP | gRPC
113 | // -----|-----
114 | // `GET /v1/messages/123456?revision=2&sub.subfield=foo` |
115 | // `GetMessage(message_id: "123456" revision: 2 sub: SubMessage(subfield:
116 | // "foo"))`
117 | //
118 | // Note that fields which are mapped to URL query parameters must have a
119 | // primitive type or a repeated primitive type or a non-repeated message type.
120 | // In the case of a repeated type, the parameter can be repeated in the URL
121 | // as `...?param=A&param=B`. In the case of a message type, each field of the
122 | // message is mapped to a separate parameter, such as
123 | // `...?foo.a=A&foo.b=B&foo.c=C`.
124 | //
125 | // For HTTP methods that allow a request body, the `body` field
126 | // specifies the mapping. Consider a REST update method on the
127 | // message resource collection:
128 | //
129 | //     service Messaging {
130 | //       rpc UpdateMessage(UpdateMessageRequest) returns (Message) {
131 | //         option (google.api.http) = {
132 | //           patch: "/v1/messages/{message_id}"
133 | //           body: "message"
134 | //         };
135 | //       }
136 | //     }
137 | //     message UpdateMessageRequest {
138 | //       string message_id = 1; // mapped to the URL
139 | //       Message message = 2;   // mapped to the body
140 | //     }
141 | //
142 | // The following HTTP JSON to RPC mapping is enabled, where the
143 | // representation of the JSON in the request body is determined by
144 | // protos JSON encoding:
145 | //
146 | // HTTP | gRPC
147 | // -----|-----
148 | // `PATCH /v1/messages/123456 { "text": "Hi!" }` | `UpdateMessage(message_id:
149 | // "123456" message { text: "Hi!" })`
150 | //
151 | // The special name `*` can be used in the body mapping to define that
152 | // every field not bound by the path template should be mapped to the
153 | // request body.  This enables the following alternative definition of
154 | // the update method:
155 | //
156 | //     service Messaging {
157 | //       rpc UpdateMessage(Message) returns (Message) {
158 | //         option (google.api.http) = {
159 | //           patch: "/v1/messages/{message_id}"
160 | //           body: "*"
161 | //         };
162 | //       }
163 | //     }
164 | //     message Message {
165 | //       string message_id = 1;
166 | //       string text = 2;
167 | //     }
168 | //
169 | //
170 | // The following HTTP JSON to RPC mapping is enabled:
171 | //
172 | // HTTP | gRPC
173 | // -----|-----
174 | // `PATCH /v1/messages/123456 { "text": "Hi!" }` | `UpdateMessage(message_id:
175 | // "123456" text: "Hi!")`
176 | //
177 | // Note that when using `*` in the body mapping, it is not possible to
178 | // have HTTP parameters, as all fields not bound by the path end in
179 | // the body. This makes this option more rarely used in practice when
180 | // defining REST APIs. The common usage of `*` is in custom methods
181 | // which don't use the URL at all for transferring data.
182 | //
183 | // It is possible to define multiple HTTP methods for one RPC by using
184 | // the `additional_bindings` option. Example:
185 | //
186 | //     service Messaging {
187 | //       rpc GetMessage(GetMessageRequest) returns (Message) {
188 | //         option (google.api.http) = {
189 | //           get: "/v1/messages/{message_id}"
190 | //           additional_bindings {
191 | //             get: "/v1/users/{user_id}/messages/{message_id}"
192 | //           }
193 | //         };
194 | //       }
195 | //     }
196 | //     message GetMessageRequest {
197 | //       string message_id = 1;
198 | //       string user_id = 2;
199 | //     }
200 | //
201 | // This enables the following two alternative HTTP JSON to RPC mappings:
202 | //
203 | // HTTP | gRPC
204 | // -----|-----
205 | // `GET /v1/messages/123456` | `GetMessage(message_id: "123456")`
206 | // `GET /v1/users/me/messages/123456` | `GetMessage(user_id: "me" message_id:
207 | // "123456")`
208 | //
209 | // ## Rules for HTTP mapping
210 | //
211 | // 1. Leaf request fields (recursive expansion nested messages in the request
212 | //    message) are classified into three categories:
213 | //    - Fields referred by the path template. They are passed via the URL path.
214 | //    - Fields referred by the [HttpRule.body][google.api.HttpRule.body]. They are passed via the HTTP
215 | //      request body.
216 | //    - All other fields are passed via the URL query parameters, and the
217 | //      parameter name is the field path in the request message. A repeated
218 | //      field can be represented as multiple query parameters under the same
219 | //      name.
220 | //  2. If [HttpRule.body][google.api.HttpRule.body] is "*", there is no URL query parameter, all fields
221 | //     are passed via URL path and HTTP request body.
222 | //  3. If [HttpRule.body][google.api.HttpRule.body] is omitted, there is no HTTP request body, all
223 | //     fields are passed via URL path and URL query parameters.
224 | //
225 | // ### Path template syntax
226 | //
227 | //     Template = "/" Segments [ Verb ] ;
228 | //     Segments = Segment { "/" Segment } ;
229 | //     Segment  = "*" | "**" | LITERAL | Variable ;
230 | //     Variable = "{" FieldPath [ "=" Segments ] "}" ;
231 | //     FieldPath = IDENT { "." IDENT } ;
232 | //     Verb     = ":" LITERAL ;
233 | //
234 | // The syntax `*` matches a single URL path segment. The syntax `**` matches
235 | // zero or more URL path segments, which must be the last part of the URL path
236 | // except the `Verb`.
237 | //
238 | // The syntax `Variable` matches part of the URL path as specified by its
239 | // template. A variable template must not contain other variables. If a variable
240 | // matches a single path segment, its template may be omitted, e.g. `{var}`
241 | // is equivalent to `{var=*}`.
242 | //
243 | // The syntax `LITERAL` matches literal text in the URL path. If the `LITERAL`
244 | // contains any reserved character, such characters should be percent-encoded
245 | // before the matching.
246 | //
247 | // If a variable contains exactly one path segment, such as `"{var}"` or
248 | // `"{var=*}"`, when such a variable is expanded into a URL path on the client
249 | // side, all characters except `[-_.~0-9a-zA-Z]` are percent-encoded. The
250 | // server side does the reverse decoding. Such variables show up in the
251 | // [Discovery
252 | // Document](https://developers.google.com/discovery/v1/reference/apis) as
253 | // `{var}`.
254 | //
255 | // If a variable contains multiple path segments, such as `"{var=foo/*}"`
256 | // or `"{var=**}"`, when such a variable is expanded into a URL path on the
257 | // client side, all characters except `[-_.~/0-9a-zA-Z]` are percent-encoded.
258 | // The server side does the reverse decoding, except "%2F" and "%2f" are left
259 | // unchanged. Such variables show up in the
260 | // [Discovery
261 | // Document](https://developers.google.com/discovery/v1/reference/apis) as
262 | // `{+var}`.
263 | //
264 | // ## Using gRPC API Service Configuration
265 | //
266 | // gRPC API Service Configuration (service config) is a configuration language
267 | // for configuring a gRPC service to become a user-facing product. The
268 | // service config is simply the YAML representation of the `google.api.Service`
269 | // proto message.
270 | //
271 | // As an alternative to annotating your proto file, you can configure gRPC
272 | // transcoding in your service config YAML files. You do this by specifying a
273 | // `HttpRule` that maps the gRPC method to a REST endpoint, achieving the same
274 | // effect as the proto annotation. This can be particularly useful if you
275 | // have a proto that is reused in multiple services. Note that any transcoding
276 | // specified in the service config will override any matching transcoding
277 | // configuration in the proto.
278 | //
279 | // Example:
280 | //
281 | //     http:
282 | //       rules:
283 | //         # Selects a gRPC method and applies HttpRule to it.
284 | //         - selector: example.v1.Messaging.GetMessage
285 | //           get: /v1/messages/{message_id}/{sub.subfield}
286 | //
287 | // ## Special notes
288 | //
289 | // When gRPC Transcoding is used to map a gRPC to JSON REST endpoints, the
290 | // proto to JSON conversion must follow the [proto3
291 | // specification](https://developers.google.com/protocol-buffers/docs/proto3#json).
292 | //
293 | // While the single segment variable follows the semantics of
294 | // [RFC 6570](https://tools.ietf.org/html/rfc6570) Section 3.2.2 Simple String
295 | // Expansion, the multi segment variable **does not** follow RFC 6570 Section
296 | // 3.2.3 Reserved Expansion. The reason is that the Reserved Expansion
297 | // does not expand special characters like `?` and `#`, which would lead
298 | // to invalid URLs. As the result, gRPC Transcoding uses a custom encoding
299 | // for multi segment variables.
300 | //
301 | // The path variables **must not** refer to any repeated or mapped field,
302 | // because client libraries are not capable of handling such variable expansion.
303 | //
304 | // The path variables **must not** capture the leading "/" character. The reason
305 | // is that the most common use case "{var}" does not capture the leading "/"
306 | // character. For consistency, all path variables must share the same behavior.
307 | //
308 | // Repeated message fields must not be mapped to URL query parameters, because
309 | // no client library can support such complicated mapping.
310 | //
311 | // If an API needs to use a JSON array for request or response body, it can map
312 | // the request or response body to a repeated field. However, some gRPC
313 | // Transcoding implementations may not support this feature.
314 | message HttpRule {
315 |   // Selects a method to which this rule applies.
316 |   //
317 |   // Refer to [selector][google.api.DocumentationRule.selector] for syntax details.
318 |   string selector = 1;
319 | 
320 |   // Determines the URL pattern is matched by this rules. This pattern can be
321 |   // used with any of the {get|put|post|delete|patch} methods. A custom method
322 |   // can be defined using the 'custom' field.
323 |   oneof pattern {
324 |     // Maps to HTTP GET. Used for listing and getting information about
325 |     // resources.
326 |     string get = 2;
327 | 
328 |     // Maps to HTTP PUT. Used for replacing a resource.
329 |     string put = 3;
330 | 
331 |     // Maps to HTTP POST. Used for creating a resource or performing an action.
332 |     string post = 4;
333 | 
334 |     // Maps to HTTP DELETE. Used for deleting a resource.
335 |     string delete = 5;
336 | 
337 |     // Maps to HTTP PATCH. Used for updating a resource.
338 |     string patch = 6;
339 | 
340 |     // The custom pattern is used for specifying an HTTP method that is not
341 |     // included in the `pattern` field, such as HEAD, or "*" to leave the
342 |     // HTTP method unspecified for this rule. The wild-card rule is useful
343 |     // for services that provide content to Web (HTML) clients.
344 |     CustomHttpPattern custom = 8;
345 |   }
346 | 
347 |   // The name of the request field whose value is mapped to the HTTP request
348 |   // body, or `*` for mapping all request fields not captured by the path
349 |   // pattern to the HTTP body, or omitted for not having any HTTP request body.
350 |   //
351 |   // NOTE: the referred field must be present at the top-level of the request
352 |   // message type.
353 |   string body = 7;
354 | 
355 |   // Optional. The name of the response field whose value is mapped to the HTTP
356 |   // response body. When omitted, the entire response message will be used
357 |   // as the HTTP response body.
358 |   //
359 |   // NOTE: The referred field must be present at the top-level of the response
360 |   // message type.
361 |   string response_body = 12;
362 | 
363 |   // Additional HTTP bindings for the selector. Nested bindings must
364 |   // not contain an `additional_bindings` field themselves (that is,
365 |   // the nesting may only be one level deep).
366 |   repeated HttpRule additional_bindings = 11;
367 | }
368 | 
369 | // A custom pattern is used for defining custom HTTP verb.
370 | message CustomHttpPattern {
371 |   // The name of this custom HTTP verb.
372 |   string kind = 1;
373 | 
374 |   // The path matched by this custom verb.
375 |   string path = 2;
376 | }


--------------------------------------------------------------------------------
/protos/rpc.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | option java_multiple_files = true;
 4 | option java_package = "tsdb-layer.rpc";
 5 | option java_outer_classname = "TSDBLayerProto";
 6 | 
 7 | package tsdblayer;
 8 | 
 9 | import "google/api/annotations.proto";
10 | 
11 | // Interface exported by the server.
12 | service TSDBLayer {
13 | 	rpc WriteBatch(WriteBatchRequest) returns (Empty) {
14 | 		option (google.api.http) = {
15 |       post: "/api/v1/writeBatch"
16 |       body: "*"
17 |     };
18 | 	}
19 | 	rpc ReadBatch(ReadBatchRequest) returns (ReadBatchResponse) {
20 | 		option (google.api.http) = {
21 |       post: "/api/v1/readBatch"
22 |       body: "*"
23 |     };
24 | 	}
25 | }
26 | 
27 | 
28 | message WriteBatchRequest {
29 | 	repeated WriteRequest batch = 1;
30 | }
31 | 
32 | message ReadBatchRequest {
33 | 	repeated ReadRequest batch = 1;
34 | }
35 | 
36 | message ReadBatchResponse {
37 | 	repeated ReadResponse batch = 1;
38 | }
39 | 
40 | message WriteRequest {
41 | 	string series_id = 1;
42 | 	Datapoint datapoint = 2;
43 | }
44 | 
45 | message ReadRequest {
46 | 	// TODO(rartoul): Time ranges.
47 | 	string series_id = 1;
48 | }
49 | 
50 | message ReadResponse {
51 | 	string series_id = 1;
52 | 	repeated Datapoint datapoints = 2;
53 | }
54 | 
55 | message Datapoint {
56 | 	uint64 timestamp_nanos = 1;
57 | 	double value = 2;
58 | }
59 | 
60 | message Empty {}
61 | 
62 | 


--------------------------------------------------------------------------------
/resources/fdb_index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/richardartoul/tsdb-layer/ae5d4df717e8d728bd764bbad452e37488b76576/resources/fdb_index.png


--------------------------------------------------------------------------------
/resources/fdb_storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/richardartoul/tsdb-layer/ae5d4df717e8d728bd764bbad452e37488b76576/resources/fdb_storage.png


--------------------------------------------------------------------------------
/resources/fdb_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/richardartoul/tsdb-layer/ae5d4df717e8d728bd764bbad452e37488b76576/resources/fdb_time.png


--------------------------------------------------------------------------------
/resources/m3db_storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/richardartoul/tsdb-layer/ae5d4df717e8d728bd764bbad452e37488b76576/resources/m3db_storage.png


--------------------------------------------------------------------------------
/resources/m3db_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/richardartoul/tsdb-layer/ae5d4df717e8d728bd764bbad452e37488b76576/resources/m3db_time.png


--------------------------------------------------------------------------------
/src/cmd/bench/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"log"
  8 | 	"math/rand"
  9 | 	"runtime/pprof"
 10 | 	"sync"
 11 | 	"sync/atomic"
 12 | 	"time"
 13 | 
 14 | 	"github.com/richardartoul/tsdb-layer/src/layer"
 15 | 	"github.com/richardartoul/tsdb-layer/src/layer/dircompress"
 16 | 	"github.com/richardartoul/tsdb-layer/src/layer/raw"
 17 | 	"github.com/richardartoul/tsdb-layer/src/layer/rawblock"
 18 | )
 19 | 
 20 | var (
 21 | 	numSeriesFlag   = flag.Int("numSeries", 100000, "number of unique series")
 22 | 	batchSizeFlag   = flag.Int("batchSize", 128, "client batch size")
 23 | 	numWorkersFlag  = flag.Int("numWorkers", 100, "number of concurrent workers")
 24 | 	durationFlag    = flag.Duration("duration", time.Minute, "duration to run the load test")
 25 | 	layerEngineFlag = flag.String("layerEngine", "raw-block", "layer engine to benchmark")
 26 | )
 27 | 
 28 | func main() {
 29 | 	flag.Parse()
 30 | 
 31 | 	tempFile, err := ioutil.TempFile("", "bench_cpu	")
 32 | 	if err != nil {
 33 | 		panic(err)
 34 | 	}
 35 | 
 36 | 	pprof.StartCPUProfile(tempFile)
 37 | 	defer func() {
 38 | 		defer pprof.StopCPUProfile()
 39 | 		fmt.Println("cpu profile at:", tempFile.Name())
 40 | 	}()
 41 | 
 42 | 	var (
 43 | 		numSeries   = *numSeriesFlag
 44 | 		batchSize   = *batchSizeFlag
 45 | 		numWorkers  = *numWorkersFlag
 46 | 		duration    = *durationFlag
 47 | 		layerEngine = *layerEngineFlag
 48 | 	)
 49 | 	fmt.Println("Running test with arguments:")
 50 | 	fmt.Println("    layerEngine:", layerEngine)
 51 | 	fmt.Println("    numSeries:", numSeries)
 52 | 	fmt.Println("    batchSize:", batchSize)
 53 | 	fmt.Println("    numWorkers:", numWorkers)
 54 | 	fmt.Println("    duration:", duration)
 55 | 	var layerClient layer.Layer
 56 | 	switch layerEngine {
 57 | 	case "direct-compress":
 58 | 		layerClient = dircompress.NewLayer()
 59 | 	case "raw":
 60 | 		layerClient = raw.NewLayer()
 61 | 	case "raw-block":
 62 | 		layerClient = rawblock.NewLayer()
 63 | 	default:
 64 | 		log.Fatalf("invalid layer engine: %s", layerEngine)
 65 | 	}
 66 | 
 67 | 	seriesIDs := make([]string, 0, numSeries)
 68 | 	for i := 0; i < numSeries; i++ {
 69 | 		seriesIDs = append(seriesIDs, fmt.Sprintf("%s-%d", randomString(20), i))
 70 | 	}
 71 | 
 72 | 	var (
 73 | 		wg                 sync.WaitGroup
 74 | 		numWritesCompleted int64
 75 | 		doneCh             = make(chan struct{})
 76 | 	)
 77 | 	go func() {
 78 | 		time.Sleep(duration)
 79 | 		close(doneCh)
 80 | 	}()
 81 | 	for i := 0; i < numWorkers; i++ {
 82 | 		wg.Add(1)
 83 | 		// Chunk up the IDs into groups for each worker.
 84 | 		idsBatchSize := len(seriesIDs) / numWorkers
 85 | 		localIDs := seriesIDs[idsBatchSize*i : idsBatchSize*i+idsBatchSize]
 86 | 
 87 | 		go func(localIDs []string) {
 88 | 			defer wg.Done()
 89 | 
 90 | 			var (
 91 | 				batch   = make([]layer.Write, 0, batchSize)
 92 | 				source  = rand.NewSource(time.Now().UnixNano())
 93 | 				rng     = rand.New(source)
 94 | 				currVal int64
 95 | 			)
 96 | 			for {
 97 | 				select {
 98 | 				case <-doneCh:
 99 | 					atomic.AddInt64(&numWritesCompleted, currVal)
100 | 					return
101 | 				default:
102 | 				}
103 | 				batch = batch[:0]
104 | 				for j := 0; j < batchSize; j++ {
105 | 					idx := rng.Intn(len(localIDs))
106 | 					batch = append(
107 | 						batch,
108 | 						layer.Write{
109 | 							ID:        localIDs[idx],
110 | 							Timestamp: time.Unix(0, int64(currVal)),
111 | 							Value:     float64(currVal)})
112 | 					currVal++
113 | 				}
114 | 				if err := layerClient.WriteBatch(batch); err != nil {
115 | 					panic(err)
116 | 				}
117 | 			}
118 | 		}(localIDs)
119 | 
120 | 	}
121 | 	wg.Wait()
122 | 
123 | 	qps := float64(numWritesCompleted) / duration.Seconds()
124 | 	fmt.Println("QPS: ", qps)
125 | }
126 | 
127 | func randomString(len int) string {
128 | 	bytes := make([]byte, len)
129 | 	for i := 0; i < len; i++ {
130 | 		bytes[i] = byte(65 + rand.Intn(25)) //A=65 and Z = 65+25
131 | 	}
132 | 	return string(bytes)
133 | }
134 | 


--------------------------------------------------------------------------------
/src/cmd/server/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"log"
 8 | 	"net"
 9 | 	"net/http"
10 | 
11 | 	pb "github.com/richardartoul/tsdb-layer/protos/.gen"
12 | 	"github.com/richardartoul/tsdb-layer/src/layer/server"
13 | 
14 | 	"github.com/grpc-ecosystem/grpc-gateway/runtime"
15 | 	"google.golang.org/grpc"
16 | 	"google.golang.org/grpc/credentials"
17 | )
18 | 
19 | var (
20 | 	useTLS   = flag.Bool("use_tls", false, "Connection uses TLS if true, else plain TCP")
21 | 	certFile = flag.String("cert_file", "", "The TLS cert file")
22 | 	keyFile  = flag.String("key_file", "", "The TLS key file")
23 | 	port     = flag.Int("port", 10000, "The server port")
24 | )
25 | 
26 | func main() {
27 | 	flag.Parse()
28 | 	var (
29 | 		opts  []grpc.ServerOption
30 | 		dopts []grpc.DialOption
31 | 	)
32 | 	if *useTLS {
33 | 		if *certFile == "" {
34 | 			log.Fatalf("cert_file path is required")
35 | 		}
36 | 		if *keyFile == "" {
37 | 			log.Fatalf("key_file path is required")
38 | 		}
39 | 		creds, err := credentials.NewServerTLSFromFile(*certFile, *keyFile)
40 | 		if err != nil {
41 | 			log.Fatalf("Failed to generate credentials %v", err)
42 | 		}
43 | 		opts = []grpc.ServerOption{grpc.Creds(creds)}
44 | 		dopts = []grpc.DialOption{grpc.WithTransportCredentials(creds)}
45 | 	} else {
46 | 		dopts = []grpc.DialOption{grpc.WithInsecure()}
47 | 	}
48 | 
49 | 	conn, err := net.Listen("tcp", fmt.Sprintf(":%d", *port))
50 | 	if err != nil {
51 | 		log.Fatalf("Failed to initial TCP listen : %v\n", err)
52 | 	}
53 | 
54 | 	go func() {
55 | 		// Start gRPC.
56 | 		grpcServer := grpc.NewServer(opts...)
57 | 		pb.RegisterTSDBLayerServer(grpcServer, server.NewServer())
58 | 		log.Printf("gRPC Listening on %s\n", conn.Addr().String())
59 | 		if err := grpcServer.Serve(conn); err != nil {
60 | 			log.Fatalf("error initializing gRPC: %v", err)
61 | 		}
62 | 	}()
63 | 
64 | 	connString := fmt.Sprintf("localhost:%d", *port)
65 | 	mux := runtime.NewServeMux()
66 | 	err = pb.RegisterTSDBLayerHandlerFromEndpoint(context.Background(), mux, connString, dopts)
67 | 	if err != nil {
68 | 		log.Fatalf("Failed to register http handler from endpoint: %v\n", err)
69 | 	}
70 | 
71 | 	port := *port + 1
72 | 	log.Printf("HTTP Listening on %d\n", port)
73 | 	log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", port), mux))
74 | }
75 | 


--------------------------------------------------------------------------------
/src/encoding/common.go:
--------------------------------------------------------------------------------
1 | package encoding
2 | 
3 | const (
4 | 	hasMoreBit = 1
5 | )
6 | 


--------------------------------------------------------------------------------
/src/encoding/decoder.go:
--------------------------------------------------------------------------------
 1 | package encoding
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"errors"
 6 | 	"io"
 7 | 	"math"
 8 | 	"time"
 9 | 
10 | 	"github.com/m3db/m3/src/dbnode/encoding"
11 | 	"github.com/m3db/m3/src/dbnode/encoding/m3tsz"
12 | 	xtime "github.com/m3db/m3/src/x/time"
13 | )
14 | 
15 | type Decoder interface {
16 | 	ReadableDecoder
17 | 	Reset(b []byte)
18 | }
19 | 
20 | type ReadableDecoder interface {
21 | 	Next() bool
22 | 	Current() (time.Time, float64)
23 | 	Err() error
24 | }
25 | 
26 | type decoder struct {
27 | 	tsDecoder    m3tsz.TimestampIterator
28 | 	floatDecoder m3tsz.FloatEncoderAndIterator
29 | 	bReader      *bytes.Reader
30 | 	stream       encoding.IStream
31 | 
32 | 	err  error
33 | 	done bool
34 | }
35 | 
36 | // NewDecoder creates a new decoder.
37 | func NewDecoder() Decoder {
38 | 	return &decoder{
39 | 		bReader: bytes.NewReader(nil),
40 | 		stream:  encoding.NewIStream(nil),
41 | 	}
42 | }
43 | 
44 | func (d *decoder) Reset(b []byte) {
45 | 	d.tsDecoder = m3tsz.NewTimestampIterator(opts, true)
46 | 	d.tsDecoder.TimeUnit = xtime.Nanosecond
47 | 	d.floatDecoder = m3tsz.FloatEncoderAndIterator{}
48 | 	d.bReader.Reset(b)
49 | 	d.stream.Reset(d.bReader)
50 | 	d.done = false
51 | }
52 | 
53 | func (d *decoder) Next() bool {
54 | 	if d.done || d.err != nil {
55 | 		return false
56 | 	}
57 | 
58 | 	bit, err := d.stream.ReadBit()
59 | 	if err == io.EOF {
60 | 		d.done = true
61 | 		return false
62 | 	}
63 | 	if err != nil {
64 | 		d.err = err
65 | 		return false
66 | 	}
67 | 	if bit != hasMoreBit {
68 | 		d.done = true
69 | 		return false
70 | 	}
71 | 
72 | 	_, done, err := d.tsDecoder.ReadTimestamp(d.stream)
73 | 	if done {
74 | 		// This should never happen since we never encode the EndOfStream marker.
75 | 		d.err = errors.New("unexpected end of timestamp stream")
76 | 		return false
77 | 	}
78 | 	if err != nil {
79 | 		d.err = err
80 | 		return false
81 | 	}
82 | 
83 | 	if err := d.floatDecoder.ReadFloat(d.stream); err != nil {
84 | 		d.err = err
85 | 		return false
86 | 	}
87 | 
88 | 	return true
89 | }
90 | 
91 | func (d *decoder) Current() (time.Time, float64) {
92 | 	return d.tsDecoder.PrevTime, math.Float64frombits(d.floatDecoder.PrevFloatBits)
93 | }
94 | 
95 | func (d *decoder) Err() error {
96 | 	return d.err
97 | }
98 | 


--------------------------------------------------------------------------------
/src/encoding/encoder.go:
--------------------------------------------------------------------------------
  1 | package encoding
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"math"
  7 | 	"time"
  8 | 	"unsafe"
  9 | 
 10 | 	"github.com/m3db/m3/src/dbnode/encoding"
 11 | 	"github.com/m3db/m3/src/dbnode/encoding/m3tsz"
 12 | 	xtime "github.com/m3db/m3/src/x/time"
 13 | )
 14 | 
 15 | var (
 16 | 	// TODO(rartoul): Eliminate the need for this.
 17 | 	opts = encoding.NewOptions()
 18 | )
 19 | 
 20 | type Encoder interface {
 21 | 	Encode(timestamp time.Time, value float64) error
 22 | 	LastEncoded() (time.Time, float64, bool)
 23 | 	State() []byte
 24 | 	Restore(b []byte) error
 25 | 	Bytes() []byte
 26 | }
 27 | 
 28 | type marshalState struct {
 29 | 	TSEncoder       m3tsz.TimestampEncoder
 30 | 	FloatEncoder    m3tsz.FloatEncoderAndIterator
 31 | 	LastByte        byte
 32 | 	BitPos          int
 33 | 	HasWrittenFirst bool
 34 | }
 35 | 
 36 | type encoder struct {
 37 | 	tsEncoder    m3tsz.TimestampEncoder
 38 | 	floatEncoder m3tsz.FloatEncoderAndIterator
 39 | 	stream       OStream
 40 | 
 41 | 	hasWrittenFirst bool
 42 | }
 43 | 
 44 | // NewEncoder creates a new encoder.
 45 | func NewEncoder() Encoder {
 46 | 	return &encoder{}
 47 | }
 48 | 
 49 | func (e *encoder) Encode(timestamp time.Time, value float64) error {
 50 | 	if e.stream == nil {
 51 | 		// Lazy init.
 52 | 		e.stream = NewOStream()
 53 | 		e.tsEncoder = m3tsz.NewTimestampEncoder(timestamp, xtime.Nanosecond, opts)
 54 | 	}
 55 | 
 56 | 	e.stream.WriteBit(hasMoreBit)
 57 | 
 58 | 	var (
 59 | 		// Unsafe insanity to temporarily avoid having to fork upstream.
 60 | 		encodingStream = *(*encoding.OStream)(unsafe.Pointer(&e.stream))
 61 | 		err            error
 62 | 	)
 63 | 	if !e.hasWrittenFirst {
 64 | 		err = e.tsEncoder.WriteFirstTime(encodingStream, timestamp, nil, xtime.Nanosecond)
 65 | 	} else {
 66 | 		err = e.tsEncoder.WriteNextTime(encodingStream, timestamp, nil, xtime.Nanosecond)
 67 | 	}
 68 | 	if err != nil {
 69 | 		return err
 70 | 	}
 71 | 
 72 | 	e.floatEncoder.WriteFloat(encodingStream, value)
 73 | 	e.hasWrittenFirst = true
 74 | 	return nil
 75 | }
 76 | 
 77 | func (e *encoder) LastEncoded() (time.Time, float64, bool) {
 78 | 	return e.tsEncoder.PrevTime, math.Float64frombits(e.floatEncoder.PrevFloatBits), e.hasWrittenFirst
 79 | }
 80 | 
 81 | func (e *encoder) State() []byte {
 82 | 	var (
 83 | 		raw, bitPos = e.stream.Rawbytes()
 84 | 		lastByte    byte
 85 | 	)
 86 | 	if len(raw) > 0 {
 87 | 		lastByte = raw[len(raw)-1]
 88 | 	}
 89 | 
 90 | 	marshalState := marshalState{
 91 | 		TSEncoder:       e.tsEncoder,
 92 | 		FloatEncoder:    e.floatEncoder,
 93 | 		HasWrittenFirst: e.hasWrittenFirst,
 94 | 		LastByte:        lastByte,
 95 | 		BitPos:          bitPos,
 96 | 	}
 97 | 	// Prevent JSON marshaling error.
 98 | 	marshalState.TSEncoder.Options = nil
 99 | 
100 | 	// TODO(rartoul): Replace this with something efficient / performant.
101 | 	marshaled, err := json.Marshal(&marshalState)
102 | 	if err != nil {
103 | 		// TODO(rartoul): Remove this once there is a better encoding scheme.
104 | 		panic(err)
105 | 	}
106 | 
107 | 	return marshaled
108 | }
109 | 
110 | func (e *encoder) Restore(b []byte) error {
111 | 	if b == nil {
112 | 		return fmt.Errorf("cannot restore from nil state")
113 | 	}
114 | 
115 | 	marshalState := marshalState{}
116 | 	if err := json.Unmarshal(b, &marshalState); err != nil {
117 | 		return err
118 | 	}
119 | 
120 | 	e.tsEncoder = marshalState.TSEncoder
121 | 	e.tsEncoder.Options = opts
122 | 	e.floatEncoder = marshalState.FloatEncoder
123 | 	e.hasWrittenFirst = marshalState.HasWrittenFirst
124 | 
125 | 	if e.stream == nil {
126 | 		e.stream = NewOStream()
127 | 	}
128 | 	// TODO(rartoul): Fix this non-sense.
129 | 	e.stream.(*ostream).buf = []byte{marshalState.LastByte}
130 | 	e.stream.(*ostream).pos = marshalState.BitPos
131 | 
132 | 	return nil
133 | }
134 | 
135 | func (e *encoder) Bytes() []byte {
136 | 	if e.stream == nil {
137 | 		return nil
138 | 	}
139 | 
140 | 	b, _ := e.stream.Rawbytes()
141 | 	return b
142 | }
143 | 


--------------------------------------------------------------------------------
/src/encoding/merge.go:
--------------------------------------------------------------------------------
 1 | package encoding
 2 | 
 3 | // MergeStreams merges a list of streams into a a single stream.
 4 | func MergeStreams(streams ...[]byte) ([]byte, error) {
 5 | 	decoders := make([]Decoder, 0, len(streams))
 6 | 	for _, stream := range streams {
 7 | 		dec := NewDecoder()
 8 | 		dec.Reset(stream)
 9 | 		decoders = append(decoders, dec)
10 | 	}
11 | 
12 | 	multiDec := NewMultiDecoder()
13 | 	multiDec.Reset(decoders)
14 | 
15 | 	mergedEnc := NewEncoder()
16 | 	for multiDec.Next() {
17 | 		mergedEnc.Encode(multiDec.Current())
18 | 	}
19 | 	if err := multiDec.Err(); err != nil {
20 | 		return nil, err
21 | 	}
22 | 
23 | 	return mergedEnc.Bytes(), nil
24 | }
25 | 


--------------------------------------------------------------------------------
/src/encoding/merge_test.go:
--------------------------------------------------------------------------------
 1 | package encoding
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sort"
 6 | 	"testing"
 7 | 	"time"
 8 | 
 9 | 	"github.com/stretchr/testify/require"
10 | )
11 | 
12 | type mergeStreamsTestCase struct {
13 | 	title   string
14 | 	streams [][]testValue
15 | }
16 | 
17 | func TestMergeStreams(t *testing.T) {
18 | 	testCases := []mergeStreamsTestCase{
19 | 		{
20 | 			title: "Merge two in order streams",
21 | 			streams: [][]testValue{
22 | 				[]testValue{{timestamp: time.Unix(0, 0), value: 0}},
23 | 				[]testValue{{timestamp: time.Unix(1, 0), value: 1}},
24 | 			},
25 | 		},
26 | 		{
27 | 			title: "Merge two out of order streams",
28 | 			streams: [][]testValue{
29 | 				[]testValue{{timestamp: time.Unix(1, 0), value: 1}},
30 | 				[]testValue{{timestamp: time.Unix(0, 0), value: 0}},
31 | 			},
32 | 		},
33 | 		{
34 | 			title: "Merge multiple streams",
35 | 			streams: [][]testValue{
36 | 				[]testValue{{timestamp: time.Unix(10, 0), value: 10}, {timestamp: time.Unix(11, 0), value: 11}},
37 | 				[]testValue{{timestamp: time.Unix(7, 0), value: 7}},
38 | 				[]testValue{{timestamp: time.Unix(8, 0), value: 8}, {timestamp: time.Unix(9, 0), value: 9}},
39 | 				[]testValue{{timestamp: time.Unix(1, 0), value: 1}, {timestamp: time.Unix(3, 0), value: 3}},
40 | 			},
41 | 		},
42 | 	}
43 | 
44 | 	for _, tc := range testCases {
45 | 		t.Run(tc.title, func(t *testing.T) {
46 | 			streams := make([][]byte, 0, len(tc.streams))
47 | 			expected := []testValue{}
48 | 			for _, stream := range tc.streams {
49 | 				enc := NewEncoder()
50 | 				for _, v := range stream {
51 | 					enc.Encode(v.timestamp, v.value)
52 | 					expected = append(expected, v)
53 | 				}
54 | 
55 | 				streams = append(streams, enc.Bytes())
56 | 			}
57 | 			sort.Slice(expected, func(i, j int) bool {
58 | 				return expected[i].timestamp.Before(expected[j].timestamp)
59 | 			})
60 | 
61 | 			merged, err := MergeStreams(streams...)
62 | 			require.NoError(t, err)
63 | 			decoder := NewDecoder()
64 | 			decoder.Reset(merged)
65 | 
66 | 			i := 0
67 | 			for decoder.Next() {
68 | 				currT, currV := decoder.Current()
69 | 				require.True(
70 | 					t,
71 | 					expected[i].timestamp.Equal(currT),
72 | 					fmt.Sprintf("expected %s but got %s", expected[i].timestamp.String(), currT.String()))
73 | 				require.Equal(t, expected[i].value, currV)
74 | 				i++
75 | 			}
76 | 			require.NoError(t, decoder.Err())
77 | 			require.Equal(t, len(expected), i)
78 | 		})
79 | 	}
80 | }
81 | 


--------------------------------------------------------------------------------
/src/encoding/multi_decoder.go:
--------------------------------------------------------------------------------
  1 | package encoding
  2 | 
  3 | import (
  4 | 	"container/heap"
  5 | 	"time"
  6 | )
  7 | 
  8 | type MultiDecoder interface {
  9 | 	ReadableDecoder
 10 | 	Reset(decs []Decoder)
 11 | }
 12 | 
 13 | type decState struct {
 14 | 	dec Decoder
 15 | }
 16 | 
 17 | type multiDecoder struct {
 18 | 	decs      []decState
 19 | 	currEntry heapEntry
 20 | 	heap      minHeap
 21 | 	err       error
 22 | }
 23 | 
 24 | func NewMultiDecoder() *multiDecoder {
 25 | 	return &multiDecoder{}
 26 | }
 27 | 
 28 | func (m *multiDecoder) Next() bool {
 29 | 	if m.err != nil {
 30 | 		return false
 31 | 	}
 32 | 	if m.heap.Len() == 0 {
 33 | 		return false
 34 | 	}
 35 | 	m.currEntry = heap.Pop(&m.heap).(heapEntry)
 36 | 	dec := m.decs[m.currEntry.decIdx].dec
 37 | 	if dec.Next() {
 38 | 		t, v := dec.Current()
 39 | 		heap.Push(&m.heap, heapEntry{t: t, v: v, decIdx: m.currEntry.decIdx})
 40 | 	} else {
 41 | 		if dec.Err() != nil {
 42 | 			m.err = dec.Err()
 43 | 		}
 44 | 	}
 45 | 	return true
 46 | }
 47 | 
 48 | func (m *multiDecoder) Current() (time.Time, float64) {
 49 | 	return m.currEntry.t, m.currEntry.v
 50 | }
 51 | 
 52 | func (m *multiDecoder) Err() error {
 53 | 	return nil
 54 | }
 55 | 
 56 | func (m *multiDecoder) Reset(decs []Decoder) {
 57 | 	m.err = nil
 58 | 	for i := range m.decs {
 59 | 		m.decs[i] = decState{}
 60 | 	}
 61 | 	m.decs = m.decs[:0]
 62 | 	for _, dec := range decs {
 63 | 		m.decs = append(m.decs, decState{dec: dec})
 64 | 	}
 65 | 
 66 | 	m.heap.vals = m.heap.vals[:0]
 67 | 	for i, dec := range m.decs {
 68 | 		if dec.dec.Next() {
 69 | 			t, v := dec.dec.Current()
 70 | 			m.heap.vals = append(m.heap.vals, heapEntry{t: t, v: v, decIdx: i})
 71 | 		} else {
 72 | 			if dec.dec.Err() != nil {
 73 | 				m.err = dec.dec.Err()
 74 | 			}
 75 | 		}
 76 | 	}
 77 | 	heap.Init(&m.heap)
 78 | }
 79 | 
 80 | type minHeap struct {
 81 | 	vals []heapEntry
 82 | }
 83 | 
 84 | type heapEntry struct {
 85 | 	t      time.Time
 86 | 	v      float64
 87 | 	decIdx int
 88 | }
 89 | 
 90 | func (h *minHeap) Push(x interface{}) {
 91 | 	h.vals = append(h.vals, x.(heapEntry))
 92 | }
 93 | 
 94 | func (h *minHeap) Pop() interface{} {
 95 | 	lastIdx := len(h.vals) - 1
 96 | 	x := h.vals[lastIdx]
 97 | 	h.vals = h.vals[:lastIdx]
 98 | 	return x
 99 | }
100 | 
101 | func (h *minHeap) Len() int {
102 | 	if h == nil {
103 | 		return 0
104 | 	}
105 | 	return len(h.vals)
106 | }
107 | 
108 | func (h *minHeap) Less(i, j int) bool {
109 | 	return h.vals[i].t.Before(h.vals[j].t)
110 | }
111 | 
112 | func (h *minHeap) Swap(i, j int) {
113 | 	h.vals[i], h.vals[j] = h.vals[j], h.vals[i]
114 | }
115 | 


--------------------------------------------------------------------------------
/src/encoding/multi_decoder_test.go:
--------------------------------------------------------------------------------
 1 | package encoding
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sort"
 6 | 	"testing"
 7 | 	"time"
 8 | 
 9 | 	"github.com/stretchr/testify/require"
10 | )
11 | 
12 | type multiDecoderTestCase struct {
13 | 	title   string
14 | 	streams [][]testValue
15 | }
16 | 
17 | func TestMultiDecoder(t *testing.T) {
18 | 	testCases := []multiDecoderTestCase{
19 | 		{
20 | 			title: "Merge two in order streams",
21 | 			streams: [][]testValue{
22 | 				[]testValue{{timestamp: time.Unix(0, 0), value: 0}},
23 | 				[]testValue{{timestamp: time.Unix(1, 0), value: 1}},
24 | 			},
25 | 		},
26 | 		{
27 | 			title: "Merge two out of order streams",
28 | 			streams: [][]testValue{
29 | 				[]testValue{{timestamp: time.Unix(1, 0), value: 1}},
30 | 				[]testValue{{timestamp: time.Unix(0, 0), value: 0}},
31 | 			},
32 | 		},
33 | 		{
34 | 			title: "Merge multiple streams",
35 | 			streams: [][]testValue{
36 | 				[]testValue{{timestamp: time.Unix(10, 0), value: 10}, {timestamp: time.Unix(11, 0), value: 11}},
37 | 				[]testValue{{timestamp: time.Unix(7, 0), value: 7}},
38 | 				[]testValue{{timestamp: time.Unix(8, 0), value: 8}, {timestamp: time.Unix(9, 0), value: 9}},
39 | 				[]testValue{{timestamp: time.Unix(1, 0), value: 1}, {timestamp: time.Unix(3, 0), value: 3}},
40 | 			},
41 | 		},
42 | 	}
43 | 
44 | 	for _, tc := range testCases {
45 | 		t.Run(tc.title, func(t *testing.T) {
46 | 			decs := make([]Decoder, 0, len(tc.streams))
47 | 			expected := []testValue{}
48 | 			for _, stream := range tc.streams {
49 | 				enc := NewEncoder()
50 | 				for _, v := range stream {
51 | 					enc.Encode(v.timestamp, v.value)
52 | 					expected = append(expected, v)
53 | 				}
54 | 
55 | 				dec := NewDecoder()
56 | 				dec.Reset(enc.Bytes())
57 | 				decs = append(decs, dec)
58 | 			}
59 | 			sort.Slice(expected, func(i, j int) bool {
60 | 				return expected[i].timestamp.Before(expected[j].timestamp)
61 | 			})
62 | 
63 | 			multiDecoder := NewMultiDecoder()
64 | 			multiDecoder.Reset(decs)
65 | 
66 | 			i := 0
67 | 			for multiDecoder.Next() {
68 | 				currT, currV := multiDecoder.Current()
69 | 				require.True(
70 | 					t,
71 | 					expected[i].timestamp.Equal(currT),
72 | 					fmt.Sprintf("expected %s but got %s", expected[i].timestamp.String(), currT.String()))
73 | 				require.Equal(t, expected[i].value, currV)
74 | 				i++
75 | 			}
76 | 			require.NoError(t, multiDecoder.Err())
77 | 			require.Equal(t, len(expected), i)
78 | 		})
79 | 	}
80 | }
81 | 


--------------------------------------------------------------------------------
/src/encoding/ostream.go:
--------------------------------------------------------------------------------
  1 | package encoding
  2 | 
  3 | // Frked from "github.com/m3db/m3/src/dbnode/encoding/ostream.go" to make some changes that
  4 | // don't make sense to include upstream.
  5 | 
  6 | type Bit byte
  7 | 
  8 | // OStream encapsulates a writable stream.
  9 | type OStream interface {
 10 | 	Len() int
 11 | 	Empty() bool
 12 | 	WriteBit(v Bit)
 13 | 	WriteBits(v uint64, numBits int)
 14 | 	WriteByte(v byte)
 15 | 	WriteBytes(bytes []byte)
 16 | 	Write(bytes []byte) (int, error)
 17 | 	Reset(buffer []byte)
 18 | 	Discard() []byte
 19 | 	Rawbytes() ([]byte, int)
 20 | }
 21 | 
 22 | const (
 23 | 	initAllocSize = 1024
 24 | )
 25 | 
 26 | // Ostream encapsulates a writable stream.
 27 | type ostream struct {
 28 | 	buf []byte
 29 | 	pos int // how many bits have been used in the last byte
 30 | }
 31 | 
 32 | // NewOStream creates a new Ostream
 33 | func NewOStream() OStream {
 34 | 	return &ostream{}
 35 | }
 36 | 
 37 | // Len returns the length of the Ostream
 38 | func (os *ostream) Len() int {
 39 | 	return len(os.buf)
 40 | }
 41 | 
 42 | // Empty returns whether the Ostream is empty
 43 | func (os *ostream) Empty() bool {
 44 | 	return os.Len() == 0 && os.pos == 0
 45 | }
 46 | 
 47 | func (os *ostream) lastIndex() int {
 48 | 	return os.Len() - 1
 49 | }
 50 | 
 51 | func (os *ostream) hasUnusedBits() bool {
 52 | 	return os.pos > 0 && os.pos < 8
 53 | }
 54 | 
 55 | // grow appends the last byte of v to buf and sets pos to np.
 56 | func (os *ostream) grow(v byte, np int) {
 57 | 	os.ensureCapacityFor(1)
 58 | 	os.buf = append(os.buf, v)
 59 | 
 60 | 	os.pos = np
 61 | }
 62 | 
 63 | // ensureCapacity ensures that there is at least capacity for n more bytes.
 64 | func (os *ostream) ensureCapacityFor(n int) {
 65 | 	var (
 66 | 		currCap      = cap(os.buf)
 67 | 		currLen      = len(os.buf)
 68 | 		availableCap = currCap - currLen
 69 | 		missingCap   = n - availableCap
 70 | 	)
 71 | 	if missingCap <= 0 {
 72 | 		// Already have enough capacity.
 73 | 		return
 74 | 	}
 75 | 
 76 | 	newCap := max(cap(os.buf)*2, currCap+missingCap)
 77 | 	newbuf := make([]byte, 0, newCap)
 78 | 	newbuf = append(newbuf, os.buf...)
 79 | 	os.buf = newbuf
 80 | }
 81 | 
 82 | func (os *ostream) fillUnused(v byte) {
 83 | 	os.buf[os.lastIndex()] |= v >> uint(os.pos)
 84 | }
 85 | 
 86 | // WriteBit writes the last bit of v.
 87 | func (os *ostream) WriteBit(v Bit) {
 88 | 	v <<= 7
 89 | 	if !os.hasUnusedBits() {
 90 | 		os.grow(byte(v), 1)
 91 | 		return
 92 | 	}
 93 | 	os.fillUnused(byte(v))
 94 | 	os.pos++
 95 | }
 96 | 
 97 | // WriteByte writes the last byte of v.
 98 | func (os *ostream) WriteByte(v byte) {
 99 | 	if !os.hasUnusedBits() {
100 | 		os.grow(v, 8)
101 | 		return
102 | 	}
103 | 	os.fillUnused(v)
104 | 	os.grow(v<<uint(8-os.pos), os.pos)
105 | }
106 | 
107 | // WriteBytes writes a byte slice.
108 | func (os *ostream) WriteBytes(bytes []byte) {
109 | 	// Call ensureCapacityFor ahead of time to ensure that the bytes pool is used to
110 | 	// grow the buf (as opposed to append possibly triggering an allocation if
111 | 	// it wasn't) and that its only grown a maximum of one time regardless of the size
112 | 	// of the []byte being written.
113 | 	os.ensureCapacityFor(len(bytes))
114 | 
115 | 	if !os.hasUnusedBits() {
116 | 		// If the stream is aligned on a byte boundary then all of the WriteByte()
117 | 		// function calls and bit-twiddling can be skipped in favor of a single
118 | 		// copy operation.
119 | 		os.buf = append(os.buf, bytes...)
120 | 		// Position 8 indicates that the last byte of the buffer has been completely
121 | 		// filled.
122 | 		os.pos = 8
123 | 		return
124 | 	}
125 | 
126 | 	for i := 0; i < len(bytes); i++ {
127 | 		os.WriteByte(bytes[i])
128 | 	}
129 | }
130 | 
131 | // Write writes a byte slice. This method exists in addition to WriteBytes()
132 | // to satisfy the io.Writer interface.
133 | func (os *ostream) Write(bytes []byte) (int, error) {
134 | 	os.WriteBytes(bytes)
135 | 	return len(bytes), nil
136 | }
137 | 
138 | // WriteBits writes the lowest numBits of v to the stream, starting
139 | // from the most significant bit to the least significant bit.
140 | func (os *ostream) WriteBits(v uint64, numBits int) {
141 | 	if numBits == 0 {
142 | 		return
143 | 	}
144 | 
145 | 	// we should never write more than 64 bits for a uint64
146 | 	if numBits > 64 {
147 | 		numBits = 64
148 | 	}
149 | 
150 | 	v <<= uint(64 - numBits)
151 | 	for numBits >= 8 {
152 | 		os.WriteByte(byte(v >> 56))
153 | 		v <<= 8
154 | 		numBits -= 8
155 | 	}
156 | 
157 | 	for numBits > 0 {
158 | 		os.WriteBit(Bit((v >> 63) & 1))
159 | 		v <<= 1
160 | 		numBits--
161 | 	}
162 | }
163 | 
164 | // Discard takes the ref to the raw buffer from the ostream.
165 | func (os *ostream) Discard() []byte {
166 | 	buffer := os.buf
167 | 
168 | 	os.buf = nil
169 | 	os.pos = 0
170 | 
171 | 	return buffer
172 | }
173 | 
174 | // Reset resets the ostream
175 | func (os *ostream) Reset(buffer []byte) {
176 | 	os.buf = buffer
177 | 
178 | 	os.pos = 0
179 | 	if os.Len() > 0 {
180 | 		// If the byte array passed in is not empty, we set
181 | 		// pos to 8 indicating the last byte is fully used.
182 | 		os.pos = 8
183 | 	}
184 | }
185 | 
186 | func (os *ostream) Rawbytes() ([]byte, int) {
187 | 	return os.buf, os.pos
188 | }
189 | 
190 | func max(x, y int) int {
191 | 	if x > y {
192 | 		return x
193 | 	}
194 | 	return y
195 | }
196 | 


--------------------------------------------------------------------------------
/src/encoding/round_trip_test.go:
--------------------------------------------------------------------------------
  1 | package encoding
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"time"
  6 | 
  7 | 	"github.com/stretchr/testify/require"
  8 | )
  9 | 
 10 | type testValue struct {
 11 | 	timestamp time.Time
 12 | 	value     float64
 13 | }
 14 | 
 15 | type roundTripTestCase struct {
 16 | 	title string
 17 | 	vals  []testValue
 18 | }
 19 | 
 20 | // TODO(rartoul): This probably needs some kind of property test.
 21 | func TestRoundTripSimple(t *testing.T) {
 22 | 	testCases := []roundTripTestCase{
 23 | 		{
 24 | 			title: "simple in order",
 25 | 			vals: []testValue{
 26 | 				{
 27 | 					timestamp: time.Unix(0, 1),
 28 | 					value:     -1,
 29 | 				},
 30 | 				{
 31 | 					timestamp: time.Unix(0, 2),
 32 | 					value:     0,
 33 | 				},
 34 | 				{
 35 | 					timestamp: time.Unix(0, 3),
 36 | 					value:     1,
 37 | 				},
 38 | 			},
 39 | 		},
 40 | 		{
 41 | 			title: "simple out of order",
 42 | 			vals: []testValue{
 43 | 				{
 44 | 					timestamp: time.Unix(0, 3),
 45 | 					value:     -1,
 46 | 				},
 47 | 				{
 48 | 					timestamp: time.Unix(0, 2),
 49 | 					value:     0,
 50 | 				},
 51 | 				{
 52 | 					timestamp: time.Unix(0, 1),
 53 | 					value:     1,
 54 | 				},
 55 | 			},
 56 | 		},
 57 | 	}
 58 | 
 59 | 	for _, tc := range testCases {
 60 | 		t.Run(tc.title, func(t *testing.T) {
 61 | 			encoder := NewEncoder()
 62 | 			// TODO(rartoul): This should probably be its own test.
 63 | 			_, _, ok := encoder.LastEncoded()
 64 | 			require.False(t, ok)
 65 | 
 66 | 			for _, v := range tc.vals {
 67 | 				err := encoder.Encode(v.timestamp, v.value)
 68 | 				require.NoError(t, err)
 69 | 
 70 | 				// TODO(rartoul): This should probably be its own test.
 71 | 				lastEncodedT, lastEncodedV, ok := encoder.LastEncoded()
 72 | 				require.True(t, ok)
 73 | 				require.True(t, v.timestamp.Equal(lastEncodedT))
 74 | 				require.Equal(t, v.value, lastEncodedV)
 75 | 			}
 76 | 
 77 | 			encodedBytes := encoder.Bytes()
 78 | 			require.Equal(t, 22, len(encodedBytes))
 79 | 
 80 | 			decoder := NewDecoder()
 81 | 			decoder.Reset(encodedBytes)
 82 | 
 83 | 			i := 0
 84 | 			for decoder.Next() {
 85 | 				currT, currV := decoder.Current()
 86 | 				require.Equal(t, tc.vals[i].timestamp, currT)
 87 | 				require.Equal(t, tc.vals[i].value, currV)
 88 | 				i++
 89 | 			}
 90 | 			require.NoError(t, decoder.Err())
 91 | 			require.Equal(t, len(tc.vals), i)
 92 | 		})
 93 | 	}
 94 | 
 95 | }
 96 | 
 97 | func TestRoundTripWithStateAndRestore(t *testing.T) {
 98 | 	values := []testValue{
 99 | 		{
100 | 			timestamp: time.Unix(0, 1),
101 | 			value:     -1,
102 | 		},
103 | 		{
104 | 			timestamp: time.Unix(0, 2),
105 | 			value:     0,
106 | 		},
107 | 		{
108 | 			timestamp: time.Unix(0, 3),
109 | 			value:     1,
110 | 		},
111 | 	}
112 | 
113 | 	var (
114 | 		accumulated []byte
115 | 		lastState   []byte
116 | 	)
117 | 	for _, v := range values {
118 | 		encoder := NewEncoder()
119 | 		if lastState != nil {
120 | 			err := encoder.Restore(lastState)
121 | 			require.NoError(t, err)
122 | 		}
123 | 		err := encoder.Encode(v.timestamp, v.value)
124 | 		require.NoError(t, err)
125 | 		lastState = encoder.State()
126 | 
127 | 		b := encoder.Bytes()
128 | 		if accumulated == nil {
129 | 			accumulated = b
130 | 		} else {
131 | 			accumulated[len(accumulated)-1] = b[0]
132 | 			if len(b) > 1 {
133 | 				accumulated = append(accumulated, b[1:]...)
134 | 			}
135 | 		}
136 | 	}
137 | 
138 | 	require.Equal(t, 22, len(accumulated))
139 | 
140 | 	decoder := NewDecoder()
141 | 	decoder.Reset(accumulated)
142 | 
143 | 	i := 0
144 | 	for decoder.Next() {
145 | 		currT, currV := decoder.Current()
146 | 		require.Equal(t, values[i].timestamp, currT)
147 | 		require.Equal(t, values[i].value, currV)
148 | 		i++
149 | 	}
150 | 	require.NoError(t, decoder.Err())
151 | 	require.Equal(t, len(values), i)
152 | }
153 | 


--------------------------------------------------------------------------------
/src/layer/dircompress/layer.go:
--------------------------------------------------------------------------------
  1 | package dircompress
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"time"
  8 | 
  9 | 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 10 | 	"github.com/richardartoul/tsdb-layer/src/encoding"
 11 | 	"github.com/richardartoul/tsdb-layer/src/layer"
 12 | )
 13 | 
 14 | func NewLayer() layer.Layer {
 15 | 	fdb.MustAPIVersion(610)
 16 | 	// TODO(rartoul): Make this configurable.
 17 | 	db := fdb.MustOpenDefault()
 18 | 	return &directCompress{
 19 | 		db: db,
 20 | 	}
 21 | }
 22 | 
 23 | type directCompress struct {
 24 | 	db fdb.Database
 25 | }
 26 | 
 27 | type timeSeriesMetadata struct {
 28 | 	State    []byte
 29 | 	LastByte byte
 30 | }
 31 | 
 32 | func (l *directCompress) Write(id string, timestamp time.Time, value float64) error {
 33 | 	// TODO: Don't allocate
 34 | 	return l.WriteBatch([]layer.Write{{ID: id, Timestamp: timestamp, Value: value}})
 35 | }
 36 | 
 37 | func (l *directCompress) WriteBatch(writes []layer.Write) error {
 38 | 	_, err := l.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
 39 | 		metadataFutures := make([]fdb.FutureByteSlice, 0, len(writes))
 40 | 		for _, w := range writes {
 41 | 			metadataKey := newTimeseriesMetadataKeyFromID(w.ID)
 42 | 			metadataFuture := tr.Get(metadataKey)
 43 | 			metadataFutures = append(metadataFutures, metadataFuture)
 44 | 		}
 45 | 
 46 | 		for i, f := range metadataFutures {
 47 | 			var (
 48 | 				w           = writes[i]
 49 | 				metadataKey = newTimeseriesMetadataKeyFromID(w.ID)
 50 | 				// TODO: Error handling
 51 | 				metadataBytes = f.MustGet()
 52 | 				metaValue     timeSeriesMetadata
 53 | 				dataAppend    []byte
 54 | 				enc           = encoding.NewEncoder()
 55 | 			)
 56 | 			if len(metadataBytes) == 0 {
 57 | 				// Never written.
 58 | 				enc := encoding.NewEncoder()
 59 | 				if err := enc.Encode(w.Timestamp, w.Value); err != nil {
 60 | 					return nil, err
 61 | 				}
 62 | 
 63 | 				metaValue = timeSeriesMetadata{
 64 | 					State: enc.State(),
 65 | 				}
 66 | 
 67 | 				// TODO: Should lastByte be set here too?
 68 | 				b := enc.Bytes()
 69 | 				if len(b) > 1 {
 70 | 					dataAppend = enc.Bytes()[:len(b)-1]
 71 | 				}
 72 | 			} else {
 73 | 				// TODO(rartoul): Don't use JSON.
 74 | 				if err := json.Unmarshal(metadataBytes, &metaValue); err != nil {
 75 | 					return nil, err
 76 | 				}
 77 | 
 78 | 				// Has been written before, restore encoder state.
 79 | 				if err := enc.Restore(metaValue.State); err != nil {
 80 | 					return nil, err
 81 | 				}
 82 | 
 83 | 				if err := enc.Encode(w.Timestamp, w.Value); err != nil {
 84 | 					return nil, err
 85 | 				}
 86 | 
 87 | 				// Ensure new state gets persisted.
 88 | 				var (
 89 | 					newState = enc.State()
 90 | 					b        = enc.Bytes()
 91 | 				)
 92 | 				if len(b) == 0 {
 93 | 					return nil, errors.New("encoder bytes was length zero")
 94 | 				}
 95 | 				if len(b) == 1 {
 96 | 					// The existing last byte was modified without adding any additional bytes. The last
 97 | 					// byte is always tracked by the state so there is nothing to append here.
 98 | 				}
 99 | 				if len(b) > 1 {
100 | 					// The last byte will be kept track of by the state, but any bytes preceding it are
101 | 					// new "complete" bytes which should be appended to the compressed stream.
102 | 					dataAppend = b[:len(b)-1]
103 | 				}
104 | 				metaValue.LastByte = b[len(b)-1]
105 | 				metaValue.State = newState
106 | 			}
107 | 
108 | 			// TODO(rartoul): Don't use JSON.
109 | 			newMetadataBytes, err := json.Marshal(&metaValue)
110 | 			if err != nil {
111 | 				return nil, err
112 | 			}
113 | 
114 | 			tr.Set(metadataKey, newMetadataBytes)
115 | 			// TODO(rartoul): Ensure it fits and if not split into new keys.
116 | 			dataKey := newTimeseriesDataKeyFromID(w.ID)
117 | 			tr.AppendIfFits(dataKey, dataAppend)
118 | 		}
119 | 
120 | 		return nil, nil
121 | 	})
122 | 
123 | 	if err != nil {
124 | 		return err
125 | 	}
126 | 	return nil
127 | }
128 | 
129 | func (l *directCompress) Read(id string) (encoding.ReadableDecoder, error) {
130 | 	stream, err := l.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
131 | 		var (
132 | 			metadataKey    = newTimeseriesMetadataKeyFromID(id)
133 | 			dataKey        = newTimeseriesDataKeyFromID(id)
134 | 			metadataFuture = tr.Get(metadataKey)
135 | 			dataFuture     = tr.Get(dataKey)
136 | 		)
137 | 
138 | 		// TODO(rartoul): Proper error handling instead of Must()
139 | 		metadataBytes := metadataFuture.MustGet()
140 | 		dataBytes := dataFuture.MustGet()
141 | 
142 | 		if len(metadataBytes) == 0 {
143 | 			// Does not exist.
144 | 			return nil, nil
145 | 		}
146 | 
147 | 		var metaValue timeSeriesMetadata
148 | 		if err := json.Unmarshal(metadataBytes, &metaValue); err != nil {
149 | 			return nil, err
150 | 		}
151 | 		stream := append(dataBytes, metaValue.LastByte)
152 | 		return stream, nil
153 | 	})
154 | 	if err != nil {
155 | 		return nil, err
156 | 	}
157 | 
158 | 	dec := encoding.NewDecoder()
159 | 	dec.Reset(stream.([]byte))
160 | 	return dec, nil
161 | }
162 | 
163 | func newTimeseriesDataKeyFromID(id string) fdb.KeyConvertible {
164 | 	// TODO(rartoul): This function will need to be much more intelligent to handle
165 | 	// the fact that the data may be spread across multiple values.
166 | 	return fdb.Key(fmt.Sprintf("%s-data", id))
167 | }
168 | 
169 | func newTimeseriesMetadataKeyFromID(id string) fdb.KeyConvertible {
170 | 	return fdb.Key(fmt.Sprintf("%s-metadata", id))
171 | }
172 | 


--------------------------------------------------------------------------------
/src/layer/dircompress/layer_test.go:
--------------------------------------------------------------------------------
 1 | package dircompress
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | 
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | type testValue struct {
11 | 	timestamp time.Time
12 | 	value     float64
13 | }
14 | 
15 | // TODO(rartoul): This probably needs some kind of property test.
16 | func TestRoundTripSimple(t *testing.T) {
17 | 	tsID := "test-id-1"
18 | 	values := []testValue{
19 | 		{
20 | 			timestamp: time.Unix(0, 1),
21 | 			value:     -1,
22 | 		},
23 | 		{
24 | 			timestamp: time.Unix(0, 2),
25 | 			value:     0,
26 | 		},
27 | 		{
28 | 			timestamp: time.Unix(0, 3),
29 | 			value:     1,
30 | 		},
31 | 	}
32 | 
33 | 	layer := NewLayer()
34 | 	for _, v := range values {
35 | 		err := layer.Write(tsID, v.timestamp, v.value)
36 | 		require.NoError(t, err)
37 | 	}
38 | 
39 | 	decoder, err := layer.Read(tsID)
40 | 	require.NoError(t, err)
41 | 
42 | 	i := 0
43 | 	for decoder.Next() {
44 | 		currT, currV := decoder.Current()
45 | 		require.Equal(t, values[i].timestamp, currT)
46 | 		require.Equal(t, values[i].value, currV)
47 | 		i++
48 | 	}
49 | 	require.NoError(t, decoder.Err())
50 | 	require.Equal(t, len(values), i)
51 | }
52 | 


--------------------------------------------------------------------------------
/src/layer/raw/layer.go:
--------------------------------------------------------------------------------
 1 | package raw
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"time"
 6 | 
 7 | 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 8 | 	"github.com/apple/foundationdb/bindings/go/src/fdb/tuple"
 9 | 	"github.com/richardartoul/tsdb-layer/src/encoding"
10 | 	"github.com/richardartoul/tsdb-layer/src/layer"
11 | )
12 | 
13 | func NewLayer() layer.Layer {
14 | 	fdb.MustAPIVersion(610)
15 | 	// TODO(rartoul): Make this configurable.
16 | 	db := fdb.MustOpenDefault()
17 | 	return &raw{
18 | 		db: db,
19 | 	}
20 | }
21 | 
22 | type raw struct {
23 | 	db fdb.Database
24 | }
25 | 
26 | func (l *raw) Write(id string, timestamp time.Time, value float64) error {
27 | 	// TODO: Don't allocate
28 | 	return l.WriteBatch([]layer.Write{{ID: id, Timestamp: timestamp, Value: value}})
29 | }
30 | 
31 | func (l *raw) WriteBatch(writes []layer.Write) error {
32 | 	_, err := l.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
33 | 		for _, w := range writes {
34 | 			key := tuple.Tuple{w.ID, w.Timestamp.UnixNano()}
35 | 			tr.Set(key, tuple.Tuple{w.Value}.Pack())
36 | 		}
37 | 		return nil, nil
38 | 	})
39 | 
40 | 	return err
41 | }
42 | 
43 | func (l *raw) Read(id string) (encoding.ReadableDecoder, error) {
44 | 	return nil, errors.New("not-implemented")
45 | }
46 | 


--------------------------------------------------------------------------------
/src/layer/rawblock/buffer.go:
--------------------------------------------------------------------------------
  1 | package rawblock
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"sync"
  7 | 	"time"
  8 | 
  9 | 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 10 | 	"github.com/apple/foundationdb/bindings/go/src/fdb/tuple"
 11 | 	"github.com/richardartoul/tsdb-layer/src/encoding"
 12 | 	"github.com/richardartoul/tsdb-layer/src/layer"
 13 | )
 14 | 
 15 | const (
 16 | 	bufferKeyPrefix    = "b-"
 17 | 	metadataKeyPostfix = "-meta"
 18 | 	tsChunkKeyPrefix   = "-chunk-"
 19 | 
 20 | 	targetChunkSize = 4096
 21 | 	flushBatchSize  = 128
 22 | )
 23 | 
 24 | type tsMetadata struct {
 25 | 	Chunks []chunkMetadata
 26 | }
 27 | 
 28 | func newTSMetadata() tsMetadata {
 29 | 	return tsMetadata{}
 30 | }
 31 | 
 32 | type chunkMetadata struct {
 33 | 	Key       []byte
 34 | 	First     time.Time
 35 | 	Last      time.Time
 36 | 	SizeBytes int
 37 | }
 38 | 
 39 | func newChunkMetadata(key []byte, first, last time.Time, sizeBytes int) chunkMetadata {
 40 | 	return chunkMetadata{
 41 | 		Key:       key,
 42 | 		First:     first,
 43 | 		Last:      last,
 44 | 		SizeBytes: sizeBytes,
 45 | 	}
 46 | }
 47 | 
 48 | type Buffer interface {
 49 | 	Write(writes []layer.Write) error
 50 | 	Read(id string) (encoding.MultiDecoder, bool, error)
 51 | 	Flush() error
 52 | }
 53 | 
 54 | // TODO(rartoul): This entire thing needs to be refactored to support creating
 55 | // new encoders (not just during flush) so that encoders can be split when:
 56 | // 1. An existing encoder gets too big (so we don't end up with huge streams
 57 | //    that later need to be broken up during flush into smaller streams)
 58 | // 2. An out-of-order write comes in.
 59 | type buffer struct {
 60 | 	sync.Mutex
 61 | 	db       fdb.Database
 62 | 	encoders map[string][]encoding.Encoder
 63 | }
 64 | 
 65 | func NewBuffer(db fdb.Database) Buffer {
 66 | 	return &buffer{
 67 | 		db:       db,
 68 | 		encoders: map[string][]encoding.Encoder{},
 69 | 	}
 70 | }
 71 | 
 72 | // TODO(rartoul): This should split up writes into a new encoder once the existing
 73 | // encoder has reached a certain size so that a given stream cant grow too large
 74 | // inbetween flushes (which is an issue because fdb has maximum sizes for a given
 75 | // value).
 76 | // TODO(rartoul): Should have per-write error handling.
 77 | func (b *buffer) Write(writes []layer.Write) error {
 78 | 	b.Lock()
 79 | 	defer b.Unlock()
 80 | 
 81 | 	for _, w := range writes {
 82 | 		encoders, ok := b.encoders[w.ID]
 83 | 		if !ok {
 84 | 			encoders = []encoding.Encoder{encoding.NewEncoder()}
 85 | 			b.encoders[w.ID] = encoders
 86 | 		}
 87 | 
 88 | 		enc := encoders[len(encoders)-1]
 89 | 		lastT, _, hasWrittenAnyValues := enc.LastEncoded()
 90 | 		if hasWrittenAnyValues {
 91 | 			if w.Timestamp.Before(lastT) {
 92 | 				// TODO(rartoul): Remove this restriction with multiple encoders.
 93 | 				return fmt.Errorf(
 94 | 					"cannot write data out of order, series: %s, prevTimestamp: %s, currTimestamp: %s",
 95 | 					w.ID, lastT.String(), w.Timestamp.String())
 96 | 			}
 97 | 			if w.Timestamp.Equal(lastT) {
 98 | 				return fmt.Errorf(
 99 | 					"cannot upsert existing values, series: %s, currTimestamp: %s",
100 | 					w.ID, lastT.String())
101 | 			}
102 | 		}
103 | 
104 | 		if err := enc.Encode(w.Timestamp, w.Value); err != nil {
105 | 			return err
106 | 		}
107 | 	}
108 | 
109 | 	return nil
110 | }
111 | 
112 | // TODO(rartoul): This should accept a time range to query and use that information
113 | // to determine which chunks to pull back instead of just reading all of them.
114 | func (b *buffer) Read(id string) (encoding.MultiDecoder, bool, error) {
115 | 	var decoders []encoding.Decoder
116 | 	_, err := b.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
117 | 		metadataKey := metadataKey(id)
118 | 		metaBytes, err := tr.Get(metadataKey).Get()
119 | 		if err != nil {
120 | 			return nil, err
121 | 		}
122 | 		if metaBytes == nil {
123 | 			return nil, nil
124 | 		}
125 | 
126 | 		var metadata tsMetadata
127 | 		if err := json.Unmarshal(metaBytes, &metadata); err != nil {
128 | 			return nil, err
129 | 		}
130 | 
131 | 		for _, chunk := range metadata.Chunks {
132 | 			chunkBytes, err := tr.Get(fdb.Key(chunk.Key)).Get()
133 | 			if err != nil {
134 | 				return nil, err
135 | 			}
136 | 			dec := encoding.NewDecoder()
137 | 			dec.Reset(chunkBytes)
138 | 			decoders = append(decoders, dec)
139 | 		}
140 | 		return nil, nil
141 | 	})
142 | 	if err != nil {
143 | 		return nil, false, err
144 | 	}
145 | 
146 | 	encoders, ok := b.encoders[id]
147 | 	if ok {
148 | 		decoders = append(decoders, encodersToDecoders(encoders)...)
149 | 	}
150 | 
151 | 	if len(decoders) == 0 {
152 | 		return nil, false, nil
153 | 	}
154 | 
155 | 	multiDec := encoding.NewMultiDecoder()
156 | 	multiDec.Reset(decoders)
157 | 	return multiDec, true, nil
158 | }
159 | 
160 | func encodersToDecoders(encs []encoding.Encoder) []encoding.Decoder {
161 | 	decs := make([]encoding.Decoder, 0, len(encs))
162 | 	for _, enc := range encs {
163 | 		dec := encoding.NewDecoder()
164 | 		dec.Reset(enc.Bytes())
165 | 		decs = append(decs, dec)
166 | 	}
167 | 	return decs
168 | }
169 | 
170 | // TODO(rartoul): Instead of performing one transaction per series it would be more efficient
171 | // to collect "batches" of series and then write them all together in one fdb transaction.
172 | func (b *buffer) Flush() error {
173 | 	// Manually control locking so map can be iterated while still being concurrently
174 | 	// accessed.
175 | 	b.Lock()
176 | 
177 | 	var pendingFlush []toFlush
178 | 	for seriesID, encoders := range b.encoders {
179 | 		if len(encoders) == 0 {
180 | 			continue
181 | 		}
182 | 
183 | 		// Append a new encoder to the list of existing encoders. Only the last encoder
184 | 		// in the list is ever written to so this effectively renders all previous
185 | 		// encoders immutable which can be taken advantage of to flush them without
186 | 		// holding a lock on the entire map.
187 | 		encoders = append(encoders, encoding.NewEncoder())
188 | 		encodersToFlush := encoders[:len(encoders)-1]
189 | 		b.encoders[seriesID] = encoders
190 | 
191 | 		var streams [][]byte
192 | 		for _, enc := range encodersToFlush {
193 | 			streams = append(streams, enc.Bytes())
194 | 		}
195 | 		pendingFlush = append(pendingFlush, toFlush{
196 | 			id:      seriesID,
197 | 			streams: streams,
198 | 		})
199 | 
200 | 		if len(pendingFlush) < flushBatchSize {
201 | 			continue
202 | 		}
203 | 
204 | 		b.Unlock()
205 | 		if err := b.flush(pendingFlush); err != nil {
206 | 			return err
207 | 		}
208 | 		pendingFlush = pendingFlush[:0]
209 | 
210 | 		// Hold the lock for the next iteration.
211 | 		b.Lock()
212 | 	}
213 | 	b.Unlock()
214 | 	if err := b.flush(pendingFlush); err != nil {
215 | 		return err
216 | 	}
217 | 	return nil
218 | }
219 | 
220 | type toFlush struct {
221 | 	id      string
222 | 	streams [][]byte
223 | }
224 | 
225 | func (b *buffer) flush(toFlush []toFlush) error {
226 | 	if len(toFlush) == 0 {
227 | 		return nil
228 | 	}
229 | 
230 | 	_, err := b.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
231 | 		var metadataFutures []fdb.FutureByteSlice
232 | 		// Start parallel fetches for each metadata.
233 | 		for _, series := range toFlush {
234 | 			metadataKey := metadataKey(series.id)
235 | 			metadataFuture := tr.Get(metadataKey)
236 | 			metadataFutures = append(metadataFutures, metadataFuture)
237 | 		}
238 | 
239 | 		for i, series := range toFlush {
240 | 			metaBytes, err := metadataFutures[i].Get()
241 | 			if err != nil {
242 | 				return nil, err
243 | 			}
244 | 
245 | 			var metadata tsMetadata
246 | 			if metaBytes == nil {
247 | 				metadata = newTSMetadata()
248 | 			} else {
249 | 				// TODO(rartoul): Don't use JSON.
250 | 				if err := json.Unmarshal(metaBytes, &metadata); err != nil {
251 | 					return nil, err
252 | 				}
253 | 			}
254 | 
255 | 			stream, err := encoding.MergeStreams(series.streams...)
256 | 			if err != nil {
257 | 				return nil, err
258 | 			}
259 | 
260 | 			var newChunkKey fdb.Key
261 | 			if len(metadata.Chunks) == 0 {
262 | 				newChunkKey = tsChunkKey(series.id, 0)
263 | 				metadata.Chunks = append(metadata.Chunks, newChunkMetadata(
264 | 					newChunkKey,
265 | 					time.Unix(0, 0), // TODO(rartoul): Fill this in.
266 | 					time.Unix(0, 0), // TODO(rartoul): Fill this in.
267 | 					len(stream),
268 | 				))
269 | 			} else {
270 | 				lastChunkIdx := len(metadata.Chunks) - 1
271 | 				lastChunk := metadata.Chunks[lastChunkIdx]
272 | 				// TODO(rartoul): Make compaction/merge logic more intelligent.
273 | 				if lastChunk.SizeBytes+len(stream) <= targetChunkSize {
274 | 					// Merge with last chunk.
275 | 					newChunkKey = fdb.Key(lastChunk.Key)
276 | 					// TODO(rartoul): This is inefficient because it forces a synchronous wait
277 | 					// on a read from fdb. This should be refactored so that all of the chunks
278 | 					// that need to be read can be fetched in parallel similar to how the metadata
279 | 					// futures are fetched in parallel above.
280 | 					existingStream, err := tr.Get(newChunkKey).Get()
281 | 					if err != nil {
282 | 						return nil, err
283 | 					}
284 | 					stream, err = encoding.MergeStreams(existingStream, stream)
285 | 					if err != nil {
286 | 						return nil, err
287 | 					}
288 | 					// TODO(rartoul): Update first and last properties here as well.
289 | 					metadata.Chunks[lastChunkIdx].SizeBytes = len(stream)
290 | 				} else {
291 | 					// Insert new chunk.
292 | 					newChunkKey = tsChunkKey(series.id, lastChunkIdx)
293 | 					metadata.Chunks = append(metadata.Chunks, newChunkMetadata(
294 | 						newChunkKey,
295 | 						time.Unix(0, 0), // TODO(rartoul): Fill this in.
296 | 						time.Unix(0, 0), // TODO(rartoul): Fill this in.
297 | 						len(stream),
298 | 					))
299 | 				}
300 | 			}
301 | 
302 | 			newMetadataBytes, err := json.Marshal(metadata)
303 | 			if err != nil {
304 | 				return nil, err
305 | 			}
306 | 
307 | 			metadataKey := metadataKey(series.id)
308 | 			tr.Set(metadataKey, newMetadataBytes)
309 | 			tr.Set(newChunkKey, stream)
310 | 		}
311 | 		return nil, nil
312 | 	})
313 | 	if err != nil {
314 | 		return err
315 | 	}
316 | 
317 | 	b.Lock()
318 | 	defer b.Unlock()
319 | 	for _, series := range toFlush {
320 | 		encoders, ok := b.encoders[series.id]
321 | 		if !ok {
322 | 			return fmt.Errorf("flushed series %s which does not exist in encoders", series.id)
323 | 		}
324 | 
325 | 		// Now that all of the immutable encoders have been flushed, they can be removed
326 | 		// from the list of existing encoders because they can now be read from FDB directly.
327 | 		//
328 | 		// TODO(rartoul): This logic works right now because the only thing that can
329 | 		// trigger creating a new encoder for an existing series is a flush and because flushing
330 | 		// is single-threaded. Once there is support for out-of-order writes, this logic will need
331 | 		// to change since there will be no way to determine if all of the encoder except the last
332 | 		// have been flushed yet (or could just force out of order writes to merge on demand?).
333 | 		b.encoders[series.id] = encoders[len(encoders)-1:]
334 | 	}
335 | 	return nil
336 | }
337 | 
338 | func metadataKey(id string) fdb.Key {
339 | 	// TODO(rartoul): Not sure if this is ideal key structure/
340 | 	return tuple.Tuple{bufferKeyPrefix, id, metadataKeyPostfix}.Pack()
341 | }
342 | 
343 | func tsChunkKey(id string, chunkNum int) fdb.Key {
344 | 	return tuple.Tuple{bufferKeyPrefix, id, tsChunkKeyPrefix, chunkNum}.Pack()
345 | }
346 | 


--------------------------------------------------------------------------------
/src/layer/rawblock/buffer_test.go:
--------------------------------------------------------------------------------
 1 | package rawblock
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 	"time"
 7 | 
 8 | 	"github.com/richardartoul/tsdb-layer/src/layer"
 9 | 
10 | 	"github.com/stretchr/testify/require"
11 | )
12 | 
13 | const (
14 | 	testID = "test-id"
15 | )
16 | 
17 | type testValue struct {
18 | 	timestamp time.Time
19 | 	value     float64
20 | }
21 | 
22 | type bufferWriteReadTestCase struct {
23 | 	title string
24 | 	vals  []testValue
25 | }
26 | 
27 | func TestBufferWriteRead(t *testing.T) {
28 | 	testCases := []bufferWriteReadTestCase{
29 | 		{
30 | 			title: "in order values",
31 | 			vals:  []testValue{{timestamp: time.Unix(0, 0), value: 0}, {timestamp: time.Unix(1, 0), value: 1}},
32 | 		},
33 | 		// TODO(rartoul): Not supported right now.
34 | 		// {
35 | 		// 	title: "out of order values",
36 | 		// 	vals:  []testValue{{timestamp: time.Unix(1, 0), value: 1}, {timestamp: time.Unix(0, 0), value: 0}},
37 | 		// },
38 | 	}
39 | 
40 | 	for _, tc := range testCases {
41 | 		t.Run(tc.title, func(t *testing.T) {
42 | 			db, cleanup := newTestFDB()
43 | 			defer cleanup()
44 | 
45 | 			buffer := NewBuffer(db)
46 | 			writes := []layer.Write{}
47 | 			for _, val := range tc.vals {
48 | 				writes = append(
49 | 					writes,
50 | 					layer.Write{
51 | 						ID:        testID,
52 | 						Timestamp: val.timestamp,
53 | 						Value:     val.value})
54 | 			}
55 | 			require.NoError(t, buffer.Write(writes))
56 | 
57 | 			assertReadFn := func() {
58 | 				multiDec, ok, err := buffer.Read(testID)
59 | 				require.NoError(t, err)
60 | 				require.True(t, ok)
61 | 
62 | 				i := 0
63 | 				for multiDec.Next() {
64 | 					currT, currV := multiDec.Current()
65 | 					require.True(
66 | 						t,
67 | 						tc.vals[i].timestamp.Equal(currT),
68 | 						fmt.Sprintf("expected %s but got %s", tc.vals[i].timestamp.String(), currT.String()))
69 | 					require.Equal(t, tc.vals[i].value, currV)
70 | 					i++
71 | 				}
72 | 				require.NoError(t, multiDec.Err())
73 | 				require.Equal(t, len(tc.vals), i)
74 | 			}
75 | 
76 | 			// Ensure reads work correctly before and after flushing.
77 | 			assertReadFn()
78 | 			require.NoError(t, buffer.Flush())
79 | 			assertReadFn()
80 | 		})
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/src/layer/rawblock/commitlog.go:
--------------------------------------------------------------------------------
  1 | package rawblock
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"math"
  8 | 	"sync"
  9 | 	"time"
 10 | 
 11 | 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 12 | 	"github.com/apple/foundationdb/bindings/go/src/fdb/tuple"
 13 | )
 14 | 
 15 | const (
 16 | 	// Multiple of fdb page size.
 17 | 	defaultBatchSize       = 4096 * 24
 18 | 	defaultMaxPendingBytes = 10000000
 19 | 	defaultFlushEvery      = 10 * time.Millisecond
 20 | 
 21 | 	commitLogKey            = "commitlog-"
 22 | 	commitLogKeyTupleLength = 2
 23 | )
 24 | 
 25 | type clStatus int
 26 | 
 27 | const (
 28 | 	clStatusUnopened clStatus = iota
 29 | 	clStatusOpen
 30 | 	clStatusClosed
 31 | )
 32 | 
 33 | // truncationToken is a token that can be passed to the commitlog to truncate the commitlogs up to
 34 | // a specific point. It should be treated as opaque by external callers.
 35 | type truncationToken struct {
 36 | 	upTo tuple.Tuple
 37 | }
 38 | 
 39 | // Commitlog is the interface for an FDB-backed commitlog.
 40 | type Commitlog interface {
 41 | 	Write([]byte) error
 42 | 	Open() error
 43 | 	Close() error
 44 | 	WaitForRotation() (truncationToken, error)
 45 | 	Truncate(token truncationToken) error
 46 | }
 47 | 
 48 | // CommitlogOptions encapsulates the options for the commit log.
 49 | type CommitlogOptions struct {
 50 | 	IdealBatchSize  int
 51 | 	MaxPendingBytes int
 52 | 	FlushEvery      time.Duration
 53 | }
 54 | 
 55 | // NewCommitlogOptions creates a new CommitlogOptions.
 56 | func NewCommitlogOptions() CommitlogOptions {
 57 | 	return CommitlogOptions{
 58 | 		IdealBatchSize:  defaultBatchSize,
 59 | 		MaxPendingBytes: defaultMaxPendingBytes,
 60 | 		FlushEvery:      defaultFlushEvery,
 61 | 	}
 62 | }
 63 | 
 64 | type flushOutcome struct {
 65 | 	// TODO(rartoul): Fix this, but last ID can be nil in the case
 66 | 	// that there was no data to flush. This is useful because it
 67 | 	// enables the WaitForRotation() API.
 68 | 	lastID tuple.Tuple
 69 | 	nextID tuple.Tuple
 70 | 	err    error
 71 | 	doneCh chan struct{}
 72 | }
 73 | 
 74 | func newFlushOutcome() *flushOutcome {
 75 | 	return &flushOutcome{
 76 | 		doneCh: make(chan struct{}, 0),
 77 | 	}
 78 | }
 79 | 
 80 | func (f *flushOutcome) waitForFlush() error {
 81 | 	<-f.doneCh
 82 | 	return f.err
 83 | }
 84 | 
 85 | func (f *flushOutcome) notify(lastID, nextID tuple.Tuple, err error) {
 86 | 	f.lastID = lastID
 87 | 	f.nextID = nextID
 88 | 	f.err = err
 89 | 	close(f.doneCh)
 90 | }
 91 | 
 92 | type commitlog struct {
 93 | 	sync.Mutex
 94 | 	status        clStatus
 95 | 	db            fdb.Database
 96 | 	prevBatch     []byte
 97 | 	currBatch     []byte
 98 | 	lastFlushTime time.Time
 99 | 	lastIdx       int64
100 | 	flushOutcome  *flushOutcome
101 | 	closeCh       chan struct{}
102 | 	closeDoneCh   chan error
103 | 	opts          CommitlogOptions
104 | }
105 | 
106 | // NewCommitlog creates a new commitlog.
107 | func NewCommitlog(db fdb.Database, opts CommitlogOptions) Commitlog {
108 | 	return &commitlog{
109 | 		status:       clStatusUnopened,
110 | 		db:           db,
111 | 		flushOutcome: newFlushOutcome(),
112 | 		closeCh:      make(chan struct{}, 1),
113 | 		closeDoneCh:  make(chan error, 1),
114 | 		opts:         opts,
115 | 	}
116 | }
117 | 
118 | func (c *commitlog) Open() error {
119 | 	c.Lock()
120 | 	defer c.Unlock()
121 | 	if c.status != clStatusUnopened {
122 | 		return errors.New("commitlog cannot be opened more than once")
123 | 	}
124 | 
125 | 	// "Bootstrap" the latest existing index to maintain a monotonically increasing
126 | 	// value for the commitlog chunk indices.
127 | 	existingIdx, ok, err := c.getLatestExistingIndex()
128 | 	if err != nil {
129 | 		return err
130 | 	}
131 | 	if !ok {
132 | 		existingIdx = -1
133 | 	}
134 | 	c.lastIdx = existingIdx
135 | 	fmt.Println("last existing IDX", c.lastIdx)
136 | 
137 | 	c.status = clStatusOpen
138 | 
139 | 	go func() {
140 | 		for {
141 | 			i := 0
142 | 			select {
143 | 			case <-c.closeCh:
144 | 				c.closeDoneCh <- c.flush()
145 | 				return
146 | 			default:
147 | 			}
148 | 			time.Sleep(time.Millisecond)
149 | 			if err := c.flush(); err != nil {
150 | 				log.Printf("error flushing commitlog: %v", err)
151 | 			}
152 | 			i++
153 | 		}
154 | 	}()
155 | 
156 | 	return nil
157 | }
158 | 
159 | func (c *commitlog) Close() error {
160 | 	c.Lock()
161 | 	if c.status != clStatusOpen {
162 | 		c.Unlock()
163 | 		return errors.New("cannot close commit log that is not open")
164 | 	}
165 | 	c.status = clStatusClosed
166 | 	c.Unlock()
167 | 
168 | 	c.closeCh <- struct{}{}
169 | 	return <-c.closeDoneCh
170 | }
171 | 
172 | // TODO(rartoul): Kind of gross that this just takes a []byte but more
173 | // flexible for now.
174 | func (c *commitlog) Write(b []byte) error {
175 | 	if len(b) == 0 {
176 | 		return errors.New("commit log can not write empty chunk")
177 | 	}
178 | 
179 | 	c.Lock()
180 | 	if c.status != clStatusOpen {
181 | 		c.Unlock()
182 | 		return errors.New("cannot write into commit log that is not open")
183 | 	}
184 | 
185 | 	if len(c.currBatch)+len(b) > c.opts.MaxPendingBytes {
186 | 		c.Unlock()
187 | 		return errors.New("commit log queue is full")
188 | 	}
189 | 
190 | 	c.currBatch = append(c.currBatch, b...)
191 | 	currFlushOutcome := c.flushOutcome
192 | 	c.Unlock()
193 | 	return currFlushOutcome.waitForFlush()
194 | }
195 | 
196 | func (c *commitlog) Truncate(token truncationToken) error {
197 | 	if token.upTo == nil {
198 | 		// This can occur in the situation where there were no existing commitlogs when
199 | 		// the truncationToken was generated by a call to WaitForRotation().
200 | 		return nil
201 | 	}
202 | 
203 | 	_, err := c.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
204 | 		tr.ClearRange(fdb.KeyRange{Begin: tuple.Tuple{commitLogKey}, End: token.upTo})
205 | 		return nil, nil
206 | 	})
207 | 
208 | 	return err
209 | }
210 | 
211 | func (c *commitlog) WaitForRotation() (truncationToken, error) {
212 | 	c.Lock()
213 | 	if c.status != clStatusOpen {
214 | 		c.Unlock()
215 | 		return truncationToken{}, errors.New("cannot wait for commit log rotation if commit log is not open")
216 | 	}
217 | 	currFlushOutcome := c.flushOutcome
218 | 	c.Unlock()
219 | 
220 | 	if err := currFlushOutcome.waitForFlush(); err != nil {
221 | 		return truncationToken{}, err
222 | 	}
223 | 
224 | 	// nextID instead of lastID because fdb clear ranges are exclusive on the end.
225 | 	return truncationToken{upTo: currFlushOutcome.nextID}, nil
226 | }
227 | 
228 | func (c *commitlog) flush() error {
229 | 	c.Lock()
230 | 	currFlushOutcome := c.flushOutcome
231 | 	c.flushOutcome = newFlushOutcome()
232 | 
233 | 	var (
234 | 		lastKey tuple.Tuple
235 | 		nextKey tuple.Tuple
236 | 	)
237 | 	if !(time.Since(c.lastFlushTime) >= c.opts.FlushEvery && len(c.currBatch) > 0) {
238 | 		c.Unlock()
239 | 		// Notify anyways so that the WaitForRotation() API can function.
240 | 		if c.lastIdx >= 0 {
241 | 			lastKey = commitlogKeyFromIdx(c.lastIdx)
242 | 			nextKey = commitlogKeyFromIdx(c.lastIdx + 1)
243 | 		}
244 | 		currFlushOutcome.notify(lastKey, nextKey, nil)
245 | 		return nil
246 | 	}
247 | 
248 | 	toWrite := c.currBatch
249 | 	c.currBatch, c.prevBatch = c.prevBatch, c.currBatch
250 | 	c.currBatch = c.currBatch[:0]
251 | 	c.Unlock()
252 | 
253 | 	_, err := c.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
254 | 		// TODO(rartoul): Need to be smarter about this because don't want to actually
255 | 		// break chunks across writes I.E every call to WriteBatch() should end up
256 | 		// in one key so that each key is a complete unit.
257 | 		startIdx := 0
258 | 		for startIdx < len(toWrite) {
259 | 			lastKey = c.nextKey()
260 | 			nextKey = commitlogKeyFromIdx(c.lastIdx + 1)
261 | 			endIdx := startIdx + c.opts.IdealBatchSize
262 | 			if endIdx > len(toWrite) {
263 | 				endIdx = len(toWrite)
264 | 			}
265 | 			tr.Set(lastKey, toWrite[startIdx:endIdx])
266 | 			startIdx = endIdx
267 | 		}
268 | 
269 | 		return nil, nil
270 | 	})
271 | 	currFlushOutcome.notify(lastKey, nextKey, err)
272 | 	return err
273 | }
274 | 
275 | func (c *commitlog) nextKey() tuple.Tuple {
276 | 	// TODO(rartoul): This should have some kind of host identifier in it.
277 | 	nextKey := commitlogKeyFromIdx(c.lastIdx + 1)
278 | 	// Safe to update this optimistically since even if the write ends up failing
279 | 	// its ok to have "gaps".
280 | 	//
281 | 	// Also safe to do this without any locking as this function is always called
282 | 	// in a single-threaded manner.
283 | 	c.lastIdx++
284 | 	return nextKey
285 | }
286 | 
287 | // TODO(rartoul): This could run afoul of fdb transction time and/or size limits if there
288 | // are too many commitlog chunks. Should be refactored to use a limit and break into multiple
289 | // transactions if necessary.
290 | func (c *commitlog) getLatestExistingIndex() (int64, bool, error) {
291 | 	key, err := c.db.Transact(func(tr fdb.Transaction) (interface{}, error) {
292 | 		var (
293 | 			rangeResult = tr.GetRange(fdb.KeyRange{
294 | 				Begin: tuple.Tuple{commitLogKey, 0},
295 | 				End:   tuple.Tuple{commitLogKey, math.MaxInt64}}, fdb.RangeOptions{})
296 | 			iter = rangeResult.Iterator()
297 | 			key  fdb.Key
298 | 		)
299 | 		for iter.Advance() {
300 | 			curr, err := iter.Get()
301 | 			if err != nil {
302 | 				return nil, err
303 | 			}
304 | 			key = curr.Key
305 | 		}
306 | 
307 | 		if key == nil {
308 | 			return nil, nil
309 | 		}
310 | 		return key, nil
311 | 	})
312 | 
313 | 	if err != nil {
314 | 		return -1, false, err
315 | 	}
316 | 	if key == nil {
317 | 		return -1, false, nil
318 | 	}
319 | 
320 | 	keyTuple, err := tuple.Unpack(key.(fdb.Key))
321 | 	if err != nil {
322 | 		return -1, false, err
323 | 	}
324 | 
325 | 	if len(keyTuple) != commitLogKeyTupleLength {
326 | 		return -1, false, fmt.Errorf(
327 | 			"malformed commitlog key tuple, expected len: %d, but was: %d, raw: %v",
328 | 			commitLogKeyTupleLength, len(keyTuple), key)
329 | 	}
330 | 	idx, ok := keyTuple[1].(int64)
331 | 	if !ok {
332 | 		return -1, false, errors.New("malformed commitlog key tuple, expected second value to be of type int64")
333 | 	}
334 | 	return idx, true, nil
335 | }
336 | 
337 | type commitlogKey struct {
338 | 	index int
339 | }
340 | 
341 | // func (k *commitLogKey)
342 | func commitlogKeyFromIdx(idx int64) tuple.Tuple {
343 | 	return tuple.Tuple{commitLogKey, idx}
344 | }
345 | 


--------------------------------------------------------------------------------
/src/layer/rawblock/commitlog_test.go:
--------------------------------------------------------------------------------
 1 | package rawblock
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestCommitlogBootstrapLastIndex(t *testing.T) {
10 | 	db, cleanup := newTestFDB()
11 | 	defer cleanup()
12 | 
13 | 	cl := NewCommitlog(db, NewCommitlogOptions())
14 | 	require.NoError(t, cl.Open())
15 | 
16 | 	clImpl := cl.(*commitlog)
17 | 	// Verify it starts at -1.
18 | 	require.Equal(t, int64(-1), clImpl.lastIdx)
19 | 	// Issue two writes sequentially so it will increase by 2 (+1 for each flush).
20 | 	require.NoError(t, cl.Write([]byte("some-data")))
21 | 	require.Equal(t, int64(0), clImpl.lastIdx)
22 | 	require.NoError(t, cl.Write([]byte("some-data")))
23 | 	require.Equal(t, int64(1), clImpl.lastIdx)
24 | 
25 | 	require.NoError(t, cl.Close())
26 | 
27 | 	// Ensure correct value is bootstrapped.
28 | 	cl = NewCommitlog(db, NewCommitlogOptions())
29 | 	require.NoError(t, cl.Open())
30 | 	require.Equal(t, int64(1), clImpl.lastIdx)
31 | 	require.NoError(t, cl.Close())
32 | }
33 | 
34 | func TestCommitlogTruncation(t *testing.T) {
35 | 	db, cleanup := newTestFDB()
36 | 	defer cleanup()
37 | 
38 | 	cl := NewCommitlog(db, NewCommitlogOptions()).(*commitlog)
39 | 	require.NoError(t, cl.Open())
40 | 
41 | 	// Verify it starts at -1.
42 | 	require.Equal(t, int64(-1), cl.lastIdx)
43 | 	// Issue two writes sequentially so it will increase by 2 (+1 for each flush).
44 | 	require.NoError(t, cl.Write([]byte("some-data")))
45 | 	require.Equal(t, int64(0), cl.lastIdx)
46 | 	require.NoError(t, cl.Write([]byte("some-data")))
47 | 	require.Equal(t, int64(1), cl.lastIdx)
48 | 
49 | 	truncToken, err := cl.WaitForRotation()
50 | 	require.NoError(t, err)
51 | 	// Use the truncation token to truncate all commitlog chunks before 2.
52 | 	require.NoError(t, cl.Truncate(truncToken))
53 | 	require.NoError(t, cl.Close())
54 | 
55 | 	// Ensure all commitlog chunks were cleared.
56 | 	cl = NewCommitlog(db, NewCommitlogOptions()).(*commitlog)
57 | 	require.NoError(t, cl.Open())
58 | 	require.Equal(t, int64(-1), cl.lastIdx)
59 | 
60 | 	// Issue a write before waiting for rotation (this should be cleared by the
61 | 	// call to Truncate()).
62 | 	require.NoError(t, cl.Write([]byte("some-data")))
63 | 	require.Equal(t, int64(0), cl.lastIdx)
64 | 
65 | 	truncToken, err = cl.WaitForRotation()
66 | 	require.NoError(t, err)
67 | 	// Issue one write after waiting for rotation so that there is one commitlog
68 | 	// chunk that should be deleted by truncation (0) and one that should remain(1).
69 | 	require.NoError(t, cl.Write([]byte("some-data")))
70 | 	require.Equal(t, int64(1), cl.lastIdx)
71 | 	require.NoError(t, cl.Truncate(truncToken))
72 | 	require.NoError(t, cl.Close())
73 | 
74 | 	// Ensure that chunk 0 (written before WaitForRotation()) was cleared but chunk 1
75 | 	// (writen after WaitForRotation()) remains.
76 | 	cl = NewCommitlog(db, NewCommitlogOptions()).(*commitlog)
77 | 	require.NoError(t, cl.Open())
78 | 	require.Equal(t, int64(1), cl.lastIdx)
79 | }
80 | 


--------------------------------------------------------------------------------
/src/layer/rawblock/common_test.go:
--------------------------------------------------------------------------------
 1 | package rawblock
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 7 | 	"github.com/apple/foundationdb/bindings/go/src/fdb/tuple"
 8 | )
 9 | 
10 | type cleanupFn func()
11 | 
12 | func newTestFDB() (fdb.Database, cleanupFn) {
13 | 	fdb.MustAPIVersion(610)
14 | 	// TODO(rartoul): Should truncate database before and after.
15 | 	db := fdb.MustOpenDefault()
16 | 	truncateFDB(db)
17 | 	cleanupFn := func() { truncateFDB(db) }
18 | 	return db, cleanupFn
19 | }
20 | 
21 | func truncateFDB(db fdb.Database) {
22 | 	_, err := db.Transact(func(tr fdb.Transaction) (interface{}, error) {
23 | 		tr.ClearRange(fdb.KeyRange{Begin: tuple.Tuple{""}, End: tuple.Tuple{0xFF}})
24 | 		return nil, nil
25 | 	})
26 | 	if err != nil {
27 | 		panic(fmt.Sprintf("error truncating fdb: %v", err))
28 | 	}
29 | }
30 | 


--------------------------------------------------------------------------------
/src/layer/rawblock/layer.go:
--------------------------------------------------------------------------------
  1 | package rawblock
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"math"
  8 | 	"sync"
  9 | 	"time"
 10 | 
 11 | 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 12 | 	"github.com/richardartoul/tsdb-layer/src/encoding"
 13 | 	"github.com/richardartoul/tsdb-layer/src/layer"
 14 | )
 15 | 
 16 | const (
 17 | 	persistLoopInterval = 100 * time.Millisecond
 18 | )
 19 | 
 20 | func NewLayer() layer.Layer {
 21 | 	fdb.MustAPIVersion(610)
 22 | 	// TODO(rartoul): Make this configurable.
 23 | 	db := fdb.MustOpenDefault()
 24 | 	cl := NewCommitlog(db, NewCommitlogOptions())
 25 | 	if err := cl.Open(); err != nil {
 26 | 		// TODO(rartoul): Clean this up
 27 | 		panic(err)
 28 | 	}
 29 | 	buffer := NewBuffer(db)
 30 | 
 31 | 	l := &rawBlock{
 32 | 		db:        db,
 33 | 		cl:        cl,
 34 | 		buffer:    buffer,
 35 | 		bytesPool: newBytesPool(1024, 16000, 4096),
 36 | 	}
 37 | 	go l.startPersistLoop()
 38 | 	return l
 39 | }
 40 | 
 41 | type rawBlock struct {
 42 | 	db        fdb.Database
 43 | 	cl        Commitlog
 44 | 	buffer    Buffer
 45 | 	bytesPool *bytesPool
 46 | }
 47 | 
 48 | func (l *rawBlock) Write(id string, timestamp time.Time, value float64) error {
 49 | 	// TODO: Don't allocate
 50 | 	return l.WriteBatch([]layer.Write{{ID: id, Timestamp: timestamp, Value: value}})
 51 | }
 52 | 
 53 | func (l *rawBlock) WriteBatch(writes []layer.Write) error {
 54 | 	if err := l.buffer.Write(writes); err != nil {
 55 | 		return err
 56 | 	}
 57 | 
 58 | 	b := l.bytesPool.Get()
 59 | 	for _, w := range writes {
 60 | 		b = encodeWrite(b, w)
 61 | 	}
 62 | 	err := l.cl.Write(b)
 63 | 	l.bytesPool.Put(b)
 64 | 	return err
 65 | }
 66 | 
 67 | func (l *rawBlock) Read(id string) (encoding.ReadableDecoder, error) {
 68 | 	decoder, _, err := l.buffer.Read(id)
 69 | 	return decoder, err
 70 | }
 71 | 
 72 | // TODO(rartoul): Add clean shutdown logic.
 73 | func (l *rawBlock) startPersistLoop() {
 74 | 	for {
 75 | 		// Prevent excessive activity when there are no incoming writes.
 76 | 		time.Sleep(persistLoopInterval)
 77 | 
 78 | 		truncToken, err := l.cl.WaitForRotation()
 79 | 		if err != nil {
 80 | 			log.Printf("error waiting for commitlog rotation: %v", err)
 81 | 			continue
 82 | 		}
 83 | 		start := time.Now()
 84 | 		if err := l.buffer.Flush(); err != nil {
 85 | 			log.Printf("error flushing buffer: %v", err)
 86 | 			continue
 87 | 		}
 88 | 		fmt.Println("flush took: ", time.Now().Sub(start))
 89 | 		if err := l.cl.Truncate(truncToken); err != nil {
 90 | 			log.Printf("error truncating commitlog: %v", err)
 91 | 			continue
 92 | 		}
 93 | 	}
 94 | }
 95 | 
 96 | // TODO(rartoul): Bucketized would be more efficient
 97 | type bytesPool struct {
 98 | 	sync.Mutex
 99 | 	pool             [][]byte
100 | 	size             int
101 | 	maxCapacity      int
102 | 	defaultAllocSize int
103 | }
104 | 
105 | func newBytesPool(size, maxCapacity, defaultAllocSize int) *bytesPool {
106 | 	return &bytesPool{
107 | 		defaultAllocSize: defaultAllocSize,
108 | 		size:             size,
109 | 		maxCapacity:      maxCapacity,
110 | 	}
111 | }
112 | 
113 | func (p *bytesPool) Get() []byte {
114 | 	p.Lock()
115 | 	var b []byte
116 | 	if len(p.pool) == 0 {
117 | 		b = make([]byte, 0, p.defaultAllocSize)
118 | 	} else {
119 | 		b = p.pool[len(p.pool)-1]
120 | 		p.pool = p.pool[:len(p.pool)-1]
121 | 	}
122 | 	p.Unlock()
123 | 	return b
124 | }
125 | 
126 | func (p *bytesPool) Put(b []byte) {
127 | 	p.Lock()
128 | 	if len(p.pool) >= p.size || cap(b) > p.maxCapacity {
129 | 		p.Unlock()
130 | 		return
131 | 	}
132 | 	p.pool = append(p.pool, b[:0])
133 | 	p.Unlock()
134 | }
135 | 
136 | // TODO: This needs to be length prefixed and all that other nice stuff so it can actually be decoded
137 | func encodeWrite(b []byte, w layer.Write) []byte {
138 | 	b = append(b, w.ID...)
139 | 	binary.PutVarint(b, w.Timestamp.UnixNano())
140 | 	binary.PutUvarint(b, math.Float64bits(w.Value))
141 | 	return b
142 | }
143 | 


--------------------------------------------------------------------------------
/src/layer/server/server.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 
 7 | 	pb "github.com/richardartoul/tsdb-layer/protos/.gen"
 8 | )
 9 | 
10 | var _ pb.TSDBLayerServer = &server{}
11 | 
12 | type server struct {
13 | }
14 | 
15 | func NewServer() pb.TSDBLayerServer {
16 | 	return &server{}
17 | }
18 | 
19 | func (s *server) WriteBatch(context.Context, *pb.WriteBatchRequest) (*pb.Empty, error) {
20 | 	fmt.Println("hmm1")
21 | 	return nil, nil
22 | }
23 | 
24 | func (s *server) ReadBatch(context.Context, *pb.ReadBatchRequest) (*pb.ReadBatchResponse, error) {
25 | 	fmt.Println("hmm2")
26 | 	return nil, nil
27 | }
28 | 


--------------------------------------------------------------------------------
/src/layer/types.go:
--------------------------------------------------------------------------------
 1 | package layer
 2 | 
 3 | import (
 4 | 	"time"
 5 | 
 6 | 	"github.com/richardartoul/tsdb-layer/src/encoding"
 7 | )
 8 | 
 9 | type Write struct {
10 | 	ID        string
11 | 	Timestamp time.Time
12 | 	Value     float64
13 | }
14 | 
15 | type Layer interface {
16 | 	Write(id string, timestamp time.Time, value float64) error
17 | 	WriteBatch(writes []Write) error
18 | 	Read(id string) (encoding.ReadableDecoder, error)
19 | }
20 | 


--------------------------------------------------------------------------------