├── .cargo
    └── config
├── .github
    └── workflows
    │   └── actions.yml
├── .gitignore
├── .travis.yml
├── Cargo.lock
├── Cargo.toml
├── Dockerfile
├── LICENSE
├── README.md
├── statsrelay-protobuf
    ├── Cargo.toml
    ├── build.rs
    ├── proto
    │   └── ext
    │   │   └── github.com
    │   │       ├── gogo
    │   │           └── protobuf
    │   │           │   └── gogoproto
    │   │           │       └── gogo.proto
    │   │       └── prometheus
    │   │           └── prometheus
    │   │               └── prompb
    │   │                   ├── remote.proto
    │   │                   └── types.proto
    └── src
    │   └── lib.rs
└── statsrelay
    ├── Cargo.toml
    ├── benches
        └── statsd_benchmark.rs
    ├── build.rs
    ├── examples
        ├── processors-basic.json
        ├── source-example1.json
        ├── tugboat-discovery.json
        └── tugboat-legacy-basic.json
    └── src
        ├── admin.rs
        ├── backend.rs
        ├── backend_client
            ├── mod.rs
            ├── prom_client.rs
            └── statsd_client.rs
        ├── backends.rs
        ├── cmd
            ├── loadgen.rs
            └── statsrelay.rs
        ├── config.rs
        ├── cuckoofilter
            ├── LICENSE
            ├── bucket.rs
            ├── mod.rs
            └── util.rs
        ├── discovery.rs
        ├── lib.rs
        ├── processors
            ├── cardinality.rs
            ├── mod.rs
            ├── regex_filter.rs
            ├── sampler.rs
            └── tag.rs
        ├── shard.rs
        ├── stats.rs
        ├── statsd_proto.rs
        └── statsd_server.rs


/.cargo/config:
--------------------------------------------------------------------------------
1 | [profile.release]
2 | lto = true
3 | 


--------------------------------------------------------------------------------
/.github/workflows/actions.yml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | jobs:
 3 |   build:
 4 |     runs-on: ubuntu-20.04
 5 |     steps:
 6 |       - uses: actions/checkout@v2
 7 |       - run: docker build .
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.log
 2 | *.o
 3 | *.py[co]
 4 | *.swp
 5 | *.trs
 6 | .deps
 7 | .idea
 8 | .dirstamp
 9 | /target
10 | .vscode/settings.json
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: rust
2 | dist: bionic
3 | services:
4 |   - docker
5 | before_install:
6 |   - docker build .
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = [
3 |     "statsrelay",
4 |     "statsrelay-protobuf"
5 | ]
6 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM fedora:33
 2 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 3 | 
 4 | ENV PATH="$PATH:$HOME/.cargo/bin"
 5 | RUN dnf install -y gcc make g++ openssl-devel
 6 | 
 7 | COPY . /code
 8 | WORKDIR /code
 9 | 
10 | RUN $HOME/.cargo/bin/cargo test --release && \
11 |     $HOME/.cargo/bin/cargo build --release
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | tugboat
 2 | Copyright (c) 2020 Lyft Inc.
 3 | The MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to
 7 | deal in the Software without restriction, including without limitation the
 8 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 9 | sell copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 | IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # statsrelay (3.0)
  2 | A veritable toolkit, sidecar, or daemon(set) for sharding, aggregating, relaying
  3 | and working with statsd and Prometheus based metrics sources, at scale.
  4 | 
  5 | ## License
  6 | MIT License
  7 | Copyright (c) 2015-2020 Lyft Inc.
  8 | 
  9 | Originally based on statsrelay:
 10 | Copyright (c) 2014 Uber Technologies, Inc.
 11 | 
 12 | ## Whats different in this version
 13 | 
 14 | Statsrelay 3.0 is a port of the original C statsrelay to Rust, with a number of
 15 | new features designed to improve the operatability and scalability of the
 16 | original daemon, moving beyond pure "relaying" and instead focusing on both
 17 | sharding, as well as cascading aggregation. The original C daemon in this fork
 18 | featured sampling support, but was limited by having the output format be
 19 | statsd.
 20 | 
 21 | ## Build
 22 | 
 23 | Dependencies:
 24 | - Rust (stable, 1.46+)
 25 | 
 26 | ## Use
 27 | 
 28 | ```
 29 | statsrelay 3.1.0
 30 | 
 31 | USAGE:
 32 |     statsrelay [OPTIONS]
 33 | 
 34 | FLAGS:
 35 |     -h, --help       Prints help information
 36 |     -V, --version    Prints version information
 37 | 
 38 | OPTIONS:
 39 |     -c, --config <config>     [default: /etc/statsrelay.json]
 40 | ```
 41 | 
 42 | Statsrelay logging is handled by the env_logger crate, which inherits a number of
 43 | logging options from the environment. Consult the [crate
 44 | documentation](https://docs.rs/env_logger/0.8.1/env_logger/#enabling-logging)
 45 | for more information on options you can set.
 46 | 
 47 | ### Protocols
 48 | 
 49 | Statsrelay understands:
 50 | 
 51 | - statsd text line protocol
 52 |   - with sampling support (`@sampling`)
 53 |   - with extended data types (map, kv, sets, etc)
 54 |   - with "DogStatsD" extended tags (`|#tags`)
 55 |   - with Lyft internal tags (`metric.__tag=value`)
 56 | 
 57 | ### Configuration file
 58 | 
 59 | The configuration file is a JSON file originating from the original statsrelay
 60 | structure. The original statsrelay configuration contract has been broken as of
 61 | version 3.1 in order to fix a number of features.
 62 | 
 63 | #### Basic structure
 64 | 
 65 | ```json
 66 | {
 67 |     "statsd": {
 68 |         "bind": "127.0.0.1:8129",
 69 |         "validate": true,
 70 |         "backends": {
 71 |           "b1": {
 72 |             "shard_map": ["127.0.0.1:1234"],
 73 |             "prefix": "myapp.",
 74 |             "suffix": ".suffix"
 75 |           }
 76 |         }
 77 |     },
 78 |     "discovery": {
 79 |       "sources": {
 80 |         "source1": {
 81 |           "type": "s3",
 82 |           "bucket": "my-bucket",
 83 |           "key": "file.json",
 84 |           "interval": 10
 85 |         }
 86 |       }
 87 |     }
 88 | }
 89 | ```
 90 | 
 91 | Statsd inputs and routing is defined in the outer `statsd` block.
 92 | 
 93 | - `bind`: sets the server bind address to accept statsd protocol messages.
 94 |   Statsrelay will bind on both UDP and TCP ports.
 95 | - `validate`: turns on extended, more expensive validation of statsd line
 96 |   protocol messages, such as parsing of numerical fields, which may not be
 97 |   required for a pure relaying case.
 98 | - `backends` forks the incoming statsd metrics down a number of parallel
 99 |   processing pipelines. By default, all incoming protocol lines from the statsd
100 |   server are sent to all backends.
101 | 
102 | #### `backends` options
103 | 
104 | Each backend is named and can accept a number of options and rewrite steps for
105 | sending and processing StatsD messages.
106 | 
107 | - `shard_map`: list of socket addresses that defines where to send statsd output
108 |   to, from a list of servers. The same server can be specified more than once
109 |   (allowing for virtual sharding). Output statsd lines are consistently hashed,
110 |   and sent to the corresponding server based on a standard hash ring, in a
111 |   compatible format to the original statsrelay code (Murmur3 hash). This list
112 |   can be empty to not relay statsd messages.
113 | - `shard_map_source`: string value which defines a discovery source to use
114 |   in-lieu of `shard_map`.
115 | - `prefix`: prepend this prefix string in front of every metric/statsd line before
116 |   forwarding it to the `shard_map` servers. Useful for tagging metrics coming
117 |   from a sidecar.
118 | - `suffix`: append a suffix. Works like prefix, just at the end.
119 | - `max_queue`: Number of messages to support queued up before dropping. Allows
120 |   the sender to make overall progress in light of one backend being down.
121 |   Defaults to 10,000.
122 | 
123 | #### `discovery` options
124 | 
125 | Each key in the discovery sources section defines a source which can be used by
126 | most backends to locate servers, sharding, or other network resources to
127 | communicate with. Each named discovery source is listed in the `sources` subkey:
128 | 
129 | ```json
130 | {
131 |   "sources": {
132 |     "source_name_1": {
133 |       "type": "static_file"
134 |     },
135 |     "source_name_2": {
136 |       "type": "s3"
137 |     }
138 |   }
139 | }
140 | ```
141 | 
142 | For sources supporting a file input (s3, static_file), the following schema is
143 | assumed:
144 | 
145 | ```json
146 | {
147 |   "hosts": ["host:port", "host:port"]
148 | }
149 | ```
150 | 
151 | Some sources may support rewriting to transform the input string into an output
152 | string (e.g., to add a port)
153 | 
154 | ##### s3 source
155 | 
156 | An S3 source represents an AWS S3 compatible source. Statsrelay uses `rusoto_s3`
157 | to access S3 and supports the vast majority of metadata sources, configuration,
158 | and environment variables in order to locate credentials.
159 | 
160 | The following keys are supported for the S3 source:
161 | 
162 | - `bucket` - The S3 bucket where the file lives
163 | - `key` - The key/path instead the S3 bucket
164 | - `interval` - An integer number of seconds to wait before re-polling the
165 |   contents of the S3 key to detect changes.
166 | - `format` - A simple text subsitution to run on the incoming text, where `{}` is
167 |   replaced by the value of each host entry. Valuable to append information, such
168 |   as a port number by specifying `"format": "{}:8125"`
169 | 


--------------------------------------------------------------------------------
/statsrelay-protobuf/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "statsrelay-protobuf"
 3 | version = "3.1.0-dev"
 4 | edition = "2018"
 5 | 
 6 | [dependencies]
 7 | tonic = "0.6"
 8 | prost = "0.9"
 9 | 
10 | [build-dependencies]
11 | tonic-build = "0.6"
12 | 


--------------------------------------------------------------------------------
/statsrelay-protobuf/build.rs:
--------------------------------------------------------------------------------
 1 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 2 |     tonic_build::configure()
 3 |         .protoc_arg("--experimental_allow_proto3_optional")
 4 |         .compile(
 5 |             &["proto/ext/github.com/prometheus/prometheus/prompb/remote.proto"],
 6 |             &[
 7 |                 "proto/",
 8 |                 "proto/ext/github.com/prometheus/prometheus/prompb",
 9 |                 "proto/ext/github.com/gogo/protobuf/",
10 |             ],
11 |         )?;
12 | 
13 |     Ok(())
14 | }
15 | 


--------------------------------------------------------------------------------
/statsrelay-protobuf/proto/ext/github.com/gogo/protobuf/gogoproto/gogo.proto:
--------------------------------------------------------------------------------
  1 | // Protocol Buffers for Go with Gadgets
  2 | //
  3 | // Copyright (c) 2013, The GoGo Authors. All rights reserved.
  4 | // http://github.com/gogo/protobuf
  5 | //
  6 | // Redistribution and use in source and binary forms, with or without
  7 | // modification, are permitted provided that the following conditions are
  8 | // met:
  9 | //
 10 | //     * Redistributions of source code must retain the above copyright
 11 | // notice, this list of conditions and the following disclaimer.
 12 | //     * Redistributions in binary form must reproduce the above
 13 | // copyright notice, this list of conditions and the following disclaimer
 14 | // in the documentation and/or other materials provided with the
 15 | // distribution.
 16 | //
 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | 
 29 | syntax = "proto2";
 30 | package gogoproto;
 31 | 
 32 | import "google/protobuf/descriptor.proto";
 33 | 
 34 | option java_package = "com.google.protobuf";
 35 | option java_outer_classname = "GoGoProtos";
 36 | option go_package = "github.com/gogo/protobuf/gogoproto";
 37 | 
 38 | extend google.protobuf.EnumOptions {
 39 | 	optional bool goproto_enum_prefix = 62001;
 40 | 	optional bool goproto_enum_stringer = 62021;
 41 | 	optional bool enum_stringer = 62022;
 42 | 	optional string enum_customname = 62023;
 43 | 	optional bool enumdecl = 62024;
 44 | }
 45 | 
 46 | extend google.protobuf.EnumValueOptions {
 47 | 	optional string enumvalue_customname = 66001;
 48 | }
 49 | 
 50 | extend google.protobuf.FileOptions {
 51 | 	optional bool goproto_getters_all = 63001;
 52 | 	optional bool goproto_enum_prefix_all = 63002;
 53 | 	optional bool goproto_stringer_all = 63003;
 54 | 	optional bool verbose_equal_all = 63004;
 55 | 	optional bool face_all = 63005;
 56 | 	optional bool gostring_all = 63006;
 57 | 	optional bool populate_all = 63007;
 58 | 	optional bool stringer_all = 63008;
 59 | 	optional bool onlyone_all = 63009;
 60 | 
 61 | 	optional bool equal_all = 63013;
 62 | 	optional bool description_all = 63014;
 63 | 	optional bool testgen_all = 63015;
 64 | 	optional bool benchgen_all = 63016;
 65 | 	optional bool marshaler_all = 63017;
 66 | 	optional bool unmarshaler_all = 63018;
 67 | 	optional bool stable_marshaler_all = 63019;
 68 | 
 69 | 	optional bool sizer_all = 63020;
 70 | 
 71 | 	optional bool goproto_enum_stringer_all = 63021;
 72 | 	optional bool enum_stringer_all = 63022;
 73 | 
 74 | 	optional bool unsafe_marshaler_all = 63023;
 75 | 	optional bool unsafe_unmarshaler_all = 63024;
 76 | 
 77 | 	optional bool goproto_extensions_map_all = 63025;
 78 | 	optional bool goproto_unrecognized_all = 63026;
 79 | 	optional bool gogoproto_import = 63027;
 80 | 	optional bool protosizer_all = 63028;
 81 | 	optional bool compare_all = 63029;
 82 |     optional bool typedecl_all = 63030;
 83 |     optional bool enumdecl_all = 63031;
 84 | 
 85 | 	optional bool goproto_registration = 63032;
 86 | 	optional bool messagename_all = 63033;
 87 | 
 88 | 	optional bool goproto_sizecache_all = 63034;
 89 | 	optional bool goproto_unkeyed_all = 63035;
 90 | }
 91 | 
 92 | extend google.protobuf.MessageOptions {
 93 | 	optional bool goproto_getters = 64001;
 94 | 	optional bool goproto_stringer = 64003;
 95 | 	optional bool verbose_equal = 64004;
 96 | 	optional bool face = 64005;
 97 | 	optional bool gostring = 64006;
 98 | 	optional bool populate = 64007;
 99 | 	optional bool stringer = 67008;
100 | 	optional bool onlyone = 64009;
101 | 
102 | 	optional bool equal = 64013;
103 | 	optional bool description = 64014;
104 | 	optional bool testgen = 64015;
105 | 	optional bool benchgen = 64016;
106 | 	optional bool marshaler = 64017;
107 | 	optional bool unmarshaler = 64018;
108 | 	optional bool stable_marshaler = 64019;
109 | 
110 | 	optional bool sizer = 64020;
111 | 
112 | 	optional bool unsafe_marshaler = 64023;
113 | 	optional bool unsafe_unmarshaler = 64024;
114 | 
115 | 	optional bool goproto_extensions_map = 64025;
116 | 	optional bool goproto_unrecognized = 64026;
117 | 
118 | 	optional bool protosizer = 64028;
119 | 	optional bool compare = 64029;
120 | 
121 | 	optional bool typedecl = 64030;
122 | 
123 | 	optional bool messagename = 64033;
124 | 
125 | 	optional bool goproto_sizecache = 64034;
126 | 	optional bool goproto_unkeyed = 64035;
127 | }
128 | 
129 | extend google.protobuf.FieldOptions {
130 | 	optional bool nullable = 65001;
131 | 	optional bool embed = 65002;
132 | 	optional string customtype = 65003;
133 | 	optional string customname = 65004;
134 | 	optional string jsontag = 65005;
135 | 	optional string moretags = 65006;
136 | 	optional string casttype = 65007;
137 | 	optional string castkey = 65008;
138 | 	optional string castvalue = 65009;
139 | 
140 | 	optional bool stdtime = 65010;
141 | 	optional bool stdduration = 65011;
142 | 	optional bool wktpointer = 65012;
143 | 
144 | }


--------------------------------------------------------------------------------
/statsrelay-protobuf/proto/ext/github.com/prometheus/prometheus/prompb/remote.proto:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Prometheus Team
 2 | // Licensed under the Apache License, Version 2.0 (the "License");
 3 | // you may not use this file except in compliance with the License.
 4 | // You may obtain a copy of the License at
 5 | //
 6 | // http://www.apache.org/licenses/LICENSE-2.0
 7 | //
 8 | // Unless required by applicable law or agreed to in writing, software
 9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | syntax = "proto3";
15 | package prometheus;
16 | 
17 | option go_package = "prompb";
18 | 
19 | import "types.proto";
20 | import "gogoproto/gogo.proto";
21 | 
22 | message WriteRequest {
23 |   repeated prometheus.TimeSeries timeseries = 1 [(gogoproto.nullable) = false];
24 |   // Cortex uses this field to determine the source of the write request.
25 |   // We reserve it to avoid any compatibility issues.
26 |   reserved  2;
27 |   repeated prometheus.MetricMetadata metadata = 3 [(gogoproto.nullable) = false];
28 | 
29 |   // Added as an extension, this field allows us to forward statsd PDUs that fail to parse.
30 |   repeated string failthrough_statsd_lines = 17;
31 | }
32 | 
33 | // ReadRequest represents a remote read request.
34 | message ReadRequest {
35 |   repeated Query queries = 1;
36 | 
37 |   enum ResponseType {
38 |     // Server will return a single ReadResponse message with matched series that includes list of raw samples.
39 |     // It's recommended to use streamed response types instead.
40 |     //
41 |     // Response headers:
42 |     // Content-Type: "application/x-protobuf"
43 |     // Content-Encoding: "snappy"
44 |     SAMPLES = 0;
45 |     // Server will stream a delimited ChunkedReadResponse message that contains XOR encoded chunks for a single series.
46 |     // Each message is following varint size and fixed size bigendian uint32 for CRC32 Castagnoli checksum.
47 |     //
48 |     // Response headers:
49 |     // Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse"
50 |     // Content-Encoding: ""
51 |     STREAMED_XOR_CHUNKS = 1;
52 |   }
53 | 
54 |   // accepted_response_types allows negotiating the content type of the response.
55 |   //
56 |   // Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is
57 |   // implemented by server, error is returned.
58 |   // For request that do not contain `accepted_response_types` field the SAMPLES response type will be used.
59 |   repeated ResponseType accepted_response_types = 2;
60 | }
61 | 
62 | // ReadResponse is a response when response_type equals SAMPLES.
63 | message ReadResponse {
64 |   // In same order as the request's queries.
65 |   repeated QueryResult results = 1;
66 | }
67 | 
68 | message Query {
69 |   int64 start_timestamp_ms = 1;
70 |   int64 end_timestamp_ms = 2;
71 |   repeated prometheus.LabelMatcher matchers = 3;
72 |   prometheus.ReadHints hints = 4;
73 | }
74 | 
75 | message QueryResult {
76 |   // Samples within a time series must be ordered by time.
77 |   repeated prometheus.TimeSeries timeseries = 1;
78 | }
79 | 
80 | // ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS.
81 | // We strictly stream full series after series, optionally split by time. This means that a single frame can contain
82 | // partition of the single series, but once a new series is started to be streamed it means that no more chunks will
83 | // be sent for previous one. Series are returned sorted in the same way TSDB block are internally.
84 | message ChunkedReadResponse {
85 |   repeated prometheus.ChunkedSeries chunked_series = 1;
86 | 
87 |   // query_index represents an index of the query from ReadRequest.queries these chunks relates to.
88 |   int64 query_index = 2;
89 | }


--------------------------------------------------------------------------------
/statsrelay-protobuf/proto/ext/github.com/prometheus/prometheus/prompb/types.proto:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 Prometheus Team
  2 | // Licensed under the Apache License, Version 2.0 (the "License");
  3 | // you may not use this file except in compliance with the License.
  4 | // You may obtain a copy of the License at
  5 | //
  6 | // http://www.apache.org/licenses/LICENSE-2.0
  7 | //
  8 | // Unless required by applicable law or agreed to in writing, software
  9 | // distributed under the License is distributed on an "AS IS" BASIS,
 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | syntax = "proto3";
 15 | package prometheus;
 16 | 
 17 | option go_package = "prompb";
 18 | 
 19 | import "gogoproto/gogo.proto";
 20 | 
 21 | message MetricMetadata {
 22 |   enum MetricType {
 23 |     UNKNOWN        = 0;
 24 |     COUNTER        = 1;
 25 |     GAUGE          = 2;
 26 |     HISTOGRAM      = 3;
 27 |     GAUGEHISTOGRAM = 4;
 28 |     SUMMARY        = 5;
 29 |     INFO           = 6;
 30 |     STATESET       = 7;
 31 | 
 32 |     // added as extensions
 33 |     DIRECTGAUGE    = 100;
 34 |     DELTAGAUGE     = 101;
 35 |   }
 36 | 
 37 |   // Represents the metric type, these match the set from Prometheus.
 38 |   // Refer to model/textparse/interface.go for details.
 39 |   MetricType type = 1;
 40 |   string metric_family_name = 2;
 41 |   string help = 4;
 42 |   string unit = 5;
 43 | }
 44 | 
 45 | message Sample {
 46 |   double value    = 1;
 47 |   // timestamp is in ms format, see model/timestamp/timestamp.go for
 48 |   // conversion from time.Time to Prometheus timestamp.
 49 |   int64 timestamp = 2;
 50 | 
 51 |   // added as an extension, sample_rate is a ratio in the range (0,1].
 52 |   double sample_rate = 100;
 53 | }
 54 | 
 55 | message Exemplar {
 56 |   // Optional, can be empty.
 57 |   repeated Label labels = 1 [(gogoproto.nullable) = false];
 58 |   double value = 2;
 59 |   // timestamp is in ms format, see model/timestamp/timestamp.go for
 60 |   // conversion from time.Time to Prometheus timestamp.
 61 |   int64 timestamp = 3;
 62 | }
 63 | 
 64 | // TimeSeries represents samples and labels for a single time series.
 65 | message TimeSeries {
 66 |   // For a timeseries to be valid, and for the samples and exemplars
 67 |   // to be ingested by the remote system properly, the labels field is required.
 68 |   repeated Label labels   = 1 [(gogoproto.nullable) = false];
 69 |   repeated Sample samples = 2 [(gogoproto.nullable) = false];
 70 |   repeated Exemplar exemplars = 3 [(gogoproto.nullable) = false];
 71 | }
 72 | 
 73 | message Label {
 74 |   string name  = 1;
 75 |   string value = 2;
 76 | }
 77 | 
 78 | message Labels {
 79 |   repeated Label labels = 1 [(gogoproto.nullable) = false];
 80 | }
 81 | 
 82 | // Matcher specifies a rule, which can match or set of labels or not.
 83 | message LabelMatcher {
 84 |   enum Type {
 85 |     EQ  = 0;
 86 |     NEQ = 1;
 87 |     RE  = 2;
 88 |     NRE = 3;
 89 |   }
 90 |   Type type    = 1;
 91 |   string name  = 2;
 92 |   string value = 3;
 93 | }
 94 | 
 95 | message ReadHints {
 96 |   int64 step_ms = 1;  // Query step size in milliseconds.
 97 |   string func = 2;    // String representation of surrounding function or aggregation.
 98 |   int64 start_ms = 3; // Start time in milliseconds.
 99 |   int64 end_ms = 4;   // End time in milliseconds.
100 |   repeated string grouping = 5; // List of label names used in aggregation.
101 |   bool by = 6; // Indicate whether it is without or by.
102 |   int64 range_ms = 7; // Range vector selector range in milliseconds.
103 | }
104 | 
105 | // Chunk represents a TSDB chunk.
106 | // Time range [min, max] is inclusive.
107 | message Chunk {
108 |   int64 min_time_ms = 1;
109 |   int64 max_time_ms = 2;
110 | 
111 |   // We require this to match chunkenc.Encoding.
112 |   enum Encoding {
113 |     UNKNOWN = 0;
114 |     XOR     = 1;
115 |   }
116 |   Encoding type  = 3;
117 |   bytes data     = 4;
118 | }
119 | 
120 | // ChunkedSeries represents single, encoded time series.
121 | message ChunkedSeries {
122 |   // Labels should be sorted.
123 |   repeated Label labels = 1 [(gogoproto.nullable) = false];
124 |   // Chunks will be in start time order and may overlap.
125 |   repeated Chunk chunks = 2 [(gogoproto.nullable) = false];
126 | }


--------------------------------------------------------------------------------
/statsrelay-protobuf/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod prometheus {
 2 |     tonic::include_proto!("prometheus");
 3 | }
 4 | 
 5 | #[cfg(test)]
 6 | mod tests {
 7 |     use crate::prometheus;
 8 | 
 9 |     #[test]
10 |     fn compiles_protobufs() {
11 |         // Just build some default protobuf objects from the .proto file to check
12 |         // everything was compiled by build.rs. While its not a runtime error,
13 |         // maybe reduces some hair pulling later
14 |         let _write_request = prometheus::WriteRequest::default();
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/statsrelay/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "statsrelay"
 3 | version = "3.1.0-dev"
 4 | authors = ["Yann Ramin <github@theatr.us>"]
 5 | edition = "2018"
 6 | description = "Swiss army knife for aggregation, filtering, relaying, sharding of statsd, prometheus, and other metrics systems"
 7 | license = "MIT"
 8 | keywords = ["statsd", "metrics", "aggregation"]
 9 | categories = ["development-tools"]
10 | repository = "https://github.com/lyft/statsrelay"
11 | 
12 | [[bin]]
13 | name = "statsrelay"
14 | path = "src/cmd/statsrelay.rs"
15 | 
16 | [[bin]]
17 | name = "sr-loadgen"
18 | path = "src/cmd/loadgen.rs"
19 | 
20 | [dependencies]
21 | murmur3 = "0.5"
22 | tokio = { version = "1", features = ["full", "parking_lot"] }
23 | tokio-stream = "0"
24 | futures = "0.3"
25 | hyper = { version = "0.14", features = ["server", "client", "runtime", "http2", "stream", "http1"] }
26 | structopt = "0.3"
27 | serde = { version = "1.0", features = ["derive"] }
28 | serde_json = "1.0"
29 | anyhow = "1.0"
30 | thiserror = "1.0"
31 | memchr = "2"
32 | stream-cancel = "0.8"
33 | bytes = "1"
34 | parking_lot = "0.11"
35 | regex = "1"
36 | chrono = "0.4"
37 | dashmap = "4"
38 | async-stream = "0.3"
39 | lexical = "5"
40 | prost = "0.9"
41 | reqwest = "0.11"
42 | smallvec = "1"
43 | snap = "1"
44 | statsrelay-protobuf = { path = "../statsrelay-protobuf" }
45 | async-trait = "0.1.56"
46 | backoff = { version = "0.4.0", features = ["tokio"] }
47 | 
48 | # For discovery
49 | rusoto_core = "0.46"
50 | rusoto_credential = "0.46.0"
51 | rusoto_s3 = "0.46.0"
52 | rusoto_sts = "0.46.0"
53 | 
54 | log = "0.4"
55 | env_logger = "0.8"
56 | 
57 | # Internal stats
58 | prometheus = "0.11"
59 | 
60 | # malloc
61 | jemallocator = "0.3.0"
62 | 
63 | # Samplers
64 | byteorder = "1"
65 | hyperloglog = "1"
66 | ahash = "0.7"
67 | fastrand = "1"
68 | rand = { version = "0.8", features = ["small_rng"] }
69 | 
70 | [[bench]]
71 | name = "statsd_benchmark"
72 | harness = false
73 | 
74 | [dev-dependencies]
75 | criterion = { version = "0.3", features = ["html_reports"] }
76 | tempfile = "3.1"
77 | quickcheck = "1"
78 | quickcheck_async = "0.1.1"
79 | quickcheck_macros = "1"
80 | 
81 | [build-dependencies]
82 | built = { version = "0.4", features = ["git2"] }
83 | 


--------------------------------------------------------------------------------
/statsrelay/benches/statsd_benchmark.rs:
--------------------------------------------------------------------------------
 1 | use bytes::Bytes;
 2 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 3 | use std::convert::TryInto;
 4 | 
 5 | fn parse(
 6 |     line: &Bytes,
 7 | ) -> Result<statsrelay::statsd_proto::Pdu, statsrelay::statsd_proto::ParseError> {
 8 |     statsrelay::statsd_proto::Pdu::parse(line.clone())
 9 | }
10 | 
11 | fn criterion_benchmark(c: &mut Criterion) {
12 |     let by = Bytes::from_static(
13 |         b"hello_world.worldworld_i_am_a_pumpkin:3|c|@1.0|#tags:tags,tags:tags,tags:tags,tags:tags",
14 |     );
15 |     c.bench_function("statsd pdu parsing", |b| b.iter(|| parse(black_box(&by))));
16 |     c.bench_function("statsd pdu conversion", |b| {
17 |         b.iter(|| {
18 |             let _: statsrelay::statsd_proto::Owned =
19 |                 parse(black_box(&by)).unwrap().try_into().unwrap();
20 |         })
21 |     });
22 | }
23 | 
24 | criterion_group!(benches, criterion_benchmark);
25 | criterion_main!(benches);
26 | 


--------------------------------------------------------------------------------
/statsrelay/build.rs:
--------------------------------------------------------------------------------
 1 | use std::env;
 2 | use std::path;
 3 | 
 4 | fn main() {
 5 |     let src = env::var("CARGO_MANIFEST_DIR").unwrap();
 6 |     let dst = path::Path::new(&env::var("OUT_DIR").unwrap()).join("built.rs");
 7 |     let mut options = built::Options::default();
 8 |     options.set_git(true);
 9 |     built::write_built_file_with_opts(&options, src.as_ref(), &dst)
10 |         .expect("Failed to acquire build-time information");
11 | }
12 | 


--------------------------------------------------------------------------------
/statsrelay/examples/processors-basic.json:
--------------------------------------------------------------------------------
 1 | 
 2 | {
 3 |     "admin": {
 4 |         "port": 9001
 5 |     },
 6 |     "processors": {
 7 |         "tag": {
 8 |             "type": "tag_converter",
 9 |             "route": ["processor:cardinality"]
10 |         },
11 |         "sampler": {
12 |             "type": "sampler",
13 |             "window": 5,
14 |             "timer_reservoir_size": 2,
15 |             "route": ["statsd:simple"]
16 |         },
17 |         "cardinality": {
18 |             "type": "cardinality",
19 |             "route": ["processor:sampler"],
20 |             "size_limit": 100,
21 |             "rotate_after_seconds": 10,
22 |             "buckets": 3
23 |         }
24 |     },
25 |     "statsd": {
26 |         "servers": {
27 |             "default": {
28 |                 "bind": "127.0.0.1:8129",
29 |                 "route": ["processor:tag"]
30 |             }
31 |         },
32 |         "backends": {
33 |             "simple": {
34 |                 "type": "statsd",
35 |                 "shard_map": [
36 |                     "127.0.0.1:8122"
37 |                 ]
38 |             }
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/statsrelay/examples/source-example1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "hosts": ["127.0.0.1:9000", "127.0.0.1:9002"]
3 | }
4 | 


--------------------------------------------------------------------------------
/statsrelay/examples/tugboat-discovery.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "statsd": {
 3 |         "servers": {
 4 |             "default": {
 5 |                 "bind": "127.0.0.1:8129",
 6 |                 "socket": "/tmp/statsrelay",
 7 |                 "route": ["statsd:simple", "statsd:path_discovery"]
 8 |             }
 9 |         },
10 |         "backends": {
11 |             "simple": {
12 |                 "shard_map": [
13 |                     "127.0.0.1:8122"
14 |                 ]
15 |             },
16 |             "path_discovery": {
17 |                 "shard_map_source": "path_discovery"
18 |             }
19 |         }
20 |     },
21 |     "discovery": {
22 |         "sources": {
23 |             "path_discovery": {
24 |                 "type": "static_file",
25 |                 "path": "examples/source-example1.json",
26 |                 "interval": 5,
27 |                 "transforms": [
28 |                     {
29 |                         "type": "repeat",
30 |                         "count": 2
31 |                     },
32 |                     {
33 |                         "type": "format",
34 |                         "pattern": "{}:cheese"
35 |                     },
36 |                     {
37 |                         "type": "format",
38 |                         "pattern": "{}:cheese2"
39 |                     }
40 |                 ]
41 |             }
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/statsrelay/examples/tugboat-legacy-basic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "admin": {
 3 |         "port": 9001
 4 |     },
 5 |     "statsd": {
 6 |         "servers": {
 7 |             "default": {
 8 |                 "bind": "127.0.0.1:8129",
 9 |                 "route": ["statsd:simple"]
10 |             }
11 |         },
12 |         "backends": {
13 |             "simple": {
14 |                 "shard_map": [
15 |                     "127.0.0.1:8122"
16 |                 ]
17 |             }
18 |         }
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/statsrelay/src/admin.rs:
--------------------------------------------------------------------------------
 1 | use log::info;
 2 | 
 3 | use hyper::service::{make_service_fn, service_fn};
 4 | use hyper::{Body, Method, Request, Response, Server};
 5 | use tokio::runtime;
 6 | 
 7 | use std::boxed::Box;
 8 | use std::convert::Infallible;
 9 | 
10 | use crate::stats::Collector;
11 | 
12 | #[derive(Clone)]
13 | struct AdminState {
14 |     collector: Collector,
15 | }
16 | 
17 | async fn metric_response(
18 |     state: AdminState,
19 |     _req: Request<Body>,
20 | ) -> Result<Response<Body>, Infallible> {
21 |     let buffer = state.collector.prometheus_output().unwrap();
22 |     Ok(Response::builder()
23 |         .header(hyper::header::CONTENT_TYPE, prometheus::TEXT_FORMAT)
24 |         .body(Body::from(buffer))
25 |         .unwrap())
26 | }
27 | 
28 | async fn request_handler(
29 |     state: AdminState,
30 |     req: Request<Body>,
31 | ) -> Result<Response<Body>, Infallible> {
32 |     match (req.method(), req.uri().path()) {
33 |         (&Method::GET, "/") => Ok(Response::builder()
34 |             .body(Body::from("statsrelay admin server"))
35 |             .unwrap()),
36 |         (&Method::GET, "/healthcheck") => Ok(Response::builder().body(Body::from("OK")).unwrap()),
37 |         (&Method::GET, "/metrics") => metric_response(state, req).await,
38 |         _ => Ok(Response::builder()
39 |             .status(404)
40 |             .body(Body::from("not found"))
41 |             .unwrap()),
42 |     }
43 | }
44 | 
45 | async fn hyper_server(port: u16, collector: Collector) -> Result<(), Box<dyn std::error::Error>> {
46 |     let addr = format!("[::]:{}", port).parse().unwrap();
47 |     let admin_state = AdminState { collector };
48 |     let make_svc = make_service_fn(move |_conn| {
49 |         let service_capture = admin_state.clone();
50 |         async {
51 |             Ok::<_, Infallible>(service_fn(move |req| {
52 |                 request_handler(service_capture.clone(), req)
53 |             }))
54 |         }
55 |     });
56 |     info!("admin server starting on port {}", port);
57 |     Server::bind(&addr).serve(make_svc).await?;
58 |     Ok(())
59 | }
60 | 
61 | pub fn spawn_admin_server(port: u16, collector: Collector) {
62 |     let rt = runtime::Builder::new_current_thread()
63 |         .enable_all()
64 |         .build()
65 |         .unwrap();
66 |     std::thread::spawn(move || rt.block_on(hyper_server(port, collector)).unwrap());
67 | }
68 | 


--------------------------------------------------------------------------------
/statsrelay/src/backend.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | use std::sync::atomic::AtomicU64;
  3 | use std::sync::Arc;
  4 | 
  5 | use regex::bytes::RegexSet;
  6 | use tokio::sync::mpsc;
  7 | 
  8 | use crate::backend_client::prom_client::PromClient;
  9 | use crate::backend_client::statsd_client::StatsdClient;
 10 | use crate::backend_client::BackendClient;
 11 | use crate::config;
 12 | use crate::config::BackendClientConfig;
 13 | use crate::discovery;
 14 | use crate::shard::{statsrelay_compat_hash, Ring};
 15 | use crate::stats;
 16 | use crate::statsd_proto::Event;
 17 | use crate::statsd_proto::{self, Pdu};
 18 | 
 19 | use log::warn;
 20 | 
 21 | type ArcBackendClient =
 22 |     Arc<dyn BackendClient<Error = mpsc::error::TrySendError<Pdu>> + Send + Sync>;
 23 | 
 24 | pub struct Backend {
 25 |     conf: config::BackendConfig,
 26 |     ring: Ring<ArcBackendClient>,
 27 |     input_filter: Option<RegexSet>,
 28 |     warning_log: AtomicU64,
 29 |     backend_sends: stats::Counter,
 30 |     backend_fails: stats::Counter,
 31 | }
 32 | 
 33 | impl Backend {
 34 |     pub fn new(
 35 |         stats: stats::Scope,
 36 |         conf: &config::BackendConfig,
 37 |         client_ref: Option<&Backend>,
 38 |         discovery_update: Option<&discovery::Update>,
 39 |         shutdown_send: Option<mpsc::Sender<()>>,
 40 |     ) -> anyhow::Result<Self> {
 41 |         let input_filter = conf.generate_input_filter();
 42 |         let mut ring: Ring<ArcBackendClient> = Ring::new();
 43 | 
 44 |         // Use the same backend for the same endpoint address, caching the lookup locally
 45 |         let mut memoize: HashMap<String, ArcBackendClient> =
 46 |             client_ref.map_or_else(HashMap::new, |b| b.clients());
 47 | 
 48 |         let use_endpoints = discovery_update
 49 |             .map(|u| u.sources())
 50 |             .unwrap_or(&conf.shard_map);
 51 |         for endpoint in use_endpoints {
 52 |             if endpoint.is_empty() {
 53 |                 continue;
 54 |             }
 55 |             if let Some(client) = memoize.get(endpoint) {
 56 |                 ring.push(client.clone())
 57 |             } else {
 58 |                 let client: ArcBackendClient = match &conf.client_config {
 59 |                     BackendClientConfig::Statsd(client_config) => Arc::new(StatsdClient::new(
 60 |                         stats.scope("statsd_client"),
 61 |                         endpoint.as_str(),
 62 |                         shutdown_send.clone(),
 63 |                         client_config.clone(),
 64 |                     )),
 65 |                     BackendClientConfig::PromRemoteWrite(client_config) => {
 66 |                         Arc::new(PromClient::new(
 67 |                             stats.scope("prom_client"),
 68 |                             endpoint,
 69 |                             client_config.clone(),
 70 |                             shutdown_send.clone(),
 71 |                         ))
 72 |                     }
 73 |                 };
 74 |                 memoize.insert(endpoint.clone(), client.clone());
 75 |                 ring.push(client);
 76 |             }
 77 |         }
 78 | 
 79 |         let backend = Backend {
 80 |             conf: conf.clone(),
 81 |             ring,
 82 |             input_filter,
 83 |             warning_log: AtomicU64::new(0),
 84 |             backend_fails: stats.counter("backend_fails").unwrap(),
 85 |             backend_sends: stats.counter("backend_sends").unwrap(),
 86 |         };
 87 | 
 88 |         Ok(backend)
 89 |     }
 90 | 
 91 |     // Capture the old ring contents into a memoization map by endpoint,
 92 |     // letting us re-use any old client connections and buffers. Note we
 93 |     // won't start tearing down connections until the memoization buffer and
 94 |     // old ring are both dropped.
 95 |     fn clients(&self) -> HashMap<String, ArcBackendClient> {
 96 |         let mut memoize: HashMap<String, ArcBackendClient> = HashMap::new();
 97 |         for i in 0..self.ring.len() {
 98 |             let client = self.ring.pick_from(i as u32);
 99 |             memoize.insert(String::from(client.endpoint()), client.clone());
100 |         }
101 |         memoize
102 |     }
103 | 
104 |     pub fn provide_statsd(&self, input: &Event) {
105 |         let pdu: statsd_proto::Pdu = input.into();
106 |         if !self
107 |             .input_filter
108 |             .as_ref()
109 |             .map_or(true, |inf| inf.is_match(pdu.name()))
110 |         {
111 |             return;
112 |         }
113 | 
114 |         let ring_read = &self.ring;
115 |         let code = match ring_read.len() {
116 |             0 => return, // In case of nothing to send, do nothing
117 |             1 => 1_u32,
118 |             _ => statsrelay_compat_hash(&pdu),
119 |         };
120 |         let client = ring_read.pick_from(code);
121 | 
122 |         // Assign prefix and/or suffix
123 |         let pdu_clone = if self.conf.prefix.is_some() || self.conf.suffix.is_some() {
124 |             pdu.with_prefix_suffix(
125 |                 self.conf
126 |                     .prefix
127 |                     .as_ref()
128 |                     .map(|p| p.as_bytes())
129 |                     .unwrap_or_default(),
130 |                 self.conf
131 |                     .suffix
132 |                     .as_ref()
133 |                     .map(|s| s.as_bytes())
134 |                     .unwrap_or_default(),
135 |             )
136 |         } else {
137 |             pdu
138 |         };
139 |         match client.try_send(pdu_clone) {
140 |             Err(_e) => {
141 |                 self.backend_fails.inc();
142 |                 let count = self
143 |                     .warning_log
144 |                     .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
145 |                 if count % 1000 == 0 {
146 |                     warn!(
147 |                         "error pushing to queue full (endpoint {}, total failures {})",
148 |                         client.endpoint(),
149 |                         count
150 |                     );
151 |                 }
152 |             }
153 |             Ok(_) => {
154 |                 self.backend_sends.inc();
155 |             }
156 |         }
157 |     }
158 | }
159 | 


--------------------------------------------------------------------------------
/statsrelay/src/backend_client/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::statsd_proto::Pdu;
 2 | 
 3 | pub mod prom_client;
 4 | pub mod statsd_client;
 5 | 
 6 | pub trait BackendClient {
 7 |     type Error;
 8 | 
 9 |     fn try_send(&self, pdu: Pdu) -> Result<(), Self::Error>;
10 | 
11 |     fn endpoint(&self) -> &str;
12 | }
13 | 


--------------------------------------------------------------------------------
/statsrelay/src/backend_client/prom_client.rs:
--------------------------------------------------------------------------------
  1 | use std::convert::TryFrom;
  2 | use std::sync::{Arc, Mutex};
  3 | use std::time::Duration;
  4 | 
  5 | use async_trait::async_trait;
  6 | use backoff::future::retry_notify;
  7 | use backoff::ExponentialBackoff;
  8 | use log::{error, info, warn};
  9 | use prost::Message;
 10 | use reqwest::Client;
 11 | use statsrelay_protobuf::prometheus::WriteRequest;
 12 | use tokio::select;
 13 | use tokio::sync::{mpsc, watch, Semaphore};
 14 | use tokio::time::sleep;
 15 | 
 16 | use crate::config::PromBackendClientConfig;
 17 | use crate::stats::{self, Counter, Gauge, Scope};
 18 | use crate::statsd_proto::{Owned, Pdu};
 19 | 
 20 | use super::BackendClient;
 21 | 
 22 | #[derive(Clone, Debug)]
 23 | struct Stats {
 24 |     bytes_sent_total: Counter,
 25 |     requests_compression_fail: Counter,
 26 |     requests_fail: Counter,
 27 |     requests_in_flight: Gauge,
 28 |     requests_retry: Counter,
 29 |     requests_total: Counter,
 30 |     samples_parse_fail: Counter,
 31 |     samples_send_fail: Counter,
 32 |     samples_total: Counter,
 33 | }
 34 | 
 35 | impl Stats {
 36 |     fn new(scope: Scope) -> Self {
 37 |         Self {
 38 |             bytes_sent_total: scope.counter("bytes_sent_total").unwrap(),
 39 |             requests_compression_fail: scope.counter("requests_compression_fail").unwrap(),
 40 |             requests_fail: scope.counter("requests_fail").unwrap(),
 41 |             requests_in_flight: scope.gauge("requests_in_flight").unwrap(),
 42 |             requests_retry: scope.counter("requests_retry").unwrap(),
 43 |             requests_total: scope.counter("requests_total").unwrap(),
 44 |             samples_parse_fail: scope.counter("samples_parse_fail").unwrap(),
 45 |             samples_send_fail: scope.counter("samples_send_fail").unwrap(),
 46 |             samples_total: scope.counter("samples_total").unwrap(),
 47 |         }
 48 |     }
 49 | }
 50 | 
 51 | pub struct PromClient {
 52 |     endpoint: String,
 53 |     _inner: Arc<Mutex<PromClientInner>>,
 54 |     sender: mpsc::Sender<Pdu>,
 55 | }
 56 | 
 57 | impl PromClient {
 58 |     /// Constructs a new [PromClient].
 59 |     ///
 60 |     /// This method panics if a [reqwest::Client] cannot be initialized.
 61 |     pub fn new(
 62 |         scope: stats::Scope,
 63 |         endpoint: &str,
 64 |         config: PromBackendClientConfig,
 65 |         shutdown_send: Option<mpsc::Sender<()>>,
 66 |     ) -> Self {
 67 |         let inner = Client::builder()
 68 |             .timeout(Duration::from_millis(config.request_timeout_ms))
 69 |             .build()
 70 |             .expect("failed to build PromClient");
 71 |         let client = ReqwestPromWriteRequestClient {
 72 |             inner,
 73 |             endpoint: String::from(endpoint),
 74 |         };
 75 |         Self::new_with_client(scope, client, endpoint, config, shutdown_send)
 76 |     }
 77 | 
 78 |     fn new_with_client<T: PromWriteRequestClient + Clone + Send + Sync + 'static>(
 79 |         scope: stats::Scope,
 80 |         client: T,
 81 |         endpoint: &str,
 82 |         config: PromBackendClientConfig,
 83 |         shutdown_send: Option<mpsc::Sender<()>>,
 84 |     ) -> Self {
 85 |         let client = Arc::new(client);
 86 |         let stats = Stats::new(scope);
 87 |         let (sender, pdu_rx) = mpsc::channel::<Pdu>(config.max_queue);
 88 |         let (abort_retries_tx, abort_retries_rx) = watch::channel(false);
 89 |         tokio::spawn(client_task(
 90 |             stats,
 91 |             client,
 92 |             pdu_rx,
 93 |             abort_retries_rx,
 94 |             config,
 95 |             shutdown_send,
 96 |         ));
 97 | 
 98 |         let inner = Arc::new(Mutex::new(PromClientInner { abort_retries_tx }));
 99 | 
100 |         Self {
101 |             endpoint: String::from(endpoint),
102 |             _inner: inner,
103 |             sender,
104 |         }
105 |     }
106 | }
107 | 
108 | impl BackendClient for PromClient {
109 |     type Error = mpsc::error::TrySendError<Pdu>;
110 | 
111 |     fn try_send(&self, pdu: Pdu) -> Result<(), Self::Error> {
112 |         self.sender.try_send(pdu)
113 |     }
114 | 
115 |     fn endpoint(&self) -> &str {
116 |         self.endpoint.as_str()
117 |     }
118 | }
119 | 
120 | struct PromClientInner {
121 |     abort_retries_tx: watch::Sender<bool>,
122 | }
123 | 
124 | impl Drop for PromClientInner {
125 |     fn drop(&mut self) {
126 |         let _ = self.abort_retries_tx.send(true);
127 |     }
128 | }
129 | 
130 | /// A thin client wrapper used for mocking in tests
131 | #[async_trait]
132 | trait PromWriteRequestClient {
133 |     async fn send_write_request(
134 |         &self,
135 |         compressed_write_request: bytes::Bytes,
136 |     ) -> Result<reqwest::Response, reqwest::Error>;
137 | }
138 | 
139 | #[derive(Clone, Debug)]
140 | struct ReqwestPromWriteRequestClient {
141 |     inner: Client,
142 |     endpoint: String,
143 | }
144 | 
145 | #[async_trait]
146 | impl PromWriteRequestClient for ReqwestPromWriteRequestClient {
147 |     async fn send_write_request(
148 |         &self,
149 |         compressed_write_request: bytes::Bytes,
150 |     ) -> Result<reqwest::Response, reqwest::Error> {
151 |         self.inner
152 |             .post(self.endpoint.clone())
153 |             .header("Content-Type", "application/x-protobuf")
154 |             .header("Content-Encoding", "snappy")
155 |             .header("X-Prometheus-Remote-Write-Version", "0.1.0")
156 |             .body(compressed_write_request)
157 |             .send()
158 |             .await
159 |     }
160 | }
161 | 
162 | async fn client_task<T: PromWriteRequestClient + Send + Sync + 'static>(
163 |     stats: Stats,
164 |     client: Arc<T>,
165 |     mut pdu_rx: mpsc::Receiver<Pdu>,
166 |     abort_retries_rx: watch::Receiver<bool>,
167 |     config: PromBackendClientConfig,
168 |     shutdown_send: Option<mpsc::Sender<()>>,
169 | ) {
170 |     let semaphore = Arc::new(Semaphore::new(config.max_in_flight));
171 |     // The receiving end of the flush channel blocks until all spawned tasks have terminated.
172 |     let (flush_tx, mut flush_rx) = mpsc::channel::<()>(1);
173 | 
174 |     loop {
175 |         let (batch, shutdown) = construct_batch(
176 |             &mut pdu_rx,
177 |             Duration::from_millis(config.batch_creation_timeout_ms),
178 |             config.max_batch_size,
179 |         )
180 |         .await;
181 | 
182 |         if !batch.is_empty() {
183 |             let permit = semaphore.clone().acquire_owned().await.unwrap();
184 |             stats
185 |                 .requests_in_flight
186 |                 .set((config.max_in_flight - semaphore.available_permits()) as f64);
187 | 
188 |             let stats = stats.clone();
189 |             let client = client.clone();
190 |             let flush_tx = flush_tx.clone();
191 |             let abort_retries_rx = abort_retries_rx.clone();
192 | 
193 |             tokio::spawn(async move {
194 |                 let samples_in_batch = batch.len() as f64;
195 |                 stats.requests_total.inc();
196 |                 stats.samples_total.inc_by(samples_in_batch);
197 | 
198 |                 let mut owned_batch: Vec<Owned> = Vec::with_capacity(batch.len());
199 |                 let mut failthrough_statsd_lines: Vec<String> = Vec::new();
200 |                 for pdu in batch {
201 |                     match Owned::try_from(&pdu) {
202 |                         Ok(owned) => owned_batch.push(owned),
203 |                         Err(e) => {
204 |                             info!("unparsable line ({:?}): {:?}", pdu.as_bytes(), e);
205 |                             stats.samples_parse_fail.inc();
206 |                             if let Ok(line) = String::from_utf8(pdu.as_bytes().to_vec()) {
207 |                                 failthrough_statsd_lines.push(line);
208 |                             }
209 |                         }
210 |                     }
211 |                 }
212 | 
213 |                 let mut write_request = Owned::to_write_request(&owned_batch);
214 |                 write_request
215 |                     .failthrough_statsd_lines
216 |                     .append(&mut failthrough_statsd_lines);
217 |                 // N.B. According to the snap documentation, this compession should never fail for
218 |                 // our use case.
219 |                 let compressed_write_request = match compress_write_request(write_request) {
220 |                     Ok(c) => c,
221 |                     Err(e) => {
222 |                         stats.requests_compression_fail.inc();
223 |                         error!(
224 |                             "failed to compress prometheus WriteRequest, dropping samples: {:?}",
225 |                             e
226 |                         );
227 |                         return;
228 |                     }
229 |                 };
230 |                 stats
231 |                     .bytes_sent_total
232 |                     .inc_by(compressed_write_request.len() as f64);
233 | 
234 |                 let res = retry_notify(
235 |                     ExponentialBackoff::default(),
236 |                     || async {
237 |                         client
238 |                             .send_write_request(compressed_write_request.clone())
239 |                             .await
240 |                             .map_err(|e| {
241 |                                 if *abort_retries_rx.borrow() {
242 |                                     return backoff::Error::permanent(e);
243 |                                 }
244 |                                 match should_retry(&e) {
245 |                                     true => backoff::Error::transient(e),
246 |                                     false => backoff::Error::permanent(e),
247 |                                 }
248 |                             })
249 |                     },
250 |                     |_, _| {
251 |                         stats.requests_retry.inc();
252 |                     },
253 |                 )
254 |                 .await;
255 | 
256 |                 if let Err(e) = res {
257 |                     stats.requests_fail.inc();
258 |                     stats.samples_send_fail.inc_by(samples_in_batch);
259 |                     warn!(
260 |                         "prometheus remote write request failed, dropping samples: {:?}",
261 |                         e
262 |                     );
263 |                 }
264 | 
265 |                 // Force move permit and flush_tx into this closure.
266 |                 drop(permit);
267 |                 drop(flush_tx);
268 |             });
269 |         }
270 | 
271 |         if shutdown {
272 |             // Wait for all spawned tasks to terminate.
273 |             drop(flush_tx);
274 |             let _ = flush_rx.recv().await;
275 |             drop(shutdown_send);
276 |             return;
277 |         }
278 |     }
279 | }
280 | 
281 | async fn construct_batch(
282 |     pdu_rx: &mut mpsc::Receiver<Pdu>,
283 |     timeout: Duration,
284 |     max_batch_size: usize,
285 | ) -> (Vec<Pdu>, bool) {
286 |     let mut batch = Vec::<Pdu>::with_capacity(max_batch_size);
287 |     let sleep = sleep(timeout);
288 |     tokio::pin!(sleep);
289 |     loop {
290 |         select! {
291 |             pdu = pdu_rx.recv() => {
292 |                 if let Some(pdu) = pdu {
293 |                     batch.push(pdu);
294 |                     if batch.len() >= max_batch_size {
295 |                         return (batch, false);
296 |                     }
297 |                 } else {
298 |                     return (batch, true)
299 |                 }
300 |             }
301 |             _ = &mut sleep => return (batch, false)
302 |         }
303 |     }
304 | }
305 | 
306 | fn compress_write_request(write_request: WriteRequest) -> Result<bytes::Bytes, snap::Error> {
307 |     let proto_encoded = write_request.encode_to_vec();
308 |     let max_size = snap::raw::max_compress_len(proto_encoded.len());
309 |     let mut buf = bytes::BytesMut::new();
310 |     buf.resize(max_size, 0x0);
311 |     // This compression shouldn't fail given our use case. An error is returned if
312 |     // the total number of bytes to compress exceeds 2^32 - 1, but we limit the size
313 |     // of the request.
314 |     let acutal_len = snap::raw::Encoder::new().compress(&proto_encoded, &mut buf)?;
315 |     buf.truncate(acutal_len);
316 |     Ok(buf.freeze())
317 | }
318 | 
319 | fn should_retry(e: &reqwest::Error) -> bool {
320 |     match e.status() {
321 |         Some(s) => s.is_server_error(),
322 |         None => false,
323 |     }
324 | }
325 | 
326 | #[cfg(test)]
327 | mod tests {
328 |     use std::{cmp, io::Cursor};
329 | 
330 |     use bytes::Bytes;
331 |     use hyper::http::response::Builder;
332 | 
333 |     use crate::{stats::Collector, statsd_proto::test::compare_owned_pdu_vecs};
334 | 
335 |     use super::*;
336 | 
337 |     pub mod arbitraries {
338 |         use quickcheck::Arbitrary;
339 | 
340 |         use crate::config::PromBackendClientConfig;
341 | 
342 |         impl Arbitrary for PromBackendClientConfig {
343 |             fn arbitrary(g: &mut quickcheck::Gen) -> Self {
344 |                 let max_batch_size = u8::arbitrary(g) as usize + 1;
345 | 
346 |                 PromBackendClientConfig {
347 |                     max_queue: 1000,
348 |                     request_timeout_ms: 1000,
349 |                     batch_creation_timeout_ms: 1000,
350 |                     max_batch_size,
351 |                     max_in_flight: 1,
352 |                 }
353 |             }
354 |         }
355 |     }
356 | 
357 |     #[derive(Clone, Default)]
358 |     struct MockPromWriteRequestClient {
359 |         status: hyper::StatusCode,
360 |         requests: Arc<Mutex<Vec<bytes::Bytes>>>,
361 |     }
362 | 
363 |     impl MockPromWriteRequestClient {
364 |         fn new(status: hyper::StatusCode) -> Self {
365 |             MockPromWriteRequestClient {
366 |                 status,
367 |                 requests: Arc::default(),
368 |             }
369 |         }
370 |     }
371 | 
372 |     #[async_trait]
373 |     impl PromWriteRequestClient for MockPromWriteRequestClient {
374 |         async fn send_write_request(
375 |             &self,
376 |             compressed_write_request: bytes::Bytes,
377 |         ) -> Result<reqwest::Response, reqwest::Error> {
378 |             self.requests.lock().unwrap().push(compressed_write_request);
379 |             let r: reqwest::Response = Builder::new()
380 |                 .status(self.status)
381 |                 .body("{}")
382 |                 .unwrap()
383 |                 .into();
384 |             r.error_for_status()
385 |         }
386 |     }
387 | 
388 |     fn decode_response(compressed_write_request: &bytes::Bytes) -> anyhow::Result<WriteRequest> {
389 |         let decompressed = snap::raw::Decoder::new().decompress_vec(compressed_write_request)?;
390 |         let r = WriteRequest::decode(Cursor::new(decompressed))?;
391 |         Ok(r)
392 |     }
393 | 
394 |     #[quickcheck_async::tokio]
395 |     async fn prom_client_no_failures(
396 |         config: PromBackendClientConfig,
397 |         input: Vec<Owned>,
398 |     ) -> anyhow::Result<()> {
399 |         let mock_client = MockPromWriteRequestClient::new(hyper::StatusCode::OK);
400 |         let scope = Collector::default().scope("stats");
401 |         let endpoint = "endpoint";
402 |         let (shutdown_send, mut shutdown_recv) = mpsc::channel::<()>(1);
403 | 
404 |         let prom_client = PromClient::new_with_client(
405 |             scope,
406 |             mock_client.clone(),
407 |             endpoint,
408 |             config.clone(),
409 |             Some(shutdown_send),
410 |         );
411 | 
412 |         let pdus: Vec<Pdu> = input.iter().map(Pdu::from).collect();
413 |         for pdu in pdus {
414 |             prom_client.try_send(pdu)?;
415 |         }
416 | 
417 |         drop(prom_client);
418 |         shutdown_recv.recv().await;
419 | 
420 |         let requests = mock_client.requests.lock().unwrap();
421 | 
422 |         if requests.len() != (input.len() as f64 / config.max_batch_size as f64).ceil() as usize {
423 |             return Err(anyhow::anyhow!(
424 |                 "unexpected number of requests: {:?}",
425 |                 requests
426 |             ));
427 |         }
428 | 
429 |         for (i, request) in requests.iter().enumerate() {
430 |             let write_request = decode_response(request)?;
431 |             let actual = Owned::try_from_write_request(&write_request)?;
432 | 
433 |             let start_idx = i * config.max_batch_size;
434 |             let end_idx = cmp::min(start_idx + config.max_batch_size, input.len());
435 | 
436 |             let expected = &input[start_idx..end_idx];
437 | 
438 |             compare_owned_pdu_vecs(expected, &actual)?;
439 |         }
440 | 
441 |         Ok(())
442 |     }
443 | 
444 |     #[tokio::test]
445 |     async fn prom_client_abort_retries() {
446 |         let config: PromBackendClientConfig = PromBackendClientConfig {
447 |             max_queue: 10000,
448 |             request_timeout_ms: 10000,
449 |             batch_creation_timeout_ms: 500,
450 |             max_batch_size: 5,
451 |             max_in_flight: 1,
452 |         };
453 |         let mock_client = MockPromWriteRequestClient::new(hyper::StatusCode::INTERNAL_SERVER_ERROR);
454 |         let scope = Collector::default().scope("stats");
455 |         let endpoint = "endpoint";
456 |         let (shutdown_send, mut shutdown_recv) = mpsc::channel::<()>(1);
457 | 
458 |         let prom_client = PromClient::new_with_client(
459 |             scope,
460 |             mock_client.clone(),
461 |             endpoint,
462 |             config.clone(),
463 |             Some(shutdown_send),
464 |         );
465 | 
466 |         let pdu = Pdu::parse(Bytes::from_static(b"foo.bar:3|c|@1.0|#tags")).unwrap();
467 | 
468 |         prom_client.try_send(pdu).expect("failed to send pdu");
469 | 
470 |         drop(prom_client);
471 | 
472 |         let sleep = sleep(Duration::from_secs(1));
473 |         tokio::pin!(sleep);
474 |         tokio::select! {
475 |             // shutdown_recv.recv() should return almost immediately if we successfully abort retries.
476 |             _ = shutdown_recv.recv() => {}
477 |             _ = &mut sleep => {
478 |                 panic!("client did not shutdown")
479 |             }
480 |         }
481 |     }
482 | }
483 | 


--------------------------------------------------------------------------------
/statsrelay/src/backend_client/statsd_client.rs:
--------------------------------------------------------------------------------
  1 | use bytes::{BufMut, Bytes, BytesMut};
  2 | use memchr::memchr;
  3 | use stream_cancel::{Trigger, Tripwire};
  4 | use tokio::io::AsyncWriteExt;
  5 | use tokio::net::TcpStream;
  6 | use tokio::select;
  7 | use tokio::sync::mpsc;
  8 | use tokio::time::{sleep, timeout};
  9 | 
 10 | use std::sync::Arc;
 11 | use std::sync::Mutex;
 12 | use std::time::Duration;
 13 | 
 14 | use crate::stats;
 15 | use crate::statsd_proto::Pdu;
 16 | 
 17 | use crate::config::StatsdBackendClientConfig;
 18 | use log::{debug, info, warn};
 19 | use tokio::sync::mpsc::error::TryRecvError;
 20 | use tokio::time::error::Elapsed;
 21 | 
 22 | use super::BackendClient;
 23 | 
 24 | pub struct StatsdClient {
 25 |     sender: mpsc::Sender<Pdu>,
 26 |     endpoint: String,
 27 |     inner: Arc<Mutex<StatsdClientInner>>,
 28 | }
 29 | 
 30 | struct StatsdClientInner {
 31 |     _sender: mpsc::Sender<Pdu>,
 32 |     _trig: Trigger,
 33 | }
 34 | 
 35 | const RECONNECT_DELAY: Duration = Duration::from_secs(5);
 36 | const CONNECT_TIMEOUT: Duration = Duration::from_secs(15);
 37 | const BUFFER_FILL_WAIT: Duration = Duration::from_millis(50);
 38 | const TCP_IDLE_DISCONNECT: Duration = Duration::from_secs(60);
 39 | 
 40 | // SEND_THRESHOLD should correspond with the statsd_server.READ_BUFFER = 8KiB
 41 | // We set the threshold around 1 metric worth of bytes (256) lower because
 42 | // the receiver waits to exceed the threshold before sending rather than
 43 | // cutting a new buffer for the metric that exceeds the threshold
 44 | const SEND_THRESHOLD: usize = 8192 - 256;
 45 | const INITIAL_BUF_CAPACITY: usize = SEND_THRESHOLD + 1024;
 46 | 
 47 | impl StatsdClient {
 48 |     pub fn new(
 49 |         stats: stats::Scope,
 50 |         endpoint: &str,
 51 |         shutdown_send: Option<mpsc::Sender<()>>,
 52 |         config: StatsdBackendClientConfig,
 53 |     ) -> Self {
 54 |         // Currently, we need this tripwire to abort connection looping. This can probably be refactored
 55 |         let (trig, trip) = Tripwire::new();
 56 |         let (sender_chan, recv_chan) = mpsc::channel::<Pdu>(config.max_queue);
 57 | 
 58 |         let eps = String::from(endpoint);
 59 |         let (ticker_sender, ticker_recv) = mpsc::channel::<bool>(1);
 60 |         tokio::spawn(ticker(eps.clone(), ticker_sender));
 61 |         tokio::spawn(client_task(
 62 |             stats,
 63 |             eps.clone(),
 64 |             trip,
 65 |             recv_chan,
 66 |             ticker_recv,
 67 |             shutdown_send,
 68 |             config.trim_on_disconnect,
 69 |         ));
 70 |         let inner = StatsdClientInner {
 71 |             _sender: sender_chan.clone(),
 72 |             _trig: trig,
 73 |         };
 74 | 
 75 |         StatsdClient {
 76 |             endpoint: eps,
 77 |             inner: Arc::new(Mutex::new(inner)),
 78 |             sender: sender_chan,
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | impl BackendClient for StatsdClient {
 84 |     type Error = mpsc::error::TrySendError<Pdu>;
 85 | 
 86 |     fn try_send(&self, pdu: Pdu) -> Result<(), Self::Error> {
 87 |         self.sender.try_send(pdu)
 88 |     }
 89 | 
 90 |     fn endpoint(&self) -> &str {
 91 |         self.endpoint.as_str()
 92 |     }
 93 | }
 94 | 
 95 | impl Clone for StatsdClient {
 96 |     fn clone(&self) -> Self {
 97 |         StatsdClient {
 98 |             endpoint: self.endpoint.clone(),
 99 |             inner: self.inner.clone(),
100 |             sender: self.sender.clone(),
101 |         }
102 |     }
103 | }
104 | 
105 | /// Repeatedly try to form a connection to and endpoint with backoff. If the
106 | /// tripwire is set, this function will then abort and return none.
107 | async fn form_connection(
108 |     stats: stats::Scope,
109 |     endpoint: &str,
110 |     mut connect_tripwire: Tripwire,
111 | ) -> Option<TcpStream> {
112 |     let connections_made = stats.counter("connections_made").unwrap();
113 |     let connections_failed = stats.counter("connections_failed").unwrap();
114 |     loop {
115 |         let connect_attempt = timeout(CONNECT_TIMEOUT, TcpStream::connect(endpoint));
116 | 
117 |         let stream = match select!(
118 |             connect = connect_attempt => connect,
119 |             _ = (&mut connect_tripwire) => {
120 |                 debug!("aborting connection attempts to {:?}", endpoint);
121 |                 return None;
122 |             },
123 |         ) {
124 |             Err(_e) => {
125 |                 warn!("connect timeout to {:?}", endpoint);
126 |                 connections_failed.inc();
127 |                 tokio::time::sleep(RECONNECT_DELAY).await;
128 |                 continue;
129 |             }
130 |             Ok(Err(e)) => {
131 |                 warn!("connect error to {:?} error {:?}", endpoint, e);
132 |                 connections_failed.inc();
133 |                 tokio::time::sleep(RECONNECT_DELAY).await;
134 |                 continue;
135 |             }
136 |             Ok(Ok(s)) => {
137 |                 info!("statsd client connect {:?}", endpoint);
138 |                 s
139 |             }
140 |         };
141 |         connections_made.inc();
142 |         return Some(stream);
143 |     }
144 | }
145 | 
146 | // Since statsd has no notion of when a message is actually received, we have to
147 | // assume a buffer write is incomplete and just drop it here. This simply
148 | // advances to the next newline in the buffer if found.
149 | fn trim_to_next_newline(buf: &mut Bytes) {
150 |     match memchr(b'\n', buf) {
151 |         None => (),
152 |         Some(pos) => {
153 |             let _b = buf.split_to(pos + 1);
154 |         }
155 |     }
156 | }
157 | 
158 | async fn client_sender(
159 |     stats: stats::Scope,
160 |     endpoint: String,
161 |     connect_tripwire: Tripwire,
162 |     mut recv: mpsc::Receiver<bytes::Bytes>,
163 |     trim_on_disconnect: bool,
164 | ) {
165 |     let bytes_sent = stats.counter("bytes_sent").unwrap();
166 |     let connections_aborted = stats.counter("connections_aborted").unwrap();
167 |     let send_error = stats.counter("send_error").unwrap();
168 | 
169 |     let first_connect_tripwire = connect_tripwire.clone();
170 |     let mut lazy_connect: Option<TcpStream> =
171 |         form_connection(stats.clone(), endpoint.as_str(), first_connect_tripwire).await;
172 | 
173 |     loop {
174 |         // We have a connection but this could get stale before we receive data.
175 |         // First check if there's data ready and if not, give time for it to appear
176 |         // before disconnecting the current connection so the next recv() forces a reconnect.
177 |         // This shouldn't be necessary but there's an issue with some of the buffer
178 |         // going missing when we try to write to a stale connection and get a broken pipe.
179 |         // Ultimately we should use a tcp keepalive here.
180 |         // See: https://users.rust-lang.org/t/tcpstream-write-silently-loses-one-message/38206
181 |         let mut buf = match recv.try_recv() {
182 |             Ok(b) => b,
183 |             Err(TryRecvError::Empty) => {
184 |                 // We could add a branch to recv() without a timeout here
185 |                 // if the connection is already closed but the code gets messy.
186 |                 // This will just recreate the timer every ~10s instead.
187 |                 match timeout(TCP_IDLE_DISCONNECT, recv.recv()).await {
188 |                     Ok(Some(b)) => b,
189 |                     Ok(None) => {
190 |                         debug!("recv() is None - sender task {} exiting", endpoint);
191 |                         return;
192 |                     }
193 |                     Err(_) => {
194 |                         lazy_connect = None;
195 |                         continue;
196 |                     }
197 |                 }
198 |             }
199 |             Err(TryRecvError::Disconnected) => {
200 |                 debug!("recv disconnect - sender task {} exiting", endpoint);
201 |                 return;
202 |             }
203 |         };
204 | 
205 |         loop {
206 |             // keep flushing to the network until buffer is empty
207 |             if buf.is_empty() {
208 |                 break;
209 |             }
210 |             let connect = match lazy_connect.as_mut() {
211 |                 None => {
212 |                     let reconnect_tripwire = connect_tripwire.clone();
213 |                     lazy_connect =
214 |                         form_connection(stats.clone(), endpoint.as_str(), reconnect_tripwire).await;
215 |                     if lazy_connect.is_none() {
216 |                         // Early check to see if the tripwire is set and bail
217 |                         debug!("sender task {} exiting", endpoint);
218 |                         return;
219 |                     }
220 |                     lazy_connect.as_mut().unwrap()
221 |                 }
222 |                 Some(c) => c,
223 |             };
224 |             // Write the buffer until success
225 |             let result = connect.write_buf(&mut buf).await;
226 |             match result {
227 |                 Ok(0) if !buf.is_empty() => {
228 |                     // Write 0 error, abort the connection and try again
229 |                     lazy_connect = None;
230 | 
231 |                     if trim_on_disconnect {
232 |                         trim_to_next_newline(&mut buf);
233 |                     }
234 |                     connections_aborted.inc();
235 |                     continue;
236 |                 }
237 |                 Ok(bytes) if buf.is_empty() => {
238 |                     bytes_sent.inc_by(bytes as f64);
239 |                     drop(buf);
240 |                     break;
241 |                 }
242 |                 Ok(bytes) => {
243 |                     bytes_sent.inc_by(bytes as f64);
244 |                     continue;
245 |                 }
246 |                 Err(e) => {
247 |                     warn!(
248 |                         "write error {} - {:?}, reforming a connection with this buffer",
249 |                         endpoint, e
250 |                     );
251 |                     if trim_on_disconnect {
252 |                         trim_to_next_newline(&mut buf);
253 |                     }
254 |                     lazy_connect = None;
255 |                     send_error.inc();
256 |                     continue;
257 |                 }
258 |             };
259 |         }
260 |     }
261 | }
262 | 
263 | ///
264 | /// Ticker is responsible for making sure the statsd channel emits a payload at
265 | /// a particular rate (allowing for write combining). Due to an issue with
266 | /// non-async mpsc try_send being used to trigger the primary sender queue, the
267 | /// ticker is needed as opposed to a timeout() wrapper over a queue.recv, which
268 | /// does not reliably get woken by try_send. The upside of this we also form one
269 | /// less short lived timer, not that its really a major advantage.
270 | async fn ticker(endpoint: String, sender: mpsc::Sender<bool>) {
271 |     loop {
272 |         sleep(BUFFER_FILL_WAIT).await;
273 |         if sender.send(true).await.is_err() {
274 |             debug!("ticker task {} exiting", endpoint);
275 |             return;
276 |         }
277 |     }
278 | }
279 | 
280 | /// Receive metrics until the buffer fills up or timeout happens
281 | /// then send the buffer over the network with client_sender above
282 | async fn client_task(
283 |     stats: stats::Scope,
284 |     endpoint: String,
285 |     connect_tripwire: Tripwire,
286 |     mut recv: mpsc::Receiver<Pdu>,
287 |     mut ticker_recv: mpsc::Receiver<bool>,
288 |     shutdown_send: Option<mpsc::Sender<()>>,
289 |     trim_on_disconnect: bool,
290 | ) {
291 |     let partial_buffer_send = stats.counter("partial_buffer_send").unwrap();
292 |     let messages_queued = stats.counter("messages_queued").unwrap();
293 | 
294 |     let mut buf = BytesMut::with_capacity(INITIAL_BUF_CAPACITY);
295 |     let (buf_sender, buf_recv) = mpsc::channel(10);
296 |     let sender_join = tokio::spawn(client_sender(
297 |         stats,
298 |         endpoint.clone(),
299 |         connect_tripwire,
300 |         buf_recv,
301 |         trim_on_disconnect,
302 |     ));
303 | 
304 |     loop {
305 |         let (pdu, timeout) = select! {
306 |             p = recv.recv() => (p, false),
307 |             _ = ticker_recv.recv() => (None, true),
308 |         };
309 | 
310 |         match (pdu, timeout) {
311 |             (Some(pdu), _) => {
312 |                 let pdu_bytes = pdu.as_bytes();
313 |                 if buf.remaining_mut() < pdu_bytes.len() {
314 |                     buf.reserve(pdu_bytes.len() + 10);
315 |                 }
316 |                 buf.put(pdu_bytes);
317 |                 buf.put(b"\n".as_ref());
318 |                 messages_queued.inc();
319 |                 if buf.len() < SEND_THRESHOLD {
320 |                     continue; // Do not send yet
321 |                 }
322 |             }
323 |             (None, false) => {
324 |                 if buf.is_empty() {
325 |                     // No more queue, no more bytes, exit
326 |                     drop(buf_sender);
327 |                     let _ = sender_join.await;
328 |                     debug!("client task {} exiting", endpoint);
329 |                     drop(shutdown_send);
330 |                     return;
331 |                 }
332 |             }
333 |             (None, true) if buf.is_empty() => {
334 |                 continue;
335 |             }
336 |             (None, true) => {
337 |                 partial_buffer_send.inc();
338 |                 // Timeout! Just go ahead and send whats in the buf now
339 |             }
340 |         };
341 | 
342 |         // Flush the buffer to the mpsc channel for the client_sender to send over the network
343 |         if let Err(send_error) = buf_sender.send(buf.freeze()).await {
344 |             debug!("client task {} exiting", endpoint);
345 |             warn!("error flushing buffer to client_sender {}", send_error);
346 |             let _ = sender_join.await;
347 |             drop(shutdown_send);
348 |             return;
349 |         }
350 |         buf = BytesMut::with_capacity(INITIAL_BUF_CAPACITY);
351 |     }
352 | }
353 | 


--------------------------------------------------------------------------------
/statsrelay/src/backends.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::{HashMap, HashSet};
  2 | use std::sync::Arc;
  3 | 
  4 | use parking_lot::RwLock;
  5 | use stream_cancel::Tripwire;
  6 | use thiserror::Error;
  7 | use tokio::sync::mpsc;
  8 | 
  9 | use crate::backend::Backend;
 10 | use crate::discovery;
 11 | use crate::stats;
 12 | use crate::statsd_proto::Event;
 13 | use crate::{config, processors};
 14 | 
 15 | #[derive(Error, Debug)]
 16 | pub enum BackendError {
 17 |     #[error("Index not valid for backend {0}")]
 18 |     InvalidIndex(usize),
 19 | }
 20 | 
 21 | struct BackendsInner {
 22 |     backends: HashMap<String, Backend>,
 23 |     processors: HashMap<String, Box<dyn processors::Processor + Send + Sync>>,
 24 |     stats: stats::Scope,
 25 |     shutdown_recv: Option<mpsc::Receiver<()>>,
 26 |     shutdown_send: Option<mpsc::Sender<()>>,
 27 | }
 28 | 
 29 | impl BackendsInner {
 30 |     fn new(stats: stats::Scope) -> Self {
 31 |         let (shutdown_send, shutdown_recv) = mpsc::channel::<()>(1);
 32 | 
 33 |         BackendsInner {
 34 |             backends: HashMap::new(),
 35 |             processors: HashMap::new(),
 36 |             stats,
 37 |             shutdown_recv: Some(shutdown_recv),
 38 |             shutdown_send: Some(shutdown_send),
 39 |         }
 40 |     }
 41 | 
 42 |     fn replace_processor(
 43 |         &mut self,
 44 |         name: &str,
 45 |         processor: Box<dyn processors::Processor + Send + Sync>,
 46 |     ) -> anyhow::Result<()> {
 47 |         self.processors.insert(name.to_owned(), processor);
 48 |         Ok(())
 49 |     }
 50 | 
 51 |     fn replace_backend(
 52 |         &mut self,
 53 |         name: &str,
 54 |         c: &config::BackendConfig,
 55 |         discovery_update: Option<&discovery::Update>,
 56 |     ) -> anyhow::Result<()> {
 57 |         let previous = self.backends.get(name);
 58 |         let backend = Backend::new(
 59 |             self.stats.scope(name),
 60 |             c,
 61 |             previous,
 62 |             discovery_update,
 63 |             self.shutdown_send.clone(),
 64 |         )?;
 65 |         self.backends.insert(name.to_owned(), backend);
 66 |         Ok(())
 67 |     }
 68 | 
 69 |     fn len(&self) -> usize {
 70 |         self.backends.len()
 71 |     }
 72 | 
 73 |     fn remove_backend(&mut self, name: &str) -> anyhow::Result<()> {
 74 |         self.backends.remove(name);
 75 |         // With the backend removed should we ensure it's flushed?
 76 |         Ok(())
 77 |     }
 78 | 
 79 |     fn backend_names(&self) -> HashSet<&String> {
 80 |         self.backends.keys().collect()
 81 |     }
 82 | 
 83 |     fn provide_statsd(&self, pdu: &Event, route: &[config::Route]) {
 84 |         for dest in route {
 85 |             match dest.route_type {
 86 |                 config::RouteType::Statsd => {
 87 |                     if let Some(backend) = self.backends.get(dest.route_to.as_str()) {
 88 |                         backend.provide_statsd(pdu)
 89 |                     }
 90 |                 }
 91 |                 config::RouteType::Processor => {
 92 |                     if let Some(chain) = self
 93 |                         .processors
 94 |                         .get(dest.route_to.as_str())
 95 |                         .and_then(|proc| proc.provide_statsd(pdu))
 96 |                     {
 97 |                         match chain.new_events {
 98 |                             None => self.provide_statsd(pdu, chain.route),
 99 |                             Some(sv) => {
100 |                                 for pdu in sv.as_ref() {
101 |                                     self.provide_statsd(pdu, chain.route);
102 |                                 }
103 |                             }
104 |                         }
105 |                     }
106 |                 }
107 |             }
108 |         }
109 |     }
110 | 
111 |     /// Provide a periodic "tick" function to drive processors background
112 |     /// housekeeping tasks asynchronously.
113 |     fn processor_tick(&self, now: std::time::SystemTime, backends: &Backends) {
114 |         for (_, proc) in self.processors.iter() {
115 |             proc.tick(now, backends);
116 |         }
117 |     }
118 | 
119 |     /// Perform a flush operation on the processors, forcing an immediate state
120 |     /// transfer. Each processor is flushed n_processors times to ensure no
121 |     /// processor maintains state in a cascading pipeline.
122 |     fn flush(&self, backends: &Backends) {
123 |         for _i in 0..self.processors.len() {
124 |             for (_, proc) in self.processors.iter() {
125 |                 proc.flush(backends);
126 |             }
127 |         }
128 |     }
129 | 
130 |     /// Drain all backends, and return a receiver to ensure they complete
131 |     fn drain_backends(&mut self) -> Option<mpsc::Receiver<()>> {
132 |         self.backends.clear();
133 |         drop(self.shutdown_send.take());
134 |         self.shutdown_recv.take()
135 |     }
136 | }
137 | 
138 | ///
139 | /// Backends provides a cloneable container for various protocol backends,
140 | /// handling logic like sharding, sampling, and other detectors.
141 | ///
142 | #[derive(Clone)]
143 | pub struct Backends {
144 |     inner: Arc<RwLock<BackendsInner>>,
145 | }
146 | 
147 | impl Backends {
148 |     pub fn new(stats: stats::Scope) -> Self {
149 |         Backends {
150 |             inner: Arc::new(RwLock::new(BackendsInner::new(stats))),
151 |         }
152 |     }
153 | 
154 |     pub fn replace_processor(
155 |         &self,
156 |         name: &str,
157 |         processor: Box<dyn processors::Processor + Send + Sync>,
158 |     ) -> anyhow::Result<()> {
159 |         self.inner.write().replace_processor(name, processor)
160 |     }
161 | 
162 |     pub fn replace_backend(
163 |         &self,
164 |         name: &str,
165 |         c: &config::BackendConfig,
166 |         discovery_update: Option<&discovery::Update>,
167 |     ) -> anyhow::Result<()> {
168 |         self.inner
169 |             .write()
170 |             .replace_backend(name, c, discovery_update)
171 |     }
172 | 
173 |     pub fn remove_backend(&self, name: &str) -> anyhow::Result<()> {
174 |         self.inner.write().remove_backend(name)
175 |     }
176 | 
177 |     pub fn backend_names(&self) -> HashSet<String> {
178 |         self.inner
179 |             .read()
180 |             .backend_names()
181 |             .iter()
182 |             .map(|s| (*s).clone())
183 |             .collect()
184 |     }
185 | 
186 |     pub fn len(&self) -> usize {
187 |         self.inner.read().len()
188 |     }
189 | 
190 |     pub fn is_empty(&self) -> bool {
191 |         self.len() == 0
192 |     }
193 | 
194 |     pub fn provide_statsd(&self, pdu: &Event, route: &[config::Route]) {
195 |         self.inner.read().provide_statsd(pdu, route)
196 |     }
197 | 
198 |     pub fn provide_statsd_slice(&self, pdu: &[Event], route: &[config::Route]) {
199 |         let lock = self.inner.read();
200 |         for p in pdu {
201 |             lock.provide_statsd(p, route);
202 |         }
203 |     }
204 | 
205 |     pub fn processor_tick(&self, now: std::time::SystemTime) {
206 |         self.inner.read().processor_tick(now, self);
207 |     }
208 | 
209 |     pub fn flush(&self) {
210 |         self.inner.read().flush(self);
211 |     }
212 | 
213 |     pub fn drain_backends(&mut self) -> Option<mpsc::Receiver<()>> {
214 |         self.inner.write().drain_backends()
215 |     }
216 | }
217 | 
218 | pub async fn ticker(tripwire: Tripwire, backends: Backends) {
219 |     let mut ticker = tokio::time::interval_at(
220 |         tokio::time::Instant::now(),
221 |         tokio::time::Duration::from_secs(1),
222 |     );
223 |     loop {
224 |         tokio::select! {
225 |             _ = tripwire.clone() => { return; }
226 |             _ = ticker.tick() => {
227 |                 let back = backends.clone();
228 |                 tokio::task::spawn_blocking(move || {
229 |                     back.processor_tick(std::time::SystemTime::now())
230 |                 }).await.unwrap();
231 |             }
232 |         }
233 |     }
234 | }
235 | 
236 | #[cfg(test)]
237 | pub mod test {
238 | 
239 |     use super::*;
240 |     use crate::processors::{self, Processor};
241 |     use crate::statsd_proto;
242 |     use crate::statsd_proto::Parsed;
243 | 
244 |     use std::convert::TryInto;
245 |     use std::sync::atomic::AtomicU32;
246 |     use std::sync::atomic::Ordering;
247 |     use std::sync::Arc;
248 | 
249 |     struct AssertProc<T>
250 |     where
251 |         T: Fn(&Event),
252 |     {
253 |         proc: T,
254 |         count: Arc<AtomicU32>,
255 |     }
256 | 
257 |     impl<T: Fn(&Event)> processors::Processor for AssertProc<T> {
258 |         fn provide_statsd(&self, sample: &Event) -> Option<processors::Output> {
259 |             (self.proc)(sample);
260 |             self.count.fetch_add(1, Ordering::Acquire);
261 |             None
262 |         }
263 |     }
264 | 
265 |     #[test]
266 |     fn simple_nil_backend() {
267 |         let scope = crate::stats::Collector::default().scope("prefix");
268 |         let _backend = Backends::new(scope);
269 |     }
270 | 
271 |     fn make_counting_mock() -> (Arc<AtomicU32>, Box<dyn Processor + Send + Sync>) {
272 |         let counter = Arc::new(AtomicU32::new(0));
273 |         let proc = Box::new(AssertProc {
274 |             proc: |_| {},
275 |             count: counter.clone(),
276 |         });
277 |         (counter, proc)
278 |     }
279 | 
280 |     fn make_asserting_mock<T: Fn(&Event) + Send + Sync + 'static>(
281 |         t: T,
282 |     ) -> (Arc<AtomicU32>, Box<dyn Processor + Send + Sync>) {
283 |         let counter = Arc::new(AtomicU32::new(0));
284 |         let proc = Box::new(AssertProc {
285 |             proc: t,
286 |             count: counter.clone(),
287 |         });
288 |         (counter, proc)
289 |     }
290 | 
291 |     fn insert_proc(backend: &Backends, name: &str, proc: Box<dyn Processor + Send + Sync>) {
292 |         backend
293 |             .inner
294 |             .write()
295 |             .processors
296 |             .insert(name.to_owned(), proc);
297 |     }
298 | 
299 |     #[test]
300 |     fn processor_tag_test() {
301 |         // Create the backend
302 |         let scope = crate::stats::Collector::default().scope("prefix");
303 |         let backend = Backends::new(scope);
304 | 
305 |         // Create a mock processor to receive all messages
306 |         let route_final = vec![config::Route {
307 |             route_type: config::RouteType::Processor,
308 |             route_to: "final".to_owned(),
309 |         }];
310 |         let (counter, proc) = make_asserting_mock(|sample| {
311 |             let owned: statsd_proto::Owned = sample.try_into().unwrap();
312 |             assert_eq!(owned.name(), b"foo.bar.__tags=value");
313 |         });
314 | 
315 |         // Insert the assert processors
316 |         insert_proc(&backend, "final", proc);
317 | 
318 |         // Create the processor under test
319 |         let tn = processors::tag::Normalizer::new(&route_final);
320 |         insert_proc(&backend, "tag", Box::new(tn));
321 | 
322 |         let pdu =
323 |             statsd_proto::Pdu::parse(bytes::Bytes::from_static(b"foo.bar:3|c|#tags:value|@1.0"))
324 |                 .unwrap();
325 |         let route = vec![config::Route {
326 |             route_type: config::RouteType::Processor,
327 |             route_to: "tag".to_owned(),
328 |         }];
329 |         backend.provide_statsd(&Event::Pdu(pdu), &route);
330 | 
331 |         // Check how many messages the mock has received
332 |         let actual_count = counter.load(Ordering::Acquire);
333 |         assert_eq!(1, actual_count);
334 |     }
335 | 
336 |     #[test]
337 |     fn processor_fanout_test() {
338 |         // Create the backend
339 |         let scope = crate::stats::Collector::default().scope("prefix");
340 |         let backend = Backends::new(scope);
341 | 
342 |         // Create a mock processor to receive all messages, 2x over
343 |         let route_final = vec![
344 |             config::Route {
345 |                 route_type: config::RouteType::Processor,
346 |                 route_to: "final1".to_owned(),
347 |             },
348 |             config::Route {
349 |                 route_type: config::RouteType::Processor,
350 |                 route_to: "final2".to_owned(),
351 |             },
352 |         ];
353 |         let (counter1, proc1) = make_counting_mock();
354 |         let (counter2, proc2) = make_counting_mock();
355 | 
356 |         // Insert the assert processors
357 |         insert_proc(&backend, "final1", proc1);
358 |         insert_proc(&backend, "final2", proc2);
359 | 
360 |         // Create the processor under test
361 |         let tn = processors::tag::Normalizer::new(&route_final);
362 |         insert_proc(&backend, "tag", Box::new(tn));
363 | 
364 |         let pdu =
365 |             statsd_proto::Pdu::parse(bytes::Bytes::from_static(b"foo.bar:3|c|#tags:value|@1.0"))
366 |                 .unwrap();
367 |         let route = vec![config::Route {
368 |             route_type: config::RouteType::Processor,
369 |             route_to: "tag".to_owned(),
370 |         }];
371 |         backend.provide_statsd(&Event::Pdu(pdu), &route);
372 | 
373 |         // Check how many messages the mock has received
374 |         let actual_count = counter1.load(Ordering::Acquire);
375 |         assert_eq!(1, actual_count);
376 |         let actual_count2 = counter2.load(Ordering::Acquire);
377 |         assert_eq!(1, actual_count2);
378 |     }
379 | }
380 | 


--------------------------------------------------------------------------------
/statsrelay/src/cmd/loadgen.rs:
--------------------------------------------------------------------------------
 1 | use bytes::{BufMut, BytesMut};
 2 | use chrono::prelude::*;
 3 | use std::time::Duration;
 4 | use structopt::StructOpt;
 5 | use tokio::io::AsyncWriteExt;
 6 | use tokio::net::TcpStream;
 7 | 
 8 | const PRINT_INTERVAL: u64 = 100000;
 9 | 
10 | #[derive(StructOpt, Debug)]
11 | struct Options {
12 |     #[structopt(short = "e", long = "--endpoint", default_value = "localhost:8129")]
13 |     pub endpoint: String,
14 | }
15 | 
16 | #[tokio::main]
17 | async fn main() {
18 |     let options = Options::from_args();
19 |     let mut stream = TcpStream::connect(options.endpoint).await.unwrap();
20 |     let mut buf = BytesMut::with_capacity(131072);
21 |     let mut counter = 0_u64;
22 |     let mut last_time = Local::now();
23 |     loop {
24 |         for _ in 0..1 {
25 |             buf.put(
26 |                 format!(
27 |                     "hello.hello.hello.hello.hello.hello.hello.hello.hello:{}|c\n",
28 |                     counter
29 |                 )
30 |                 .as_bytes()
31 |                 .as_ref(),
32 |             );
33 |         }
34 |         stream.write_buf(&mut buf).await.unwrap();
35 |         counter += 1;
36 | 
37 |         if counter % PRINT_INTERVAL == 0 {
38 |             let now_time = Local::now();
39 |             let diff = now_time - last_time;
40 |             last_time = now_time;
41 |             println!(
42 |                 "{}: sent {:15} lines in {:5}ms ({:.0} l/s)",
43 |                 now_time.format("%H:%M:%S"),
44 |                 counter,
45 |                 diff.num_milliseconds(),
46 |                 PRINT_INTERVAL as f64 / (diff.num_milliseconds() as f64 / 1000.0)
47 |             );
48 |             tokio::time::sleep(Duration::from_millis(40)).await;
49 |         };
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/statsrelay/src/cmd/statsrelay.rs:
--------------------------------------------------------------------------------
  1 | extern crate jemallocator;
  2 | 
  3 | #[global_allocator]
  4 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
  5 | 
  6 | use anyhow::Context;
  7 | use futures::StreamExt;
  8 | use futures::{stream::FuturesUnordered, FutureExt};
  9 | use stream_cancel::Tripwire;
 10 | use structopt::StructOpt;
 11 | 
 12 | use std::collections::HashMap;
 13 | use std::collections::HashSet;
 14 | 
 15 | use tokio::runtime;
 16 | use tokio::select;
 17 | use tokio::signal::unix::{signal, SignalKind};
 18 | use tokio::time::{sleep_until, Duration, Instant};
 19 | 
 20 | use env_logger::Env;
 21 | use log::{debug, error, info, warn};
 22 | 
 23 | use statsrelay::config;
 24 | use statsrelay::discovery;
 25 | use statsrelay::processors;
 26 | use statsrelay::stats;
 27 | use statsrelay::statsd_server;
 28 | use statsrelay::{admin, config::Config};
 29 | use statsrelay::{backends, stats::Scope};
 30 | 
 31 | #[derive(StructOpt, Debug, Clone)]
 32 | struct Options {
 33 |     #[structopt(short = "c", long = "--config", default_value = "/etc/statsrelay.json")]
 34 |     pub config: String,
 35 | 
 36 |     #[structopt(long = "--config-check-and-exit")]
 37 |     pub config_check: bool,
 38 | 
 39 |     #[structopt(long = "--shutdown-delay", default_value = "0")]
 40 |     pub shutdown_delay: u32,
 41 | 
 42 |     #[structopt(long = "--shutdown-wait", default_value = "5")]
 43 |     pub shutdown_wait: u32,
 44 | 
 45 |     #[structopt(short = "t", long = "--threaded")]
 46 |     pub threaded: bool,
 47 | 
 48 |     #[structopt(long = "--version")]
 49 |     pub version: bool,
 50 | }
 51 | 
 52 | /// The main server invocation, for a given configuration, options and stats
 53 | /// scope. The server will spawn any listeners, initialize a backend
 54 | /// configuration update loop, as well as register signal handlers.
 55 | async fn server(scope: stats::Scope, config: Config, opts: Options) {
 56 |     let backend_reloads = scope.counter("backend_reloads").unwrap();
 57 |     let config_load_failures = scope.counter("backend_reloads_failure").unwrap();
 58 |     let mut backends = backends::Backends::new(scope.scope("backends"));
 59 | 
 60 |     // Load processors
 61 |     if let Some(processors) = config.processors.as_ref() {
 62 |         load_processors(scope.scope("processors"), &backends, processors)
 63 |             .await
 64 |             .unwrap();
 65 |     }
 66 | 
 67 |     // Build a pair of unrelated tripwires, which can shut down the input and
 68 |     // output portions of the stack at different times.
 69 |     let (server_cancel, server_tripwire) = Tripwire::new();
 70 |     let (backend_ticker_cancel, backend_ticker_tripwire) = Tripwire::new();
 71 | 
 72 |     let mut servers: FuturesUnordered<_> = config
 73 |         .statsd
 74 |         .servers
 75 |         .iter()
 76 |         .map({
 77 |             |(server_name, server_config)| {
 78 |                 let name = server_name.clone();
 79 |                 statsd_server::run(
 80 |                     scope.scope("statsd_server").scope(server_name),
 81 |                     server_tripwire.clone(),
 82 |                     server_config.clone(),
 83 |                     backends.clone(),
 84 |                 )
 85 |                 .map(|_| name)
 86 |             }
 87 |         })
 88 |         .collect();
 89 | 
 90 |     // Trap ctrl+c and sigterm messages and perform a clean shutdown
 91 |     let mut sigint = signal(SignalKind::interrupt()).unwrap();
 92 |     let mut sigterm = signal(SignalKind::terminate()).unwrap();
 93 |     let shutdown_delay = opts.shutdown_delay;
 94 |     tokio::spawn(async move {
 95 |         select! {
 96 |         _ = sigint.recv() => info!("received sigint"),
 97 |         _ = sigterm.recv() => info!("received sigterm"),
 98 |         }
 99 |         if shutdown_delay > 0 {
100 |             info!(
101 |                 "waiting {} seconds before shutting down (--shutdown-delay set",
102 |                 shutdown_delay
103 |             );
104 |             tokio::time::sleep(Duration::from_secs(shutdown_delay as u64)).await;
105 |         }
106 |         // We only want to abort the server/input side of the system, let it
107 |         // drain, then work on shutting down the output side. Cancelling the
108 |         // server will complete the server future, which then allow the runtime
109 |         // loop to proceed with an orderly shutdown.
110 |         server_cancel.cancel();
111 |     });
112 | 
113 |     // Trap sighup to support manual file reloading
114 |     let mut sighup = signal(SignalKind::hangup()).unwrap();
115 |     // This task is designed to asynchronously build backend configurations,
116 |     // which may in turn come from other data sources or discovery sources.
117 |     // This inherently races with bringing up servers, to the point where a
118 |     // server may not have any backends to dispatch to yet, if discovery is
119 |     // very slow. This is the intended state, as configuration of processors
120 |     // and any buffers should have already been performed.
121 |     //
122 |     // SIGHUP will attempt to reload backend configurations as well as any
123 |     // discovery changes.
124 |     let discovery_backends = backends.clone();
125 |     let options_reload = opts.clone();
126 |     tokio::spawn(async move {
127 |         let mut last_config = config.clone();
128 |         let dconfig = config.discovery.unwrap_or_default();
129 |         let discovery_cache = discovery::Cache::new();
130 |         let mut discovery_stream =
131 |             discovery::reflector(discovery_cache.clone(), discovery::as_stream(&dconfig));
132 |         loop {
133 |             info!("loading configuration and updating backends");
134 |             backend_reloads.inc();
135 |             let config = match load_backend_configs(
136 |                 &discovery_cache,
137 |                 &discovery_backends,
138 |                 options_reload.config.as_ref(),
139 |             )
140 |             .await
141 |             {
142 |                 Ok(config) => {
143 |                     last_config = config.clone();
144 |                     config
145 |                 }
146 |                 Err(e) => {
147 |                     config_load_failures.inc();
148 |                     error!("error reloading configuration from disk, using original configuration: {:?}", e);
149 |                     last_config.clone()
150 |                 }
151 |             };
152 |             let dconfig = config.discovery.unwrap_or_default();
153 | 
154 |             tokio::select! {
155 |                 _ = sighup.recv() => {
156 |                     info!("received sighup");
157 |                     discovery_stream = discovery::reflector(discovery_cache.clone(), discovery::as_stream(&dconfig));
158 |                     info!("reloaded discovery stream");
159 |                 }
160 |                 Some(event) = discovery_stream.next() => {
161 |                     info!("updating discovery for map {}", event.0);
162 |                 }
163 |             };
164 |         }
165 |     });
166 | 
167 |     // Start processing processor tickers
168 |     let ticker_backends = backends.clone();
169 |     tokio::spawn(backends::ticker(
170 |         backend_ticker_tripwire.clone(),
171 |         ticker_backends,
172 |     ));
173 | 
174 |     // Wait for the server to finish
175 |     while let Some(name) = servers.next().await {
176 |         debug!("server {} exited", name)
177 |     }
178 |     debug!("forcing processor tick to flush");
179 |     backends.flush();
180 |     debug!("stopping backend ticker");
181 |     backend_ticker_cancel.cancel();
182 |     let shutdown_recv = backends.drain_backends();
183 |     match shutdown_recv {
184 |         Some(mut shutdown_recv) => {
185 |             let drain_timeout = Instant::now() + Duration::from_secs(opts.shutdown_wait as u64);
186 |             select! {
187 |                 _ = sleep_until(drain_timeout) => {
188 |                 info!("backends didn't finish draining within {:?} seconds, aborting", opts.shutdown_wait);
189 |                 }
190 |                 _ = shutdown_recv.recv() => {
191 |                     info!("all backends finished");
192 |                 }
193 |             }
194 |         }
195 |         None => {
196 |             warn!("backends already drained");
197 |         }
198 |     }
199 | }
200 | 
201 | fn main() -> anyhow::Result<()> {
202 |     env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
203 |     let opts = Options::from_args();
204 | 
205 |     if opts.version {
206 |         println!(
207 |             "statsrelay - {} - {}",
208 |             statsrelay::built_info::PKG_VERSION,
209 |             statsrelay::built_info::GIT_COMMIT_HASH.unwrap_or("unknown")
210 |         );
211 |         return Ok(());
212 |     }
213 |     info!(
214 |         "statsrelay loading - {} - {}",
215 |         statsrelay::built_info::PKG_VERSION,
216 |         statsrelay::built_info::GIT_COMMIT_HASH.unwrap_or("unknown")
217 |     );
218 | 
219 |     let config = statsrelay::config::load(opts.config.as_ref())
220 |         .with_context(|| format!("can't load config file from {}", opts.config))?;
221 |     info!("loaded config file {}", opts.config);
222 |     debug!("servers defined: {:?}", config.statsd.servers);
223 |     if opts.config_check {
224 |         info!("--config-check-and-exit set, exiting");
225 |         return Ok(());
226 |     }
227 | 
228 |     let collector = stats::Collector::default();
229 | 
230 |     if let Some(admin) = &config.admin {
231 |         admin::spawn_admin_server(admin.port, collector.clone());
232 |         info!("spawned admin server on port {}", admin.port);
233 |     }
234 |     debug!("installed metrics receiver");
235 | 
236 |     let mut builder = match opts.threaded {
237 |         true => runtime::Builder::new_multi_thread(),
238 |         false => runtime::Builder::new_current_thread(),
239 |     };
240 | 
241 |     let runtime = builder.enable_all().build().unwrap();
242 |     info!("tokio runtime built, threaded: {}", opts.threaded);
243 | 
244 |     let scope = collector.scope("statsrelay");
245 | 
246 |     runtime.block_on(server(scope, config, opts));
247 | 
248 |     drop(runtime);
249 |     info!("runtime terminated");
250 |     Ok(())
251 | }
252 | 
253 | /// Load processors from a given config structure and pack them into the given
254 | /// backend set. Currently processors can't be reloaded at runtime.
255 | async fn load_processors(
256 |     scope: Scope,
257 |     backends: &backends::Backends,
258 |     processors: &HashMap<String, config::Processor>,
259 | ) -> anyhow::Result<()> {
260 |     for (name, cp) in processors.iter() {
261 |         let proc: Box<dyn processors::Processor + Send + Sync> = match cp {
262 |             config::Processor::TagConverter(tc) => {
263 |                 info!("processor tag_converter: {:?}", tc);
264 |                 Box::new(processors::tag::Normalizer::new(tc.route.as_ref()))
265 |             }
266 |             config::Processor::Sampler(sampler) => {
267 |                 info!("processor sampler: {:?}", sampler);
268 |                 Box::new(processors::sampler::Sampler::new(sampler)?)
269 |             }
270 |             config::Processor::Cardinality(cardinality) => {
271 |                 info!("processor cardinality: {:?}", cardinality);
272 |                 Box::new(processors::cardinality::Cardinality::new(
273 |                     scope.scope(name),
274 |                     cardinality,
275 |                 ))
276 |             }
277 |             config::Processor::RegexFilter(regex) => {
278 |                 info!("processor regex_filter: {:?}", regex);
279 |                 Box::new(processors::regex_filter::RegexFilter::new(
280 |                     scope.scope(name),
281 |                     regex,
282 |                 )?)
283 |             }
284 |         };
285 |         backends.replace_processor(name.as_str(), proc)?;
286 |     }
287 |     Ok(())
288 | }
289 | 
290 | async fn load_backend_configs(
291 |     discovery_cache: &discovery::Cache,
292 |     backends: &backends::Backends,
293 |     path: &str,
294 | ) -> anyhow::Result<config::Config> {
295 |     // Check if we have to load the configuration file
296 |     let config = match statsrelay::config::load(path)
297 |         .with_context(|| format!("can't load config file from {}", path))
298 |     {
299 |         Err(e) => {
300 |             error!("failed to reload configuration: {}", e);
301 |             return Err(e).context("failed to reload configuration file");
302 |         }
303 |         Ok(ok) => ok,
304 |     };
305 | 
306 |     let duplicate = &config.statsd.backends;
307 |     for (name, dp) in duplicate.iter() {
308 |         let discovery_data = if let Some(discovery_name) = &dp.shard_map_source {
309 |             discovery_cache.get(discovery_name)
310 |         } else {
311 |             None
312 |         };
313 |         if let Err(e) = backends.replace_backend(name, dp, discovery_data.as_ref()) {
314 |             error!("failed to replace backend index {} error {}", name, e);
315 |             continue;
316 |         }
317 |     }
318 |     let existing_backends = backends.backend_names();
319 |     let config_backends: HashSet<String> = duplicate.keys().cloned().collect();
320 |     let difference = existing_backends.difference(&config_backends);
321 |     for remove in difference {
322 |         if let Err(e) = backends.remove_backend(remove) {
323 |             error!("failed to remove backend {} with error {:?}", remove, e);
324 |         }
325 |     }
326 | 
327 |     info!("backends reloaded");
328 |     Ok(config)
329 | }
330 | 


--------------------------------------------------------------------------------
/statsrelay/src/config.rs:
--------------------------------------------------------------------------------
  1 | use regex::bytes::RegexSet;
  2 | use serde::{Deserialize, Deserializer, Serialize, Serializer};
  3 | use std::collections::HashMap;
  4 | use std::convert::{AsRef, TryFrom, TryInto};
  5 | use std::fmt;
  6 | use thiserror::Error;
  7 | 
  8 | #[derive(Debug, Clone, PartialEq)]
  9 | pub enum RouteType {
 10 |     Statsd,
 11 |     Processor,
 12 | }
 13 | 
 14 | impl TryFrom<&str> for RouteType {
 15 |     type Error = Error;
 16 | 
 17 |     fn try_from(value: &str) -> Result<Self, Self::Error> {
 18 |         match value {
 19 |             "statsd" => Ok(RouteType::Statsd),
 20 |             "processor" => Ok(RouteType::Processor),
 21 |             _ => Err(Error::UnknownRouteType(value.to_string())),
 22 |         }
 23 |     }
 24 | }
 25 | 
 26 | impl From<&RouteType> for &str {
 27 |     fn from(t: &RouteType) -> Self {
 28 |         match t {
 29 |             RouteType::Statsd => "statsd",
 30 |             RouteType::Processor => "processor",
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | impl fmt::Display for RouteType {
 36 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 37 |         let s: &str = self.into();
 38 |         write!(f, "{}", s)
 39 |     }
 40 | }
 41 | 
 42 | #[derive(Debug, Clone, PartialEq)]
 43 | pub struct Route {
 44 |     pub route_type: RouteType,
 45 |     pub route_to: String,
 46 | }
 47 | 
 48 | impl fmt::Display for Route {
 49 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 50 |         write!(f, "{}:{}", self.route_type, self.route_to)
 51 |     }
 52 | }
 53 | 
 54 | impl<'de> Deserialize<'de> for Route {
 55 |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
 56 |     where
 57 |         D: Deserializer<'de>,
 58 |     {
 59 |         let s: &str = Deserialize::deserialize(deserializer)?;
 60 |         let parts: Vec<&str> = s.split(':').collect();
 61 |         if let [ty, to] = &parts[..] {
 62 |             Ok(Route {
 63 |                 route_type: (*ty).try_into().map_err(serde::de::Error::custom)?,
 64 |                 route_to: (*to).into(),
 65 |             })
 66 |         } else {
 67 |             Err(Error::MalformedRoute(s.to_string())).map_err(serde::de::Error::custom)
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | impl Serialize for Route {
 73 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
 74 |     where
 75 |         S: Serializer,
 76 |     {
 77 |         serializer.serialize_str(format!("{}:{}", self.route_type, self.route_to).as_str())
 78 |     }
 79 | }
 80 | 
 81 | pub mod processor {
 82 |     use super::*;
 83 | 
 84 |     #[derive(Debug, Serialize, Deserialize, Clone)]
 85 |     pub struct Sampler {
 86 |         pub window: u32,
 87 |         pub timer_reservoir_size: Option<u32>,
 88 | 
 89 |         pub route: Vec<Route>,
 90 |     }
 91 | 
 92 |     #[derive(Serialize, Deserialize, Debug, Clone)]
 93 |     pub struct TagConverter {
 94 |         pub route: Vec<Route>,
 95 |     }
 96 | 
 97 |     #[derive(Serialize, Deserialize, Debug, Clone)]
 98 |     pub struct Cardinality {
 99 |         pub size_limit: usize,
100 |         pub rotate_after_seconds: u64,
101 |         pub buckets: usize,
102 |         pub route: Vec<Route>,
103 |     }
104 | 
105 |     #[derive(Serialize, Deserialize, Debug, Clone)]
106 |     pub struct RegexFilter {
107 |         pub remove: Option<Vec<String>>,
108 |         pub allow: Option<Vec<String>>,
109 |         pub route: Vec<Route>,
110 |     }
111 | }
112 | 
113 | #[derive(Serialize, Deserialize, Debug, Clone)]
114 | #[serde(tag = "type", rename_all = "snake_case")]
115 | pub enum Processor {
116 |     Sampler(processor::Sampler),
117 |     TagConverter(processor::TagConverter),
118 |     Cardinality(processor::Cardinality),
119 |     RegexFilter(processor::RegexFilter),
120 | }
121 | 
122 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
123 | pub struct BackendConfig {
124 |     #[serde(flatten)]
125 |     pub client_config: BackendClientConfig,
126 |     #[serde(default)]
127 |     pub shard_map: Vec<String>,
128 |     pub shard_map_source: Option<String>,
129 |     pub suffix: Option<String>,
130 |     pub prefix: Option<String>,
131 |     pub input_blocklist: Option<String>,
132 |     pub input_filter: Option<String>,
133 | }
134 | 
135 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
136 | #[serde(tag = "type", rename_all = "snake_case")]
137 | pub enum BackendClientConfig {
138 |     Statsd(StatsdBackendClientConfig),
139 |     PromRemoteWrite(PromBackendClientConfig),
140 | }
141 | 
142 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
143 | pub struct StatsdBackendClientConfig {
144 |     #[serde(default = "default_max_queue")]
145 |     pub max_queue: usize,
146 |     #[serde(default = "default_trim_on_disconnect")]
147 |     pub trim_on_disconnect: bool,
148 | }
149 | 
150 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
151 | pub struct PromBackendClientConfig {
152 |     #[serde(default = "default_max_queue")]
153 |     pub max_queue: usize,
154 |     #[serde(default = "default_request_timeout_ms")]
155 |     pub request_timeout_ms: u64,
156 |     #[serde(default = "default_batch_creation_timeout_ms")]
157 |     pub batch_creation_timeout_ms: u64,
158 |     #[serde(default = "default_max_batch_size")]
159 |     pub max_batch_size: usize,
160 |     #[serde(default = "default_max_in_flight")]
161 |     pub max_in_flight: usize,
162 | }
163 | 
164 | fn default_max_queue() -> usize {
165 |     100000
166 | }
167 | fn default_request_timeout_ms() -> u64 {
168 |     10000
169 | }
170 | fn default_batch_creation_timeout_ms() -> u64 {
171 |     50
172 | }
173 | fn default_max_batch_size() -> usize {
174 |     500
175 | }
176 | fn default_max_in_flight() -> usize {
177 |     16
178 | }
179 | fn default_trim_on_disconnect() -> bool {
180 |     false
181 | }
182 | 
183 | impl BackendConfig {
184 |     pub fn generate_input_filter(&self) -> Option<RegexSet> {
185 |         let mut filters: Vec<String> = Vec::new();
186 | 
187 |         if let Some(ref blocklist) = self.input_blocklist {
188 |             filters.push(blocklist.clone());
189 |         }
190 |         if let Some(ref filter) = self.input_filter {
191 |             filters.push(filter.clone());
192 |         }
193 | 
194 |         if !filters.is_empty() {
195 |             Some(RegexSet::new(filters).unwrap())
196 |         } else {
197 |             None
198 |         }
199 |     }
200 | }
201 | 
202 | #[derive(Serialize, Deserialize, Debug, Clone)]
203 | pub struct StatsdServerConfig {
204 |     pub bind: String,
205 |     pub socket: Option<String>,
206 |     pub read_buffer: Option<usize>,
207 |     pub route: Vec<Route>,
208 |     pub read_timeout_secs: Option<u16>,
209 | }
210 | 
211 | #[derive(Serialize, Deserialize, Debug, Clone)]
212 | pub struct StatsdConfig {
213 |     pub servers: HashMap<String, StatsdServerConfig>,
214 |     pub backends: HashMap<String, BackendConfig>,
215 | }
216 | 
217 | #[derive(Debug, Serialize, Deserialize, Clone)]
218 | #[serde(tag = "type", rename_all = "snake_case")]
219 | pub enum DiscoveryTransform {
220 |     Format { pattern: String },
221 |     Repeat { count: u32 },
222 | }
223 | 
224 | #[derive(Serialize, Deserialize, Debug, Clone)]
225 | pub struct S3DiscoverySource {
226 |     pub bucket: String,
227 |     pub key: String,
228 |     pub interval: u32,
229 |     pub transforms: Option<Vec<DiscoveryTransform>>,
230 | }
231 | 
232 | #[derive(Serialize, Deserialize, Debug, Clone)]
233 | pub struct PathDiscoverySource {
234 |     pub path: String,
235 |     pub interval: u32,
236 |     pub transforms: Option<Vec<DiscoveryTransform>>,
237 | }
238 | 
239 | #[derive(Serialize, Deserialize, Debug, Clone)]
240 | #[serde(tag = "type", rename_all = "snake_case")]
241 | pub enum DiscoverySource {
242 |     StaticFile(PathDiscoverySource),
243 |     S3(S3DiscoverySource),
244 | }
245 | 
246 | #[derive(Debug, Serialize, Deserialize, Clone, Default)]
247 | pub struct Discovery {
248 |     pub sources: HashMap<String, DiscoverySource>,
249 | }
250 | 
251 | #[derive(Debug, Serialize, Deserialize, Clone)]
252 | pub struct AdminConfig {
253 |     pub port: u16,
254 | }
255 | 
256 | #[derive(Serialize, Deserialize, Debug, Clone)]
257 | pub struct Config {
258 |     pub admin: Option<AdminConfig>,
259 |     pub statsd: StatsdConfig,
260 |     pub discovery: Option<Discovery>,
261 |     pub processors: Option<HashMap<String, Processor>>,
262 | }
263 | 
264 | #[derive(Error, Debug)]
265 | pub enum Error {
266 |     #[error("could not locate discovery source {0}")]
267 |     UnknownDiscoverySource(String),
268 |     #[error("malformed route {0}")]
269 |     MalformedRoute(String),
270 |     #[error("invalid route type {0}")]
271 |     UnknownRouteType(String),
272 |     #[error("invalid routing destination {0}")]
273 |     UnknownRoutingDestination(Route),
274 | }
275 | 
276 | fn check_routes(config: &Config, routes: &[Route]) -> Result<(), Error> {
277 |     let result: Result<Vec<_>, Error> = routes
278 |         .iter()
279 |         .map(|route| match route.route_type {
280 |             RouteType::Statsd => config
281 |                 .statsd
282 |                 .backends
283 |                 .get(route.route_to.as_str())
284 |                 .ok_or_else(|| Error::UnknownRoutingDestination(route.clone()))
285 |                 .map(|_| ()),
286 |             RouteType::Processor => {
287 |                 if let Some(procs) = &config.processors {
288 |                     return procs
289 |                         .get(route.route_to.as_str())
290 |                         .ok_or_else(|| Error::UnknownRoutingDestination(route.clone()))
291 |                         .map(|_| ());
292 |                 } else {
293 |                     Err(Error::UnknownRoutingDestination(route.clone()))
294 |                 }
295 |             }
296 |         })
297 |         .collect();
298 |     result.map(|_| ())
299 | }
300 | 
301 | fn check_config_route(config: &Config) -> Result<(), Error> {
302 |     for (_, statsd) in config.statsd.servers.iter() {
303 |         check_routes(config, statsd.route.as_ref())?;
304 |     }
305 |     let routes: Result<Vec<_>, Error> = config
306 |         .clone()
307 |         .processors
308 |         .unwrap_or_default()
309 |         .iter()
310 |         .map(|(_, proc)| match proc {
311 |             Processor::Sampler(sampler) => check_routes(config, sampler.route.as_ref()),
312 |             Processor::TagConverter(tc) => check_routes(config, tc.route.as_ref()),
313 |             Processor::Cardinality(c) => check_routes(config, c.route.as_ref()),
314 |             Processor::RegexFilter(filter) => check_routes(config, filter.route.as_ref()),
315 |         })
316 |         .collect();
317 |     routes.map(|_| ())
318 | }
319 | 
320 | fn check_config_discovery(config: &Config, discovery: &Discovery) -> anyhow::Result<()> {
321 |     for (_, statsd_dupl) in config.statsd.backends.iter() {
322 |         if let Some(source) = &statsd_dupl.shard_map_source {
323 |             if discovery.sources.get(source).is_none() {
324 |                 return Err(Error::UnknownDiscoverySource(source.clone()).into());
325 |             }
326 |         }
327 |     }
328 |     Ok(())
329 | }
330 | 
331 | fn check_config(config: &Config) -> anyhow::Result<()> {
332 |     let default = Discovery::default();
333 |     let discovery = &config.discovery.as_ref().unwrap_or(&default);
334 |     // Every reference to a shard_map needs a reference to a valid discovery block
335 |     check_config_discovery(config, discovery)?;
336 |     check_config_route(config)?;
337 |     Ok(())
338 | }
339 | 
340 | pub fn load(path: &str) -> anyhow::Result<Config> {
341 |     let input = std::fs::read_to_string(path)?;
342 |     let config: Config = serde_json::from_str(input.as_ref())?;
343 |     // Perform some high level validation
344 |     check_config(&config)?;
345 |     Ok(config)
346 | }
347 | 
348 | #[cfg(test)]
349 | pub mod test {
350 |     use super::*;
351 |     use std::io::Write;
352 |     use tempfile::NamedTempFile;
353 | 
354 |     #[test]
355 |     fn load_example_config() {
356 |         let config = r#"
357 |         {
358 |             "statsd": {
359 |                 "servers": {
360 |                     "default":
361 |                         {
362 |                             "bind": "127.0.0.1:BIND_STATSD_PORT",
363 |                             "route": ["statsd:test1"],
364 |                             "read_buffer": 65535
365 |                         }
366 |                 },
367 |                 "backends": {
368 |                     "test1":
369 |                        {
370 |                             "type": "statsd",
371 |                             "prefix": "test-1.",
372 |                             "shard_map": [
373 |                                 "127.0.0.1:SEND_STATSD_PORT"
374 |                             ],
375 |                             "suffix": ".suffix"
376 |                         },
377 |                     "mapsource":
378 |                         {
379 |                             "type": "statsd",
380 |                             "input_filter": "^(?=dontmatchme)",
381 |                             "prefix": "test-2.",
382 |                             "shard_map_source": "my_s3"
383 |                         },
384 |                     "prom":
385 |                         {
386 |                             "type": "prom_remote_write",
387 |                             "request_timeout_ms": 60000,
388 |                             "max_batch_size": 200000
389 |                         }
390 |                 }
391 |             },
392 |             "processors": {
393 |                 "tag1": {
394 |                     "type": "tag_converter",
395 |                     "route": ["statsd:test1"]
396 |                 },
397 |                 "regex": {
398 |                     "type": "regex_filter",
399 |                     "allow": [".*"],
400 |                     "route": ["statsd:test1"]
401 |                 }
402 |             },
403 |             "discovery": {
404 |                 "sources": {
405 |                     "file": {
406 |                         "type":"static_file",
407 |                         "path":"/tmp/file",
408 |                         "interval":5
409 |                     },
410 |                     "my_s3": {
411 |                         "type": "s3",
412 |                         "bucket": "foo",
413 |                         "key": "bar",
414 |                         "interval": 3,
415 |                         "transforms": [
416 |                             {
417 |                                 "type": "repeat",
418 |                                 "count": 3
419 |                             },
420 |                             {
421 |                                 "type": "format",
422 |                                 "pattern": "{}:123"
423 |                             }
424 |                         ]
425 |                     }
426 |                 }
427 |             }
428 |         }
429 |         "#;
430 |         let mut tf = NamedTempFile::new().unwrap();
431 |         tf.write_all(config.as_bytes()).unwrap();
432 |         let config = load(tf.path().to_str().unwrap()).unwrap();
433 |         // Check servers
434 |         let default_server = config.statsd.servers.get("default").unwrap();
435 |         assert_eq!(
436 |             default_server.bind,
437 |             "127.0.0.1:BIND_STATSD_PORT".to_string()
438 |         );
439 |         // Check backends
440 |         let statsd_backend = config.statsd.backends.get("test1").unwrap();
441 |         assert_eq!(
442 |             statsd_backend,
443 |             &BackendConfig {
444 |                 client_config: BackendClientConfig::Statsd(StatsdBackendClientConfig {
445 |                     max_queue: 100000,
446 |                     trim_on_disconnect: false,
447 |                 },),
448 |                 prefix: Some("test-1.".to_string()),
449 |                 shard_map: vec!["127.0.0.1:SEND_STATSD_PORT".to_string()],
450 |                 suffix: Some(".suffix".to_string()),
451 |                 shard_map_source: None,
452 |                 input_blocklist: None,
453 |                 input_filter: None,
454 |             },
455 |         );
456 |         let prom_backend = config.statsd.backends.get("prom").unwrap();
457 |         assert_eq!(
458 |             prom_backend,
459 |             &BackendConfig {
460 |                 client_config: BackendClientConfig::PromRemoteWrite(PromBackendClientConfig {
461 |                     max_queue: 100000,
462 |                     request_timeout_ms: 60000,
463 |                     batch_creation_timeout_ms: 50,
464 |                     max_batch_size: 200000,
465 |                     max_in_flight: 16,
466 |                 }),
467 |                 prefix: None,
468 |                 shard_map: vec![],
469 |                 suffix: None,
470 |                 shard_map_source: None,
471 |                 input_blocklist: None,
472 |                 input_filter: None,
473 |             },
474 |         );
475 |         // Check processors
476 |         assert_eq!(2, config.clone().processors.unwrap_or_default().len());
477 |         // Check discovery
478 |         let discovery = config.discovery.unwrap();
479 |         assert_eq!(2, discovery.sources.len());
480 |         let s3_source = discovery.sources.get("my_s3").unwrap();
481 |         match s3_source {
482 |             DiscoverySource::S3(source) => {
483 |                 assert!(source.bucket == "foo");
484 |             }
485 |             _ => panic!("not an s3 source"),
486 |         };
487 |     }
488 | }
489 | 


--------------------------------------------------------------------------------
/statsrelay/src/cuckoofilter/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Seif Lotfy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/statsrelay/src/cuckoofilter/bucket.rs:
--------------------------------------------------------------------------------
  1 | pub const FINGERPRINT_SIZE: usize = 1;
  2 | pub const BUCKET_SIZE: usize = 4;
  3 | const EMPTY_FINGERPRINT_DATA: [u8; FINGERPRINT_SIZE] = [100; FINGERPRINT_SIZE];
  4 | 
  5 | // Fingerprint Size is 1 byte so lets remove the Vec
  6 | #[derive(PartialEq, Copy, Clone, Hash)]
  7 | pub struct Fingerprint {
  8 |     pub data: [u8; FINGERPRINT_SIZE],
  9 | }
 10 | 
 11 | impl Fingerprint {
 12 |     /// Attempts to create a new Fingerprint based on the given
 13 |     /// number. If the created Fingerprint would be equal to the
 14 |     /// empty Fingerprint, None is returned.
 15 |     pub fn from_data(data: [u8; FINGERPRINT_SIZE]) -> Option<Self> {
 16 |         let result = Self { data };
 17 |         if result.is_empty() {
 18 |             None
 19 |         } else {
 20 |             Some(result)
 21 |         }
 22 |     }
 23 | 
 24 |     /// Returns the empty Fingerprint.
 25 |     pub fn empty() -> Self {
 26 |         Self {
 27 |             data: EMPTY_FINGERPRINT_DATA,
 28 |         }
 29 |     }
 30 | 
 31 |     /// Checks if this is the empty Fingerprint.
 32 |     pub fn is_empty(&self) -> bool {
 33 |         self.data == EMPTY_FINGERPRINT_DATA
 34 |     }
 35 | 
 36 |     /// Sets the fingerprint value to a previously exported one via an in-memory copy.
 37 |     fn slice_copy(&mut self, fingerprint: &[u8]) {
 38 |         self.data.copy_from_slice(fingerprint);
 39 |     }
 40 | }
 41 | 
 42 | /// Manages `BUCKET_SIZE` fingerprints at most.
 43 | #[derive(Clone)]
 44 | pub struct Bucket {
 45 |     pub buffer: [Fingerprint; BUCKET_SIZE],
 46 | }
 47 | 
 48 | impl Bucket {
 49 |     /// Creates a new bucket with a pre-allocated buffer.
 50 |     pub fn new() -> Self {
 51 |         Self {
 52 |             buffer: [Fingerprint::empty(); BUCKET_SIZE],
 53 |         }
 54 |     }
 55 | 
 56 |     /// Inserts the fingerprint into the buffer if the buffer is not full.
 57 |     /// This operation is O(1).
 58 |     pub fn insert(&mut self, fp: Fingerprint) -> bool {
 59 |         for entry in &mut self.buffer {
 60 |             if entry.is_empty() {
 61 |                 *entry = fp;
 62 |                 return true;
 63 |             }
 64 |         }
 65 |         false
 66 |     }
 67 | 
 68 |     /// Deletes the given fingerprint from the bucket. This operation is O(1).
 69 |     pub fn delete(&mut self, fp: Fingerprint) -> bool {
 70 |         match self.get_fingerprint_index(fp) {
 71 |             Some(index) => {
 72 |                 self.buffer[index] = Fingerprint::empty();
 73 |                 true
 74 |             }
 75 |             None => false,
 76 |         }
 77 |     }
 78 | 
 79 |     /// Returns the index of the given fingerprint, if its found. O(1)
 80 |     pub fn get_fingerprint_index(&self, fp: Fingerprint) -> Option<usize> {
 81 |         self.buffer.iter().position(|e| *e == fp)
 82 |     }
 83 | 
 84 |     /// Returns all current fingerprint data of the current buffer for storage.
 85 |     pub fn get_fingerprint_data(&self) -> Vec<u8> {
 86 |         self.buffer
 87 |             .iter()
 88 |             .flat_map(|f| f.data.iter())
 89 |             .cloned()
 90 |             .collect()
 91 |     }
 92 | 
 93 |     /// Empties the bucket by setting each used entry to Fingerprint::empty(). Returns the number of entries that were modified.
 94 |     #[inline(always)]
 95 |     pub fn clear(&mut self) {
 96 |         *self = Self::new()
 97 |     }
 98 | }
 99 | 
100 | impl From<&[u8]> for Bucket {
101 |     /// Constructs a buffer of fingerprints from a set of previously exported fingerprints.
102 |     fn from(fingerprints: &[u8]) -> Self {
103 |         let mut buffer = [Fingerprint::empty(); BUCKET_SIZE];
104 |         for (idx, value) in fingerprints.chunks(FINGERPRINT_SIZE).enumerate() {
105 |             buffer[idx].slice_copy(value);
106 |         }
107 |         Self { buffer }
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/statsrelay/src/cuckoofilter/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Cuckoo filter probabilistic data structure for membership testing and cardinality counting.
  2 | //!
  3 | //! # Usage
  4 | //!
  5 | //! This crate is [on crates.io](https://crates.io/crates/cuckoofilter) and can be
  6 | //! used by adding `cuckoofilter` to the dependencies in your project's `Cargo.toml`.
  7 | //!
  8 | //! ```toml
  9 | //! [dependencies]
 10 | //! cuckoofilter = "0.3"
 11 | //! ```
 12 | //!
 13 | //! And this in your crate root:
 14 | //!
 15 | //! ```rust
 16 | //! use statsrelay::cuckoofilter;
 17 | //! ```
 18 | 
 19 | mod bucket;
 20 | mod util;
 21 | 
 22 | use bucket::{Bucket, Fingerprint, BUCKET_SIZE, FINGERPRINT_SIZE};
 23 | use util::{get_alt_index, get_fai, FaI};
 24 | 
 25 | use std::cmp;
 26 | use std::collections::hash_map::DefaultHasher;
 27 | use std::error::Error as StdError;
 28 | use std::fmt;
 29 | use std::hash::{Hash, Hasher};
 30 | use std::iter::repeat;
 31 | use std::marker::PhantomData;
 32 | use std::mem;
 33 | 
 34 | use rand::{Rng, SeedableRng};
 35 | #[cfg(feature = "serde_support")]
 36 | use serde_derive::{Deserialize, Serialize};
 37 | 
 38 | /// If insertion fails, we will retry this many times.
 39 | pub const MAX_REBUCKET: u32 = 10;
 40 | 
 41 | /// The default number of buckets.
 42 | pub const DEFAULT_CAPACITY: usize = (1 << 20) - 1;
 43 | 
 44 | #[derive(Debug)]
 45 | pub enum CuckooError {
 46 |     NotEnoughSpace,
 47 | }
 48 | 
 49 | impl fmt::Display for CuckooError {
 50 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 51 |         f.write_str("NotEnoughSpace")
 52 |     }
 53 | }
 54 | 
 55 | impl StdError for CuckooError {
 56 |     fn description(&self) -> &str {
 57 |         "Not enough space to store this item, rebucketing failed."
 58 |     }
 59 | }
 60 | 
 61 | /// A cuckoo filter class exposes a Bloomier filter interface,
 62 | /// providing methods of add, delete, contains.
 63 | ///
 64 | /// # Examples
 65 | ///
 66 | /// ```
 67 | /// use statsrelay::cuckoofilter;
 68 | ///
 69 | /// let words = vec!["foo", "bar", "xylophone", "milagro"];
 70 | /// let mut cf = cuckoofilter::CuckooFilter::new();
 71 | ///
 72 | /// let mut insertions = 0;
 73 | /// for s in &words {
 74 | ///     if cf.test_and_add(s).unwrap() {
 75 | ///         insertions += 1;
 76 | ///     }
 77 | /// }
 78 | ///
 79 | /// assert_eq!(insertions, words.len());
 80 | /// assert_eq!(cf.len(), words.len());
 81 | ///
 82 | /// // Re-add the first element.
 83 | /// cf.add(words[0]);
 84 | ///
 85 | /// assert_eq!(cf.len(), words.len() + 1);
 86 | ///
 87 | /// for s in &words {
 88 | ///     cf.delete(s);
 89 | /// }
 90 | ///
 91 | /// assert_eq!(cf.len(), 1);
 92 | /// assert!(!cf.is_empty());
 93 | ///
 94 | /// cf.delete(words[0]);
 95 | ///
 96 | /// assert_eq!(cf.len(), 0);
 97 | /// assert!(cf.is_empty());
 98 | ///
 99 | /// for s in &words {
100 | ///     if cf.test_and_add(s).unwrap() {
101 | ///         insertions += 1;
102 | ///     }
103 | /// }
104 | ///
105 | /// cf.clear();
106 | ///
107 | /// assert!(cf.is_empty());
108 | ///
109 | /// ```
110 | pub struct CuckooFilter<H> {
111 |     buckets: Box<[Bucket]>,
112 |     len: usize,
113 |     rng: rand::rngs::SmallRng,
114 |     _hasher: std::marker::PhantomData<H>,
115 | }
116 | 
117 | impl Default for CuckooFilter<DefaultHasher> {
118 |     fn default() -> Self {
119 |         Self::new()
120 |     }
121 | }
122 | 
123 | impl CuckooFilter<DefaultHasher> {
124 |     /// Construct a CuckooFilter with default capacity and hasher.
125 |     pub fn new() -> Self {
126 |         Self::with_capacity(DEFAULT_CAPACITY)
127 |     }
128 | }
129 | 
130 | impl<H> CuckooFilter<H>
131 | where
132 |     H: Hasher + Default,
133 | {
134 |     /// Constructs a Cuckoo Filter with a given max capacity
135 |     pub fn with_capacity(cap: usize) -> Self {
136 |         let capacity = cmp::max(1, cap.next_power_of_two() / BUCKET_SIZE);
137 | 
138 |         Self {
139 |             buckets: repeat(Bucket::new())
140 |                 .take(capacity)
141 |                 .collect::<Vec<_>>()
142 |                 .into_boxed_slice(),
143 |             len: 0,
144 |             rng: rand::rngs::SmallRng::from_entropy(),
145 |             _hasher: PhantomData,
146 |         }
147 |     }
148 | 
149 |     /// Checks if `data` is in the filter.
150 |     pub fn contains<T: ?Sized + Hash>(&self, data: &T) -> bool {
151 |         let FaI { fp, i1, i2 } = get_fai::<T, H>(data);
152 |         let len = self.buckets.len();
153 |         self.buckets[i1 % len]
154 |             .get_fingerprint_index(fp)
155 |             .or_else(|| self.buckets[i2 % len].get_fingerprint_index(fp))
156 |             .is_some()
157 |     }
158 | 
159 |     /// Adds `data` to the filter. Returns `Ok` if the insertion was successful,
160 |     /// but could fail with a `NotEnoughSpace` error, especially when the filter
161 |     /// is nearing its capacity.
162 |     /// Note that while you can put any hashable type in the same filter, beware
163 |     /// for side effects like that the same number can have diferent hashes
164 |     /// depending on the type.
165 |     /// So for the filter, 4711i64 isn't the same as 4711u64.
166 |     ///
167 |     /// **Note:** When this returns `NotEnoughSpace`, the element given was
168 |     /// actually added to the filter, but some random *other* element was
169 |     /// removed. This might improve in the future.
170 |     pub fn add<T: ?Sized + Hash>(&mut self, data: &T) -> Result<(), CuckooError> {
171 |         let fai = get_fai::<T, H>(data);
172 |         if self.put(fai.fp, fai.i1) || self.put(fai.fp, fai.i2) {
173 |             return Ok(());
174 |         }
175 |         let len = self.buckets.len();
176 |         let mut i = fai.random_index(&mut self.rng);
177 |         let mut fp = fai.fp;
178 |         for _ in 0..MAX_REBUCKET {
179 |             let other_fp;
180 |             {
181 |                 let loc = &mut self.buckets[i % len].buffer[self.rng.gen_range(0..BUCKET_SIZE)];
182 |                 other_fp = *loc;
183 |                 *loc = fp;
184 |                 i = get_alt_index::<H>(other_fp, i);
185 |             }
186 |             if self.put(other_fp, i) {
187 |                 return Ok(());
188 |             }
189 |             fp = other_fp;
190 |         }
191 |         // fp is dropped here, which means that the last item that was
192 |         // rebucketed gets removed from the filter.
193 |         // TODO: One could introduce a single-item cache for this element,
194 |         // check this cache in all methods additionally to the actual filter,
195 |         // and return NotEnoughSpace if that cache is already in use.
196 |         // This would complicate the code, but stop random elements from
197 |         // getting removed and result in nicer behaviour for the user.
198 |         Err(CuckooError::NotEnoughSpace)
199 |     }
200 | 
201 |     /// Adds `data` to the filter if it does not exist in the filter yet.
202 |     /// Returns `Ok(true)` if `data` was not yet present in the filter and added
203 |     /// successfully.
204 |     pub fn test_and_add<T: ?Sized + Hash>(&mut self, data: &T) -> Result<bool, CuckooError> {
205 |         if self.contains(data) {
206 |             Ok(false)
207 |         } else {
208 |             self.add(data).map(|_| true)
209 |         }
210 |     }
211 | 
212 |     /// Number of items in the filter.
213 |     pub fn len(&self) -> usize {
214 |         self.len
215 |     }
216 | 
217 |     /// Exports fingerprints in all buckets, along with the filter's length for storage.
218 |     /// The filter can be recovered by passing the `ExportedCuckooFilter` struct to the
219 |     /// `from` method of `CuckooFilter`.
220 |     pub fn export(&self) -> ExportedCuckooFilter {
221 |         self.into()
222 |     }
223 | 
224 |     /// Number of bytes the filter occupies in memory
225 |     pub fn memory_usage(&self) -> usize {
226 |         mem::size_of_val(self) + self.buckets.len() * mem::size_of::<Bucket>()
227 |     }
228 | 
229 |     /// Check if filter is empty
230 |     pub fn is_empty(&self) -> bool {
231 |         self.len == 0
232 |     }
233 | 
234 |     /// Deletes `data` from the filter. Returns true if `data` existed in the
235 |     /// filter before.
236 |     pub fn delete<T: ?Sized + Hash>(&mut self, data: &T) -> bool {
237 |         let FaI { fp, i1, i2 } = get_fai::<T, H>(data);
238 |         self.remove(fp, i1) || self.remove(fp, i2)
239 |     }
240 | 
241 |     /// Empty all the buckets in a filter and reset the number of items.
242 |     pub fn clear(&mut self) {
243 |         if self.is_empty() {
244 |             return;
245 |         }
246 | 
247 |         for bucket in self.buckets.iter_mut() {
248 |             bucket.clear();
249 |         }
250 |         self.len = 0;
251 |     }
252 | 
253 |     /// Extracts fingerprint values from all buckets, used for exporting the filters data.
254 |     fn values(&self) -> Vec<u8> {
255 |         self.buckets
256 |             .iter()
257 |             .flat_map(|b| b.get_fingerprint_data().into_iter())
258 |             .collect()
259 |     }
260 | 
261 |     /// Removes the item with the given fingerprint from the bucket indexed by i.
262 |     fn remove(&mut self, fp: Fingerprint, i: usize) -> bool {
263 |         let len = self.buckets.len();
264 |         if self.buckets[i % len].delete(fp) {
265 |             self.len -= 1;
266 |             true
267 |         } else {
268 |             false
269 |         }
270 |     }
271 | 
272 |     fn put(&mut self, fp: Fingerprint, i: usize) -> bool {
273 |         let len = self.buckets.len();
274 |         if self.buckets[i % len].insert(fp) {
275 |             self.len += 1;
276 |             true
277 |         } else {
278 |             false
279 |         }
280 |     }
281 | }
282 | 
283 | /// A minimal representation of the CuckooFilter which can be transfered or stored, then recovered at a later stage.
284 | #[derive(Debug)]
285 | #[cfg_attr(feature = "serde_support", derive(Deserialize, Serialize))]
286 | pub struct ExportedCuckooFilter {
287 |     #[cfg_attr(feature = "serde_support", serde(with = "serde_bytes"))]
288 |     pub values: Vec<u8>,
289 |     pub length: usize,
290 | }
291 | 
292 | impl<H> From<ExportedCuckooFilter> for CuckooFilter<H> {
293 |     /// Converts a simplified representation of a filter used for export to a
294 |     /// fully functioning version.
295 |     ///
296 |     /// # Contents
297 |     ///
298 |     /// * `values` - A serialized version of the `CuckooFilter`'s memory, where the
299 |     /// fingerprints in each bucket are chained one after another, then in turn all
300 |     /// buckets are chained together.
301 |     /// * `length` - The number of valid fingerprints inside the `CuckooFilter`.
302 |     /// This value is used as a time saving method, otherwise all fingerprints
303 |     /// would need to be checked for equivalence against the null pattern.
304 |     fn from(exported: ExportedCuckooFilter) -> Self {
305 |         // Assumes that the `BUCKET_SIZE` and `FINGERPRINT_SIZE` constants do not change.
306 |         Self {
307 |             buckets: exported
308 |                 .values
309 |                 .chunks(BUCKET_SIZE * FINGERPRINT_SIZE)
310 |                 .map(Bucket::from)
311 |                 .collect::<Vec<_>>()
312 |                 .into_boxed_slice(),
313 |             len: exported.length,
314 |             rng: rand::rngs::SmallRng::from_entropy(),
315 |             _hasher: PhantomData,
316 |         }
317 |     }
318 | }
319 | 
320 | impl<H> From<&CuckooFilter<H>> for ExportedCuckooFilter
321 | where
322 |     H: Hasher + Default,
323 | {
324 |     /// Converts a `CuckooFilter` into a simplified version which can be serialized and stored
325 |     /// for later use.
326 |     fn from(cuckoo: &CuckooFilter<H>) -> Self {
327 |         Self {
328 |             values: cuckoo.values(),
329 |             length: cuckoo.len(),
330 |         }
331 |     }
332 | }
333 | 


--------------------------------------------------------------------------------
/statsrelay/src/cuckoofilter/util.rs:
--------------------------------------------------------------------------------
 1 | use super::bucket::{Fingerprint, FINGERPRINT_SIZE};
 2 | 
 3 | use std::hash::{Hash, Hasher};
 4 | 
 5 | use byteorder::{BigEndian, WriteBytesExt};
 6 | 
 7 | // A struct combining *F*ingerprint *a*nd *I*ndexes,
 8 | // to have a return type with named fields
 9 | // instead of a tuple with unnamed fields.
10 | pub struct FaI {
11 |     pub fp: Fingerprint,
12 |     pub i1: usize,
13 |     pub i2: usize,
14 | }
15 | 
16 | fn get_hash<T: ?Sized + Hash, H: Hasher + Default>(data: &T) -> (u32, u32) {
17 |     let mut hasher = <H as Default>::default();
18 |     data.hash(&mut hasher);
19 |     let result = hasher.finish();
20 | 
21 |     // split 64bit hash value in the upper and the lower 32bit parts,
22 |     // one used for the fingerprint, the other used for the indexes.
23 |     ((result >> 32) as u32, result as u32)
24 | }
25 | 
26 | pub fn get_alt_index<H: Hasher + Default>(fp: Fingerprint, i: usize) -> usize {
27 |     let (_, index_hash) = get_hash::<_, H>(&fp.data);
28 |     let alt_i = index_hash as usize;
29 |     (i ^ alt_i) as usize
30 | }
31 | 
32 | impl FaI {
33 |     fn from_data<T: ?Sized + Hash, H: Hasher + Default>(data: &T) -> Self {
34 |         let (fp_hash, index_hash) = get_hash::<_, H>(data);
35 | 
36 |         let mut fp_hash_arr = [0; FINGERPRINT_SIZE];
37 |         let _ = (&mut fp_hash_arr[..]).write_u32::<BigEndian>(fp_hash);
38 |         let mut valid_fp_hash: [u8; FINGERPRINT_SIZE] = [0; FINGERPRINT_SIZE];
39 |         let mut n = 0;
40 |         let fp;
41 | 
42 |         // increment every byte of the hash until we find one that is a valid fingerprint
43 |         loop {
44 |             for i in 0..FINGERPRINT_SIZE {
45 |                 valid_fp_hash[i] = fp_hash_arr[i] + n;
46 |             }
47 | 
48 |             if let Some(val) = Fingerprint::from_data(valid_fp_hash) {
49 |                 fp = val;
50 |                 break;
51 |             }
52 |             n += 1;
53 |         }
54 | 
55 |         let i1 = index_hash as usize;
56 |         let i2 = get_alt_index::<H>(fp, i1);
57 |         Self { fp, i1, i2 }
58 |     }
59 | 
60 |     pub fn random_index<R: ::rand::Rng>(&self, r: &mut R) -> usize {
61 |         if r.gen() {
62 |             self.i1
63 |         } else {
64 |             self.i2
65 |         }
66 |     }
67 | }
68 | 
69 | pub fn get_fai<T: ?Sized + Hash, H: Hasher + Default>(data: &T) -> FaI {
70 |     FaI::from_data::<_, H>(data)
71 | }
72 | 
73 | #[cfg(test)]
74 | mod tests {
75 |     use super::*;
76 | 
77 |     #[test]
78 |     fn test_fp_and_index() {
79 |         use std::collections::hash_map::DefaultHasher;
80 |         let data = "seif";
81 |         let fai = get_fai::<_, DefaultHasher>(data);
82 |         let FaI { fp, i1, i2 } = fai;
83 |         let i11 = get_alt_index::<DefaultHasher>(fp, i2);
84 |         assert_eq!(i11, i1);
85 | 
86 |         let i22 = get_alt_index::<DefaultHasher>(fp, i11);
87 |         assert_eq!(i22, i2);
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/statsrelay/src/discovery.rs:
--------------------------------------------------------------------------------
  1 | use crate::config::{
  2 |     Discovery, DiscoverySource, DiscoveryTransform, PathDiscoverySource, S3DiscoverySource,
  3 | };
  4 | 
  5 | use std::sync::Arc;
  6 | use std::time::Duration;
  7 | use std::{fs::File, ops::Add};
  8 | use std::{io::BufReader, pin::Pin};
  9 | 
 10 | use async_stream::stream;
 11 | use dashmap::DashMap;
 12 | use futures::{stream::Stream, StreamExt};
 13 | use log::warn;
 14 | use rusoto_s3::S3;
 15 | use serde::{Deserialize, Serialize};
 16 | use tokio::io::AsyncReadExt;
 17 | use tokio::time::Instant;
 18 | use tokio_stream::StreamMap;
 19 | 
 20 | // Transformer is a set of transformations to apply to a discovery set, for
 21 | // example formatting output or repeating elements
 22 | trait Transformer {
 23 |     fn transform(&self, input: &Update) -> Option<Update>;
 24 | }
 25 | 
 26 | /// Convert an update into another update based on a format string
 27 | fn transform_format(format: &str, input: &Update) -> Option<Update> {
 28 |     if !format.contains("{}") {
 29 |         return None;
 30 |     }
 31 |     Some(Update {
 32 |         hosts: input
 33 |             .hosts
 34 |             .iter()
 35 |             .map(|input| String::from(format).replace("{}", input))
 36 |             .collect(),
 37 |     })
 38 | }
 39 | 
 40 | /// A transformer which repeats each element count times, e.g. a,b count =2 would produce a,a,b,b
 41 | fn transform_repeat(count: u32, input: &Update) -> Option<Update> {
 42 |     match count {
 43 |         0 => None,
 44 |         1 => Some(input.clone()),
 45 |         n => Some(Update {
 46 |             hosts: input
 47 |                 .hosts
 48 |                 .iter()
 49 |                 .flat_map(|input| std::iter::repeat(input.clone()).take(n as usize))
 50 |                 .collect(),
 51 |         }),
 52 |     }
 53 | }
 54 | 
 55 | impl Transformer for DiscoveryTransform {
 56 |     fn transform(&self, input: &Update) -> Option<Update> {
 57 |         match self {
 58 |             DiscoveryTransform::Format { pattern } => transform_format(pattern, input),
 59 |             DiscoveryTransform::Repeat { count } => transform_repeat(*count, input),
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
 65 | pub struct Update {
 66 |     hosts: Vec<String>,
 67 | }
 68 | 
 69 | impl Update {
 70 |     pub fn sources(&self) -> &Vec<String> {
 71 |         &self.hosts
 72 |     }
 73 | }
 74 | 
 75 | #[derive(Debug, thiserror::Error)]
 76 | pub enum Error {
 77 |     #[error("reading a discovery source had no data")]
 78 |     EmptyObjectError,
 79 | }
 80 | 
 81 | async fn poll_s3_source(config: S3DiscoverySource) -> anyhow::Result<Update> {
 82 |     let region = rusoto_core::Region::default();
 83 |     let http_client = rusoto_core::HttpClient::new()?;
 84 |     let provider = rusoto_credential::AutoRefreshingProvider::new(
 85 |         rusoto_sts::WebIdentityProvider::from_k8s_env())?;
 86 |     let region = rusoto_core::Region::default();
 87 |     let s3 = rusoto_s3::S3Client::new_with(http_client, provider, region);
 88 |     let req = rusoto_s3::GetObjectRequest {
 89 |         bucket: config.bucket.clone(),
 90 |         key: config.key.clone(),
 91 |         ..Default::default()
 92 |     };
 93 |     let resp = s3.get_object(req).await?;
 94 |     let mut buffer = Vec::with_capacity(resp.content_length.unwrap_or(0_i64) as usize);
 95 |     let mut update = match resp.body {
 96 |         Some(contents) => {
 97 |             contents.into_async_read().read_to_end(&mut buffer).await?;
 98 |             let update: Update = serde_json::from_slice(buffer.as_ref())?;
 99 |             update
100 |         }
101 |         None => {
102 |             warn!("no cluster state located at {:?}", config.key);
103 |             return Err(Error::EmptyObjectError.into());
104 |         }
105 |     };
106 | 
107 |     for trans in config.transforms.unwrap_or_default().iter() {
108 |         if let Some(new_update) = trans.transform(&update) {
109 |             update = new_update;
110 |         }
111 |     }
112 |     Ok(update)
113 | }
114 | 
115 | async fn poll_file_source(config: PathDiscoverySource, path: String) -> anyhow::Result<Update> {
116 |     let result = tokio::task::spawn_blocking(move || {
117 |         let file = File::open(path)?;
118 |         let reader = BufReader::new(file);
119 |         let mut update: Update = serde_json::from_reader(reader)?;
120 | 
121 |         for trans in config.transforms.unwrap_or_default().iter() {
122 |             if let Some(new_update) = trans.transform(&update) {
123 |                 update = new_update;
124 |             }
125 |         }
126 |         Ok(update)
127 |     })
128 |     .await?;
129 |     result
130 | }
131 | 
132 | /// A generic stream which takes a callable async function taking an
133 | /// update (or lack thereof), polling at the defined interval, emitting the
134 | /// output when changed as a stream.
135 | fn polled_stream<T, C>(config: T, interval: u64, callable: C) -> impl Stream<Item = Update>
136 | where
137 |     T: Clone + Send + Sync,
138 |     C: Fn(T) -> Pin<Box<dyn futures::Future<Output = anyhow::Result<Update>> + Send>>,
139 | {
140 |     let mut last_update = Update::default();
141 |     let duration = Duration::from_secs(interval as u64);
142 |     let start = Instant::now().add(duration);
143 |     stream! {
144 | 
145 |         let mut ticker = tokio::time::interval_at(start, duration);
146 |         loop {
147 |             let new_update = match callable(config.clone()).await {
148 |                 Err(e) => {
149 |                     warn!("unable to fetch discovery source due to error {:?}", e);
150 |                     ticker.tick().await;
151 |                     continue;
152 |                 },
153 |                 Ok(update) => update,
154 |             };
155 |             if new_update != last_update {
156 |                 yield new_update.clone();
157 |             }
158 |             last_update = new_update;
159 |             ticker.tick().await;
160 |         }
161 |     }
162 | }
163 | 
164 | pub fn as_stream(config: &Discovery) -> impl Stream<Item = (String, Update)> {
165 |     let mut streams: StreamMap<String, Pin<Box<dyn Stream<Item = Update> + Send>>> =
166 |         StreamMap::new();
167 | 
168 |     for (name, source) in config.sources.iter() {
169 |         match source {
170 |             DiscoverySource::S3(source) => {
171 |                 let ns = Box::pin(polled_stream(
172 |                     source.clone(),
173 |                     source.interval as u64,
174 |                     move |s| Box::pin(poll_s3_source(s)),
175 |                 ));
176 |                 //let ns = Box::pin(s3_stream(source.clone()));
177 |                 streams.insert(name.clone(), ns);
178 |             }
179 |             DiscoverySource::StaticFile(source) => {
180 |                 let cs = source.clone();
181 |                 let ns = Box::pin(polled_stream(
182 |                     source.path.clone(),
183 |                     source.interval as u64,
184 |                     move |s| Box::pin(poll_file_source(cs.clone(), s)),
185 |                 ));
186 |                 //let ns = Box::pin(static_file_stream(source.clone()));
187 |                 streams.insert(name.clone(), ns);
188 |             }
189 |         }
190 |     }
191 |     streams
192 | }
193 | 
194 | #[derive(Clone)]
195 | pub struct Cache {
196 |     cache: Arc<DashMap<String, Update>>,
197 | }
198 | 
199 | impl Cache {
200 |     pub fn new() -> Self {
201 |         Cache {
202 |             cache: Arc::new(DashMap::new()),
203 |         }
204 |     }
205 | 
206 |     pub fn store(&self, event: &(String, Update)) {
207 |         self.cache.insert(event.0.clone(), event.1.clone());
208 |     }
209 | 
210 |     pub fn get(&self, key: &str) -> Option<Update> {
211 |         self.cache.get(key).map(|s| s.clone())
212 |     }
213 | }
214 | 
215 | impl Default for Cache {
216 |     fn default() -> Self {
217 |         Cache::new()
218 |     }
219 | }
220 | 
221 | pub fn reflector<S>(cache: Cache, stream: S) -> impl Stream<Item = (String, Update)>
222 | where
223 |     S: Stream<Item = (String, Update)>,
224 | {
225 |     stream.inspect(move |event| cache.store(event))
226 | }
227 | 
228 | #[cfg(test)]
229 | pub mod tests {
230 |     use crate::config::DiscoveryTransform;
231 | 
232 |     use super::{Transformer, Update};
233 | 
234 |     #[test]
235 |     fn format() {
236 |         let o1 = Update {
237 |             hosts: vec!["a", "b"].iter().map(|s| (*s).into()).collect(),
238 |         };
239 |         let transformer = DiscoveryTransform::Format {
240 |             pattern: "{}hello".into(),
241 |         };
242 |         let f = transformer.transform(&o1).unwrap();
243 |         assert_eq!(f.hosts[0], "ahello");
244 |         assert_eq!(f.hosts[1], "bhello");
245 | 
246 |         let bad_transformer = DiscoveryTransform::Format {
247 |             pattern: "foo".into(),
248 |         };
249 | 
250 |         assert!(bad_transformer.transform(&o1).is_none());
251 |     }
252 | 
253 |     #[test]
254 |     fn repeat() {
255 |         let o1 = Update {
256 |             hosts: vec!["a", "b"].iter().map(|s| (*s).into()).collect(),
257 |         };
258 |         let transformer = DiscoveryTransform::Repeat { count: 4 };
259 |         let f = transformer.transform(&o1).unwrap();
260 |         assert_eq!(f.hosts, vec!["a", "a", "a", "a", "b", "b", "b", "b"]);
261 | 
262 |         let bad_transformer = DiscoveryTransform::Repeat { count: 0 };
263 | 
264 |         assert!(bad_transformer.transform(&o1).is_none());
265 |     }
266 | }
267 | 


--------------------------------------------------------------------------------
/statsrelay/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod admin;
 2 | pub mod backend;
 3 | pub mod backend_client;
 4 | pub mod backends;
 5 | pub mod config;
 6 | pub mod cuckoofilter;
 7 | pub mod discovery;
 8 | pub mod processors;
 9 | pub mod shard;
10 | pub mod stats;
11 | pub mod statsd_proto;
12 | pub mod statsd_server;
13 | pub mod built_info {
14 |     // The file has been placed there by the build script.
15 |     include!(concat!(env!("OUT_DIR"), "/built.rs"));
16 | }
17 | 


--------------------------------------------------------------------------------
/statsrelay/src/processors/cardinality.rs:
--------------------------------------------------------------------------------
  1 | use std::convert::TryInto;
  2 | use std::hash::{Hash, Hasher};
  3 | use std::time::{Duration, SystemTime};
  4 | 
  5 | use super::super::config;
  6 | use super::super::statsd_proto::Event;
  7 | use super::{Output, Processor};
  8 | use crate::stats::{Counter, Gauge, Scope};
  9 | use crate::{
 10 |     backends::Backends,
 11 |     statsd_proto::{Owned, Parsed},
 12 | };
 13 | 
 14 | use crate::cuckoofilter::{self, CuckooFilter};
 15 | use ahash::AHasher;
 16 | use parking_lot::Mutex;
 17 | 
 18 | use log::warn;
 19 | 
 20 | struct TimeBoundedCuckoo<H>
 21 | where
 22 |     H: Hasher + Default,
 23 | {
 24 |     filter: CuckooFilter<H>,
 25 |     valid_until: SystemTime,
 26 | }
 27 | 
 28 | impl<H> TimeBoundedCuckoo<H>
 29 | where
 30 |     H: Hasher + Default,
 31 | {
 32 |     fn new(valid_until: SystemTime) -> Self {
 33 |         TimeBoundedCuckoo {
 34 |             filter: CuckooFilter::with_capacity((1 << 22) - 1),
 35 |             valid_until,
 36 |         }
 37 |     }
 38 | }
 39 | 
 40 | struct MultiCuckoo<H>
 41 | where
 42 |     H: Hasher + Default,
 43 | {
 44 |     buckets: usize,
 45 |     window: Duration,
 46 |     filters: Vec<TimeBoundedCuckoo<H>>,
 47 | }
 48 | 
 49 | impl<H> MultiCuckoo<H>
 50 | where
 51 |     H: Hasher + Default,
 52 | {
 53 |     fn new(buckets: usize, window: &Duration) -> Self {
 54 |         assert!(buckets > 0);
 55 |         let now = SystemTime::now();
 56 |         let cuckoos: Vec<_> = (1..(buckets + 1))
 57 |             .map(|bucket| TimeBoundedCuckoo::new(now + (*window * bucket as u32)))
 58 |             .collect();
 59 |         MultiCuckoo {
 60 |             buckets,
 61 |             window: *window,
 62 |             filters: cuckoos,
 63 |         }
 64 |     }
 65 | 
 66 |     fn len(&self) -> usize {
 67 |         self.filters[0].filter.len()
 68 |     }
 69 | 
 70 |     fn contains<T: ?Sized + Hash>(&self, data: &T) -> bool {
 71 |         self.filters[0].filter.contains(data)
 72 |     }
 73 | 
 74 |     fn add<T: ?Sized + Hash>(&mut self, data: &T) -> Result<(), cuckoofilter::CuckooError> {
 75 |         let results: Result<Vec<_>, _> = self
 76 |             .filters
 77 |             .iter_mut()
 78 |             .map(|filter| filter.filter.test_and_add(data))
 79 |             .collect();
 80 |         results.map(|_| ())
 81 |     }
 82 | 
 83 |     fn rotate(&mut self, with_time: SystemTime) {
 84 |         if self.filters[0]
 85 |             .valid_until
 86 |             .duration_since(with_time)
 87 |             .is_err()
 88 |         {
 89 |             // duration_since returns err if the given is later then the valid_until time, aka expired
 90 |             self.filters.remove(0);
 91 |             self.filters.push(TimeBoundedCuckoo::new(
 92 |                 with_time + (self.window * (self.buckets + 1) as u32),
 93 |             ));
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | pub struct Cardinality {
 99 |     route: Vec<config::Route>,
100 |     filter: Mutex<MultiCuckoo<AHasher>>,
101 |     limit: usize,
102 |     counter_flagged_metrics: Counter,
103 |     gauge_metric_hwm: Gauge,
104 | }
105 | 
106 | impl Cardinality {
107 |     pub fn new(scope: Scope, from_config: &config::processor::Cardinality) -> Self {
108 |         let window = Duration::from_secs(from_config.rotate_after_seconds);
109 |         // Record a limit gauge for visibility
110 |         let limit_gauge = scope.gauge("limit").unwrap();
111 |         limit_gauge.set(from_config.size_limit as f64);
112 |         Cardinality {
113 |             route: from_config.route.clone(),
114 |             filter: Mutex::new(MultiCuckoo::new(from_config.buckets, &window)),
115 |             limit: from_config.size_limit as usize,
116 |             counter_flagged_metrics: scope.counter("flagged_metrics").unwrap(),
117 |             gauge_metric_hwm: scope.gauge("count_hwm").unwrap(),
118 |         }
119 |     }
120 | 
121 |     fn rotate(&self) {
122 |         self.filter.lock().rotate(SystemTime::now())
123 |     }
124 | }
125 | 
126 | impl Processor for Cardinality {
127 |     fn provide_statsd(&self, sample: &Event) -> Option<Output> {
128 |         let mut filter = self.filter.lock();
129 |         let contains = filter.contains(sample);
130 |         let len = filter.len();
131 |         self.gauge_metric_hwm.set(len as f64);
132 | 
133 |         if !contains && len > self.limit {
134 |             if (self.counter_flagged_metrics.get() as u64) % 1000 == 0 {
135 |                 // Enforce parsing of the metric to give a clean debug log
136 |                 let owned: Owned = sample.try_into().ok()?;
137 |                 warn!("metric flagged for cardinality limits: {}", owned.id());
138 |             }
139 |             self.counter_flagged_metrics.inc();
140 |             return None;
141 |         }
142 |         let _ = filter.add(sample);
143 |         Some(Output {
144 |             route: self.route.as_ref(),
145 |             new_events: None,
146 |         })
147 |     }
148 | 
149 |     fn tick(&self, _time: std::time::SystemTime, _backends: &Backends) {
150 |         self.rotate();
151 |     }
152 | }
153 | 
154 | #[cfg(test)]
155 | pub mod test {
156 |     use std::vec;
157 | 
158 |     use crate::statsd_proto::{Id, Owned, Type};
159 | 
160 |     use super::*;
161 | 
162 |     #[test]
163 |     fn cuckoo_simple_contains() {
164 |         let a = "a".to_string();
165 |         let b = "b".to_string();
166 | 
167 |         let mut mc: MultiCuckoo<AHasher> = MultiCuckoo::new(2, &Duration::from_secs(60));
168 | 
169 |         mc.add(&a).unwrap();
170 |         assert!(!mc.contains(&b));
171 |         assert!(mc.contains(&a));
172 |         mc.add(&b).unwrap();
173 |         assert!(mc.contains(&b));
174 |     }
175 | 
176 |     #[test]
177 |     fn cuckoo_simple_rotate() {
178 |         let a = "a".to_string();
179 |         let b = "b".to_string();
180 | 
181 |         let now = SystemTime::now();
182 |         let mut mc: MultiCuckoo<AHasher> = MultiCuckoo::new(2, &Duration::from_secs(60));
183 | 
184 |         mc.add(&a).unwrap();
185 |         assert!(!mc.contains(&b));
186 |         assert!(mc.contains(&a));
187 |         mc.add(&b).unwrap();
188 |         assert!(mc.contains(&b));
189 |         // Rotate once, add only a
190 |         mc.rotate(now + Duration::from_secs(61));
191 |         assert!(mc.contains(&a));
192 |         assert!(mc.contains(&b));
193 |         assert!(mc.len() == 2);
194 |         mc.add(&a).unwrap();
195 |         // Rotate again, b should drop out
196 |         mc.rotate(now + Duration::from_secs(122));
197 |         assert!(mc.contains(&a));
198 |         assert!(!mc.contains(&b));
199 |         assert!(mc.len() == 1);
200 |     }
201 | 
202 |     #[test]
203 |     fn test_cardinality_limit() {
204 |         let names: Vec<Event> = (0..400)
205 |             .map(|val| {
206 |                 let id = Id {
207 |                     name: format!("metric.{}", val as u32).as_bytes().to_vec(),
208 |                     mtype: Type::Counter,
209 |                     tags: vec![],
210 |                 };
211 |                 Event::Parsed(Owned::new(id, 1.0, None))
212 |             })
213 |             .collect();
214 | 
215 |         let config = config::processor::Cardinality {
216 |             size_limit: 100_usize,
217 |             rotate_after_seconds: 10,
218 |             buckets: 2,
219 |             route: vec![],
220 |         };
221 |         let scope = crate::stats::Collector::default().scope("test");
222 |         let filter = Cardinality::new(scope, &config);
223 |         for name in &names[0..101] {
224 |             assert!(filter.provide_statsd(name).is_some());
225 |         }
226 |         let len = filter.filter.lock().len();
227 |         assert!(len == 101, "length isn't as expected {}", len);
228 |         for name in &names[101..] {
229 |             assert!(
230 |                 filter.provide_statsd(name).is_none(),
231 |                 "sample {:?} was allowed",
232 |                 name
233 |             );
234 |         }
235 |         assert!(
236 |             filter.gauge_metric_hwm.get() == 101_f64,
237 |             "metric high water mark was set, hwm {}",
238 |             filter.gauge_metric_hwm.get()
239 |         );
240 |         assert!(
241 |             filter.counter_flagged_metrics.get() > 298_f64,
242 |             "flagged metric counter was increased, count {}",
243 |             filter.counter_flagged_metrics.get()
244 |         );
245 |     }
246 | }
247 | 


--------------------------------------------------------------------------------
/statsrelay/src/processors/mod.rs:
--------------------------------------------------------------------------------
 1 | use super::backends::Backends;
 2 | use crate::config;
 3 | use crate::statsd_proto::Event;
 4 | use smallvec::SmallVec;
 5 | 
 6 | pub mod cardinality;
 7 | pub mod regex_filter;
 8 | pub mod sampler;
 9 | pub mod tag;
10 | 
11 | pub struct Output<'a> {
12 |     /// Lists of new events returned if the processor has modified the
13 |     /// sample in any way. If this is none but a route is set, downstream
14 |     /// processors will be called with the original reference to the Sample
15 |     pub new_events: Option<SmallVec<[Event; 4]>>,
16 |     pub route: &'a [config::Route],
17 | }
18 | pub trait Processor {
19 |     /// Tick is designed for processors to do any internal housekeeping. A copy
20 |     /// of the called time is provided for mocking, and a reference to the
21 |     /// Backends structure is provided to re-inject messages into processor
22 |     /// framework if desired.
23 |     fn tick(&self, _time: std::time::SystemTime, _backends: &Backends) {}
24 | 
25 |     /// Provides a signal that the processor should cease operating,
26 |     /// specifically designed to allow processors which do buffering to flush
27 |     /// internal state out to backends. The Drop trait should be used to handle
28 |     /// actual cleanup of resources.
29 |     fn flush(&self, _backends: &Backends) {}
30 | 
31 |     fn provide_statsd(&self, sample: &Event) -> Option<Output>;
32 | }
33 | 


--------------------------------------------------------------------------------
/statsrelay/src/processors/regex_filter.rs:
--------------------------------------------------------------------------------
 1 | use regex::RegexSet;
 2 | 
 3 | use super::{Output, Processor};
 4 | use crate::stats;
 5 | use crate::{config::processor, statsd_proto::Event};
 6 | use crate::{config::Route, statsd_proto::Parsed};
 7 | 
 8 | pub struct RegexFilter {
 9 |     allow: Option<RegexSet>,
10 |     remove: Option<RegexSet>,
11 |     route: Vec<Route>,
12 | 
13 |     counter_remove: stats::Counter,
14 | }
15 | 
16 | impl RegexFilter {
17 |     pub fn new(
18 |         scope: stats::Scope,
19 |         from_config: &processor::RegexFilter,
20 |     ) -> Result<Self, regex::Error> {
21 |         let allow = from_config.allow.as_ref().map(RegexSet::new).transpose()?;
22 |         let remove = from_config.remove.as_ref().map(RegexSet::new).transpose()?;
23 |         Ok(RegexFilter {
24 |             allow,
25 |             remove,
26 |             route: from_config.route.clone(),
27 |             counter_remove: scope.counter("removed").unwrap(),
28 |         })
29 |     }
30 | }
31 | 
32 | impl Processor for RegexFilter {
33 |     fn provide_statsd(&self, event: &Event) -> Option<Output> {
34 |         let name = std::str::from_utf8(match event {
35 |             Event::Parsed(parsed) => parsed.id().name.as_ref(),
36 |             Event::Pdu(pdu) => pdu.name(),
37 |         })
38 |         .ok()?;
39 |         if let Some(allow) = &self.allow {
40 |             if !allow.is_match(name) {
41 |                 self.counter_remove.inc();
42 |                 return None;
43 |             }
44 |         }
45 |         if let Some(remove) = &self.remove {
46 |             if remove.is_match(name) {
47 |                 self.counter_remove.inc();
48 |                 return None;
49 |             }
50 |         }
51 |         Some(Output {
52 |             new_events: None,
53 |             route: self.route.as_ref(),
54 |         })
55 |     }
56 | }
57 | 
58 | #[cfg(test)]
59 | pub mod test {
60 | 
61 |     use super::*;
62 | 
63 |     #[test]
64 |     fn build_filter() {
65 |         let c = processor::RegexFilter {
66 |             route: vec![],
67 |             remove: Some(vec![r"^hello.*".to_owned(), r"^goodbye.*".to_owned()]),
68 |             allow: None,
69 |         };
70 |         let sink = stats::Collector::default();
71 |         let scope = sink.scope("prefix");
72 |         let filter = RegexFilter::new(scope, &c).unwrap();
73 | 
74 |         let event1 = Event::Pdu(
75 |             crate::statsd_proto::Pdu::parse(bytes::Bytes::from_static(b"hello.world:c|1")).unwrap(),
76 |         );
77 |         let event2 = Event::Pdu(
78 |             crate::statsd_proto::Pdu::parse(bytes::Bytes::from_static(b"goodbye.world:c|1"))
79 |                 .unwrap(),
80 |         );
81 |         let event3 = Event::Pdu(
82 |             crate::statsd_proto::Pdu::parse(bytes::Bytes::from_static(b"pineapples:c|1")).unwrap(),
83 |         );
84 | 
85 |         assert!(filter.provide_statsd(&event1).is_none(), "should remove");
86 |         assert!(filter.provide_statsd(&event2).is_none(), "should remove");
87 |         assert!(
88 |             filter.provide_statsd(&event3).is_some(),
89 |             "should not remove"
90 |         );
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/statsrelay/src/processors/sampler.rs:
--------------------------------------------------------------------------------
  1 | use super::Output;
  2 | use crate::backends::Backends;
  3 | use crate::processors;
  4 | use crate::statsd_proto::Id;
  5 | use crate::statsd_proto::{Event, Owned, Type};
  6 | use crate::{config, statsd_proto::Parsed};
  7 | 
  8 | use ahash::RandomState;
  9 | use parking_lot::Mutex;
 10 | use std::cell::RefCell;
 11 | use std::time::SystemTime;
 12 | use thiserror::Error;
 13 | 
 14 | use std::collections::HashMap;
 15 | use std::convert::TryInto;
 16 | 
 17 | const DEFAULT_RESERVOIR: u32 = 100;
 18 | 
 19 | fn scale(value: f64, sample_rate: Option<f64>) -> (f64, f64) {
 20 |     match sample_rate {
 21 |         None => (value, 1_f64),
 22 |         Some(rate) => {
 23 |             let scale = 1_f64 / rate;
 24 |             if scale > 0_f64 && scale <= 1_f64 {
 25 |                 (value * scale, scale)
 26 |             } else {
 27 |                 (value, 1_f64)
 28 |             }
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | #[derive(Error, Debug)]
 34 | pub enum Error {
 35 |     #[error("invalid sampler configuration")]
 36 |     InvalidConfig,
 37 | }
 38 | 
 39 | #[derive(Debug, Default)]
 40 | struct Counter {
 41 |     value: f64,
 42 |     samples: f64,
 43 | }
 44 | 
 45 | impl Counter {
 46 |     fn to_event(&self, id: &Id) -> Event {
 47 |         let value = self.value / self.samples;
 48 |         let sample_rate = 1_f64 / self.samples;
 49 |         Event::Parsed(Owned::new(id.clone(), value, Some(sample_rate)))
 50 |     }
 51 | }
 52 | 
 53 | #[derive(Debug)]
 54 | struct Timer {
 55 |     values: Vec<f64>,
 56 |     filled_count: u32,
 57 |     reservoir_size: u32,
 58 |     count: f64,
 59 |     sum: f64,
 60 | }
 61 | 
 62 | impl Timer {
 63 |     fn new(reservoir_size: u32) -> Self {
 64 |         Timer {
 65 |             values: Vec::with_capacity(reservoir_size as usize),
 66 |             filled_count: 0,
 67 |             reservoir_size,
 68 |             count: 0_f64,
 69 |             sum: 0_f64,
 70 |         }
 71 |     }
 72 | 
 73 |     fn add(&mut self, value: f64, sample_rate: Option<f64>) {
 74 |         // Do an initial fill if we haven't filled the full reservoir
 75 |         if self.values.len() < self.reservoir_size as usize {
 76 |             self.values.push(value);
 77 |         } else {
 78 |             match fastrand::u32(..) % self.filled_count {
 79 |                 idx if idx < self.reservoir_size => self.values[idx as usize] = value,
 80 |                 _ => (),
 81 |             }
 82 |         }
 83 |         let (sum, count) = scale(value, sample_rate);
 84 |         // Keep track of a sample rate scaled count independently from the
 85 |         // reservoir sample fill
 86 |         self.count += count;
 87 |         self.sum += sum;
 88 |         self.filled_count += 1;
 89 |     }
 90 | }
 91 | 
 92 | #[derive(Debug, Default)]
 93 | struct Gauge {
 94 |     value: f64,
 95 | }
 96 | 
 97 | impl Gauge {
 98 |     fn to_event(&self, id: &Id) -> Event {
 99 |         Event::Parsed(Owned::new(id.clone(), self.value, None))
100 |     }
101 | }
102 | 
103 | #[derive(Debug)]
104 | pub struct Sampler {
105 |     config: config::processor::Sampler,
106 |     counters: Mutex<RefCell<HashMap<Id, Counter, RandomState>>>,
107 |     timers: Mutex<RefCell<HashMap<Id, Timer, RandomState>>>,
108 |     gauges: Mutex<RefCell<HashMap<Id, Gauge, RandomState>>>,
109 | 
110 |     last_flush: Mutex<RefCell<std::time::SystemTime>>,
111 | 
112 |     route_to: Vec<config::Route>,
113 | }
114 | 
115 | impl Sampler {
116 |     pub fn new(config: &config::processor::Sampler) -> Result<Self, Error> {
117 |         let counters: RefCell<HashMap<Id, Counter, RandomState>> = RefCell::new(HashMap::default());
118 |         let timers: RefCell<HashMap<Id, Timer, RandomState>> = RefCell::new(HashMap::default());
119 |         let gauges: RefCell<HashMap<Id, Gauge, RandomState>> = RefCell::new(HashMap::default());
120 |         Ok(Sampler {
121 |             config: config.clone(),
122 |             counters: Mutex::new(counters),
123 |             timers: Mutex::new(timers),
124 |             gauges: Mutex::new(gauges),
125 |             route_to: config.route.clone(),
126 |             last_flush: Mutex::new(RefCell::new(std::time::SystemTime::now())),
127 |         })
128 |     }
129 | 
130 |     fn record_timer(&self, owned: &Owned) {
131 |         let lock = self.timers.lock();
132 |         let mut hm = lock.borrow_mut();
133 | 
134 |         match hm.get_mut(owned.id()) {
135 |             Some(v) => {
136 |                 v.add(owned.value(), owned.sample_rate());
137 |             }
138 |             None => {
139 |                 let mut timer = Timer::new(
140 |                     self.config
141 |                         .timer_reservoir_size
142 |                         .unwrap_or(DEFAULT_RESERVOIR),
143 |                 );
144 |                 timer.add(owned.value(), owned.sample_rate());
145 |                 hm.insert(owned.id().clone(), timer);
146 |             }
147 |         }
148 |     }
149 | 
150 |     fn record_gauge(&self, owned: &Owned) {
151 |         let lock = self.gauges.lock();
152 |         let mut hm = lock.borrow_mut();
153 |         // Note: Using the entry API would make logical sense to avoid
154 |         // re-hashing the same Id on insert, however it costs more to
155 |         // clone the Id as the entry API does not allow for trait Clone
156 |         // key references and supporting lazy-cloning.
157 |         match hm.get_mut(owned.id()) {
158 |             Some(v) => v.value = owned.value(),
159 |             None => {
160 |                 hm.insert(
161 |                     owned.id().clone(),
162 |                     Gauge {
163 |                         value: owned.value(),
164 |                     },
165 |                 );
166 |             }
167 |         };
168 |     }
169 | 
170 |     fn record_counter(&self, owned: &Owned) {
171 |         // Adjust values based on sample rate. In the end, emission will
172 |         // re-scale everything back to the sample rate.
173 |         let (scaled, counts) = scale(owned.value(), owned.sample_rate());
174 | 
175 |         let lock = self.counters.lock();
176 |         let mut hm = lock.borrow_mut();
177 | 
178 |         match hm.get_mut(owned.id()) {
179 |             Some(v) => {
180 |                 v.value += scaled;
181 |                 v.samples += counts;
182 |             }
183 |             None => {
184 |                 hm.insert(
185 |                     owned.id().clone(),
186 |                     Counter {
187 |                         value: scaled,
188 |                         samples: counts,
189 |                     },
190 |                 );
191 |             }
192 |         }
193 |     }
194 | 
195 |     fn handle_flush(&self, backends: &Backends) {
196 |         let mut gauges = self.gauges.lock().replace(HashMap::default());
197 |         for (id, gauge) in gauges.drain() {
198 |             let pdu = gauge.to_event(&id);
199 |             backends.provide_statsd(&pdu, self.route_to.as_ref())
200 |         }
201 | 
202 |         let mut counters = self.counters.lock().replace(HashMap::default());
203 |         for (id, counter) in counters.drain() {
204 |             let pdu = counter.to_event(&id);
205 |             backends.provide_statsd(&pdu, self.route_to.as_ref());
206 |         }
207 | 
208 |         let mut timers = self.timers.lock().replace(HashMap::default());
209 |         for (id, timer) in timers.drain() {
210 |             let sample_rate = timer.values.len() as f64 / timer.count;
211 |             for value in timer.values {
212 |                 let pdu = Event::Parsed(Owned::new(id.clone(), value, Some(sample_rate)));
213 |                 backends.provide_statsd(&pdu, self.route_to.as_ref());
214 |             }
215 |         }
216 |     }
217 | 
218 |     fn check_tick_passed(&self, earlier: SystemTime, time: SystemTime) -> bool {
219 |         match time.duration_since(earlier) {
220 |             Err(_) => false,
221 |             Ok(duration) if duration.as_secs() < self.config.window as u64 => false,
222 |             Ok(_) => true,
223 |         }
224 |     }
225 | }
226 | 
227 | impl processors::Processor for Sampler {
228 |     fn provide_statsd(&self, sample: &Event) -> Option<processors::Output> {
229 |         let owned: Result<Owned, _> = sample.try_into();
230 |         match owned {
231 |             Err(_) => None,
232 |             Ok(owned) if owned.metric_type() == &Type::Timer => {
233 |                 self.record_timer(&owned);
234 |                 None
235 |             }
236 |             Ok(owned) if owned.metric_type() == &Type::Counter => {
237 |                 self.record_counter(&owned);
238 |                 None
239 |             }
240 |             Ok(owned) if owned.metric_type() == &Type::Gauge => {
241 |                 self.record_gauge(&owned);
242 |                 None
243 |             }
244 |             Ok(_) => Some(Output {
245 |                 route: &self.route_to,
246 |                 new_events: None,
247 |             }),
248 |         }
249 |     }
250 | 
251 |     fn tick(&self, time: std::time::SystemTime, backends: &Backends) {
252 |         // Take a lock on the last flush, which guards all other flushes.
253 |         let flush_lock = self.last_flush.lock();
254 |         let earlier = *flush_lock.borrow();
255 |         if !self.check_tick_passed(earlier, time) {
256 |             return;
257 |         };
258 |         self.handle_flush(backends);
259 |         flush_lock.replace(time);
260 |     }
261 | 
262 |     fn flush(&self, backends: &Backends) {
263 |         let _flush_lock = self.last_flush.lock();
264 |         self.handle_flush(backends);
265 |     }
266 | }
267 | 
268 | #[cfg(test)]
269 | pub mod test {
270 |     use super::*;
271 | 
272 |     #[test]
273 |     fn fill_timer() {
274 |         let mut timer = Timer::new(100);
275 |         for x in 0..200 {
276 |             timer.add(x as f64, None);
277 |         }
278 |         assert_eq!(timer.filled_count, 200);
279 |         assert_eq!(timer.count, 200_f64);
280 |         assert_eq!(timer.sum, 19900_f64);
281 |         assert_eq!(timer.values.len(), 100);
282 |     }
283 | }
284 | 


--------------------------------------------------------------------------------
/statsrelay/src/processors/tag.rs:
--------------------------------------------------------------------------------
 1 | use crate::config;
 2 | use crate::processors;
 3 | use crate::statsd_proto;
 4 | use crate::statsd_proto::Event;
 5 | use std::convert::TryInto;
 6 | 
 7 | use smallvec::smallvec;
 8 | 
 9 | pub struct Normalizer {
10 |     route: Vec<config::Route>,
11 | }
12 | 
13 | impl Normalizer {
14 |     pub fn new(route: &[config::Route]) -> Self {
15 |         Normalizer {
16 |             route: route.to_vec(),
17 |         }
18 |     }
19 | }
20 | 
21 | impl processors::Processor for Normalizer {
22 |     fn provide_statsd(&self, sample: &Event) -> Option<processors::Output> {
23 |         let owned: Result<statsd_proto::Owned, _> = sample.try_into();
24 |         owned
25 |             .map(|inp| {
26 |                 let out = statsd_proto::convert::to_inline_tags(inp);
27 |                 processors::Output {
28 |                     new_events: Some(smallvec![Event::Parsed(out)]),
29 |                     route: self.route.as_ref(),
30 |                 }
31 |             })
32 |             .ok()
33 |     }
34 | }
35 | 
36 | #[cfg(test)]
37 | pub mod test {
38 |     use processors::Processor;
39 |     use statsd_proto::Parsed;
40 | 
41 |     use super::*;
42 | 
43 |     #[test]
44 |     fn make_normalizer() {
45 |         let route = vec![config::Route {
46 |             route_type: config::RouteType::Processor,
47 |             route_to: "null".to_string(),
48 |         }];
49 | 
50 |         let tn = Normalizer::new(&route);
51 |         let pdu =
52 |             statsd_proto::Pdu::parse(bytes::Bytes::from_static(b"foo.bar:3|c|#tags:value|@1.0"))
53 |                 .unwrap();
54 |         let sample = Event::Pdu(pdu);
55 |         let result = tn.provide_statsd(&sample).unwrap();
56 | 
57 |         let first_sample = &result.new_events.as_ref().unwrap()[0];
58 |         let owned: statsd_proto::Owned = first_sample.try_into().unwrap();
59 |         assert_eq!(owned.name(), b"foo.bar.__tags=value");
60 |         assert_eq!(route, result.route);
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/statsrelay/src/shard.rs:
--------------------------------------------------------------------------------
  1 | use std::io::Cursor;
  2 | 
  3 | use crate::statsd_proto::Pdu;
  4 | 
  5 | // HASHLIB_SEED same as the legacy statsrelay code base
  6 | const HASHLIB_SEED: u32 = 0xaccd3d34;
  7 | 
  8 | pub fn statsrelay_compat_hash(pdu: &Pdu) -> u32 {
  9 |     murmur3::murmur3_32(&mut Cursor::new(pdu.name()), HASHLIB_SEED).unwrap_or(0)
 10 | }
 11 | 
 12 | pub struct Ring<C: Send + Sync + 'static> {
 13 |     members: Vec<C>,
 14 | }
 15 | 
 16 | impl<C: Send + Sync + 'static> Ring<C> {
 17 |     pub fn new() -> Self {
 18 |         Ring {
 19 |             members: Vec::new(),
 20 |         }
 21 |     }
 22 | 
 23 |     pub fn push(&mut self, c: C) {
 24 |         self.members.push(c);
 25 |     }
 26 | 
 27 |     pub fn len(&self) -> usize {
 28 |         self.members.len()
 29 |     }
 30 | 
 31 |     pub fn is_empty(&self) -> bool {
 32 |         self.len() == 0
 33 |     }
 34 | 
 35 |     pub fn pick_from(&self, code: u32) -> &C {
 36 |         let l = self.members.len();
 37 |         self.members.get(code as usize % l).unwrap()
 38 |     }
 39 | 
 40 |     pub fn act_on<F>(&mut self, code: u32, mut f: F)
 41 |     where
 42 |         F: FnMut(&mut C),
 43 |     {
 44 |         let len = self.members.len();
 45 |         let c = &mut self.members[code as usize % len];
 46 |         f(c);
 47 |     }
 48 | 
 49 |     pub fn swap(&mut self, other: Ring<C>) {
 50 |         self.members = other.members;
 51 |     }
 52 | }
 53 | 
 54 | impl<C: Send + Sync + 'static> Default for Ring<C> {
 55 |     fn default() -> Self {
 56 |         Ring::new()
 57 |     }
 58 | }
 59 | 
 60 | #[cfg(test)]
 61 | pub mod test {
 62 |     use super::*;
 63 |     use bytes::Bytes;
 64 | 
 65 |     #[test]
 66 |     fn test_swap() {
 67 |         let mut ring = Ring::new();
 68 |         ring.push(0);
 69 |         ring.push(1);
 70 |         assert_eq!(ring.len(), 2);
 71 |         let mut ring2 = Ring::new();
 72 |         ring2.push(2);
 73 |         ring2.push(3);
 74 |         ring2.push(4);
 75 |         assert_eq!(ring2.len(), 3);
 76 |         ring.swap(ring2);
 77 |         assert_eq!(ring.len(), 3);
 78 |     }
 79 |     #[test]
 80 |     fn test_hash() {
 81 |         let mut ring = Ring::new();
 82 |         ring.push(0);
 83 |         ring.push(1);
 84 |         ring.push(2);
 85 |         ring.push(3);
 86 | 
 87 |         assert_eq!(
 88 |             *ring.pick_from(statsrelay_compat_hash(
 89 |                 &Pdu::parse(Bytes::copy_from_slice(b"apple:1|c")).unwrap()
 90 |             )),
 91 |             2
 92 |         );
 93 |         assert_eq!(
 94 |             *ring.pick_from(statsrelay_compat_hash(
 95 |                 &Pdu::parse(Bytes::copy_from_slice(b"banana:1|c")).unwrap()
 96 |             )),
 97 |             3
 98 |         );
 99 |         assert_eq!(
100 |             *ring.pick_from(statsrelay_compat_hash(
101 |                 &Pdu::parse(Bytes::copy_from_slice(b"orange:1|c")).unwrap()
102 |             )),
103 |             0
104 |         );
105 |         assert_eq!(
106 |             *ring.pick_from(statsrelay_compat_hash(
107 |                 &Pdu::parse(Bytes::copy_from_slice(b"lemon:1|c")).unwrap()
108 |             )),
109 |             1
110 |         );
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/statsrelay/src/stats.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | 
  3 | use dashmap::DashMap;
  4 | use prometheus::{Encoder, Registry, TextEncoder};
  5 | 
  6 | pub const SEP: &str = ":";
  7 | /// A wrapped stats implementation, to allow multiple backends to be used
  8 | /// instead of just prometheus, when required. Right now this implementation is
  9 | /// extremely simple and only works with prometheus exporting, and will require
 10 | /// some revisions to improve.
 11 | ///
 12 | /// All types are clone-able - for the Collector and all built metric types,
 13 | /// they will continue to refer to the same set of names and values and do not
 14 | /// create new values. Scopes can be cloned, but do not share lineage and allow
 15 | /// sub-scopes to be made independently. Building a reference to the same
 16 | /// counter name will return the same underlying counter atomic.
 17 | 
 18 | #[derive(Clone, Debug)]
 19 | pub struct Collector {
 20 |     // Registry is an Arc<> locked type and therefor is freely cloneable
 21 |     registry: Registry,
 22 |     counters: Arc<DashMap<String, Counter>>,
 23 |     gauges: Arc<DashMap<String, Gauge>>,
 24 | }
 25 | 
 26 | impl Default for Collector {
 27 |     fn default() -> Self {
 28 |         Collector {
 29 |             registry: Registry::new(),
 30 |             counters: Arc::new(DashMap::new()),
 31 |             gauges: Arc::new(DashMap::new()),
 32 |         }
 33 |     }
 34 | }
 35 | 
 36 | impl Collector {
 37 |     pub fn scope(&self, prefix: &str) -> Scope {
 38 |         Scope {
 39 |             collector: self.clone(),
 40 |             scope: String::from(prefix),
 41 |         }
 42 |     }
 43 | 
 44 |     /// Generate and return a byte buffer containing a Prometheus formatted text
 45 |     /// output of the current contents of this collector.
 46 |     pub fn prometheus_output(&self) -> anyhow::Result<Vec<u8>> {
 47 |         let output = self.registry.gather();
 48 |         let encoder = TextEncoder::new();
 49 |         let mut buffer = vec![];
 50 | 
 51 |         encoder.encode(&output, &mut buffer)?;
 52 |         Ok(buffer)
 53 |     }
 54 | 
 55 |     /// Attempt to register a new counter. If the counter already exists, it
 56 |     /// will return the previously registered counter instead of the one passed
 57 |     /// in.
 58 |     fn register_counter(&self, c: Counter) -> anyhow::Result<Counter> {
 59 |         let counter = match self.counters.get(&c.name) {
 60 |             Some(counter) => counter.clone(),
 61 |             None => {
 62 |                 self.registry.register(Box::new(c.clone().counter))?;
 63 |                 self.counters.insert(c.name.clone(), c.clone());
 64 |                 c
 65 |             }
 66 |         };
 67 | 
 68 |         Ok(counter)
 69 |     }
 70 | 
 71 |     fn register_gauge(&self, g: Gauge) -> anyhow::Result<Gauge> {
 72 |         let gauge = match self.gauges.get(&g.name) {
 73 |             Some(gauge) => gauge.clone(),
 74 |             None => {
 75 |                 self.registry.register(Box::new(g.clone().gauge))?;
 76 |                 self.gauges.insert(g.name.clone(), g.clone());
 77 |                 g
 78 |             }
 79 |         };
 80 |         Ok(gauge)
 81 |     }
 82 | }
 83 | 
 84 | #[derive(Clone, Debug)]
 85 | pub struct Scope {
 86 |     collector: Collector,
 87 |     scope: String,
 88 | }
 89 | 
 90 | impl Scope {
 91 |     pub fn scope(&self, extend: &str) -> Scope {
 92 |         Scope {
 93 |             scope: format!("{}{}{}", self.scope, SEP, extend),
 94 |             collector: self.collector.clone(),
 95 |         }
 96 |     }
 97 | 
 98 |     /// Create a new counter with the given scope, or return an existing
 99 |     /// underlying counter
100 |     pub fn counter(&self, name: &str) -> anyhow::Result<Counter> {
101 |         let name = format!("{}{}{}", self.scope, SEP, name);
102 |         let counter = Counter::new(name)?;
103 |         self.collector.register_counter(counter)
104 |     }
105 | 
106 |     /// Create a new gauge with the given scope, or return the existing gauge
107 |     /// with the same name
108 |     pub fn gauge(&self, name: &str) -> anyhow::Result<Gauge> {
109 |         let name = format!("{}{}{}", self.scope, SEP, name);
110 |         let gauge = Gauge::new(name.as_str())?;
111 |         self.collector.register_gauge(gauge)
112 |     }
113 | }
114 | 
115 | #[derive(Clone, Debug)]
116 | pub struct Gauge {
117 |     name: String,
118 |     gauge: prometheus::Gauge,
119 | }
120 | 
121 | impl Gauge {
122 |     fn new(name: &str) -> anyhow::Result<Self> {
123 |         let pg = prometheus::Gauge::new(name.to_owned(), "a gauge")?;
124 |         Ok(Self {
125 |             name: name.to_owned(),
126 |             gauge: pg,
127 |         })
128 |     }
129 | 
130 |     pub fn set(&self, value: f64) {
131 |         self.gauge.set(value)
132 |     }
133 | 
134 |     pub fn get(&self) -> f64 {
135 |         self.gauge.get()
136 |     }
137 | }
138 | 
139 | #[derive(Clone, Debug)]
140 | pub struct Counter {
141 |     name: String,
142 |     counter: prometheus::Counter,
143 | }
144 | 
145 | impl Counter {
146 |     fn new(name: String) -> anyhow::Result<Self> {
147 |         let pcounter = prometheus::Counter::new(name.clone(), "a counter")?;
148 |         Ok(Self {
149 |             name,
150 |             counter: pcounter,
151 |         })
152 |     }
153 | 
154 |     /// Increment a counter
155 |     pub fn inc(&self) {
156 |         self.counter.inc();
157 |     }
158 | 
159 |     pub fn inc_by(&self, value: f64) {
160 |         self.counter.inc_by(value);
161 |     }
162 | 
163 |     /// Return the current counter value
164 |     pub fn get(&self) -> f64 {
165 |         self.counter.get()
166 |     }
167 | }
168 | 
169 | #[cfg(test)]
170 | pub mod test {
171 |     use super::*;
172 | 
173 |     #[test]
174 |     pub fn test_counter() {
175 |         let collector = Collector::default();
176 |         let scope = collector.scope("prefix");
177 |         let ctr1 = scope.counter("counter").unwrap();
178 |         ctr1.inc();
179 |         let ctr2 = scope.counter("counter").unwrap();
180 |         // Ensure we have the same counter object
181 |         assert_eq!(ctr2.get(), 1_f64);
182 |         ctr2.inc();
183 |         assert_eq!(ctr1.get(), 2_f64);
184 |     }
185 | 
186 |     #[test]
187 |     pub fn test_gauge() {
188 |         let collector = Collector::default();
189 |         let scope = collector.scope("prefix");
190 |         let ctr1 = scope.gauge("gauge").unwrap();
191 |         ctr1.set(12_f64);
192 |         let ctr2 = scope.gauge("gauge").unwrap();
193 |         // Ensure we have the same gauge object
194 |         assert_eq!(ctr2.get(), 12_f64);
195 |         ctr2.set(13_f64);
196 |         assert_eq!(ctr1.get(), 13_f64);
197 |     }
198 | }
199 | 


--------------------------------------------------------------------------------
/statsrelay/src/statsd_server.rs:
--------------------------------------------------------------------------------
  1 | use bytes::{BufMut, BytesMut};
  2 | use memchr::memchr;
  3 | use stream_cancel::Tripwire;
  4 | use tokio::io::{AsyncRead, AsyncWrite};
  5 | use tokio::io::{AsyncReadExt, AsyncWriteExt};
  6 | use tokio::net::unix;
  7 | use tokio::net::{TcpListener, UnixListener, UnixStream};
  8 | use tokio::select;
  9 | use tokio::time::timeout;
 10 | 
 11 | use std::io::ErrorKind;
 12 | use std::net::UdpSocket;
 13 | use std::sync::atomic::AtomicBool;
 14 | use std::sync::atomic::Ordering::Relaxed;
 15 | use std::sync::Arc;
 16 | use std::time::Duration;
 17 | 
 18 | use log::{debug, info, warn};
 19 | 
 20 | use crate::backends::Backends;
 21 | use crate::config;
 22 | use crate::config::StatsdServerConfig;
 23 | use crate::stats;
 24 | use crate::statsd_proto::{Event, Pdu};
 25 | 
 26 | const TCP_READ_TIMEOUT: Duration = Duration::from_secs(62);
 27 | const READ_BUFFER_SIZE: usize = 8192;
 28 | 
 29 | struct UdpServer {
 30 |     shutdown_gate: Arc<AtomicBool>,
 31 | }
 32 | 
 33 | impl Drop for UdpServer {
 34 |     fn drop(&mut self) {
 35 |         self.shutdown_gate.store(true, Relaxed);
 36 |     }
 37 | }
 38 | 
 39 | impl UdpServer {
 40 |     fn new() -> Self {
 41 |         UdpServer {
 42 |             shutdown_gate: Arc::new(AtomicBool::new(false)),
 43 |         }
 44 |     }
 45 | 
 46 |     fn udp_worker(
 47 |         &mut self,
 48 |         stats: stats::Scope,
 49 |         bind: String,
 50 |         backends: Backends,
 51 |         route: Vec<config::Route>,
 52 |     ) -> std::thread::JoinHandle<()> {
 53 |         let socket = UdpSocket::bind(bind.as_str()).unwrap();
 54 | 
 55 |         let processed_lines = stats.counter("processed_lines").unwrap();
 56 |         let incoming_bytes = stats.counter("incoming_bytes").unwrap();
 57 |         // We set a small timeout to allow aborting the UDP server if there is no
 58 |         // incoming traffic.
 59 |         socket
 60 |             .set_read_timeout(Some(Duration::from_secs(1)))
 61 |             .unwrap();
 62 |         info!("statsd udp server running on {}", bind);
 63 |         let gate = self.shutdown_gate.clone();
 64 |         std::thread::spawn(move || {
 65 |             info!("started udp reader thread");
 66 |             let mut buf = BytesMut::with_capacity(65535);
 67 |             loop {
 68 |                 if gate.load(Relaxed) {
 69 |                     break;
 70 |                 }
 71 |                 buf.resize(65535, 0_u8);
 72 |                 match socket.recv_from(buf.as_mut()) {
 73 |                     Ok((size, _remote)) => {
 74 |                         buf.truncate(size);
 75 |                         incoming_bytes.inc_by(size as f64);
 76 |                         let r = process_buffer_newlines(&mut buf);
 77 |                         processed_lines.inc_by(r.len() as f64);
 78 |                         backends.provide_statsd_slice(&r, &route);
 79 | 
 80 |                         if let Ok(p) = Pdu::parse(buf.clone().freeze()) {
 81 |                             backends.provide_statsd(&Event::Pdu(p), &route);
 82 |                         }
 83 |                     }
 84 |                     Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => (),
 85 |                     Err(e) => warn!("udp receiver error {:?}", e),
 86 |                 }
 87 |             }
 88 |             info!("terminating statsd udp");
 89 |         })
 90 |     }
 91 | }
 92 | 
 93 | fn process_buffer_newlines(buf: &mut BytesMut) -> Vec<Event> {
 94 |     let mut ret: Vec<Event> = Vec::new();
 95 |     loop {
 96 |         match memchr(b'\n', buf) {
 97 |             None => break,
 98 |             Some(newline) => {
 99 |                 let mut incoming = buf.split_to(newline + 1);
100 |                 let length: usize = incoming.len();
101 |                 if length < 3 {
102 |                     continue; // Not a real metric. Likely \n or \r\n.
103 |                 } else if incoming[length - 2] == b'\r' {
104 |                     incoming.truncate(length - 2);
105 |                 } else {
106 |                     incoming.truncate(length - 1);
107 |                 }
108 |                 let frozen = incoming.freeze();
109 |                 if frozen == "status" {
110 |                     // Consume a line consisting of just the word status, and do not produce a PDU
111 |                     continue;
112 |                 }
113 |                 if let Ok(pdu) = Pdu::parse(frozen) {
114 |                     ret.push(Event::Pdu(pdu));
115 |                 }
116 |             }
117 |         };
118 |     }
119 |     ret
120 | }
121 | 
122 | async fn client_handler<T>(
123 |     stats: stats::Scope,
124 |     peer: String,
125 |     mut tripwire: Tripwire,
126 |     mut socket: T,
127 |     backends: Backends,
128 |     route: Vec<config::Route>,
129 |     config: StatsdServerConfig,
130 | ) where
131 |     T: AsyncRead + AsyncWrite + Unpin,
132 | {
133 |     let incoming_bytes = stats.counter("incoming_bytes").unwrap();
134 |     let disconnects = stats.counter("disconnects").unwrap();
135 |     let processed_lines = stats.counter("lines").unwrap();
136 |     let read_timeouts = stats.counter("read_timeout").unwrap();
137 |     let socket_errors = stats.counter("socket_error").unwrap();
138 | 
139 |     let read_buffer_size = config.read_buffer.unwrap_or(READ_BUFFER_SIZE);
140 |     let mut buf = BytesMut::with_capacity(read_buffer_size);
141 | 
142 |     loop {
143 |         if buf.remaining_mut() < read_buffer_size {
144 |             buf.reserve(read_buffer_size);
145 |         }
146 |         let result = select! {
147 |             r = timeout(
148 |                 config.read_timeout_secs.map(|s| Duration::from_secs(s as u64)).unwrap_or(TCP_READ_TIMEOUT),
149 |                 //per socket.read_buf - If the timeout completes first it is guaranteed that no data was read
150 |                 socket.read_buf(&mut buf)) =>
151 |             {
152 |                 match r {
153 |                     Err(_e)  => Err(std::io::Error::new(ErrorKind::TimedOut, "read timeout")),
154 |                     Ok(Err(e)) => Err(e),
155 |                     Ok(Ok(r)) => Ok(r),
156 |                 }
157 |             },
158 |             _ = &mut tripwire => Err(std::io::Error::new(ErrorKind::Other, "shutting down")),
159 |         };
160 | 
161 |         match result {
162 |             Ok(bytes) if buf.is_empty() && bytes == 0 => {
163 |                 debug!("closing reader (empty buffer, eof) {}", peer);
164 |                 break;
165 |             }
166 |             Ok(bytes) if bytes == 0 => {
167 |                 let r = process_buffer_newlines(&mut buf);
168 |                 processed_lines.inc_by(r.len() as f64);
169 | 
170 |                 backends.provide_statsd_slice(&r, &route);
171 |                 let remaining = buf.clone().freeze();
172 |                 if let Ok(p) = Pdu::parse(remaining) {
173 |                     backends.provide_statsd(&Event::Pdu(p), &route);
174 |                 };
175 |                 debug!("remaining {:?}", buf);
176 |                 debug!("closing reader {}", peer);
177 |                 break;
178 |             }
179 |             Ok(bytes) => {
180 |                 incoming_bytes.inc_by(bytes as f64);
181 | 
182 |                 let r = process_buffer_newlines(&mut buf);
183 |                 processed_lines.inc_by(r.len() as f64);
184 |                 backends.provide_statsd_slice(&r, &route);
185 |             }
186 |             Err(e) if e.kind() == ErrorKind::Other => {
187 |                 // Ignoring the results of the write call here
188 |                 let _ = timeout(
189 |                     Duration::from_secs(1),
190 |                     socket.write_all(b"server closing due to shutdown, goodbye\n"),
191 |                 )
192 |                 .await;
193 |                 break;
194 |             }
195 |             Err(e) if e.kind() == ErrorKind::TimedOut => {
196 |                 read_timeouts.inc();
197 |                 debug!("read timeout, closing {}", peer);
198 |                 break;
199 |             }
200 |             Err(e) => {
201 |                 socket_errors.inc();
202 |                 debug!("socket error {:?} {}", e, peer);
203 |                 break;
204 |             }
205 |         }
206 |     }
207 |     disconnects.inc();
208 | }
209 | 
210 | /// Wrapper type to adapt an optional listener and either return an accept
211 | /// future, or a pending future which never returns. This wrapper is needed to
212 | /// work around that .accept() is an opaque impl Future type, so can't be
213 | /// readily mixed into a stream.
214 | async fn optional_accept(
215 |     listener: Option<&UnixListener>,
216 | ) -> std::io::Result<(UnixStream, unix::SocketAddr)> {
217 |     if let Some(listener) = listener {
218 |         listener.accept().await
219 |     } else {
220 |         futures::future::pending().await
221 |     }
222 | }
223 | 
224 | pub async fn run(
225 |     stats: stats::Scope,
226 |     tripwire: Tripwire,
227 |     config: StatsdServerConfig,
228 |     backends: Backends,
229 | ) {
230 |     let tcp_listener = TcpListener::bind(config.bind.as_str()).await.unwrap();
231 |     info!("statsd tcp server running on {}", config.bind);
232 | 
233 |     let unix_listener = config.socket.as_ref().map(|socket| {
234 |         let unix = UnixListener::bind(socket.as_str()).unwrap();
235 |         info!("statsd unix server running on {}", socket);
236 |         unix
237 |     });
238 | 
239 |     // Spawn the threaded, non-async blocking UDP server
240 |     let mut udp = UdpServer::new();
241 |     let udp_join = udp.udp_worker(
242 |         stats.scope("udp"),
243 |         config.bind.clone(),
244 |         backends.clone(),
245 |         config.route.clone(),
246 |     );
247 | 
248 |     let accept_connections = stats.counter("accepts").unwrap();
249 |     let accept_connections_unix = stats.counter("accepts_unix").unwrap();
250 |     let accept_failures = stats.counter("accept_failures").unwrap();
251 |     let accept_failures_unix = stats.counter("accept_failures_unix").unwrap();
252 | 
253 |     let routes = config.route.clone();
254 |     let server_config = config.clone();
255 |     async move {
256 |         loop {
257 |             select! {
258 |                 _ = tripwire.clone() => {
259 |                     info!("stopped stream listener loop");
260 |                     return
261 |                 }
262 |                 // Wrap the unix acceptor for different stats
263 |                 unix_res = optional_accept(unix_listener.as_ref()) => {
264 |                     match unix_res {
265 |                         Ok((socket,_)) => {
266 |                             let peer_addr = format!("{:?}", socket.peer_addr());
267 |                             debug!("accepted unix connection from {:?}", socket.peer_addr());
268 |                             accept_connections_unix.inc();
269 |                             tokio::spawn(client_handler(stats.scope("connections_unix"), peer_addr, tripwire.clone(), socket, backends.clone(), routes.clone(), server_config.clone()));
270 |                         }
271 |                         Err(err) => {
272 |                             accept_failures_unix.inc();
273 |                             info!("unix accept error = {:?}", err);
274 |                         }
275 |                     }
276 |                 }
277 |                 socket_res = tcp_listener.accept() => {
278 | 
279 |                     match socket_res {
280 |                         Ok((socket,_)) => {
281 |                             let peer_addr = format!("{:?}", socket.peer_addr());
282 |                             debug!("accepted connection from {:?}", socket.peer_addr());
283 |                             accept_connections.inc();
284 |                             tokio::spawn(client_handler(stats.scope("connections"), peer_addr, tripwire.clone(), socket, backends.clone(), routes.clone(), server_config.clone()));
285 |                         }
286 |                         Err(err) => {
287 |                             accept_failures.inc();
288 |                             info!("accept error = {:?}", err);
289 |                         }
290 |                     }
291 |                 }
292 |             }
293 |         }
294 |     }
295 |     .await;
296 |     drop(udp);
297 |     // The socket file descriptor is not removed on teardown. Lets remove it if enabled.
298 |     if let Some(socket) = config.socket.as_ref() {
299 |         let _ = std::fs::remove_file(socket);
300 |     }
301 |     tokio::task::spawn_blocking(move || {
302 |         udp_join.join().unwrap();
303 |     })
304 |     .await
305 |     .unwrap();
306 | }
307 | 
308 | #[cfg(test)]
309 | pub mod test {
310 |     use super::*;
311 |     #[test]
312 |     fn test_process_buffer_no_newlines() {
313 |         let mut b = BytesMut::new();
314 |         // Validate we don't consume non-newlines
315 |         b.put_slice(b"hello");
316 |         let r = process_buffer_newlines(&mut b);
317 |         assert!(r.is_empty());
318 |         assert!(b.split().as_ref() == b"hello");
319 |     }
320 | 
321 |     #[test]
322 |     fn test_process_buffer_newlines() {
323 |         let mut b = BytesMut::new();
324 |         // Validate we don't consume newlines, but not a remnant
325 |         b.put_slice(b"hello:1|c\nhello:1|c\nhello2");
326 |         let r = process_buffer_newlines(&mut b);
327 |         assert!(r.len() == 2);
328 |         assert!(b.split().as_ref() == b"hello2");
329 |     }
330 | 
331 |     #[test]
332 |     fn test_process_buffer_cr_newlines() {
333 |         let mut found = 0;
334 |         let mut b = BytesMut::new();
335 |         // Validate we don't consume newlines, but not a remnant
336 |         b.put_slice(b"hello:1|c\r\nhello:1|c\nhello2");
337 |         let r = process_buffer_newlines(&mut b);
338 |         for w in r {
339 |             let pdu: Pdu = w.into();
340 |             assert!(pdu.pdu_type() == b"c");
341 |             assert!(pdu.name() == b"hello");
342 |             found += 1
343 |         }
344 |         assert_eq!(2, found);
345 |         assert!(b.split().as_ref() == b"hello2");
346 |     }
347 | 
348 |     #[test]
349 |     fn test_process_buffer_status() {
350 |         let mut found = 0;
351 |         let mut b = BytesMut::new();
352 |         // Validate we don't consume newlines, but not a remnant
353 |         b.put_slice(b"status\r\nhello:1|c\nhello2");
354 |         let r = process_buffer_newlines(&mut b);
355 |         for w in r {
356 |             let pdu: Pdu = w.into();
357 |             assert!(pdu.pdu_type() == b"c");
358 |             assert!(pdu.name() == b"hello");
359 |             found += 1
360 |         }
361 |         assert_eq!(1, found);
362 |         assert!(b.split().as_ref() == b"hello2");
363 |     }
364 | }
365 | 


--------------------------------------------------------------------------------