├── .dockerignore
├── .github
    ├── codeql
    │   └── codeql-config.yml
    └── workflows
    │   └── codeql-analysis.yml
├── .gitignore
├── .rustfmt.toml
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Cargo.lock
├── Cargo.toml
├── Dockerfile
├── Gene-Cernan-1-578x485.jpg
├── LICENSE.txt
├── README.md
├── benches
    ├── buckets.rs
    ├── protocols_graphite.rs
    └── protocols_statsd.rs
├── build-container.sh
├── codecov.yml
├── examples
    ├── configs
    │   ├── basic.toml
    │   ├── counting-example.toml
    │   ├── quickstart-files.toml
    │   ├── quickstart-filters.toml
    │   ├── quickstart.toml
    │   ├── receiver-config.toml
    │   └── transmitter-config.toml
    └── scripts
    │   ├── collectd_scrub.lua
    │   ├── frau_im_mond.lua
    │   └── keep_count.lua
├── resources
    ├── protobufs
    │   ├── native.proto
    │   └── prometheus.proto
    └── tests
    │   ├── data
    │       ├── data-deflate.avro
    │       ├── data-null.avro
    │       ├── data-snappy.avro
    │       ├── users-deflate.avro
    │       ├── users-null.avro
    │       └── users-snappy.avro
    │   └── scripts
    │       ├── add_keys.lua
    │       ├── clear_logs.lua
    │       ├── clear_metrics.lua
    │       ├── collectd_scrub.lua
    │       ├── demonstrate_require.lua
    │       ├── field_from_path.lua
    │       ├── identity.lua
    │       ├── insufficient_args.lua
    │       ├── json_parse.lua
    │       ├── keep_count.lua
    │       ├── lib
    │           ├── demo.lua
    │           └── json.lua
    │       ├── lua_error.lua
    │       ├── missing_func.lua
    │       ├── remove_keys.lua
    │       └── set_value.lua
├── src
    ├── bin
    │   └── cernan.rs
    ├── buckets.rs
    ├── config.rs
    ├── constants.rs
    ├── filter
    │   ├── delay_filter.rs
    │   ├── flush_boundary_filter.rs
    │   ├── json_encode_filter.rs
    │   ├── mod.rs
    │   └── programmable_filter.rs
    ├── http.rs
    ├── lib.rs
    ├── matrix.rs
    ├── metric
    │   ├── ackbag.rs
    │   ├── event.rs
    │   ├── logline.rs
    │   ├── mod.rs
    │   └── telemetry.rs
    ├── protocols
    │   ├── graphite.rs
    │   ├── mod.rs
    │   ├── native.rs
    │   ├── prometheus.rs
    │   └── statsd.rs
    ├── sink
    │   ├── console.rs
    │   ├── elasticsearch.rs
    │   ├── influxdb.rs
    │   ├── kafka.rs
    │   ├── mod.rs
    │   ├── native.rs
    │   ├── null.rs
    │   ├── prometheus.rs
    │   └── wavefront.rs
    ├── source
    │   ├── avro.rs
    │   ├── file
    │   │   ├── file_server.rs
    │   │   ├── file_watcher.rs
    │   │   └── mod.rs
    │   ├── flush.rs
    │   ├── graphite.rs
    │   ├── internal.rs
    │   ├── mod.rs
    │   ├── native.rs
    │   ├── nonblocking.rs
    │   ├── statsd.rs
    │   └── tcp.rs
    ├── thread.rs
    ├── time.rs
    └── util.rs
├── tests
    └── programmable_filter.rs
└── upload-artifact.sh


/.dockerignore:
--------------------------------------------------------------------------------
1 | target/
2 | 


--------------------------------------------------------------------------------
/.github/codeql/codeql-config.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL config"
2 | 
3 | queries:
4 |   - uses: security-extended
5 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "Code scanning - action"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master, ]
 6 |   schedule:
 7 |     - cron: '0 8 * * 2'
 8 | 
 9 | jobs:
10 |   CodeQL-Build:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - name: Checkout repository
16 |       uses: actions/checkout@v2
17 |       with:
18 |         # We must fetch at least the immediate parents so that if this is
19 |         # a pull request then we can checkout the head.
20 |         fetch-depth: 2
21 | 
22 |     # If this run was triggered by a pull request event, then checkout
23 |     # the head of the pull request instead of the merge commit.
24 |     - run: git checkout HEAD^2
25 |       if: ${{ github.event_name == 'pull_request' }}
26 | 
27 |     # Initializes the CodeQL tools for scanning.
28 |     - name: Initialize CodeQL
29 |       uses: github/codeql-action/init@v1
30 |       # Override language selection by uncommenting this and choosing your languages
31 |       # with:
32 |       #   languages: go, javascript, csharp, python, cpp, java
33 |       with:
34 |         config-file: ./.github/codeql/codeql-config.yml
35 | 
36 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
37 |     # If this step fails, then you should remove it and run the build manually (see below)
38 |     - name: Autobuild
39 |       uses: github/codeql-action/autobuild@v1
40 | 
41 |     # ℹ️ Command-line programs to run using the OS shell.
42 |     # 📚 https://git.io/JvXDl
43 | 
44 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
45 |     #    and modify them (or add more) to build your code if your project
46 |     #    uses a compiled language
47 | 
48 |     #- run: |
49 |     #   make bootstrap
50 |     #   make release
51 | 
52 |     - name: Perform CodeQL Analysis
53 |       uses: github/codeql-action/analyze@v1
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .criterion/
 2 | data/
 3 | target/
 4 | src/metrics/statsd.rs
 5 | src/metrics/graphite.rs
 6 | *.bk
 7 | /tags
 8 | /.vscode
 9 | tmp/
10 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 87
2 | format_strings = false
3 | wrap_comments = true 
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | cache: cargo
 3 | rust:
 4 |   - stable
 5 |   - beta
 6 |   - nightly
 7 | 
 8 | before_script:
 9 |   - rustup component add rustfmt
10 |   - rustup component add clippy
11 | script:
12 |   - cargo fmt --all -- --check
13 |   - cargo clippy -- -A clippy::redundant_field_names
14 |   - cargo clean
15 |   - cargo test
16 | 
17 | matrix:
18 |   allow_failures:
19 |   - rust: nightly
20 |   - rust: beta
21 | 
22 | before_install:
23 |   - sudo apt-get update
24 | 
25 | install:
26 |   - PATH=$PATH:/home/travis/.cargo/bin
27 | 
28 | addons:
29 |   apt:
30 |     packages:
31 |       - libcurl4-openssl-dev
32 |       - libelf-dev
33 |       - libdw-dev
34 |       - cmake
35 |       - gcc
36 |       - binutils-dev
37 |       - libiberty-dev
38 | 
39 | after_success: |
40 |   wget https://github.com/SimonKagstrom/kcov/archive/v34.tar.gz &&
41 |   tar xzf v34.tar.gz &&
42 |   cd kcov-34 &&
43 |   mkdir build &&
44 |   cd build &&
45 |   cmake .. &&
46 |   make &&
47 |   sudo make install &&
48 |   cd ../.. &&
49 |   rm -rf kcov-34 &&
50 |   find target/debug -maxdepth 1 -name 'cernan-*' -type f | while read file; do
51 |     [ -x $file ] || continue;
52 |     mkdir -p "target/cov/$(basename $file)";
53 |     kcov --exclude-pattern=/.cargo,/usr/lib  --include-path="$(pwd)" --verify "target/cov/$(basename $file)" "$file";
54 |   done &&
55 |   bash <(curl -s https://codecov.io/bash) &&
56 |   echo "Uploaded code coverage"
57 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at brian@troutwine.us. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to cernan
 2 | 
 3 | Hey there, wow! Cernan is a collaborative effort and we're really excited to see
 4 | that you're giving it a shot. Thank you!
 5 | 
 6 | ## Feature Requests
 7 | 
 8 | If you'd like to request we add a feature to cernan, go right ahead! Please
 9 | create an issue in [our tracker](https://github.com/postmates/cernan/issues) and
10 | tag it as a "feature request".
11 | 
12 | ## Bug Reports
13 | 
14 | Cernan is intended to run on a variety of hosts but the developers, as of this
15 | writing, have access to a limited pool of systems. If you've caught a bug please
16 | do create an issue in [our tracker](http://github.com/postmates/cernan/issues).
17 | Here's a template that you can use to file a bug, though it's not necessary to
18 | use it exactly:
19 | 
20 |     <short summary of the bug>
21 | 
22 |     I tried this:
23 | 
24 |     <reproducible steps to trigger the bug>
25 | 
26 |     I expected to see this happen: <explanation>
27 | 
28 |     Instead, this happened: <explanation>
29 | 
30 |     ## Meta
31 | 
32 |     `rustc --version --verbose`:
33 | 
34 |     `cernan --version`:
35 | 
36 |     Backtrace:
37 | 
38 | All three components are important: what you did, what you expected, what
39 | happened instead. Please include the output of `rustc --version --verbose`,
40 | which includes important information about what platform you're on and what
41 | version of Rust you're using to compile cernan.
42 | 
43 | Sometimes, a backtrace is helpful, and so including that is nice. To get a
44 | backtrace, set the `RUST_BACKTRACE` environment variable to a value other than
45 | `0`. The easiest way to do this is to invoke cernan like this:
46 | 
47 | ```bash
48 | $ RUST_BACKTRACE=1 cernan ...
49 | ```
50 | 
51 | ## Pull Requests
52 | 
53 | Pull requests are the mechanism we use to incorporate changes to cernan. GitHub
54 | itself has [documentation](https://metrics.wavefront.com/alert/1469751512848) on
55 | using the Pull Request feature. We use the 'fork and pull' model described
56 | there.
57 | 
58 | Please make pull requests against the `master` branch.
59 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | authors = ["Brian L. Troutwine <blt@postmates.com>",
 3 |            "John Koenig <john@postmates.com>",
 4 |            "Tom Santero <tom.santero@postmates.com>"]
 5 | description = "A telemetry and logging aggregation server."
 6 | keywords = ["statsd", "graphite", "telemetry", "logging", "metrics"]
 7 | license = "MIT"
 8 | name = "cernan"
 9 | readme = "README.md"
10 | repository = "https://github.com/postmates/cernan"
11 | version = "0.9.2-pre"
12 | edition = "2018"
13 | 
14 | [[bin]]
15 | name = "cernan"
16 | doc = false
17 | 
18 | [dependencies]
19 | base64 = "0.9.0"
20 | byteorder = "1.0"
21 | chan-signal = "0.3.1"
22 | chrono = "0.4"
23 | clap = "2.27"
24 | coco = "0.3"
25 | elastic = "0.20"
26 | elastic_types = "0.20"
27 | fern = "0.5"
28 | flate2 = "1.0"
29 | futures = "0.1"
30 | glob = "0.2.11"
31 | hopper = "0.4"
32 | lazy_static = "1.0"
33 | libc = "0.2"
34 | log = "0.4"
35 | mond = "0.1"
36 | mio = "0.6.11"
37 | openssl-probe = "0.1"
38 | protobuf = "1.7"
39 | quantiles = { version = "0.7", features = ["serde_support"] }
40 | rand = "0.5"
41 | rdkafka = "0.17.0"
42 | regex = "1.0"
43 | reqwest = "0.8"
44 | seahash = "3.0"
45 | serde = "1.0"
46 | serde-avro = "0.5.0"
47 | serde_derive = "1.0"
48 | serde_json = "1.0"
49 | slab = "0.4"
50 | tiny_http = "0.6"
51 | toml = "0.4"
52 | url = "1.6"
53 | uuid = {version = "0.6", features = ["v4", "serde"]}
54 | 
55 | [dev-dependencies]
56 | tempdir = "0.3"
57 | quickcheck = "0.6"
58 | criterion = "0.2.9"
59 | 
60 | [profile.dev]
61 | codegen-units = 4
62 | 
63 | [profile.release]
64 | lto = true
65 | 
66 | [[bench]]
67 | name = "buckets"
68 | harness = false
69 | 
70 | [[bench]]
71 | name = "protocols_statsd"
72 | harness = false
73 | 
74 | [[bench]]
75 | name = "protocols_graphite"
76 | harness = false
77 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ekidd/rust-musl-builder:1.32.0 as builder
 2 | 
 3 | RUN VERS=1.2.11 && \
 4 |     cd /home/rust/libs && \
 5 |     curl -LO http://zlib.net/zlib-$VERS.tar.gz && \
 6 |     tar xzf zlib-$VERS.tar.gz && cd zlib-$VERS && \
 7 |     CC=musl-gcc CFLAGS=-fPIC ./configure --static --prefix=/usr/local/musl && \
 8 |     make && sudo make install && \
 9 |     cd .. && rm -rf zlib-$VERS.tar.gz zlib-$VERS
10 | 
11 | RUN cd /home/rust/libs && \
12 |     curl -LO https://github.com/lz4/lz4/archive/master.tar.gz && \
13 |     tar xfz master.tar.gz && \
14 |     ls && \
15 |     cd lz4-master && \
16 |     CC=musl-gcc CFLAGS=-fPIC make prefix=/usr/local/musl && \
17 |     sudo make install prefix=/usr/local/musl && \
18 |     cd .. && \
19 |     rm -rf master.tar.gz lz4-master
20 | 
21 | RUN sudo apt-get update && \
22 |     sudo apt-get install -y python2.7-minimal && \
23 |     sudo ln -sf /usr/bin/python2.7 /usr/bin/python
24 | 
25 | ENV CC=musl-gcc \
26 |     CFLAGS=-I/usr/local/musl/include \
27 |     LDFLAGS=-L/usr/local/musl/lib
28 | 
29 | COPY --chown=rust:rust . /source
30 | RUN cd /source && cargo build --release
31 | 
32 | FROM alpine:3.8
33 | 
34 | RUN apk update \
35 |   && apk upgrade --no-cache
36 | 
37 | RUN apk add --no-cache --update \
38 |   ca-certificates \
39 |   llvm-libunwind \
40 |   openssl && \
41 |   update-ca-certificates && \
42 |   rm -rf /var/cache/apk/* && \
43 |   mkdir -p /etc/cernan/scripts
44 | 
45 | COPY --from=builder /source/target/x86_64-unknown-linux-musl/release/cernan /usr/bin/cernan
46 | COPY examples/configs/quickstart.toml /etc/cernan/cernan.toml
47 | 
48 | ENV STATSD_PORT 8125
49 | 
50 | ENTRYPOINT /usr/bin/cernan
51 | CMD ["--config", "/etc/cernan/cernan.toml"]
52 | 


--------------------------------------------------------------------------------
/Gene-Cernan-1-578x485.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/Gene-Cernan-1-578x485.jpg


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Original work Copyright (c) 2016, Postmates, Inc.
 4 | Modified work Copyright (c) 2015, Mark Story <mark@mark-story.com>
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a
 7 | copy of this software and associated documentation files (the "Software"),
 8 | to deal in the Software without restriction, including without limitation
 9 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | and/or sell copies of the Software, and to permit persons to whom the
11 | Software is furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Now Archived and Forked
 2 | _cernan_ will not be maintained in this repository going forward. Please use, create issues on, and make PRs to the fork of _cernan_ located [here](https://github.com/blt/cernan).
 3 | 
 4 | # cernan - telemetry aggregation and shipping, last up the ladder
 5 | 
 6 | [![Build Status](https://travis-ci.com/postmates/cernan.svg?branch=master)](https://travis-ci.com/postmates/cernan) [![Codecov](https://img.shields.io/codecov/c/github/postmates/cernan.svg)](https://codecov.io/gh/postmates/cernan)
 7 | 
 8 | ![Eugene Cernan, Apollo 17 EVA](Gene-Cernan-1-578x485.jpg)
 9 | 
10 | Cernan is a telemetry and logging aggregation server. It exposes multiple
11 | interfaces for ingestion and can emit to multiple aggregation sources while doing
12 | in-flight manipulation of data. Cernan has minimal CPU and memory requirements
13 | and is intended to service bursty telemetry _without_ load shedding. Cernan aims
14 | to be _reliable_ and _convenient_ to use, both for application engineers and
15 | operations staff.
16 | 
17 | Why you might choose to use cernan:
18 | 
19 |   * You need to ingest telemetry from multiple protocols.
20 |   * You need to multiplex telemetry over aggregation services.
21 |   * You want to convert log lines into telemetry.
22 |   * You want to convert telemetry into log lines.
23 |   * You want to transform telemetry or log lines in-flight.
24 | 
25 | If you'd like to learn more, please do have a look in
26 | our [wiki](https://github.com/postmates/cernan/wiki/).
27 | 
28 | # Quickstart
29 | 
30 | To build cernan you will need to
31 | have [Rust](https://www.rust-lang.org/en-US/). This should be as simple as:
32 | 
33 |     > curl -sSf https://static.rust-lang.org/rustup.sh | sh
34 | 
35 | Once Rust is installed, from the root of this project run:
36 | 
37 |     > cargo run -- --config examples/configs/quickstart.toml
38 | 
39 | and you're good to go. Cernan will report to stdout what ports it is now
40 | listening on. If you would like to debug your service--to determine if the
41 | telemetry you intend is issued--run cernan like:
42 | 
43 |     > cargo run -- -vvvv --config examples/configs/quickstart.toml
44 | 
45 | and full trace output will be reported to stdout.
46 | 
47 | # Usage
48 | 
49 | The cernan server has a few command-line toggles to control its behaviour:
50 | 
51 | ```
52 | -C, --config <config>    The config file to feed in.
53 | -v               Turn on verbose output.
54 | ```
55 | 
56 | The verbose flag `-v` allows multiples, each addition cranking up the verbosity
57 | by one. So:
58 | 
59 | * `-v` -- error, warning
60 | * `-vv` -- error, warning, info
61 | * `-vvv` -- error, warning, info, debug
62 | * `-vvvv` -- error, warning, info, debug, trace
63 | 
64 | # License
65 | 
66 | cernan is copyright © 2017-2018 Postmates, Inc and released to the public under the
67 | terms of the MIT license.
68 | 


--------------------------------------------------------------------------------
/benches/buckets.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate criterion;
 3 | 
 4 | use criterion::Criterion;
 5 | 
 6 | extern crate cernan;
 7 | extern crate chrono;
 8 | extern crate rand;
 9 | 
10 | use cernan::buckets;
11 | use cernan::metric::{AggregationMethod, Telemetry};
12 | use chrono::{TimeZone, Utc};
13 | use rand::distributions::Alphanumeric;
14 | use rand::{Rng, SeedableRng, XorShiftRng};
15 | 
16 | fn experiment(input: &ExperimentInput) {
17 |     let total_adds = input.total_adds;
18 |     let name_pool_size = input.name_pool_size;
19 |     let mut rng: XorShiftRng = SeedableRng::from_seed([
20 |         1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21 |     ]);
22 |     let aggregations = [
23 |         AggregationMethod::Histogram,
24 |         AggregationMethod::Set,
25 |         AggregationMethod::Sum,
26 |         AggregationMethod::Summarize,
27 |     ];
28 |     let times = [
29 |         Utc.ymd(1972, 12, 11)
30 |             .and_hms_milli(11, 59, 49, 0)
31 |             .timestamp(),
32 |         Utc.ymd(1972, 12, 11)
33 |             .and_hms_milli(11, 59, 50, 0)
34 |             .timestamp(),
35 |         Utc.ymd(1972, 12, 11)
36 |             .and_hms_milli(11, 59, 51, 0)
37 |             .timestamp(),
38 |         Utc.ymd(1972, 12, 11)
39 |             .and_hms_milli(11, 59, 52, 0)
40 |             .timestamp(),
41 |         Utc.ymd(1972, 12, 11)
42 |             .and_hms_milli(11, 59, 52, 0)
43 |             .timestamp(),
44 |     ];
45 |     let mut pool: Vec<String> = Vec::with_capacity(name_pool_size);
46 |     for _ in 0..name_pool_size {
47 |         pool.push(rng.sample_iter(&Alphanumeric).take(10).collect());
48 |     }
49 |     let mut bucket = buckets::Buckets::default();
50 | 
51 |     for _ in 0..total_adds {
52 |         bucket.add(
53 |             Telemetry::new()
54 |                 .value(rng.gen::<f64>())
55 |                 .name(rng.choose(&pool).unwrap().clone())
56 |                 .kind(*rng.choose(&aggregations).unwrap())
57 |                 .harden()
58 |                 .unwrap()
59 |                 .timestamp(*rng.choose(&times).unwrap()),
60 |         );
61 |     }
62 | }
63 | 
64 | #[derive(Debug)]
65 | struct ExperimentInput {
66 |     total_adds: usize,
67 |     name_pool_size: usize,
68 | }
69 | 
70 | impl ::std::fmt::Display for ExperimentInput {
71 |     fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
72 |         write!(f, "({}, {})", self.total_adds, self.name_pool_size)
73 |     }
74 | }
75 | 
76 | fn benchmark(c: &mut Criterion) {
77 |     let mut inputs = Vec::with_capacity(32);
78 |     for i in 6..8 {
79 |         for j in 6..10 {
80 |             inputs.push(ExperimentInput {
81 |                 total_adds: 2usize.pow(i),
82 |                 name_pool_size: 2usize.pow(j),
83 |             });
84 |         }
85 |     }
86 | 
87 |     c.bench_function_over_inputs(
88 |         "bucket_add",
89 |         |b, input| {
90 |             b.iter(|| experiment(input));
91 |         },
92 |         inputs,
93 |     );
94 | }
95 | 
96 | criterion_group!(benches, benchmark);
97 | criterion_main!(benches);
98 | 


--------------------------------------------------------------------------------
/benches/protocols_graphite.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate criterion;
 3 | 
 4 | use criterion::Criterion;
 5 | 
 6 | extern crate cernan;
 7 | 
 8 | use cernan::metric::Telemetry;
 9 | use cernan::protocols::graphite::parse_graphite;
10 | use std::sync;
11 | 
12 | fn experiment() {
13 |     let packet = "fst 1 101
14 | snd -2.0 202
15 | thr 3 303
16 | fth@fth 4 404
17 | fv%fv 5 505
18 | s-th 6 606";
19 | 
20 |     let mut res = Vec::new();
21 |     let metric = sync::Arc::new(Some(Telemetry::default()));
22 |     assert!(parse_graphite(packet, &mut res, &metric));
23 | }
24 | 
25 | fn benchmark(c: &mut Criterion) {
26 |     c.bench_function("parse_graphite", |b| {
27 |         b.iter(experiment);
28 |     });
29 | }
30 | 
31 | criterion_group!(benches, benchmark);
32 | criterion_main!(benches);
33 | 


--------------------------------------------------------------------------------
/benches/protocols_statsd.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate criterion;
 3 | 
 4 | use criterion::Criterion;
 5 | 
 6 | extern crate cernan;
 7 | 
 8 | use cernan::metric::Telemetry;
 9 | use cernan::protocols::statsd::parse_statsd;
10 | use cernan::source::StatsdParseConfig;
11 | use std::sync;
12 | 
13 | fn experiment() {
14 |     let packet = "zrth:0|g
15 | fst:-1.1|ms
16 | snd:+2.2|g
17 | thd:3.3|h
18 | fth:4|c
19 | fvth:5.5|c|@0.1
20 | sxth:-6.6|g
21 | svth:+7.77|g";
22 | 
23 |     let metric = sync::Arc::new(Some(Telemetry::default()));
24 |     let config = sync::Arc::new(StatsdParseConfig::default());
25 |     let mut res = Vec::new();
26 |     assert!(parse_statsd(packet, &mut res, &metric, &config));
27 | }
28 | 
29 | fn benchmark(c: &mut Criterion) {
30 |     c.bench_function("parse_statsd", |b| {
31 |         b.iter(experiment);
32 |     });
33 | }
34 | 
35 | criterion_group!(benches, benchmark);
36 | criterion_main!(benches);
37 | 


--------------------------------------------------------------------------------
/build-container.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o pipefail
 5 | set -o nounset
 6 | set -o xtrace
 7 | 
 8 | function usage() {
 9 |     echo "$0 <version>"
10 | }
11 | 
12 | if [ -z "$1" ]; then
13 |     usage
14 |     exit 1
15 | fi
16 | 
17 | cargo clean
18 | 
19 | VERSION="${1}"
20 | 
21 | docker build -t "quay.io/postmates/cernan:$VERSION" .
22 | docker push "quay.io/postmates/cernan:$VERSION"
23 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | coverage:
 2 |   precision: 2
 3 |   round: down
 4 |   range: 70...100
 5 | 
 6 |   status:
 7 |     project: true
 8 |     changes: false
 9 | 
10 |     patch:
11 |       default:
12 |         target: 90%
13 | 
14 |     project:
15 |       default:
16 |         target: 91%
17 |         threshold: 1%
18 | 
19 | comment:
20 |   layout: "header, diff"
21 |   behavior: default  # update if exists else create new
22 | 


--------------------------------------------------------------------------------
/examples/configs/basic.toml:
--------------------------------------------------------------------------------
 1 | data-directory = "data/"
 2 | scripts-directory = "examples/scripts/"
 3 | 
 4 | flush-interval = 5
 5 | 
 6 | [tags]
 7 | source = "cernan"
 8 | 
 9 | [sources]
10 |   [sources.internal]
11 |   forwards = ["sinks.console", "sinks.null"]
12 | 
13 |   [sources.statsd.primary]
14 |   enabled = true
15 |   port = 8125
16 |   forwards = ["sinks.console", "filters.delay.two_seconds"]
17 | 
18 |   [sources.native.primary]
19 |   ip = "127.0.0.1"
20 |   port = 1972
21 |   forwards = ["filters.delay.two_seconds"]
22 | 
23 |   [sources.graphite.primary]
24 |   enabled = true
25 |   port = 2004
26 |   forwards = ["filters.programmable.collectd_scrub"]
27 | 
28 |   [sources.files]
29 |   [sources.files.example_log]
30 |   path = "example.log"
31 |   forwards = ["sinks.firehose.stream_two"]
32 | 
33 |   [sources.files.foo_log]
34 |   path = "foo.log"
35 |   forwards = ["sinks.firehose.stream_two"]
36 | 
37 | [filters]
38 |   [filters.programmable.collectd_scrub]
39 |   script = "collectd_scrub.lua"
40 |   forwards = ["filters.delay.two_seconds"]
41 | 
42 |   [filters.delay.two_seconds]
43 |   tolerance = 2
44 |   forwards = ["filters.flush_boundary.two_seconds"]
45 | 
46 |   [filters.flush_boundary.two_seconds]
47 |   tolerance = 2
48 |   forwards = ["sinks.console", "sinks.null", "sinks.influxdb", "sinks.prometheus"]
49 | 
50 | [sinks]
51 |   [sinks.console]
52 |   bin_width = 1
53 | 
54 |   [sinks.null]
55 | 
56 |   # [sinks.wavefront]
57 |   # port = 2878
58 |   # host = "127.0.0.1"
59 |   # bin_width = 1
60 | 
61 |   [sinks.influxdb]
62 |   port = 8086
63 |   host = "127.0.0.1"
64 |   bin_width = 1
65 | 
66 |   [sinks.prometheus]
67 |   port = 8080
68 |   host = "127.0.0.1"
69 |   bin_width = 1
70 | 
71 |   [sinks.firehose.stream_two]
72 |   delivery_stream = "stream_two"
73 |   batch_size = 800
74 |   region = "us-east-1"
75 | 


--------------------------------------------------------------------------------
/examples/configs/counting-example.toml:
--------------------------------------------------------------------------------
 1 | data-directory = "data/"
 2 | scripts-directory = "examples/scripts/"
 3 | 
 4 | flush-interval = 1
 5 | 
 6 | [tags]
 7 | source = "cernan"
 8 | 
 9 | [sources]
10 |   [sources.statsd.primary]
11 |   enabled = true
12 |   port = 8125
13 |   forwards = ["filters.keep_count"]
14 | 
15 | [filters]
16 |   [filters.keep_count]
17 |   script = "keep_count.lua"
18 |   forwards = ["sinks.console"]
19 | 
20 | [sinks]
21 |   [sinks.console]
22 |   bin_width = 1
23 |   flush_interval = 2
24 | 


--------------------------------------------------------------------------------
/examples/configs/quickstart-files.toml:
--------------------------------------------------------------------------------
 1 | data-directory = "data/"
 2 | scripts-directory = "examples/scripts/"
 3 | 
 4 | flush-interval = 5
 5 | 
 6 | [tags]
 7 | source = "cernan"
 8 | 
 9 | [sources]
10 |   [sources.files]
11 |   [sources.files.tmp_logs]
12 |   path = "/tmp/log/*.log"
13 |   forwards = ["sinks.firehose.stream_two"]
14 | 
15 | [sinks]
16 |   [sinks.firehose.stream_two]
17 |   delivery_stream = "stream_two"
18 |   batch_size = 800
19 |   region = "us-east-1"
20 | 


--------------------------------------------------------------------------------
/examples/configs/quickstart-filters.toml:
--------------------------------------------------------------------------------
 1 | scripts-directory = "examples/scripts/"
 2 | flush-interval = 10
 3 | 
 4 | [sources]
 5 |   [sources.statsd.primary]
 6 |   port = 8125
 7 |   forwards = ["filters.frau_im_mond"]
 8 | 
 9 | [filters]
10 |   [filters.frau_im_mond]
11 |   script = "frau_im_mond.lua"
12 |   forwards = ["sinks.console"]
13 | 
14 | [sinks]
15 |   [sinks.console]
16 | 


--------------------------------------------------------------------------------
/examples/configs/quickstart.toml:
--------------------------------------------------------------------------------
 1 | flush-interval = 10
 2 | 
 3 | [sources]
 4 |   [sources.statsd.primary]
 5 |   port = 8125
 6 |   forwards = ["sinks.console"]
 7 | 
 8 | [sinks]
 9 |   [sinks.console]
10 | 


--------------------------------------------------------------------------------
/examples/configs/receiver-config.toml:
--------------------------------------------------------------------------------
 1 | data-directory = "data-receiver/"
 2 | 
 3 | [statsd]
 4 | enabled = false
 5 | 
 6 | [graphite]
 7 | enabled = false
 8 | 
 9 | flush-interval = 10
10 | 
11 | [tags]
12 | service = "receiver"
13 | uhf_band = "s"
14 | source = "cernan"
15 | 
16 | [federation_receiver]
17 | port = 1972
18 | ip = "0.0.0.0"
19 | 
20 | [wavefront]
21 | port = 2878
22 | host = "127.0.0.1"
23 | 
24 | [console]
25 | 


--------------------------------------------------------------------------------
/examples/configs/transmitter-config.toml:
--------------------------------------------------------------------------------
 1 | data-directory = "data-transmitter/"
 2 | 
 3 | [statsd]
 4 | port = 8125
 5 | 
 6 | [graphite]
 7 | port = 2003
 8 | 
 9 | flush-interval = 10
10 | 
11 | [tags]
12 | service = "transmitter"
13 | source = "cernan"
14 | 
15 | [federation_transmitter]
16 | port = 1972
17 | host = "127.0.0.1"
18 | 


--------------------------------------------------------------------------------
/examples/scripts/collectd_scrub.lua:
--------------------------------------------------------------------------------
 1 | count_per_tick = 0
 2 | 
 3 | function process_metric(pyld)
 4 |    count_per_tick = count_per_tick + 1
 5 | 
 6 |    local old_name = payload.metric_name(pyld, 1)
 7 |    local collectd, rest = string.match(old_name, "^(collectd)[%.@][%w_]+(.*)")
 8 |    if collectd ~= nil then
 9 |       local new_name = string.format("%s%s", collectd, rest)
10 |       payload.set_metric_name(pyld, 1, new_name)
11 |    end
12 | end
13 | 
14 | function process_log(pyld)
15 |    count_per_tick = count_per_tick + 1
16 | end
17 | 
18 | function tick(pyld)
19 |    payload.push_metric(pyld, "cernan_bridge.count_per_tick", count_per_tick)
20 |    payload.push_log(pyld, string.format("count_per_tick: %s", count_per_tick))
21 |    count_per_tick = 0
22 | end 
23 | 


--------------------------------------------------------------------------------
/examples/scripts/frau_im_mond.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 |    payload.set_metric_name(pyld, 1, "frau_im_mond")
 3 | end
 4 | 
 5 | function process_log(pyld)
 6 | end
 7 | 
 8 | function tick(pyld)
 9 | end 
10 | 


--------------------------------------------------------------------------------
/examples/scripts/keep_count.lua:
--------------------------------------------------------------------------------
 1 | count_per_tick = 0
 2 | 
 3 | function process_metric(pyld)
 4 |    count_per_tick = count_per_tick + 1
 5 | end
 6 | 
 7 | function process_log(pyld)
 8 |    count_per_tick = count_per_tick + 1
 9 | end
10 | 
11 | function tick(pyld)
12 |    payload.push_metric(pyld, "count_per_tick", count_per_tick)
13 |    payload.push_log(pyld, string.format("count_per_tick: %s", count_per_tick))
14 |    count_per_tick = 0
15 | end 
16 | 


--------------------------------------------------------------------------------
/resources/protobufs/native.proto:
--------------------------------------------------------------------------------
  1 | // Copyright 2016, Postmates Inc.
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | // of this software and associated documentation files (the "Software"), to deal
  5 | // in the Software without restriction, including without limitation the rights
  6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | // copies of the Software, and to permit persons to whom the Software is
  8 | // furnished to do so, subject to the following conditions:
  9 | //
 10 | // The above copyright notice and this permission notice shall be included in
 11 | // all copies or substantial portions of the Software.
 12 | //
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | // SOFTWARE.
 20 | 
 21 | //
 22 | // Welcome!
 23 | //
 24 | // This file defines the protocol that cernan speaks natively. We hope that it's
 25 | // a relatively straightforward protocol to implement. Cernan's native transport
 26 | // is TCP. We require that all on-wire payloads have the following form:
 27 | //
 28 | //     [--------------------------------|~~~~~~~~~~ . . . ~~~~~~~~~~~~]
 29 | //     ^                                ^
 30 | //     u32, payload length in bytes     protobuf payload, of prefix len
 31 | //
 32 | // The protobuf payload conforms to the following definition.
 33 | syntax = "proto2";
 34 | 
 35 | package com.postmates.cernan;
 36 | option java_package = "com.postmates.cernan";
 37 | 
 38 | // 'Payload' - the top-level structure in each on-wire payload
 39 | //
 40 | // Payload is a container for repeated Telemetry and LogLines. There's not much
 41 | // more to it than that.
 42 | message Payload {
 43 |   repeated Telemetry points = 2;
 44 |   repeated LogLine lines = 3;
 45 | }
 46 | 
 47 | // 'LogLine' - a bit of unstructure text
 48 | //
 49 | // One of cernan's gigs is picking up logs from disk and transforming them
 50 | // in-flight, shipping them off. This structure allows you to ship lines
 51 | // directly via the native protocol without having to round-trip through disk
 52 | // first.
 53 | message LogLine {
 54 |   optional string path = 1; // unique 'location' of the log line
 55 |   optional string value = 2; // the line itself
 56 |   map<string, string> metadata = 3; // associated key/value metadata
 57 |   optional int64 timestamp_ms = 4; // milliseconds since the Unix epoch
 58 | }
 59 | 
 60 | // 'Telemetry' - a numeric measure of a thing
 61 | //
 62 | // Cernan's slightly more complicated gig is its 'telemetry'
 63 | // subsystem. Telemetry is defined as a name and time associated collection of
 64 | // measurements. In the structure we refer to these measurements as
 65 | // 'samples'. The Telemetry structure makes is possible to associate multiple
 66 | // samples in a single millisecond time window. Cernan will build a quantile
 67 | // structure over these samples but you may further choose aggregation
 68 | // interpretations by setting AggregationMethod.
 69 | message Telemetry {
 70 |   optional string name = 1; // the unique name of the telemetry
 71 |   repeated double samples = 2 [ packed = true ]; // telemetry samples present in timestamp_ms
 72 |   optional bool persisted = 3 [ default = false ]; // persist metric across time windows
 73 |   optional AggregationMethod method = 4 [ default = SUMMARIZE ]; // see below
 74 |   map<string, string> metadata = 5; // associated key/value metadata
 75 |   optional int64 timestamp_ms = 6; // milliseconds since the Unix epoch
 76 |   repeated double bin_bounds = 7; // BIN inclusive upper bounds
 77 | }
 78 | 
 79 | // 'AggregationMethod' - an interpretation signal
 80 | //
 81 | // Cernan maintains quantile summaries for all Telemetry samples. Not all sinks
 82 | // are capable of interpreting summaries natively. Cernan allows the client to
 83 | // set preferred aggregations over the summaries for reporting to 'flat'
 84 | // sinks. Sinks are allows to ignore AggregationMethod at their
 85 | // convenience. Additionally, aggregation time windows may be configured
 86 | // per-sink and are not controllable through the protocol.
 87 | enum AggregationMethod {
 88 |   // SUM keeps a sum of samples. This is often interpreted as a
 89 |   // per-window counter.
 90 |   SUM = 1;
 91 |   // SET preserves the last sample set into the Telemetry per time
 92 |   // window.
 93 |   SET = 2;
 94 |   // SUMMARIZE produces a quantile summary of the input samples per time
 95 |   // window. This is the default behaviour.
 96 |   SUMMARIZE = 3;
 97 |   // BIN produces a histogram summary of the input samples per time window. The
 98 |   // user will specify the bins' upper bounds.
 99 |   BIN = 4;
100 | }


--------------------------------------------------------------------------------
/resources/protobufs/prometheus.proto:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Prometheus Team
 2 | // Licensed under the Apache License, Version 2.0 (the "License");
 3 | // you may not use this file except in compliance with the License.
 4 | // You may obtain a copy of the License at
 5 | //
 6 | // http://www.apache.org/licenses/LICENSE-2.0
 7 | //
 8 | // Unless required by applicable law or agreed to in writing, software
 9 | // distributed under the License is distributed on an "AS IS" BASIS,
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | syntax = "proto2";
15 | 
16 | package io.prometheus.client;
17 | option java_package = "io.prometheus.client";
18 | 
19 | message LabelPair {
20 |   optional string name  = 1;
21 |   optional string value = 2;
22 | }
23 | 
24 | enum MetricType {
25 |   COUNTER    = 0;
26 |   GAUGE      = 1;
27 |   SUMMARY    = 2;
28 |   UNTYPED    = 3;
29 |   HISTOGRAM  = 4;
30 | }
31 | 
32 | message Gauge {
33 |   optional double value = 1;
34 | }
35 | 
36 | message Counter {
37 |   optional double value = 1;
38 | }
39 | 
40 | message Quantile {
41 |   optional double quantile = 1;
42 |   optional double value    = 2;
43 | }
44 | 
45 | message Summary {
46 |   optional uint64   sample_count = 1;
47 |   optional double   sample_sum   = 2;
48 |   repeated Quantile quantile     = 3;
49 | }
50 | 
51 | message Untyped {
52 |   optional double value = 1;
53 | }
54 | 
55 | message Histogram {
56 |   optional uint64 sample_count = 1;
57 |   optional double sample_sum   = 2;
58 |   repeated Bucket bucket       = 3; // Ordered in increasing order of upper_bound, +Inf bucket is optional.
59 | }
60 | 
61 | message Bucket {
62 |   optional uint64 cumulative_count = 1; // Cumulative in increasing order.
63 |   optional double upper_bound = 2;      // Inclusive.
64 | }
65 | 
66 | message Metric {
67 |   repeated LabelPair label        = 1;
68 |   optional Gauge     gauge        = 2;
69 |   optional Counter   counter      = 3;
70 |   optional Summary   summary      = 4;
71 |   optional Untyped   untyped      = 5;
72 |   optional Histogram histogram    = 7;
73 |   optional int64     timestamp_ms = 6;
74 | }
75 | 
76 | message MetricFamily {
77 |   optional string     name   = 1;
78 |   optional string     help   = 2;
79 |   optional MetricType type   = 3;
80 |   repeated Metric     metric = 4;
81 | }
82 | 


--------------------------------------------------------------------------------
/resources/tests/data/data-deflate.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/data-deflate.avro


--------------------------------------------------------------------------------
/resources/tests/data/data-null.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/data-null.avro


--------------------------------------------------------------------------------
/resources/tests/data/data-snappy.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/data-snappy.avro


--------------------------------------------------------------------------------
/resources/tests/data/users-deflate.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/users-deflate.avro


--------------------------------------------------------------------------------
/resources/tests/data/users-null.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/users-null.avro


--------------------------------------------------------------------------------
/resources/tests/data/users-snappy.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/users-snappy.avro


--------------------------------------------------------------------------------
/resources/tests/scripts/add_keys.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 |    payload.metric_set_tag(pyld, 1, "bizz", "bazz")
 3 | end
 4 | 
 5 | function process_log(pyld)
 6 |    payload.log_set_tag(pyld, 1, "bizz", "bazz")
 7 | end
 8 | 
 9 | function tick(pyld)
10 | end
11 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/clear_logs.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 | end
 3 | 
 4 | function process_log(pyld)
 5 |    payload.clear_logs(pyld)
 6 | end
 7 | 
 8 | function tick(pyld)
 9 | end
10 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/clear_metrics.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 |    payload.clear_metrics(pyld)
 3 | end
 4 | 
 5 | function process_log(pyld)
 6 | end
 7 | 
 8 | function tick(pyld)
 9 | end
10 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/collectd_scrub.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 |    local old_name = payload.metric_name(pyld, 1)
 3 |    local collectd, rest = string.match(old_name, "^(collectd)[%.@][%w_-]+(.*)")
 4 |    if collectd ~= nil then
 5 |       local new_name = string.format("%s%s", collectd, rest)
 6 |       payload.set_metric_name(pyld, 1, new_name)
 7 |    end
 8 | end
 9 | 
10 | function process_log(pyld)
11 | end
12 | 
13 | function tick(pyld)
14 | end 
15 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/demonstrate_require.lua:
--------------------------------------------------------------------------------
 1 | local demo = require "lib/demo"
 2 | 
 3 | function process_metric(pyld)
 4 | end
 5 | 
 6 | function process_log(pyld)
 7 |    payload.log_set_tag(pyld, 1, "bizz", demo.demo())
 8 | end
 9 | 
10 | function tick(pyld)
11 | end
12 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/field_from_path.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 | end
 3 | 
 4 | function process_log(pyld)
 5 |    local path = payload.log_path(pyld, 1)
 6 |    payload.log_set_field(pyld, 1, "foo", path)
 7 | end
 8 | 
 9 | function tick(pyld)
10 | end
11 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/identity.lua:
--------------------------------------------------------------------------------
1 | function process_metric(pyld)
2 | end
3 | 
4 | function process_log(pyld)
5 | end
6 | 
7 | function tick(pyld)
8 | end
9 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/insufficient_args.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 |    payload.metric_remove_tag(pyld, 1)
 3 | end
 4 | 
 5 | function process_log(pyld)
 6 | end
 7 | 
 8 | function tick(pyld)
 9 | end
10 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/json_parse.lua:
--------------------------------------------------------------------------------
 1 | local json = require "lib/json"
 2 | 
 3 | function process_metric(pyld)
 4 | end
 5 | 
 6 | function process_log(pyld)
 7 |    local line = payload.log_value(pyld, 1)
 8 |    local json_pyld = json.decode(line)
 9 |    payload.log_set_field(pyld, 1, "foo", json_pyld["foo"])
10 | end
11 | 
12 | function tick(pyld)
13 | end
14 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/keep_count.lua:
--------------------------------------------------------------------------------
 1 | count_per_tick = 0
 2 | 
 3 | function process_metric(pyld)
 4 |    count_per_tick = count_per_tick + 1
 5 | end
 6 | 
 7 | function process_log(pyld)
 8 |    count_per_tick = count_per_tick + 1
 9 | end
10 | 
11 | function tick(pyld)
12 |    payload.push_metric(pyld, "count_per_tick", count_per_tick)
13 |    payload.push_log(pyld, string.format("count_per_tick: %s", count_per_tick))
14 |    count_per_tick = 0
15 | end 
16 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/lib/demo.lua:
--------------------------------------------------------------------------------
1 | local demo = {}
2 | 
3 | function demo.demo()
4 |    return "bazz"
5 | end
6 | 
7 | return demo 
8 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/lua_error.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 |     error("boom")
 3 | end
 4 | 
 5 | function process_log(pyld)
 6 | end
 7 | 
 8 | function tick(pyld)
 9 | end
10 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/missing_func.lua:
--------------------------------------------------------------------------------
1 | function process_log(pyld)
2 | end
3 | 
4 | function tick(pyld)
5 | end
6 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/remove_keys.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 |    payload.metric_remove_tag(pyld, 1, "bizz")
 3 | end
 4 | 
 5 | function process_log(pyld)
 6 |    payload.log_remove_tag(pyld, 1, "bizz")
 7 | end
 8 | 
 9 | function tick(pyld)
10 | end
11 | 


--------------------------------------------------------------------------------
/resources/tests/scripts/set_value.lua:
--------------------------------------------------------------------------------
 1 | function process_metric(pyld)
 2 | end
 3 | 
 4 | function process_log(pyld)
 5 |    payload.log_set_value(pyld, 1, "foo")
 6 | end
 7 | 
 8 | function tick(pyld)
 9 | end
10 | 


--------------------------------------------------------------------------------
/src/constants.rs:
--------------------------------------------------------------------------------
 1 | //! Library level constants
 2 | use mio;
 3 | 
 4 | /// MIO token used to distinguish system events
 5 | /// from other event sources.
 6 | ///
 7 | /// Note - It is assumed that sources will not hold
 8 | /// more than 2048 addressable streams, 0 indexed.
 9 | pub const SYSTEM: mio::Token = mio::Token(2048);
10 | 


--------------------------------------------------------------------------------
/src/filter/delay_filter.rs:
--------------------------------------------------------------------------------
 1 | //! Filter streams to within a bounded interval of current time.
 2 | //!
 3 | //! This filter is intended to remove items from the stream which are too old,
 4 | //! as defined by the current time and the configured `tolerance`. That is, if
 5 | //! for some time `T`, `(T - time::now()).abs() > tolerance` the item associated
 6 | //! with `T` will be rejected.
 7 | 
 8 | use crate::filter;
 9 | use crate::metric;
10 | use crate::time;
11 | use std::sync::atomic::{AtomicUsize, Ordering};
12 | 
13 | /// Total number of telemetry rejected for age
14 | pub static DELAY_TELEM_REJECT: AtomicUsize = AtomicUsize::new(0);
15 | /// Total number of telemetry accepted for age
16 | pub static DELAY_TELEM_ACCEPT: AtomicUsize = AtomicUsize::new(0);
17 | /// Total number of logline rejected for age
18 | pub static DELAY_LOG_REJECT: AtomicUsize = AtomicUsize::new(0);
19 | /// Total number of logline accepted for age
20 | pub static DELAY_LOG_ACCEPT: AtomicUsize = AtomicUsize::new(0);
21 | 
22 | /// Filter streams to within a bounded interval of current time.
23 | ///
24 | /// This filter is intended to remove items from the stream which are too old,
25 | /// as defined by the current time and the configured `tolerance`. That is, if
26 | /// for some time `T`, `(T - time::now()).abs() > tolerance` the item associated
27 | /// with `T` will be rejected.
28 | pub struct DelayFilter {
29 |     tolerance: i64,
30 | }
31 | 
32 | /// Configuration for `DelayFilter`
33 | #[derive(Clone, Debug)]
34 | pub struct DelayFilterConfig {
35 |     /// The filter's unique name in the routing topology.
36 |     pub config_path: Option<String>,
37 |     /// The forwards along which the filter will emit its `metric::Event`s.
38 |     pub forwards: Vec<String>,
39 |     /// The delay tolerance of the filter, measured in seconds.
40 |     pub tolerance: i64,
41 | }
42 | 
43 | impl DelayFilter {
44 |     /// Create a new DelayFilter
45 |     pub fn new(config: &DelayFilterConfig) -> DelayFilter {
46 |         DelayFilter {
47 |             tolerance: config.tolerance,
48 |         }
49 |     }
50 | }
51 | 
52 | impl filter::Filter for DelayFilter {
53 |     fn process(
54 |         &mut self,
55 |         event: metric::Event,
56 |         res: &mut Vec<metric::Event>,
57 |     ) -> Result<(), filter::FilterError> {
58 |         match event {
59 |             metric::Event::Telemetry(telem) => {
60 |                 if (telem.timestamp - time::now()).abs() < self.tolerance {
61 |                     DELAY_TELEM_ACCEPT.fetch_add(1, Ordering::Relaxed);
62 |                     res.push(metric::Event::Telemetry(telem));
63 |                 } else {
64 |                     DELAY_TELEM_REJECT.fetch_add(1, Ordering::Relaxed);
65 |                 }
66 |             }
67 |             metric::Event::Log(log) => {
68 |                 if (log.time - time::now()).abs() < self.tolerance {
69 |                     DELAY_LOG_ACCEPT.fetch_add(1, Ordering::Relaxed);
70 |                     res.push(metric::Event::Log(log));
71 |                 } else {
72 |                     DELAY_LOG_REJECT.fetch_add(1, Ordering::Relaxed);
73 |                 }
74 |             }
75 |             ev => {
76 |                 res.push(ev);
77 |             }
78 |         }
79 |         Ok(())
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/filter/flush_boundary_filter.rs:
--------------------------------------------------------------------------------
 1 | use crate::filter;
 2 | use crate::metric;
 3 | use std::mem;
 4 | 
 5 | /// Buffer events for a set period of flushes
 6 | ///
 7 | /// This filter is intended to hold events for a set number of flushes. This
 8 | /// delays the events for the duration of those flushes but reduce the
 9 | /// likelyhood of cross-flush splits of timestamps.
10 | pub struct FlushBoundaryFilter {
11 |     tolerance: usize,
12 |     holds: Vec<Hold>,
13 | }
14 | 
15 | /// Configuration for `FlushBoundaryFilter`
16 | #[derive(Clone, Debug)]
17 | pub struct FlushBoundaryFilterConfig {
18 |     /// The filter's unique name in the routing topology
19 |     pub config_path: Option<String>,
20 |     /// The forwards along which the filter will emit its `metric::Event`s
21 |     /// stream.
22 |     pub forwards: Vec<String>,
23 |     /// The flush boundary tolerance, measured in seconds.
24 |     pub tolerance: usize,
25 | }
26 | 
27 | struct Hold {
28 |     timestamp: i64,
29 |     age: usize,
30 |     events: Vec<metric::Event>,
31 | }
32 | 
33 | impl Hold {
34 |     pub fn new(event: metric::Event) -> Hold {
35 |         let ts = event.timestamp().unwrap();
36 |         let mut events = Vec::new();
37 |         events.push(event);
38 |         Hold {
39 |             timestamp: ts,
40 |             age: 0,
41 |             events: events,
42 |         }
43 |     }
44 | }
45 | 
46 | impl FlushBoundaryFilter {
47 |     /// Create a new FlushBoundaryFilter
48 |     pub fn new(config: &FlushBoundaryFilterConfig) -> FlushBoundaryFilter {
49 |         FlushBoundaryFilter {
50 |             tolerance: config.tolerance,
51 |             holds: Vec::new(),
52 |         }
53 |     }
54 | }
55 | 
56 | impl filter::Filter for FlushBoundaryFilter {
57 |     fn process(
58 |         &mut self,
59 |         event: metric::Event,
60 |         res: &mut Vec<metric::Event>,
61 |     ) -> Result<(), filter::FilterError> {
62 |         if event.is_timer_flush() {
63 |             for hold in &mut self.holds {
64 |                 hold.age += 1;
65 |             }
66 |             let holds = mem::replace(&mut self.holds, Vec::new());
67 |             let mut too_new = Vec::new();
68 |             for mut hold in holds {
69 |                 if hold.age > self.tolerance {
70 |                     res.append(&mut hold.events);
71 |                 } else {
72 |                     too_new.push(hold);
73 |                 }
74 |             }
75 |             res.push(event);
76 |             self.holds = too_new;
77 |         } else {
78 |             let opt_ts = event.timestamp();
79 |             if let Some(ts) = opt_ts {
80 |                 match self.holds.binary_search_by(|hold| hold.timestamp.cmp(&ts)) {
81 |                     Ok(idx) => self.holds[idx].events.push(event),
82 |                     Err(idx) => {
83 |                         let hold = Hold::new(event);
84 |                         self.holds.insert(idx, hold)
85 |                     }
86 |                 }
87 |             }
88 |         }
89 |         Ok(())
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/filter/json_encode_filter.rs:
--------------------------------------------------------------------------------
  1 | //! Convert `LogLine` events into Raw events encoded as JSON.
  2 | //!
  3 | //! This filter takes `LogLines` and encodes them into JSON, emitting the
  4 | //! encoded event as a Raw event. This allows further filters or sinks to
  5 | //! operate on the JSON without needing to understand a `LogLine` event in
  6 | //! particular.  If the `LogLine` value is a valid JSON object and
  7 | //! `parse_line` config option is true, then the JSON will be merged with
  8 | //! `LogLine` metadata. Otherwise, the original line will be included simply as
  9 | //! a string.
 10 | 
 11 | use crate::filter;
 12 | use crate::metric;
 13 | use chrono::naive::NaiveDateTime;
 14 | use chrono::offset::Utc;
 15 | use chrono::DateTime;
 16 | use rand::random;
 17 | use serde_json;
 18 | use serde_json::map::Map;
 19 | use serde_json::Value;
 20 | use std::iter::FromIterator;
 21 | use std::sync::atomic::{AtomicUsize, Ordering};
 22 | 
 23 | /// Total number of logline processed
 24 | pub static JSON_ENCODE_LOG_PROCESSED: AtomicUsize = AtomicUsize::new(0);
 25 | /// Total number of logline with JSON value successfully parsed
 26 | pub static JSON_ENCODE_LOG_PARSED: AtomicUsize = AtomicUsize::new(0);
 27 | 
 28 | /// Convert `LogLine` events into Raw events encoded as JSON.
 29 | ///
 30 | /// This filter takes `LogLines` and encodes them into JSON, emitting the
 31 | /// encoded event as a Raw event. This allows further filters or sinks to
 32 | /// operate on the JSON without needing to understand a `LogLine` event in
 33 | /// particular.  If the `LogLine` value is a valid JSON object and `parse_line`
 34 | /// config option is true, then the JSON will be merged with `LogLine`
 35 | /// metadata. Otherwise, the original line will be included simply as a string.
 36 | pub struct JSONEncodeFilter {
 37 |     parse_line: bool,
 38 |     tags: metric::TagMap,
 39 | }
 40 | 
 41 | /// Configuration for `JSONEncodeFilter`
 42 | #[derive(Clone, Debug)]
 43 | pub struct JSONEncodeFilterConfig {
 44 |     /// The filter's unique name in the routing topology.
 45 |     pub config_path: Option<String>,
 46 |     /// The forwards along which the filter will emit its `metric::Event`s.
 47 |     pub forwards: Vec<String>,
 48 |     /// Whether the filter should attempt to parse `LogLine` values that are
 49 |     /// valid JSON objects.
 50 |     pub parse_line: bool,
 51 |     /// The tags to be applied to all `metric::Event`s streaming through this
 52 |     /// filter. These tags will overwrite any tags carried by the
 53 |     /// `metric::Event` itself.
 54 |     pub tags: metric::TagMap,
 55 | }
 56 | 
 57 | impl JSONEncodeFilter {
 58 |     /// Create a new JSONEncodeFilter
 59 |     pub fn new(config: JSONEncodeFilterConfig) -> JSONEncodeFilter {
 60 |         JSONEncodeFilter {
 61 |             parse_line: config.parse_line,
 62 |             tags: config.tags,
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | impl filter::Filter for JSONEncodeFilter {
 68 |     fn process(
 69 |         &mut self,
 70 |         event: metric::Event,
 71 |         res: &mut Vec<metric::Event>,
 72 |     ) -> Result<(), filter::FilterError> {
 73 |         match event {
 74 |             metric::Event::Log(log) => {
 75 |                 let naive_time = NaiveDateTime::from_timestamp(log.time, 0);
 76 |                 let utc_time: DateTime<Utc> = DateTime::from_utc(naive_time, Utc);
 77 |                 let metadata = json_to_object(json!({
 78 |                     "time": utc_time.to_rfc3339(),
 79 |                     "path": log.path.clone(),
 80 |                     "tags": Map::from_iter(log.tags(&self.tags).map(|(k, v): (&String, &String)| (k.to_string(), v.to_string().into()))),
 81 |                 }));
 82 |                 // If parse_line is true, and line is parsable as a JSON object, parse
 83 |                 // it. Otherwise get an object containing the original
 84 |                 // line.
 85 |                 let value = (if self.parse_line { Some(()) } else { None })
 86 |                     .and_then(|_| serde_json::from_str::<Value>(&log.value).ok())
 87 |                     .and_then(|v| {
 88 |                         if let Value::Object(obj) = v {
 89 |                             Some(obj)
 90 |                         } else {
 91 |                             None
 92 |                         }
 93 |                     })
 94 |                     .map(|v| {
 95 |                         JSON_ENCODE_LOG_PARSED.fetch_add(1, Ordering::Relaxed);
 96 |                         v
 97 |                     })
 98 |                     .unwrap_or_else(|| {
 99 |                         json_to_object(json!({"message": log.value.clone()}))
100 |                     });
101 |                 // Combine our various sources of data.
102 |                 // Data that is more likely to be correct (more specific to the
103 |                 // source) overrides other data. So the parsed value
104 |                 // is authoritative, followed by any fields we could
105 |                 // parse by filters, then finally the metadata we were able to work
106 |                 // out on our own.
107 |                 let value = merge_objects(vec![
108 |                     value,
109 |                     Map::from_iter(log.fields.into_iter().map(|(k, v)| (k, v.into()))),
110 |                     metadata,
111 |                 ]);
112 |                 res.push(metric::Event::Raw {
113 |                     order_by: random(),
114 |                     encoding: metric::Encoding::JSON,
115 |                     bytes: serde_json::to_string(&value).unwrap().into(), /* serde_json::Value
116 |                                                                            * will never fail to
117 |                                                                            * encode */
118 |                     metadata: None,
119 |                     connection_id: None,
120 |                 });
121 |                 JSON_ENCODE_LOG_PROCESSED.fetch_add(1, Ordering::Relaxed);
122 |             }
123 |             // All other event types are passed through.
124 |             event => {
125 |                 res.push(event);
126 |             }
127 |         }
128 |         Ok(())
129 |     }
130 | }
131 | 
132 | /// Convenience helper to take a json!() macro you know is an object and get
133 | /// back a Map<String, Value>, instead of a generic Value.
134 | fn json_to_object(v: Value) -> Map<String, Value> {
135 |     if let Value::Object(obj) = v {
136 |         obj
137 |     } else {
138 |         unreachable!()
139 |     }
140 | }
141 | 
142 | /// Merge JSON objects, with values from earlier objects in the list overriding
143 | /// later ones. Note this is not a recursive merge - if the same key is in many
144 | /// objects, we simply take the value from the earliest one.
145 | fn merge_objects(objs: Vec<Map<String, Value>>) -> Map<String, Value> {
146 |     let mut result = Map::new();
147 |     for obj in objs {
148 |         for (key, value) in obj {
149 |             if !result.contains_key(&key) {
150 |                 result.insert(key, value);
151 |             }
152 |         }
153 |     }
154 |     result
155 | }
156 | 
157 | // Tests
158 | //
159 | #[cfg(test)]
160 | mod test {
161 |     use super::*;
162 |     use crate::filter::Filter;
163 |     use crate::metric;
164 |     use quickcheck::QuickCheck;
165 |     use serde_json::map::Map;
166 |     use serde_json::Value;
167 | 
168 |     fn process_event(parse_line: bool, event: metric::Event) -> Value {
169 |         let mut filter = JSONEncodeFilter {
170 |             parse_line: parse_line,
171 |             tags: metric::TagMap::default(),
172 |         };
173 |         let mut results = Vec::new();
174 |         filter.process(event, &mut results).unwrap();
175 |         // fail if results empty, else return processed event's payload
176 |         if let metric::Event::Raw { ref bytes, .. } = results[0] {
177 |             return serde_json::from_slice(bytes).unwrap();
178 |         }
179 |         panic!("Processed event was not Raw")
180 |     }
181 | 
182 |     #[test]
183 |     fn parsable_line_parsing_off() {
184 |         // Test we don't parse a line if parsing is off
185 |         assert_eq!(
186 |             process_event(
187 |                 false,
188 |                 metric::Event::new_log(metric::LogLine {
189 |                     path: "testpath".to_string(),
190 |                     value: "{\"bad\": \"do not parse\"}".to_string(),
191 |                     time: 946684800,
192 |                     tags: Default::default(),
193 |                     fields: Default::default(),
194 |                 })
195 |             ),
196 |             json!({
197 |                 "path": "testpath",
198 |                 "message": "{\"bad\": \"do not parse\"}",
199 |                 "time": "2000-01-01T00:00:00+00:00",
200 |                 "tags": {},
201 |             })
202 |         );
203 |     }
204 | 
205 |     #[test]
206 |     fn parsable_line_parsing_on() {
207 |         // Test we do parse a line if parsing is on
208 |         assert_eq!(
209 |             process_event(
210 |                 true,
211 |                 metric::Event::new_log(metric::LogLine {
212 |                     path: "testpath".to_string(),
213 |                     value: "{\"good\": \"do parse\"}".to_string(),
214 |                     time: 946684800,
215 |                     tags: Default::default(),
216 |                     fields: Default::default(),
217 |                 })
218 |             ),
219 |             json!({
220 |                 "path": "testpath",
221 |                 "good": "do parse",
222 |                 "time": "2000-01-01T00:00:00+00:00",
223 |                 "tags": {},
224 |             })
225 |         );
226 |     }
227 | 
228 |     #[test]
229 |     fn unparsable_line() {
230 |         // Test we don't parse a line if it's not JSON
231 |         assert_eq!(
232 |             process_event(
233 |                 true,
234 |                 metric::Event::new_log(metric::LogLine {
235 |                     path: "testpath".to_string(),
236 |                     value: "this is not json".to_string(),
237 |                     time: 946684800,
238 |                     tags: Default::default(),
239 |                     fields: Default::default(),
240 |                 })
241 |             ),
242 |             json!({
243 |                 "path": "testpath",
244 |                 "message": "this is not json",
245 |                 "time": "2000-01-01T00:00:00+00:00",
246 |                 "tags": {},
247 |             })
248 |         );
249 |     }
250 | 
251 |     #[test]
252 |     fn non_object_line() {
253 |         // Test we don't parse a line if it's not a JSON object but is valid JSON
254 |         assert_eq!(
255 |             process_event(
256 |                 true,
257 |                 metric::Event::new_log(metric::LogLine {
258 |                     path: "testpath".to_string(),
259 |                     value: "[123, \"not an object\"]".to_string(),
260 |                     time: 946684800,
261 |                     tags: Default::default(),
262 |                     fields: Default::default(),
263 |                 })
264 |             ),
265 |             json!({
266 |                 "path": "testpath",
267 |                 "message": "[123, \"not an object\"]",
268 |                 "time": "2000-01-01T00:00:00+00:00",
269 |                 "tags": {},
270 |             })
271 |         );
272 |     }
273 | 
274 |     // quickcheck and serde_json::map::Map aren't compatible, so we ask quickcheck
275 |     // for many Vec<(String, String)>s that we turn into maps.
276 |     fn vecs_to_objs(vecs: &Vec<Vec<(String, String)>>) -> Vec<Map<String, Value>> {
277 |         vecs.iter()
278 |             .map(|vec| {
279 |                 Map::from_iter(
280 |                     vec.iter()
281 |                         .map(|&(ref k, ref v)| (k.clone(), v.clone().into())),
282 |                 )
283 |             })
284 |             .collect()
285 |     }
286 | 
287 |     #[test]
288 |     fn merged_objects_contain_all_keys() {
289 |         fn inner(vecs: Vec<Vec<(String, String)>>) -> bool {
290 |             let result = merge_objects(vecs_to_objs(&vecs));
291 |             for obj in vecs {
292 |                 for (k, _v) in obj {
293 |                     if !result.contains_key(&k) {
294 |                         return false;
295 |                     }
296 |                 }
297 |             }
298 |             true
299 |         }
300 |         QuickCheck::new().quickcheck(inner as fn(Vec<Vec<(String, String)>>) -> bool);
301 |     }
302 | 
303 |     #[test]
304 |     fn merged_objects_takes_first_value() {
305 |         fn inner(vecs: Vec<Vec<(String, String)>>) -> bool {
306 |             let objs = vecs_to_objs(&vecs);
307 |             let result = merge_objects(objs.clone());
308 |             for (key, result_value) in result {
309 |                 match objs.iter().find(|obj| obj.contains_key(&key)) {
310 |                     Some(obj) => {
311 |                         if obj[&key] != result_value {
312 |                             return false; // result value did not match first obj containing key
313 |                         }
314 |                     }
315 |                     None => return false, // key in result was not in any input objs
316 |                 }
317 |             }
318 |             true
319 |         }
320 |         QuickCheck::new().quickcheck(inner as fn(Vec<Vec<(String, String)>>) -> bool);
321 |     }
322 | }
323 | 


--------------------------------------------------------------------------------
/src/filter/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Transform or create `metric::Event` from a stream of `metric::Event`.
  2 | //!
  3 | //! cernan filters are intended to input `metric::Event` and then adapt that
  4 | //! stream, either by injecting new `metric::Event` or by transforming the
  5 | //! stream members as they come through. Exact behaviour varies by filters. The
  6 | //! filter receives on an input channel and outputs over its forwards.
  7 | 
  8 | use crate::metric;
  9 | use crate::time;
 10 | use crate::util;
 11 | use hopper;
 12 | 
 13 | pub mod delay_filter;
 14 | mod flush_boundary_filter;
 15 | pub mod json_encode_filter;
 16 | mod programmable_filter;
 17 | 
 18 | pub use self::delay_filter::{DelayFilter, DelayFilterConfig};
 19 | pub use self::flush_boundary_filter::{
 20 |     FlushBoundaryFilter, FlushBoundaryFilterConfig,
 21 | };
 22 | pub use self::json_encode_filter::{JSONEncodeFilter, JSONEncodeFilterConfig};
 23 | pub use self::programmable_filter::{ProgrammableFilter, ProgrammableFilterConfig};
 24 | 
 25 | /// Errors that can strike a Filter
 26 | #[derive(Debug)]
 27 | pub enum FilterError {
 28 |     /// Specific to a ProgrammableFilter, means no function is available as
 29 |     /// called in the script
 30 |     NoSuchFunction(&'static str, metric::Event),
 31 |     /// Specific to a ProgrammableFilter, means the lua code errored
 32 |     LuaError(String, metric::Event),
 33 | }
 34 | 
 35 | fn msg_in_fe(fe: &FilterError) -> &str {
 36 |     match fe {
 37 |         FilterError::NoSuchFunction(n, _) => n,
 38 |         FilterError::LuaError(ref n, _) => n,
 39 |     }
 40 | }
 41 | 
 42 | fn event_in_fe(fe: FilterError) -> metric::Event {
 43 |     match fe {
 44 |         FilterError::NoSuchFunction(_, m) | FilterError::LuaError(_, m) => m,
 45 |     }
 46 | }
 47 | 
 48 | /// The Filter trait
 49 | ///
 50 | /// All filters take as input a stream of `metric::Event` and produce as output
 51 | /// another `metric::Event` stream. That's it. The exact method by which each
 52 | /// stream works depends on the implementation of the Filter.
 53 | pub trait Filter {
 54 |     /// Process a single `metric::Event`
 55 |     ///
 56 |     /// Individual Filters will implementat this function depending on their
 57 |     /// mechanism. See individaul filters for details.
 58 |     fn process(
 59 |         &mut self,
 60 |         event: metric::Event,
 61 |         res: &mut Vec<metric::Event>,
 62 |     ) -> Result<(), FilterError>;
 63 | 
 64 |     /// Run the Filter
 65 |     ///
 66 |     /// It is not expected that most Filters will re-implement this. If this is
 67 |     /// done, take care to obey overload signals and interpret errors from
 68 |     /// `Filter::process`.
 69 |     fn run(
 70 |         &mut self,
 71 |         recv: hopper::Receiver<metric::Event>,
 72 |         sources: Vec<String>,
 73 |         mut chans: util::Channel,
 74 |     ) {
 75 |         let mut attempts = 0;
 76 |         let mut events = Vec::with_capacity(64);
 77 |         let mut recv = recv.into_iter();
 78 |         let mut total_shutdowns = 0;
 79 |         loop {
 80 |             time::delay(attempts);
 81 |             match recv.next() {
 82 |                 None => attempts += 1,
 83 |                 Some(metric::Event::Shutdown) => {
 84 |                     util::send(&mut chans, metric::Event::Shutdown);
 85 |                     total_shutdowns += 1;
 86 |                     if total_shutdowns >= sources.len() {
 87 |                         trace!(
 88 |                             "Received shutdown from every configured source: {:?}",
 89 |                             sources
 90 |                         );
 91 |                         return;
 92 |                     }
 93 |                 }
 94 |                 Some(event) => {
 95 |                     attempts = 0;
 96 |                     match self.process(event, &mut events) {
 97 |                         Ok(()) => {
 98 |                             for ev in events.drain(..) {
 99 |                                 util::send(&mut chans, ev)
100 |                             }
101 |                         }
102 |                         Err(fe) => {
103 |                             error!(
104 |                                 "Failed to run filter with error: {:?}",
105 |                                 msg_in_fe(&fe)
106 |                             );
107 |                             let event = event_in_fe(fe);
108 |                             util::send(&mut chans, event);
109 |                         }
110 |                     }
111 |                 }
112 |             }
113 |         }
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/src/http.rs:
--------------------------------------------------------------------------------
 1 | //! Tiny, unassuming HTTP Server
 2 | 
 3 | extern crate tiny_http;
 4 | 
 5 | use crate::thread;
 6 | use std;
 7 | 
 8 | /// HTTP request. Alias of `tiny_http::Request`.
 9 | pub type Request = tiny_http::Request;
10 | /// HTTP response. Alias of `tiny_http::Response`.
11 | pub type Response<'a> = tiny_http::Response<&'a [u8]>;
12 | /// HTTP header. Alias of `tiny_http::Header`.
13 | pub type Header = tiny_http::Header;
14 | /// HTTP header field. Alias of `tiny_http::HeaderField`.
15 | pub type HeaderField = tiny_http::HeaderField;
16 | /// HTTP status code. Alias of `tiny_http::StatusCode`.
17 | pub type StatusCode = tiny_http::StatusCode;
18 | 
19 | /// Simple single threaded HTTP request handler.
20 | pub trait Handler: Sync + Send {
21 |     /// Handler for a single HTTP request.
22 |     fn handle(&self, request: Request) -> ();
23 | }
24 | 
25 | /// Single threaded HTTP server.
26 | pub struct Server {
27 |     /// Thread handle for the operating HTTP server.
28 |     thread: thread::ThreadHandle,
29 | }
30 | 
31 | fn http_server<H>(
32 |     poller: &thread::Poll,
33 |     tiny_http_server: &tiny_http::Server,
34 |     handler: &H,
35 | ) where
36 |     H: Handler,
37 | {
38 |     loop {
39 |         let mut events = thread::Events::with_capacity(1024);
40 |         match poller.poll(&mut events, Some(std::time::Duration::from_millis(5))) {
41 |             Ok(num_events) if num_events > 0 => {
42 |                 break;
43 |             }
44 | 
45 |             Ok(_) => match tiny_http_server
46 |                 .recv_timeout(std::time::Duration::from_millis(1000))
47 |             {
48 |                 Ok(maybe_a_request) => {
49 |                     if let Some(request) = maybe_a_request {
50 |                         handler.handle(request);
51 |                     }
52 |                 }
53 | 
54 |                 Err(e) => {
55 |                     panic!(format!("Failed during recv_timeout {:?}", e));
56 |                 }
57 |             },
58 | 
59 |             Err(e) => {
60 |                 panic!(format!("Failed during poll {:?}", e));
61 |             }
62 |         };
63 |     }
64 | }
65 | 
66 | /// Single threaded HTTP server implementation.
67 | impl Server {
68 |     /// Create and start an HTTP server on the given host and port.
69 |     pub fn new<H: Handler + 'static>(host_port: String, handler: H) -> Self {
70 |         Server {
71 |             thread: thread::spawn(move |poller| {
72 |                 let tiny_http_server = tiny_http::Server::http(host_port).unwrap();
73 |                 http_server(&poller, &tiny_http_server, &handler)
74 |             }),
75 |         }
76 |     }
77 | }
78 | 
79 | /// Graceful shutdown support for Server.
80 | impl thread::Stoppable for Server {
81 |     fn join(self) {
82 |         self.thread.join();
83 |     }
84 | 
85 |     fn shutdown(self) {
86 |         self.thread.shutdown();
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Cernan is a telemetry and logging aggregation server. It exposes multiple
 2 | //! interfaces for ingestion and can emit to mutiple aggregation sources while
 3 | //! doing in-flight manipulation of data. Cernan has minimal CPU and memory
 4 | //! requirements and is intended to service bursty telemetry _without_ load
 5 | //! shedding. Cernan aims to be _reliable_ and _convenient_ to use, both for
 6 | //! application engineers and operations staff.
 7 | //!
 8 | //! Why you might choose to use cernan:
 9 | //!
10 | //!  * You need to ingest telemetry from multiple protocols.
11 | //!  * You need to multiplex telemetry over aggregation services.
12 | //!  * You want to convert log lines into telemetry.
13 | //!  * You want to convert telemetry into log lines.
14 | //!  * You want to transform telemetry or log lines in-flight.
15 | //!
16 | //! If you'd like to learn more, please do have a look in
17 | //! our [wiki](https://github.com/postmates/cernan/wiki/).
18 | #![allow(unknown_lints)]
19 | #![deny(
20 |     trivial_numeric_casts,
21 |     missing_docs,
22 |     unstable_features,
23 |     unused_import_braces
24 | )]
25 | extern crate base64;
26 | extern crate byteorder;
27 | extern crate chrono;
28 | extern crate clap;
29 | extern crate coco;
30 | extern crate elastic;
31 | extern crate flate2;
32 | extern crate futures;
33 | extern crate glob;
34 | extern crate hopper;
35 | extern crate libc;
36 | extern crate mio;
37 | extern crate mond;
38 | extern crate protobuf;
39 | extern crate quantiles;
40 | extern crate rand;
41 | extern crate rdkafka;
42 | extern crate regex;
43 | extern crate reqwest;
44 | extern crate seahash;
45 | extern crate serde_avro;
46 | #[macro_use]
47 | extern crate serde_json;
48 | extern crate slab;
49 | extern crate toml;
50 | extern crate url;
51 | extern crate uuid;
52 | 
53 | #[macro_use]
54 | extern crate log;
55 | 
56 | #[macro_use]
57 | extern crate lazy_static;
58 | 
59 | #[macro_use]
60 | extern crate serde_derive;
61 | 
62 | #[cfg(test)]
63 | extern crate quickcheck;
64 | 
65 | pub mod buckets;
66 | pub mod config;
67 | pub mod constants;
68 | pub mod filter;
69 | pub mod http;
70 | pub mod matrix;
71 | pub mod metric;
72 | pub mod protocols;
73 | pub mod sink;
74 | pub mod source;
75 | pub mod thread;
76 | pub mod time;
77 | pub mod util;
78 | 


--------------------------------------------------------------------------------
/src/matrix.rs:
--------------------------------------------------------------------------------
  1 | //! Collection of matrix implementations.
  2 | 
  3 | use crate::util;
  4 | use std;
  5 | use std::fmt::Debug;
  6 | use std::str::FromStr;
  7 | 
  8 | type AdjacencyMap<T> = util::HashMap<String, Option<T>>;
  9 | 
 10 | type AdjacencyMatrix<T> = util::HashMap<String, AdjacencyMap<T>>;
 11 | 
 12 | /// Adjacency matrix struct.
 13 | pub struct Adjacency<M: Clone> {
 14 |     edges: AdjacencyMatrix<M>,
 15 | }
 16 | 
 17 | impl<M: Clone + Debug> Default for Adjacency<M> {
 18 |     fn default() -> Adjacency<M> {
 19 |         Adjacency {
 20 |             edges: Default::default(),
 21 |         }
 22 |     }
 23 | }
 24 | 
 25 | ///  Poor man's adjacency matrix biased towards incident edge queries.
 26 | ///
 27 | ///  Edges are not symmetric.  Two values are symmetrically adjacent when
 28 | ///  edges originate from each value to the other value.
 29 | impl<M: Clone + Debug> Adjacency<M> {
 30 |     /// Construct a new adjacency matrix.
 31 |     pub fn new() -> Self {
 32 |         Adjacency {
 33 |             edges: Default::default(),
 34 |         }
 35 |     }
 36 | 
 37 |     /// Adds an outbound edge from a node to another.
 38 |     pub fn add_asymmetric_edge(
 39 |         &mut self,
 40 |         from_str: &str,
 41 |         to_str: &str,
 42 |         metadata: Option<M>,
 43 |     ) {
 44 |         let to = String::from_str(to_str).unwrap();
 45 |         let from = String::from_str(from_str).unwrap();
 46 |         let vec = self.edges.entry(from).or_insert_with(Default::default);
 47 |         vec.insert(to, metadata);
 48 |     }
 49 | 
 50 |     /// Adds symmetric edges between the given node and a set of other nodes.
 51 |     pub fn add_edges(
 52 |         &mut self,
 53 |         from_str: &str,
 54 |         to_strs: Vec<String>,
 55 |         metadata: Option<M>,
 56 |     ) {
 57 |         for to_str in to_strs {
 58 |             self.add_asymmetric_edge(from_str, &to_str, metadata.clone());
 59 |             self.add_asymmetric_edge(&to_str, from_str, metadata.clone())
 60 |         }
 61 | 
 62 |         drop(metadata);
 63 |     }
 64 | 
 65 |     /// Returns the number of incident edges to the given node.
 66 |     pub fn num_edges(&mut self, id: &str) -> usize {
 67 |         match self.edges.get(id) {
 68 |             Some(value) => value.keys().len(),
 69 | 
 70 |             None => 0,
 71 |         }
 72 |     }
 73 | 
 74 |     /// Returns true iff relations exist for the given node id.
 75 |     pub fn contains_node(&self, id: &str) -> bool {
 76 |         self.edges.contains_key(id)
 77 |     }
 78 | 
 79 |     /// Filters and returns edges satisfying the given constraint.
 80 |     pub fn filter_nodes<F>(&self, id: &str, f: F) -> Vec<String>
 81 |     where
 82 |         for<'r> F: FnMut(&'r (&String, &Option<M>)) -> bool,
 83 |     {
 84 |         self.edges[id]
 85 |             .iter()
 86 |             .filter(f)
 87 |             .map(|(k, _v)| k.clone())
 88 |             .collect()
 89 |     }
 90 | 
 91 |     /// Iterates over edge relations in the matrix.
 92 |     pub fn iter(&self) -> std::collections::hash_map::Iter<String, AdjacencyMap<M>> {
 93 |         self.edges.iter()
 94 |     }
 95 | 
 96 |     /// Pops adjacency metadata for the given node.
 97 |     pub fn pop(&mut self, id: &str) -> Option<AdjacencyMap<M>> {
 98 |         self.edges.remove(id)
 99 |     }
100 | 
101 |     /// As pop, but returns a vec of node identifiers connected to the given
102 |     /// node.
103 |     pub fn pop_nodes(&mut self, id: &str) -> Vec<String> {
104 |         match self.pop(id) {
105 |             Some(map) => map.into_iter().map(|(k, _v)| k).collect(),
106 | 
107 |             None => Vec::new(),
108 |         }
109 |     }
110 | 
111 |     /// As pop, but returns a vec of edge metadata.
112 |     /// Option values will be unwrapped and None values filtered.
113 |     pub fn pop_metadata(&mut self, id: &str) -> Vec<M> {
114 |         match self.pop(id) {
115 |             Some(map) => map
116 |                 .into_iter()
117 |                 .filter(|&(ref _k, ref option_v)| option_v.is_some())
118 |                 .map(|(_, some_v)| some_v.unwrap())
119 |                 .collect(),
120 | 
121 |             None => Vec::new(),
122 |         }
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/metric/ackbag.rs:
--------------------------------------------------------------------------------
  1 | use crate::util;
  2 | use std::cmp::min;
  3 | use std::sync::{Arc, Mutex};
  4 | use std::{thread, time};
  5 | use uuid::Uuid;
  6 | 
  7 | #[derive(Clone)]
  8 | pub struct SyncAckProps {
  9 |     acks_received: usize,
 10 | }
 11 | 
 12 | impl Default for SyncAckProps {
 13 |     fn default() -> SyncAckProps {
 14 |         SyncAckProps { acks_received: 0 }
 15 |     }
 16 | }
 17 | 
 18 | impl SyncAckProps {
 19 |     pub fn ack(&mut self) {
 20 |         self.acks_received = self.acks_received.saturating_add(1);
 21 |     }
 22 | 
 23 |     pub fn acks_received(&self) -> usize {
 24 |         self.acks_received
 25 |     }
 26 | }
 27 | 
 28 | type SyncAckBagMap = util::HashMap<Uuid, SyncAckProps>;
 29 | 
 30 | pub struct SyncAckBag {
 31 |     waiting_syncs: Arc<Mutex<SyncAckBagMap>>,
 32 | }
 33 | 
 34 | impl Default for SyncAckBag {
 35 |     fn default() -> SyncAckBag {
 36 |         SyncAckBag {
 37 |             waiting_syncs: Arc::new(Mutex::new(SyncAckBagMap::default())),
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | impl SyncAckBag {
 43 |     /// Return a clone of the SyncAckProps for the given key
 44 |     pub fn props_for(&self, key: Uuid) -> Option<SyncAckProps> {
 45 |         let bag = self.waiting_syncs.lock().unwrap();
 46 |         match bag.get(&key) {
 47 |             Some(v) => Some((*v).clone()),
 48 |             None => None,
 49 |         }
 50 |     }
 51 | 
 52 |     /// Insert an empty-initialized SyncAckProps for the given key
 53 |     pub fn prepare_wait(&self, key: Uuid) {
 54 |         let mut bag = self.waiting_syncs.lock().unwrap();
 55 |         bag.insert(key, SyncAckProps::default());
 56 |     }
 57 | 
 58 |     /// Remove the given key from the internal bag
 59 |     pub fn remove(&self, key: Uuid) {
 60 |         let mut bag = self.waiting_syncs.lock().unwrap();
 61 |         bag.remove(&key);
 62 |     }
 63 | 
 64 |     /// Retrieve a mutable reference to the SyncAckProps for the key and invoke
 65 |     /// a callback if it exists. If the key is not present, then the
 66 |     /// callback is not called.
 67 |     pub fn with_props<F: FnOnce(&mut SyncAckProps)>(&self, key: Uuid, callback: F) {
 68 |         let mut bag = self.waiting_syncs.lock().unwrap();
 69 |         if let Some(v) = bag.get_mut(&key) {
 70 |             callback(v);
 71 |         }
 72 |     }
 73 | 
 74 |     /// Wait until the number of acks in the specified key becomes non-zero.
 75 |     pub fn wait_for(&self, key: Uuid) {
 76 |         self.wait_for_callback(key, thread::sleep);
 77 |     }
 78 | 
 79 |     /// Testability driver for wait_for
 80 |     pub fn wait_for_callback<F: Fn(time::Duration)>(&self, key: Uuid, cb: F) {
 81 |         let max_nap_time = time::Duration::from_millis(250);
 82 |         let mut current_nap_time = time::Duration::from_millis(5);
 83 |         let mut wait = true;
 84 |         while wait {
 85 |             match self.props_for(key) {
 86 |                 Some(props) => wait = props.acks_received() == 0,
 87 |                 _ => wait = false,
 88 |             }
 89 |             if wait {
 90 |                 cb(current_nap_time);
 91 |                 current_nap_time = min(max_nap_time, current_nap_time * 2);
 92 |             }
 93 |         }
 94 |     }
 95 | }
 96 | 
 97 | lazy_static! {
 98 |     static ref SINGLETON: SyncAckBag = SyncAckBag::default();
 99 | }
100 | 
101 | /// Returns the singleton ack bag.
102 | /// The ack bag is necessary because we need to protect concurrent access of raw
103 | /// events' data, but we can't Arc that enum due to serialization needs.
104 | /// Instead, we keep (and Arc) a global bag of properties and only serialize a
105 | /// key into the bag.
106 | pub fn global_ack_bag() -> &'static SyncAckBag {
107 |     &SINGLETON
108 | }
109 | 
110 | #[cfg(test)]
111 | mod tests {
112 |     use super::*;
113 |     use std::cell::RefCell;
114 | 
115 |     #[test]
116 |     fn test_prepare_wait_adds_default_entry() {
117 |         let ack_bag = SyncAckBag::default();
118 |         let key = Uuid::new_v4();
119 |         ack_bag.prepare_wait(key);
120 | 
121 |         let value = ack_bag.props_for(key).unwrap();
122 |         assert_eq!(value.acks_received(), 0);
123 |     }
124 | 
125 |     #[test]
126 |     fn test_remove_works() {
127 |         let ack_bag = SyncAckBag::default();
128 |         let key = Uuid::new_v4();
129 |         ack_bag.prepare_wait(key);
130 |         ack_bag.remove(key);
131 | 
132 |         assert_eq!(ack_bag.props_for(key).is_some(), false);
133 | 
134 |         // Removing a non-existent key is fine.
135 |         let other_key = Uuid::new_v4();
136 |         ack_bag.remove(other_key);
137 |     }
138 | 
139 |     #[test]
140 |     fn test_ack_adds_one_to_tally() {
141 |         let ack_bag = SyncAckBag::default();
142 |         let key = Uuid::new_v4();
143 |         ack_bag.prepare_wait(key);
144 |         ack_bag.with_props(key, |props| {
145 |             props.ack();
146 |         });
147 | 
148 |         assert_eq!(ack_bag.props_for(key).unwrap().acks_received(), 1);
149 |     }
150 | 
151 |     #[test]
152 |     fn test_wait_for_returns_when_there_are_acks() {
153 |         let ack_bag = SyncAckBag::default();
154 |         let key = Uuid::new_v4();
155 |         ack_bag.prepare_wait(key);
156 |         ack_bag.with_props(key, |props| {
157 |             props.ack();
158 |         });
159 | 
160 |         ack_bag.wait_for(key);
161 |     }
162 | 
163 |     #[test]
164 |     fn test_wait_for_sleeps_when_there_are_no_acks() {
165 |         let callback_count = RefCell::new(0);
166 |         let ack_bag = SyncAckBag::default();
167 |         let key = Uuid::new_v4();
168 |         ack_bag.prepare_wait(key);
169 |         ack_bag.wait_for_callback(key, |_| {
170 |             *callback_count.borrow_mut() += 1;
171 |             ack_bag.with_props(key, |props| {
172 |                 props.ack();
173 |             })
174 |         });
175 |         assert_eq!(*callback_count.borrow(), 1);
176 |     }
177 | 
178 |     #[test]
179 |     fn test_waiting_for_nonexistent_key_returns() {
180 |         let ack_bag = SyncAckBag::default();
181 |         ack_bag.wait_for(Uuid::new_v4());
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/src/metric/event.rs:
--------------------------------------------------------------------------------
 1 | use crate::metric::{LogLine, Telemetry};
 2 | use crate::util::HashMap;
 3 | use uuid::Uuid;
 4 | 
 5 | /// Supported event encodings.
 6 | #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
 7 | pub enum Encoding {
 8 |     /// Raw bytes, no encoding.
 9 |     Raw,
10 |     /// Avro
11 |     Avro,
12 |     /// JSON
13 |     JSON,
14 | }
15 | 
16 | /// Metadata: additional data attached to an event
17 | pub type Metadata = HashMap<Vec<u8>, Vec<u8>>;
18 | 
19 | /// Event: the central cernan datastructure
20 | ///
21 | /// Event is the heart of cernan, the enumeration that cernan works on in all
22 | /// cases. The enumeration fields drive sink / source / filter operations
23 | /// depending on their implementation.
24 | #[allow(clippy::large_enum_variant)]
25 | #[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
26 | pub enum Event {
27 |     /// A wrapper for `metric::Telemetry`. See its documentation for more
28 |     /// detail.
29 |     Telemetry(Telemetry),
30 |     /// A wrapper for `metric::LogLine`. See its documentation for more
31 |     /// detail.
32 |     Log(LogLine),
33 |     /// A flush pulse signal. The `TimerFlush` keeps a counter of the total
34 |     /// flushes made in this cernan's run. See `source::Flush` for the origin of
35 |     /// these pulses in cernan operation.
36 |     TimerFlush(u64),
37 |     /// Shutdown event which marks the location in the queue after which no
38 |     /// more events will appear.  It is expected that after receiving this
39 |     /// marker the given source will exit cleanly.
40 |     Shutdown,
41 |     /// Raw, encoded bytes.
42 |     Raw {
43 |         /// Ordering value used by some sinks accepting Raw events.
44 |         order_by: u64,
45 |         /// Encoding for the included bytes.
46 |         encoding: Encoding,
47 |         /// Encoded payload.
48 |         bytes: Vec<u8>,
49 |         /// Metadata used by some sinks
50 |         metadata: Option<Metadata>,
51 |         /// Connection ID of the source on which this raw event was received
52 |         connection_id: Option<Uuid>,
53 |     },
54 | }
55 | 
56 | impl Event {
57 |     /// Determine if an event is a `TimerFlush`.
58 |     pub fn is_timer_flush(&self) -> bool {
59 |         match *self {
60 |             Event::TimerFlush(_) => true,
61 |             _ => false,
62 |         }
63 |     }
64 | 
65 |     /// Retrieve the timestamp from an `Event` if such exists. `TimerFlush` has
66 |     /// no sensible timestamp -- being itself a mechanism _of_ time, not inside
67 |     /// time -- and these `Event`s will always return None.
68 |     pub fn timestamp(&self) -> Option<i64> {
69 |         match *self {
70 |             Event::Telemetry(ref telem) => Some(telem.timestamp),
71 |             Event::Log(ref log) => Some(log.time),
72 |             Event::TimerFlush(_) | Event::Shutdown | Event::Raw { .. } => None,
73 |         }
74 |     }
75 | }
76 | 
77 | impl Event {
78 |     /// Create a new `Event::Telemetry` from an existing `metric::Telemetry`.
79 |     #[inline]
80 |     pub fn new_telemetry(metric: Telemetry) -> Event {
81 |         Event::Telemetry(metric)
82 |     }
83 | 
84 |     /// Create a new `Event::Log` from an existing `metric::LogLine`.
85 |     #[inline]
86 |     pub fn new_log(log: LogLine) -> Event {
87 |         Event::Log(log)
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/metric/logline.rs:
--------------------------------------------------------------------------------
  1 | use crate::metric::{TagIter, TagMap};
  2 | use crate::time;
  3 | use std::collections::HashSet;
  4 | 
  5 | /// An unstructured piece of text, plus associated metadata
  6 | #[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
  7 | pub struct LogLine {
  8 |     /// The time that this `LogLine` occupies, in the units of time::now()
  9 |     pub time: i64,
 10 |     /// The path that this `LogLine` originated from. May be a unix path or not,
 11 |     /// depending on origin.
 12 |     pub path: String,
 13 |     /// The line read from the `LogLine` path
 14 |     pub value: String,
 15 |     /// Fields that may have been parsed out of the value, a key/value structure
 16 |     pub fields: TagMap,
 17 |     /// Cernan tags for this LogLine
 18 |     pub tags: Option<TagMap>,
 19 | }
 20 | 
 21 | /// `LogLine` - a structure that represents a bit of text
 22 | ///
 23 | /// A `LogLine` is intended to hold a bit of text in its 'value' that may or may
 24 | /// not be structured. The field 'fields' is available for
 25 | impl LogLine {
 26 |     /// Create a new `LogLine`
 27 |     ///
 28 |     /// Please see `LogLine` struct documentation for more details.
 29 |     pub fn new<S>(path: S, value: S) -> LogLine
 30 |     where
 31 |         S: Into<String>,
 32 |     {
 33 |         LogLine {
 34 |             path: path.into(),
 35 |             value: value.into(),
 36 |             time: time::now(),
 37 |             tags: Default::default(),
 38 |             fields: Default::default(),
 39 |         }
 40 |     }
 41 | 
 42 |     /// Set the time of the Logline
 43 |     ///
 44 |     /// # Examples
 45 |     /// ```
 46 |     /// use cernan::metric::LogLine;
 47 |     ///
 48 |     /// let time = 101;
 49 |     /// let mut l = LogLine::new("some_path", "value");
 50 |     /// assert!(l.time != time);
 51 |     ///
 52 |     /// l = l.time(time);
 53 |     /// assert_eq!(l.time, time);
 54 |     /// ```
 55 |     pub fn time(mut self, time: i64) -> LogLine {
 56 |         self.time = time;
 57 |         self
 58 |     }
 59 | 
 60 |     /// Insert a new field into LogLine
 61 |     ///
 62 |     /// Fields are distinct from tags. A 'field' is related to data that has
 63 |     /// been parsed out of LogLine.value and will be treated specially by
 64 |     /// supporting sinks. For instance, the firehose sink will put the field
 65 |     /// _into_ the payload where tags will be associated metadata that define
 66 |     /// groups of related LogLines.
 67 |     pub fn insert_field<S>(mut self, key: S, val: S) -> LogLine
 68 |     where
 69 |         S: Into<String>,
 70 |     {
 71 |         self.fields.insert(key.into(), val.into());
 72 |         self
 73 |     }
 74 | 
 75 |     /// Insert a tag into the LogLine
 76 |     ///
 77 |     /// This inserts a key/value pair into the LogLine, returning the previous
 78 |     /// value if the key already existed.
 79 |     pub fn insert_tag<S>(&mut self, key: S, val: S) -> Option<String>
 80 |     where
 81 |         S: Into<String>,
 82 |     {
 83 |         if let Some(ref mut tags) = self.tags {
 84 |             tags.insert(key.into(), val.into())
 85 |         } else {
 86 |             let mut tags = TagMap::default();
 87 |             let res = tags.insert(key.into(), val.into());
 88 |             self.tags = Some(tags);
 89 |             res
 90 |         }
 91 |     }
 92 | 
 93 |     /// Remove a tag from the Telemetry
 94 |     ///
 95 |     /// This removes a key/value pair from the Telemetry, returning the previous
 96 |     /// value if the key existed.
 97 |     pub fn remove_tag(&mut self, key: &str) -> Option<String> {
 98 |         if let Some(ref mut tags) = self.tags {
 99 |             tags.remove(key)
100 |         } else {
101 |             None
102 |         }
103 |     }
104 | 
105 |     /// Overlay a tag into the LogLine
106 |     ///
107 |     /// This function inserts a new key and value into the LogLine's tags. If
108 |     /// the key was already present the old value is replaced.
109 |     pub fn overlay_tag<S>(mut self, key: S, val: S) -> LogLine
110 |     where
111 |         S: Into<String>,
112 |     {
113 |         let _ = self.insert_tag(key, val);
114 |         self
115 |     }
116 | 
117 |     /// Overlay a TagMap on the LogLine's tags
118 |     ///
119 |     /// This function overlays a TagMap onto the LogLine's existing tags. If a
120 |     /// key is present in both TagMaps the one from 'map' will be preferred.
121 |     pub fn overlay_tags_from_map(mut self, map: &TagMap) -> LogLine {
122 |         if let Some(ref mut tags) = self.tags {
123 |             for (k, v) in map.iter() {
124 |                 tags.insert(k.clone(), v.clone());
125 |             }
126 |         } else if !map.is_empty() {
127 |             self.tags = Some(map.clone());
128 |         }
129 |         self
130 |     }
131 | 
132 |     /// Get a value from tags, either interior or default
133 |     pub fn get_from_tags<'a>(
134 |         &'a mut self,
135 |         key: &'a str,
136 |         defaults: &'a TagMap,
137 |     ) -> Option<&'a String> {
138 |         if let Some(ref mut tags) = self.tags {
139 |             match tags.get(key) {
140 |                 Some(v) => Some(v),
141 |                 None => defaults.get(key),
142 |             }
143 |         } else {
144 |             defaults.get(key)
145 |         }
146 |     }
147 | 
148 |     /// Iterate tags, layering in defaults when needed
149 |     ///
150 |     /// The defaults serves to fill 'holes' in the Telemetry's view of the
151 |     /// tags. We avoid shipping tags through the whole system at the expense of
152 |     /// slightly more complicated call-sites in sinks.
153 |     pub fn tags<'a>(&'a self, defaults: &'a TagMap) -> TagIter<'a> {
154 |         if let Some(ref tags) = self.tags {
155 |             TagIter::Double {
156 |                 iters_exhausted: false,
157 |                 seen_keys: HashSet::new(),
158 |                 defaults: defaults.iter(),
159 |                 iters: tags.iter(),
160 |             }
161 |         } else {
162 |             TagIter::Single {
163 |                 defaults: defaults.iter(),
164 |             }
165 |         }
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/metric/mod.rs:
--------------------------------------------------------------------------------
 1 | //! `metric` is a collection of the abstract datatypes that cernan operates
 2 | //! over, plus related metadata. The main show here is
 3 | //! `metric::Event`. Everything branches down from that.
 4 | mod ackbag;
 5 | mod event;
 6 | mod logline;
 7 | mod telemetry;
 8 | 
 9 | pub use self::ackbag::global_ack_bag;
10 | pub use self::event::{Encoding, Event, Metadata};
11 | pub use self::logline::LogLine;
12 | #[cfg(test)]
13 | pub use self::telemetry::Value;
14 | pub use self::telemetry::{AggregationMethod, Telemetry};
15 | use crate::util;
16 | use std::cmp;
17 | use std::collections::{hash_map, HashSet};
18 | 
19 | /// A common type in cernan, a map from string to string
20 | pub type TagMap = util::HashMap<String, String>;
21 | 
22 | /// Compare two tagmaps
23 | ///
24 | /// K/Vs are compared lexographically unless the maps are of different length,
25 | /// in which case length is the comparator.
26 | pub fn cmp_tagmap(
27 |     lhs: &Option<TagMap>,
28 |     rhs: &Option<TagMap>,
29 | ) -> Option<cmp::Ordering> {
30 |     match (lhs, rhs) {
31 |         (&Some(ref l), &Some(ref r)) => {
32 |             if l.len() != r.len() {
33 |                 l.len().partial_cmp(&r.len())
34 |             } else {
35 |                 l.iter().partial_cmp(r)
36 |             }
37 |         }
38 |         _ => Some(cmp::Ordering::Equal),
39 |     }
40 | }
41 | 
42 | #[allow(missing_docs)]
43 | pub enum TagIter<'a> {
44 |     Single {
45 |         defaults: hash_map::Iter<'a, String, String>,
46 |     },
47 |     Double {
48 |         seen_keys: HashSet<String>,
49 |         iters: hash_map::Iter<'a, String, String>,
50 |         defaults: hash_map::Iter<'a, String, String>,
51 |         iters_exhausted: bool,
52 |     },
53 | }
54 | 
55 | impl<'a> Iterator for TagIter<'a> {
56 |     type Item = (&'a String, &'a String);
57 | 
58 |     fn next(&mut self) -> Option<Self::Item> {
59 |         match *self {
60 |             TagIter::Single { ref mut defaults } => defaults.next(),
61 |             TagIter::Double {
62 |                 ref mut seen_keys,
63 |                 ref mut iters,
64 |                 ref mut defaults,
65 |                 ref mut iters_exhausted,
66 |             } => loop {
67 |                 if *iters_exhausted {
68 |                     if let Some((k, v)) = defaults.next() {
69 |                         if seen_keys.insert(k.to_string()) {
70 |                             return Some((k, v));
71 |                         }
72 |                     } else {
73 |                         return None;
74 |                     }
75 |                 } else if let Some((k, v)) = iters.next() {
76 |                     seen_keys.insert(k.to_string());
77 |                     return Some((k, v));
78 |                 } else {
79 |                     *iters_exhausted = true;
80 |                     continue;
81 |                 }
82 |             },
83 |         }
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/protocols/graphite.rs:
--------------------------------------------------------------------------------
 1 | //! Handle the graphite protocol. Graphite is a timestamped, simple text-based
 2 | //! protocol for telemetry.
 3 | 
 4 | use crate::metric::{AggregationMethod, Telemetry};
 5 | use std::str::FromStr;
 6 | use std::sync;
 7 | 
 8 | /// Parse a string for graphite data into a `metric::Telemetry` if possible.
 9 | pub fn parse_graphite(
10 |     source: &str,
11 |     res: &mut Vec<Telemetry>,
12 |     metric: &sync::Arc<Option<Telemetry>>,
13 | ) -> bool {
14 |     let mut iter = source.split_whitespace();
15 |     while let Some(name) = iter.next() {
16 |         match iter.next() {
17 |             Some(val) => match iter.next() {
18 |                 Some(time) => {
19 |                     let parsed_val = match f64::from_str(val) {
20 |                         Ok(f) => f,
21 |                         Err(_) => return false,
22 |                     };
23 |                     let parsed_time = match i64::from_str(time) {
24 |                         Ok(t) => t,
25 |                         Err(_) => return false,
26 |                     };
27 |                     let metric = sync::Arc::make_mut(&mut sync::Arc::clone(metric))
28 |                         .take()
29 |                         .unwrap();
30 |                     res.push(
31 |                         metric
32 |                             .thaw()
33 |                             .name(name)
34 |                             .value(parsed_val)
35 |                             .kind(AggregationMethod::Set)
36 |                             .timestamp(parsed_time)
37 |                             .harden()
38 |                             .unwrap(),
39 |                     );
40 |                 }
41 |                 None => return false,
42 |             },
43 |             None => return false,
44 |         }
45 |     }
46 |     !res.is_empty()
47 | }
48 | 
49 | #[cfg(test)]
50 | mod tests {
51 |     use super::*;
52 |     use crate::metric::{AggregationMethod, Telemetry};
53 |     use chrono::{TimeZone, Utc};
54 |     use std::sync;
55 | 
56 |     #[test]
57 |     fn test_parse_graphite() {
58 |         let pyld = "fst 1 101\nsnd -2.0 202\nthr 3 303\nfth@fth 4 404\nfv%fv 5 505\ns-th 6 606\n";
59 |         let mut res = Vec::new();
60 |         let metric = sync::Arc::new(Some(Telemetry::default()));
61 |         assert!(parse_graphite(pyld, &mut res, &metric));
62 | 
63 |         assert_eq!(res[0].kind(), AggregationMethod::Set);
64 |         assert_eq!(res[0].name, "fst");
65 |         assert_eq!(res[0].value(), Some(1.0));
66 |         assert_eq!(res[0].timestamp, Utc.timestamp(101, 0).timestamp());
67 | 
68 |         assert_eq!(res[1].kind(), AggregationMethod::Set);
69 |         assert_eq!(res[1].name, "snd");
70 |         assert_eq!(res[1].value(), Some(-2.0));
71 |         assert_eq!(res[1].timestamp, Utc.timestamp(202, 0).timestamp());
72 | 
73 |         assert_eq!(res[2].kind(), AggregationMethod::Set);
74 |         assert_eq!(res[2].name, "thr");
75 |         assert_eq!(res[2].value(), Some(3.0));
76 |         assert_eq!(res[2].timestamp, Utc.timestamp(303, 0).timestamp());
77 | 
78 |         assert_eq!(res[3].kind(), AggregationMethod::Set);
79 |         assert_eq!(res[3].name, "fth@fth");
80 |         assert_eq!(res[3].value(), Some(4.0));
81 |         assert_eq!(res[3].timestamp, Utc.timestamp(404, 0).timestamp());
82 | 
83 |         assert_eq!(res[4].kind(), AggregationMethod::Set);
84 |         assert_eq!(res[4].name, "fv%fv");
85 |         assert_eq!(res[4].value(), Some(5.0));
86 |         assert_eq!(res[4].timestamp, Utc.timestamp(505, 0).timestamp());
87 | 
88 |         assert_eq!(res[5].kind(), AggregationMethod::Set);
89 |         assert_eq!(res[5].name, "s-th");
90 |         assert_eq!(res[5].value(), Some(6.0));
91 |         assert_eq!(res[5].timestamp, Utc.timestamp(606, 0).timestamp());
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/src/protocols/mod.rs:
--------------------------------------------------------------------------------
 1 | //! The input protocols that cernan must parse. Not all sources are reflected
 2 | //! here. These modules are used by the sources to do their work.
 3 | 
 4 | #![allow(renamed_and_removed_lints)]
 5 | 
 6 | pub mod graphite;
 7 | pub mod native;
 8 | pub mod prometheus;
 9 | pub mod statsd;
10 | 


--------------------------------------------------------------------------------
/src/sink/console.rs:
--------------------------------------------------------------------------------
  1 | //! Console Event logger.
  2 | 
  3 | use crate::buckets::Buckets;
  4 | use crate::metric::{AggregationMethod, LogLine, TagMap, Telemetry};
  5 | use crate::sink::{Sink, Valve};
  6 | use crate::source::flushes_per_second;
  7 | use chrono::naive::NaiveDateTime;
  8 | use chrono::offset::Utc;
  9 | use chrono::DateTime;
 10 | 
 11 | /// The 'console' sink exists for development convenience. The sink will
 12 | /// aggregate according to [buckets](../buckets/struct.Buckets.html) method and
 13 | /// print each `flush-interval` to stdout.
 14 | pub struct Console {
 15 |     aggrs: Buckets,
 16 |     buffer: Vec<LogLine>,
 17 |     flush_interval: u64,
 18 |     tags: TagMap,
 19 | }
 20 | 
 21 | /// The configuration struct for Console. There's not a whole lot to configure
 22 | /// here, independent of other sinks, but Console does do aggregations and that
 23 | /// requires knowing what the user wants for `bin_width`.
 24 | #[derive(Clone, Debug, Deserialize)]
 25 | pub struct ConsoleConfig {
 26 |     /// The sink's unique name in the routing topology.
 27 |     pub config_path: Option<String>,
 28 |     /// Sets the bin width for Console's underlying
 29 |     /// [bucket](../buckets/struct.Bucket.html).
 30 |     pub bin_width: i64,
 31 |     /// The tags to be applied to all `metric::Event`s streaming through this
 32 |     /// sink. These tags will overwrite any tags carried by the `metric::Event`
 33 |     /// itself.
 34 |     pub tags: TagMap,
 35 |     /// The sink specific `flush_interval`.
 36 |     pub flush_interval: u64,
 37 | }
 38 | 
 39 | impl Default for ConsoleConfig {
 40 |     fn default() -> ConsoleConfig {
 41 |         ConsoleConfig {
 42 |             bin_width: 1,
 43 |             flush_interval: 60 * flushes_per_second(),
 44 |             config_path: None,
 45 |             tags: TagMap::default(),
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | impl ConsoleConfig {
 51 |     /// Convenience method to create a ConsoleConfig with `bin_width` equal to
 52 |     /// 1.
 53 |     pub fn new(
 54 |         config_path: String,
 55 |         flush_interval: u64,
 56 |         tags: TagMap,
 57 |     ) -> ConsoleConfig {
 58 |         ConsoleConfig {
 59 |             config_path: Some(config_path),
 60 |             bin_width: 1,
 61 |             flush_interval,
 62 |             tags,
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | impl Sink<ConsoleConfig> for Console {
 68 |     fn init(config: ConsoleConfig) -> Self {
 69 |         Console {
 70 |             aggrs: Buckets::new(config.bin_width),
 71 |             buffer: Vec::new(),
 72 |             flush_interval: config.flush_interval,
 73 |             tags: config.tags,
 74 |         }
 75 |     }
 76 | 
 77 |     fn valve_state(&self) -> Valve {
 78 |         Valve::Open
 79 |     }
 80 | 
 81 |     fn deliver(&mut self, point: Telemetry) {
 82 |         self.aggrs.add(point);
 83 |     }
 84 | 
 85 |     fn deliver_line(&mut self, line: LogLine) {
 86 |         self.buffer.append(&mut vec![line]);
 87 |     }
 88 | 
 89 |     fn flush_interval(&self) -> Option<u64> {
 90 |         Some(self.flush_interval)
 91 |     }
 92 | 
 93 |     fn flush(&mut self) {
 94 |         println!("Flushing lines: {}", Utc::now().to_rfc3339());
 95 |         for line in &self.buffer {
 96 |             print!("{} {}: {}", format_time(line.time), line.path, line.value);
 97 |             for (k, v) in line.tags(&self.tags) {
 98 |                 print!(" {}={}", k, v);
 99 |             }
100 |             println!();
101 |         }
102 |         self.buffer.clear();
103 | 
104 |         println!("Flushing metrics: {}", Utc::now().to_rfc3339());
105 | 
106 |         let mut sums = String::new();
107 |         let mut sets = String::new();
108 |         let mut summaries = String::new();
109 |         let mut histograms = String::new();
110 | 
111 |         for telem in self.aggrs.iter() {
112 |             match telem.kind() {
113 |                 AggregationMethod::Histogram => {
114 |                     use quantiles::histogram::Bound;
115 |                     let tgt = &mut histograms;
116 |                     if let Some(bin_iter) = telem.bins() {
117 |                         for &(bound, val) in bin_iter {
118 |                             tgt.push_str("    ");
119 |                             tgt.push_str(&telem.name);
120 |                             tgt.push_str("_");
121 |                             match bound {
122 |                                 Bound::Finite(bnd) => {
123 |                                     tgt.push_str(&bnd.to_string());
124 |                                 }
125 |                                 Bound::PosInf => {
126 |                                     tgt.push_str("pos_inf");
127 |                                 }
128 |                             };
129 |                             tgt.push_str("(");
130 |                             tgt.push_str(&telem.timestamp.to_string());
131 |                             tgt.push_str("): ");
132 |                             tgt.push_str(&val.to_string());
133 |                             tgt.push_str("\n");
134 |                         }
135 |                     }
136 |                 }
137 |                 AggregationMethod::Sum => {
138 |                     let tgt = &mut sums;
139 |                     if let Some(f) = telem.sum() {
140 |                         tgt.push_str("    ");
141 |                         tgt.push_str(&telem.name);
142 |                         tgt.push_str("(");
143 |                         tgt.push_str(&telem.timestamp.to_string());
144 |                         tgt.push_str("): ");
145 |                         tgt.push_str(&f.to_string());
146 |                         tgt.push_str("\n");
147 |                     }
148 |                 }
149 |                 AggregationMethod::Set => {
150 |                     let tgt = &mut sets;
151 |                     if let Some(f) = telem.set() {
152 |                         tgt.push_str("    ");
153 |                         tgt.push_str(&telem.name);
154 |                         tgt.push_str("(");
155 |                         tgt.push_str(&telem.timestamp.to_string());
156 |                         tgt.push_str("): ");
157 |                         tgt.push_str(&f.to_string());
158 |                         tgt.push_str("\n");
159 |                     }
160 |                 }
161 |                 AggregationMethod::Summarize => {
162 |                     let tgt = &mut summaries;
163 |                     for tup in &[
164 |                         ("min", 0.0),
165 |                         ("max", 1.0),
166 |                         ("50", 0.5),
167 |                         ("90", 0.90),
168 |                         ("99", 0.99),
169 |                         ("999", 0.999),
170 |                     ] {
171 |                         let stat: &str = tup.0;
172 |                         let quant: f64 = tup.1;
173 |                         if let Some(f) = telem.query(quant) {
174 |                             tgt.push_str("    ");
175 |                             tgt.push_str(&telem.name);
176 |                             tgt.push_str(": ");
177 |                             tgt.push_str(stat);
178 |                             tgt.push_str(" ");
179 |                             tgt.push_str(&f.to_string());
180 |                             tgt.push_str("\n");
181 |                         }
182 |                     }
183 |                 }
184 |             }
185 |         }
186 |         println!("  sums:");
187 |         print!("{}", sums);
188 |         println!("  sets:");
189 |         print!("{}", sets);
190 |         println!("  summaries:");
191 |         print!("{}", summaries);
192 |         println!("  histograms:");
193 |         print!("{}", histograms);
194 | 
195 |         self.aggrs.reset();
196 |     }
197 | 
198 |     fn shutdown(mut self) {
199 |         self.flush();
200 |     }
201 | }
202 | 
203 | #[inline]
204 | fn format_time(time: i64) -> String {
205 |     let naive_time = NaiveDateTime::from_timestamp(time, 0);
206 |     let utc_time: DateTime<Utc> = DateTime::from_utc(naive_time, Utc);
207 |     format!("{}", utc_time.format("%Y-%m-%dT%H:%M:%S%.3fZ"))
208 | }
209 | 


--------------------------------------------------------------------------------
/src/sink/elasticsearch.rs:
--------------------------------------------------------------------------------
  1 | //! `ElasticSearch` is a documentation indexing engine.
  2 | 
  3 | use crate::metric::{LogLine, TagMap};
  4 | use crate::sink::{Sink, Valve};
  5 | use crate::source::flushes_per_second;
  6 | use chrono::naive::NaiveDateTime;
  7 | use chrono::offset::Utc;
  8 | use chrono::DateTime;
  9 | use elastic::client::responses::bulk;
 10 | use elastic::error;
 11 | use elastic::error::Result;
 12 | use elastic::prelude::*;
 13 | use std::cmp;
 14 | use std::error::Error;
 15 | use std::sync::atomic::{AtomicUsize, Ordering};
 16 | use uuid;
 17 | 
 18 | /// Total deliveries made
 19 | pub static ELASTIC_RECORDS_DELIVERY: AtomicUsize = AtomicUsize::new(0);
 20 | /// Total internal buffer entries
 21 | pub static ELASTIC_INTERNAL_BUFFER_LEN: AtomicUsize = AtomicUsize::new(0);
 22 | /// Total records delivered in the last delivery
 23 | pub static ELASTIC_RECORDS_TOTAL_DELIVERED: AtomicUsize = AtomicUsize::new(0);
 24 | /// Total records that failed to be delivered due to error
 25 | pub static ELASTIC_RECORDS_TOTAL_FAILED: AtomicUsize = AtomicUsize::new(0);
 26 | /// Unknown error occurred during attempted flush
 27 | pub static ELASTIC_ERROR_UNKNOWN: AtomicUsize = AtomicUsize::new(0);
 28 | /// Total number of index bulk action errors
 29 | pub static ELASTIC_BULK_ACTION_INDEX_ERR: AtomicUsize = AtomicUsize::new(0);
 30 | /// Total number of create bulk action errors
 31 | pub static ELASTIC_BULK_ACTION_CREATE_ERR: AtomicUsize = AtomicUsize::new(0);
 32 | /// Total number of update bulk action errors
 33 | pub static ELASTIC_BULK_ACTION_UPDATE_ERR: AtomicUsize = AtomicUsize::new(0);
 34 | /// Total number of delete bulk action errors
 35 | pub static ELASTIC_BULK_ACTION_DELETE_ERR: AtomicUsize = AtomicUsize::new(0);
 36 | 
 37 | /// Total number of api errors due to index not found
 38 | pub static ELASTIC_ERROR_API_INDEX_NOT_FOUND: AtomicUsize = AtomicUsize::new(0);
 39 | /// Total number of api errors due to parsing
 40 | pub static ELASTIC_ERROR_API_PARSING: AtomicUsize = AtomicUsize::new(0);
 41 | /// Total number of api errors due to mapper parsing
 42 | pub static ELASTIC_ERROR_API_MAPPER_PARSING: AtomicUsize = AtomicUsize::new(0);
 43 | /// Total number of api errors due to action request validation
 44 | pub static ELASTIC_ERROR_API_ACTION_REQUEST_VALIDATION: AtomicUsize =
 45 |     AtomicUsize::new(0);
 46 | /// Total number of api errors due to missing document
 47 | pub static ELASTIC_ERROR_API_DOCUMENT_MISSING: AtomicUsize = AtomicUsize::new(0);
 48 | /// Total number of api errors due to index already existing
 49 | pub static ELASTIC_ERROR_API_INDEX_ALREADY_EXISTS: AtomicUsize = AtomicUsize::new(0);
 50 | /// Total number of api errors due to unknown reasons
 51 | pub static ELASTIC_ERROR_API_UNKNOWN: AtomicUsize = AtomicUsize::new(0);
 52 | /// Total number of client errors, no specific reasons
 53 | pub static ELASTIC_ERROR_CLIENT: AtomicUsize = AtomicUsize::new(0);
 54 | 
 55 | /// Configuration for the Elasticsearch sink
 56 | ///
 57 | /// Elasticsearch is an open-source document indexing engine. It can be used for
 58 | /// performing searches over corpus, which for cernan's use is
 59 | /// `metric::LogLine`.
 60 | #[derive(Debug, Clone)]
 61 | pub struct ElasticsearchConfig {
 62 |     /// The unique name of the sink in the routing topology
 63 |     pub config_path: Option<String>,
 64 |     /// The Elasticsearch index prefix. This prefix will be added to the
 65 |     /// automatically created date-based index of this sink.
 66 |     pub index_prefix: Option<String>,
 67 |     /// The _type of the Elasticsearch index
 68 |     pub index_type: String,
 69 |     /// Determines whether to use HTTP or HTTPS when publishing to
 70 |     /// Elasticsearch.
 71 |     pub secure: bool,
 72 |     /// Determine how many times to attempt the delivery of a log line before
 73 |     /// dropping it from the buffer. Failures of a global bulk request does not
 74 |     /// count against this limit.
 75 |     pub delivery_attempt_limit: u8,
 76 |     /// The Elasticsearch host. May be an IP address or DNS hostname.
 77 |     pub host: String,
 78 |     /// The Elasticsearch port.
 79 |     pub port: usize,
 80 |     /// The sink's specific flush interval.
 81 |     pub flush_interval: u64,
 82 |     /// The tags to be applied to all `metric::Event`s streaming through this
 83 |     /// sink. These tags will overwrite any tags carried by the `metric::Event`
 84 |     /// itself.
 85 |     pub tags: TagMap,
 86 | }
 87 | 
 88 | impl Default for ElasticsearchConfig {
 89 |     fn default() -> Self {
 90 |         ElasticsearchConfig {
 91 |             config_path: Some("sinks.elasticsearch".to_string()),
 92 |             secure: false,
 93 |             host: "127.0.0.1".to_string(),
 94 |             index_prefix: None,
 95 |             index_type: "payload".to_string(),
 96 |             delivery_attempt_limit: 10,
 97 |             port: 9200,
 98 |             flush_interval: flushes_per_second(),
 99 |             tags: TagMap::default(),
100 |         }
101 |     }
102 | }
103 | 
104 | struct Line {
105 |     attempts: u8,
106 |     uuid: uuid::Uuid,
107 |     line: LogLine,
108 | }
109 | 
110 | /// The elasticsearch sink struct.
111 | ///
112 | /// Refer to the documentation on `ElasticsearchConfig` for more details.
113 | pub struct Elasticsearch {
114 |     buffer: Vec<Line>,
115 |     secure: bool,
116 |     host: String,
117 |     port: usize,
118 |     delivery_attempt_limit: u8,
119 |     index_prefix: Option<String>,
120 |     index_type: String,
121 |     flush_interval: u64,
122 |     tags: TagMap,
123 | }
124 | 
125 | impl Elasticsearch {
126 |     fn bulk_body(&self, buffer: &mut String) {
127 |         assert!(!self.buffer.is_empty());
128 |         use serde_json::{to_string, Value};
129 |         for m in &self.buffer {
130 |             let uuid = m.uuid.hyphenated().to_string();
131 |             let line = &m.line;
132 |             let header: Value = json!({
133 |                 "index": {
134 |                     "_index" : idx(&self.index_prefix, line.time),
135 |                     "_type" : self.index_type.clone(),
136 |                     "_id" : uuid.clone(),
137 |                 }
138 |             });
139 |             buffer.push_str(&to_string(&header).unwrap());
140 |             buffer.push('\n');
141 |             let mut payload: Value = json!({
142 |                 "uuid": uuid,
143 |                 "path": line.path.clone(),
144 |                 "payload": line.value.clone(),
145 |                 "timestamp": format_time(line.time),
146 |             });
147 |             let obj = payload.as_object_mut().unwrap();
148 |             for (k, v) in line.tags(&self.tags) {
149 |                 obj.insert(k.clone(), Value::String(v.clone()));
150 |             }
151 |             for (k, v) in &line.fields {
152 |                 obj.insert(k.clone(), Value::String(v.clone()));
153 |             }
154 |             buffer.push_str(&to_string(&obj).unwrap());
155 |             buffer.push('\n');
156 |         }
157 |     }
158 | }
159 | 
160 | impl Sink<ElasticsearchConfig> for Elasticsearch {
161 |     fn init(config: ElasticsearchConfig) -> Self {
162 |         Elasticsearch {
163 |             buffer: Vec::new(),
164 |             secure: config.secure,
165 |             host: config.host,
166 |             port: config.port,
167 |             index_prefix: config.index_prefix,
168 |             index_type: config.index_type,
169 |             delivery_attempt_limit: config.delivery_attempt_limit,
170 |             flush_interval: config.flush_interval,
171 |             tags: config.tags,
172 |         }
173 |     }
174 | 
175 |     fn flush_interval(&self) -> Option<u64> {
176 |         Some(self.flush_interval)
177 |     }
178 | 
179 |     #[allow(clippy::cyclomatic_complexity)]
180 |     fn flush(&mut self) {
181 |         if self.buffer.is_empty() {
182 |             return;
183 |         }
184 | 
185 |         let proto = if self.secure { "https" } else { "http" };
186 |         let params =
187 |             RequestParams::new(format!("{}://{}:{}", proto, self.host, self.port));
188 |         let client = SyncClientBuilder::from_params(params).build().unwrap();
189 | 
190 |         let mut buffer = String::with_capacity(4048);
191 |         self.bulk_body(&mut buffer);
192 |         if let Ok(snd) = client.request(BulkRequest::new(buffer)).send() {
193 |             let bulk_resp: Result<BulkResponse> = snd.into_response::<BulkResponse>();
194 |             ELASTIC_INTERNAL_BUFFER_LEN.store(self.buffer.len(), Ordering::Relaxed);
195 |             match bulk_resp {
196 |                 Ok(bulk) => {
197 |                     ELASTIC_RECORDS_DELIVERY.fetch_add(1, Ordering::Relaxed);
198 |                     for item in bulk.iter() {
199 |                         match item {
200 |                             Ok(item) => {
201 |                                 let uuid = uuid::Uuid::parse_str(item.id())
202 |                                     .expect("catastrophic error, TID not a UUID");
203 |                                 let mut idx = 0;
204 |                                 for i in 0..self.buffer.len() {
205 |                                     match self.buffer[i].uuid.cmp(&uuid) {
206 |                                         cmp::Ordering::Equal => {
207 |                                             break;
208 |                                         }
209 |                                         _ => idx += 1,
210 |                                     }
211 |                                 }
212 |                                 self.buffer.remove(idx);
213 |                                 ELASTIC_RECORDS_TOTAL_DELIVERED
214 |                                     .fetch_add(1, Ordering::Relaxed);
215 |                             }
216 |                             Err(item) => {
217 |                                 let uuid = uuid::Uuid::parse_str(item.id())
218 |                                     .expect("catastrophic error, TID not a UUID");
219 |                                 let mut idx = 0;
220 |                                 for i in 0..self.buffer.len() {
221 |                                     match self.buffer[i].uuid.cmp(&uuid) {
222 |                                         cmp::Ordering::Equal => {
223 |                                             break;
224 |                                         }
225 |                                         _ => idx += 1,
226 |                                     }
227 |                                 }
228 |                                 self.buffer[idx].attempts += 1;
229 |                                 if self.buffer[idx].attempts
230 |                                     > self.delivery_attempt_limit
231 |                                 {
232 |                                     self.buffer.remove(idx);
233 |                                 }
234 |                                 ELASTIC_RECORDS_TOTAL_FAILED
235 |                                     .fetch_add(1, Ordering::Relaxed);
236 |                                 if let Some(source) = item.source() {
237 |                                     debug!(
238 |                                         "Failed to write item with error {}, source {}",
239 |                                         item.description(),
240 |                                         source
241 |                                     );
242 |                                 } else {
243 |                                     debug!(
244 |                                         "Failed to write item with error {}",
245 |                                         item.description()
246 |                                     );
247 |                                 }
248 |                                 match item.action() {
249 |                                     bulk::Action::Index => {
250 |                                         ELASTIC_BULK_ACTION_INDEX_ERR
251 |                                             .fetch_add(1, Ordering::Relaxed)
252 |                                     }
253 |                                     bulk::Action::Create => {
254 |                                         ELASTIC_BULK_ACTION_CREATE_ERR
255 |                                             .fetch_add(1, Ordering::Relaxed)
256 |                                     }
257 |                                     bulk::Action::Update => {
258 |                                         ELASTIC_BULK_ACTION_UPDATE_ERR
259 |                                             .fetch_add(1, Ordering::Relaxed)
260 |                                     }
261 |                                     bulk::Action::Delete => {
262 |                                         ELASTIC_BULK_ACTION_DELETE_ERR
263 |                                             .fetch_add(1, Ordering::Relaxed)
264 |                                     }
265 |                                 };
266 |                             }
267 |                         }
268 |                     }
269 |                 }
270 |                 Err(err) => {
271 |                     match err {
272 |                         error::Error::Api(ref api_error) => {
273 |                             use elastic::error::ApiError;
274 |                             match *api_error {
275 |                                 ApiError::IndexNotFound { ref index } => {
276 |                                     ELASTIC_ERROR_API_INDEX_NOT_FOUND
277 |                                         .fetch_add(1, Ordering::Relaxed);
278 |                                     debug!("Unable to write, API Error (Index Not Found): {}", index);
279 |                                 }
280 |                                 ApiError::Parsing { ref reason, .. } => {
281 |                                     ELASTIC_ERROR_API_PARSING
282 |                                         .fetch_add(1, Ordering::Relaxed);
283 |                                     debug!(
284 |                                         "Unable to write, API Error (Parsing): {}",
285 |                                         reason
286 |                                     );
287 |                                 }
288 |                                 ApiError::MapperParsing { ref reason, .. } => {
289 |                                     ELASTIC_ERROR_API_MAPPER_PARSING
290 |                                         .fetch_add(1, Ordering::Relaxed);
291 |                                     debug!("Unable to write, API Error (Mapper Parsing): {}", reason);
292 |                                 }
293 |                                 ApiError::ActionRequestValidation {
294 |                                     ref reason,
295 |                                     ..
296 |                                 } => {
297 |                                     ELASTIC_ERROR_API_ACTION_REQUEST_VALIDATION
298 |                                         .fetch_add(1, Ordering::Relaxed);
299 |                                     debug!(
300 |                                     "Unable to write, API Error (Action Request Validation): {}",
301 |                                     reason
302 |                                 );
303 |                                 }
304 |                                 ApiError::DocumentMissing { ref index, .. } => {
305 |                                     ELASTIC_ERROR_API_DOCUMENT_MISSING
306 |                                         .fetch_add(1, Ordering::Relaxed);
307 |                                     debug!("Unable to write, API Error (Document Missing): {}", index);
308 |                                 }
309 |                                 ApiError::IndexAlreadyExists { ref index, .. } => {
310 |                                     ELASTIC_ERROR_API_INDEX_ALREADY_EXISTS
311 |                                         .fetch_add(1, Ordering::Relaxed);
312 |                                     debug!(
313 |                                     "Unable to write, API Error (Index Already Exists): {}",
314 |                                     index
315 |                                 );
316 |                                 }
317 |                                 _ => {
318 |                                     ELASTIC_ERROR_API_UNKNOWN
319 |                                         .fetch_add(1, Ordering::Relaxed);
320 |                                     debug!("Unable to write, API Error (Unknown)");
321 |                                 }
322 |                             }
323 |                         }
324 |                         error::Error::Client(ref client_error) => {
325 |                             ELASTIC_ERROR_CLIENT.fetch_add(1, Ordering::Relaxed);
326 |                             debug!(
327 |                                 "Unable to write, client error: {}",
328 |                                 client_error.description()
329 |                             );
330 |                         }
331 |                     }
332 |                 }
333 |             }
334 |         }
335 |     }
336 | 
337 |     fn shutdown(mut self) {
338 |         self.flush();
339 |     }
340 | 
341 |     fn deliver_line(&mut self, line: LogLine) {
342 |         let uuid = uuid::Uuid::new_v4();
343 |         self.buffer.push(Line {
344 |             uuid: uuid,
345 |             line: line,
346 |             attempts: 0,
347 |         });
348 |     }
349 | 
350 |     fn valve_state(&self) -> Valve {
351 |         if self.buffer.len() > 10_000 {
352 |             Valve::Closed
353 |         } else {
354 |             Valve::Open
355 |         }
356 |     }
357 | }
358 | 
359 | #[inline]
360 | fn format_time(time: i64) -> String {
361 |     let naive_time = NaiveDateTime::from_timestamp(time, 0);
362 |     let utc_time: DateTime<Utc> = DateTime::from_utc(naive_time, Utc);
363 |     format!("{}", utc_time.format("%+"))
364 | }
365 | 
366 | #[inline]
367 | fn idx(prefix: &Option<String>, time: i64) -> String {
368 |     let naive_time = NaiveDateTime::from_timestamp(time, 0);
369 |     let utc_time: DateTime<Utc> = DateTime::from_utc(naive_time, Utc);
370 |     match *prefix {
371 |         Some(ref p) => format!("{}-{}", p, utc_time.format("%Y-%m-%d")),
372 |         None => format!("{}", utc_time.format("%Y-%m-%d")),
373 |     }
374 | }
375 | 


--------------------------------------------------------------------------------
/src/sink/mod.rs:
--------------------------------------------------------------------------------
  1 | //! A 'sink' is a final destination for telemetry and log lines. That is, a
  2 | //! 'sink' is that which is at the end of a `source -> filter -> filter ->
  3 | //! ... -> sink` chain. The sink has no obligations with regard to the telemetry
  4 | //! and log lines it receives, other than to receive them. Individual sinks make
  5 | //! different choices.
  6 | 
  7 | use crate::metric::{Encoding, Event, LogLine, Metadata, Telemetry};
  8 | use crate::thread;
  9 | use crate::time;
 10 | use crate::util::Valve;
 11 | use hopper;
 12 | use std::marker::PhantomData;
 13 | use uuid::Uuid;
 14 | 
 15 | mod console;
 16 | pub mod elasticsearch;
 17 | pub mod influxdb;
 18 | pub mod kafka;
 19 | mod native;
 20 | mod null;
 21 | pub mod prometheus;
 22 | pub mod wavefront;
 23 | 
 24 | pub use self::console::{Console, ConsoleConfig};
 25 | pub use self::elasticsearch::{Elasticsearch, ElasticsearchConfig};
 26 | pub use self::influxdb::{InfluxDB, InfluxDBConfig};
 27 | pub use self::kafka::{Kafka, KafkaConfig};
 28 | pub use self::native::{Native, NativeConfig};
 29 | pub use self::null::{Null, NullConfig};
 30 | pub use self::prometheus::{Prometheus, PrometheusConfig};
 31 | pub use self::wavefront::{Wavefront, WavefrontConfig};
 32 | 
 33 | /// Generic interface used to capture global sink configuration
 34 | /// parameters as well as sink specific parameters.
 35 | ///
 36 | /// Stored configuration is consumed when the sink is spawned,
 37 | /// resulting in a new thread executing the given sink.
 38 | pub struct RunnableSink<S, SConfig>
 39 | where
 40 |     S: Send + Sink<SConfig>,
 41 |     SConfig: 'static + Send + Clone,
 42 | {
 43 |     recv: hopper::Receiver<Event>,
 44 |     sources: Vec<String>,
 45 |     state: S,
 46 | 
 47 |     // Yes, compiler, we know that we aren't storing
 48 |     // anything of type SConfig.
 49 |     config: PhantomData<SConfig>,
 50 | }
 51 | 
 52 | impl<S, SConfig> RunnableSink<S, SConfig>
 53 | where
 54 |     S: 'static + Send + Sink<SConfig>,
 55 |     SConfig: 'static + Clone + Send,
 56 | {
 57 |     /// Generic constructor for RunnableSink - execution wrapper around objects
 58 |     /// implementing Sink.
 59 |     pub fn new(
 60 |         recv: hopper::Receiver<Event>,
 61 |         sources: Vec<String>,
 62 |         config: SConfig,
 63 |     ) -> RunnableSink<S, SConfig> {
 64 |         RunnableSink {
 65 |             recv: recv,
 66 |             sources: sources,
 67 |             state: S::init(config),
 68 |             config: PhantomData,
 69 |         }
 70 |     }
 71 | 
 72 |     /// Spawns / consumes the given stateful sink, returning the corresponding
 73 |     /// thread.
 74 |     pub fn run(self) -> thread::ThreadHandle {
 75 |         thread::spawn(move |_poll| {
 76 |             self.consume();
 77 |         })
 78 |     }
 79 | 
 80 |     fn consume(mut self) {
 81 |         let mut attempts = 0;
 82 |         let mut recv = self.recv.into_iter();
 83 |         let mut last_flush_idx = 0;
 84 |         let mut total_shutdowns = 0;
 85 |         // The run-loop of a sink is two nested loops. The outer loop pulls a
 86 |         // value from the hopper queue. If that value is Some the inner loop
 87 |         // tries to do something with it, only discarding it at such time as
 88 |         // it's been delivered to the Sink.
 89 |         loop {
 90 |             let nxt = recv.next();
 91 |             if nxt.is_none() {
 92 |                 time::delay(attempts);
 93 |                 attempts += 1;
 94 |                 continue;
 95 |             }
 96 |             attempts = 0;
 97 |             let event = nxt.unwrap();
 98 |             loop {
 99 |                 // We have to be careful here not to dump a value until it's
100 |                 // already been delivered _and_ be sure we at least attempt to
101 |                 // make progress on delivery. There are two conditions we have
102 |                 // to look out for most carefully:
103 |                 //
104 |                 //  1. Is the valve_state closed?
105 |                 //  2. Does the flush_interval match our flush index?
106 |                 //
107 |                 // If the valve state is closed we attempt to flush the sink to
108 |                 // clear the valve, hold on to the value and loop around again
109 |                 // after a delay. If the flush_interval is Some and DOES match
110 |                 // then we flush. If the flush_interval is Some and DOES NOT
111 |                 // match then we do not flush. If the flush_interval is NONE
112 |                 // then we never flush.
113 |                 match self.state.valve_state() {
114 |                     Valve::Open => match event {
115 |                         Event::TimerFlush(idx) => {
116 |                             // Flush timers are interesting. The timer thread
117 |                             // sends a TimerFlush pulse once a second and it's
118 |                             // possible that a sink will have multiple Sources /
119 |                             // Filters pushing down into it. That means multiple
120 |                             // TimerFlush values for the same time index.
121 |                             //
122 |                             // What we do to avoid duplicating time pulses is
123 |                             // keep track of a 'last_flush_idx', our current
124 |                             // time and only reset to a new time when the idx in
125 |                             // the pulse is greater than the last one we've
126 |                             // seen. If it's not, we ignore it.
127 |                             if idx > last_flush_idx {
128 |                                 // Now, because sinks will not want to flush
129 |                                 // every timer pulse we query the flush_interval
130 |                                 // of the sink. If the interval and the idx
131 |                                 // match up, we flush. Else, not.
132 |                                 if let Some(flush_interval) =
133 |                                     self.state.flush_interval()
134 |                                 {
135 |                                     if flush_interval == 0 || idx % flush_interval == 0
136 |                                     {
137 |                                         self.state.flush();
138 |                                     }
139 |                                 }
140 |                                 last_flush_idx = idx;
141 |                             }
142 |                             break;
143 |                         }
144 |                         Event::Telemetry(metric) => {
145 |                             self.state.deliver(metric);
146 |                             break;
147 |                         }
148 |                         Event::Log(line) => {
149 |                             self.state.deliver_line(line);
150 |                             break;
151 |                         }
152 |                         Event::Raw {
153 |                             order_by,
154 |                             encoding,
155 |                             bytes,
156 |                             metadata,
157 |                             connection_id,
158 |                         } => {
159 |                             self.state.deliver_raw(
160 |                                 order_by,
161 |                                 encoding,
162 |                                 bytes,
163 |                                 metadata,
164 |                                 connection_id,
165 |                             );
166 |                             break;
167 |                         }
168 |                         Event::Shutdown => {
169 |                             // Invariant - In order to ensure at least once delivery
170 |                             // at the sink level, the following properties must hold:
171 |                             //
172 |                             //    1) An upstream source injects a Shutdown event after
173 |                             //    all of its events have been processed.
174 |                             //
175 |                             // 2) Sources shutdown only after receiving Shutdown
176 |                             // from each of its
177 |                             // upstream sources/filters.
178 |                             total_shutdowns += 1;
179 |                             if total_shutdowns >= self.sources.len() {
180 |                                 trace!(
181 |                                     "Received shutdown from every configured source: {:?}",
182 |                                     self.sources
183 |                                 );
184 |                                 self.state.shutdown();
185 |                                 return;
186 |                             }
187 |                         }
188 |                     },
189 |                     Valve::Closed => {
190 |                         self.state.flush();
191 |                         continue;
192 |                     }
193 |                 }
194 |             }
195 |         }
196 |     }
197 | }
198 | 
199 | /// A 'sink' is a sink for metrics.
200 | pub trait Sink<SConfig>
201 | where
202 |     Self: 'static + Send + Sized,
203 |     SConfig: 'static + Send + Clone,
204 | {
205 |     /// Generic constructor for sinks implementing this trait.
206 |     fn new(
207 |         recv: hopper::Receiver<Event>,
208 |         sources: Vec<String>,
209 |         config: SConfig,
210 |     ) -> RunnableSink<Self, SConfig> {
211 |         RunnableSink::<Self, SConfig>::new(recv, sources, config)
212 |     }
213 | 
214 |     /// Constructs a new sink.
215 |     fn init(config: SConfig) -> Self;
216 | 
217 |     /// Lookup the `Sink`'s specific flush interval. This determines how often a
218 |     /// sink will obey the periodic flush pulse.
219 |     ///
220 |     /// If the value is `None` this is a signal that the sink will NEVER flush
221 |     /// EXCEPT in the case where the sink's valve_state is Closed.
222 |     fn flush_interval(&self) -> Option<u64>;
223 |     /// Perform the `Sink` specific flush. The rate at which this occurs is
224 |     /// determined by the global `flush_interval` or the sink specific flush
225 |     /// interval. Pulses occur at a rate of once per second, subject to
226 |     /// communication delays in the routing topology.
227 |     fn flush(&mut self) -> ();
228 |     /// Lookup the `Sink` valve state. See `Valve` documentation for more
229 |     /// information.
230 |     fn valve_state(&self) -> Valve {
231 |         // never close up shop
232 |         Valve::Open
233 |     }
234 |     /// Deliver a `Telemetry` to the `Sink`. Exact behaviour varies by
235 |     /// implementation.
236 |     fn deliver(&mut self, _telem: Telemetry) {
237 |         // nothing, intentionally
238 |     }
239 |     /// Deliver a `LogLine` to the `Sink`. Exact behaviour varies by
240 |     /// implementation.
241 |     fn deliver_line(&mut self, _line: LogLine) {
242 |         // nothing, intentionally
243 |     }
244 |     /// Deliver a 'Raw' series of encoded bytes to the sink.
245 |     fn deliver_raw(
246 |         &mut self,
247 |         _order_by: u64,
248 |         _encoding: Encoding,
249 |         _bytes: Vec<u8>,
250 |         _metadata: Option<Metadata>,
251 |         _connection_id: Option<Uuid>,
252 |     ) {
253 |         // Not all sinks accept raw events.  By default, we do nothing.
254 |     }
255 |     /// Provide a hook to shutdown a sink. This is necessary for sinks which
256 |     /// have their own long-running threads.
257 |     fn shutdown(self) -> ();
258 | }
259 | 


--------------------------------------------------------------------------------
/src/sink/native.rs:
--------------------------------------------------------------------------------
  1 | //! Sink for Cernan's native protocol.
  2 | 
  3 | use crate::metric;
  4 | use crate::protocols::native::{AggregationMethod, LogLine, Payload, Telemetry};
  5 | use crate::sink::Sink;
  6 | use crate::source::flushes_per_second;
  7 | use crate::time;
  8 | use byteorder::{BigEndian, ByteOrder};
  9 | use protobuf::repeated::RepeatedField;
 10 | use protobuf::stream::CodedOutputStream;
 11 | use protobuf::Message;
 12 | use std::collections::HashMap;
 13 | use std::io::BufWriter;
 14 | use std::mem::replace;
 15 | use std::net::{TcpStream, ToSocketAddrs};
 16 | 
 17 | /// The native sink
 18 | ///
 19 | /// This sink is the pair to the native source. The native source/sink use or
 20 | /// consume cernan's native protocol, defined
 21 | /// `resources/protobufs/native.proto`. Clients may use the native protocol
 22 | /// without having to obey the translation required in other sources or
 23 | /// operators may set up cernan to cernan communication.
 24 | pub struct Native {
 25 |     port: u16,
 26 |     host: String,
 27 |     buffer: Vec<metric::Event>,
 28 |     flush_interval: u64,
 29 |     delivery_attempts: u32,
 30 |     stream: Option<TcpStream>,
 31 |     tags: metric::TagMap,
 32 | }
 33 | 
 34 | /// Configuration for the native sink
 35 | #[derive(Clone, Debug, Deserialize)]
 36 | pub struct NativeConfig {
 37 |     /// The port to communicate with the native host
 38 |     pub port: u16,
 39 |     /// The native cernan host to communicate with. May be an IP address or DNS
 40 |     /// hostname.
 41 |     pub host: String,
 42 |     /// The sink's unique name in the routing topology.
 43 |     pub config_path: Option<String>,
 44 |     /// The sink's specific flush interval.
 45 |     pub flush_interval: u64,
 46 |     /// The tags to be applied to all `metric::Event`s streaming through this
 47 |     /// sink. These tags will overwrite any tags carried by the `metric::Event`
 48 |     /// itself.
 49 |     pub tags: metric::TagMap,
 50 | }
 51 | 
 52 | impl Default for NativeConfig {
 53 |     fn default() -> Self {
 54 |         NativeConfig {
 55 |             port: 1972,
 56 |             host: "localhost".to_string(),
 57 |             config_path: None,
 58 |             flush_interval: 60 * flushes_per_second(),
 59 |             tags: metric::TagMap::default(),
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | fn connect(host: &str, port: u16) -> Option<TcpStream> {
 65 |     let addrs = (host, port).to_socket_addrs();
 66 |     match addrs {
 67 |         Ok(srv) => {
 68 |             let ips: Vec<_> = srv.collect();
 69 |             for ip in ips {
 70 |                 match TcpStream::connect(ip) {
 71 |                     Ok(stream) => return Some(stream),
 72 |                     Err(e) => info!(
 73 |                         "Unable to connect to proxy at {} using addr {} with error \
 74 |                          {}",
 75 |                         host, ip, e
 76 |                     ),
 77 |                 }
 78 |             }
 79 |             None
 80 |         }
 81 |         Err(e) => {
 82 |             info!(
 83 |                 "Unable to perform DNS lookup on host {} with error {}",
 84 |                 host, e
 85 |             );
 86 |             None
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | impl Sink<NativeConfig> for Native {
 92 |     fn init(config: NativeConfig) -> Self {
 93 |         let stream = connect(&config.host, config.port);
 94 |         Native {
 95 |             port: config.port,
 96 |             host: config.host,
 97 |             buffer: Vec::new(),
 98 |             flush_interval: config.flush_interval,
 99 |             delivery_attempts: 0,
100 |             stream,
101 |             tags: config.tags,
102 |         }
103 |     }
104 | 
105 |     fn deliver(&mut self, telemetry: metric::Telemetry) {
106 |         self.buffer.push(metric::Event::Telemetry(telemetry));
107 |     }
108 | 
109 |     fn deliver_line(&mut self, line: metric::LogLine) {
110 |         self.buffer.push(metric::Event::Log(line));
111 |     }
112 | 
113 |     fn flush_interval(&self) -> Option<u64> {
114 |         Some(self.flush_interval)
115 |     }
116 | 
117 |     fn flush(&mut self) {
118 |         let mut points = Vec::with_capacity(1024);
119 |         let mut lines = Vec::with_capacity(1024);
120 | 
121 |         for ev in self.buffer.drain(..) {
122 |             match ev {
123 |                 metric::Event::Telemetry(mut m) => {
124 |                     let mut telem = Telemetry::new();
125 |                     telem.set_name(replace(&mut m.name, Default::default()));
126 |                     let method = match m.kind() {
127 |                         metric::AggregationMethod::Histogram => AggregationMethod::BIN,
128 |                         metric::AggregationMethod::Sum => AggregationMethod::SUM,
129 |                         metric::AggregationMethod::Set => AggregationMethod::SET,
130 |                         metric::AggregationMethod::Summarize => {
131 |                             AggregationMethod::SUMMARIZE
132 |                         }
133 |                     };
134 |                     let persist = m.persist;
135 |                     telem.set_persisted(persist);
136 |                     telem.set_method(method);
137 |                     let mut meta = HashMap::new();
138 |                     // TODO
139 |                     //
140 |                     // Learn how to consume bits of the metric without having to
141 |                     // clone like crazy
142 |                     for (k, v) in m.tags(&self.tags) {
143 |                         meta.insert(k.to_string(), v.to_string());
144 |                     }
145 |                     telem.set_metadata(meta);
146 |                     telem.set_timestamp_ms(m.timestamp * 1000); // FIXME #166
147 |                     telem.set_samples(m.samples());
148 |                     // TODO set bin_bounds. What we do is set the counts for the
149 |                     // bins as set_samples above, then bin_bounds comes from
150 |                     // elsewhere
151 |                     points.push(telem);
152 |                 }
153 |                 metric::Event::Log(l) => {
154 |                     let mut ll = LogLine::new();
155 |                     let mut meta = HashMap::new();
156 |                     // TODO
157 |                     //
158 |                     // Learn how to consume bits of the metric without having to
159 |                     // clone like crazy
160 |                     for (k, v) in l.tags(&self.tags) {
161 |                         meta.insert(k.clone(), v.clone());
162 |                     }
163 |                     ll.set_path(l.path);
164 |                     ll.set_value(l.value);
165 |                     ll.set_metadata(meta);
166 |                     ll.set_timestamp_ms(l.time * 1000); // FIXME #166
167 | 
168 |                     lines.push(ll);
169 |                 }
170 |                 _ => {}
171 |             }
172 |         }
173 | 
174 |         let mut pyld = Payload::new();
175 |         pyld.set_points(RepeatedField::from_vec(points));
176 |         pyld.set_lines(RepeatedField::from_vec(lines));
177 | 
178 |         loop {
179 |             let mut delivery_failure = false;
180 |             if let Some(ref mut stream) = self.stream {
181 |                 let mut bufwrite = BufWriter::new(stream);
182 |                 let mut stream = CodedOutputStream::new(&mut bufwrite);
183 |                 let mut sz_buf = [0; 4];
184 |                 let pyld_len = pyld.compute_size();
185 |                 BigEndian::write_u32(&mut sz_buf, pyld_len);
186 |                 stream.write_raw_bytes(&sz_buf).unwrap();
187 |                 let res = pyld.write_to_with_cached_sizes(&mut stream);
188 |                 if res.is_ok() {
189 |                     self.buffer.clear();
190 |                     return;
191 |                 } else {
192 |                     self.delivery_attempts = self.delivery_attempts.saturating_add(1);
193 |                     delivery_failure = true;
194 |                 }
195 |             } else {
196 |                 time::delay(self.delivery_attempts);
197 |                 self.stream = connect(&self.host, self.port);
198 |             }
199 |             if delivery_failure {
200 |                 self.stream = None
201 |             }
202 |         }
203 |     }
204 | 
205 |     fn shutdown(mut self) {
206 |         self.flush();
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/sink/null.rs:
--------------------------------------------------------------------------------
 1 | //! Sink equivalent of /dev/null.
 2 | use crate::sink::{Sink, Valve};
 3 | 
 4 | /// Null sink
 5 | ///
 6 | /// This sink is intended for testing and demonstration. Every `metric::Event`
 7 | /// it receives will be deallocated.
 8 | pub struct Null {}
 9 | 
10 | /// Configuration for the `Null` sink
11 | #[derive(Clone, Debug, Deserialize)]
12 | pub struct NullConfig {
13 |     /// The sink's unique name in the routing topology.
14 |     pub config_path: String,
15 | }
16 | 
17 | impl NullConfig {
18 |     /// Create a new `NullConfig`
19 |     pub fn new(config_path: String) -> NullConfig {
20 |         NullConfig { config_path }
21 |     }
22 | }
23 | 
24 | impl Sink<NullConfig> for Null {
25 |     fn init(_config: NullConfig) -> Self {
26 |         Null {}
27 |     }
28 | 
29 |     fn valve_state(&self) -> Valve {
30 |         Valve::Open
31 |     }
32 | 
33 |     fn flush_interval(&self) -> Option<u64> {
34 |         Some(1)
35 |     }
36 | 
37 |     fn flush(&mut self) {
38 |         // do nothing
39 |     }
40 | 
41 |     fn shutdown(mut self) {
42 |         self.flush();
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/source/file/file_server.rs:
--------------------------------------------------------------------------------
  1 | use crate::metric;
  2 | use crate::source;
  3 | use crate::source::file::file_watcher::FileWatcher;
  4 | use crate::source::internal::report_full_telemetry;
  5 | use crate::util;
  6 | use crate::util::send;
  7 | use glob::glob;
  8 | use mio;
  9 | use std::mem;
 10 | use std::path::PathBuf;
 11 | use std::str;
 12 | use std::time;
 13 | 
 14 | /// `FileServer` is a Source which cooperatively schedules reads over files,
 15 | /// converting the lines of said files into `LogLine` structures. As
 16 | /// `FileServer` is intended to be useful across multiple operating systems with
 17 | /// POSIX filesystem semantics `FileServer` must poll for changes. That is, no
 18 | /// event notification is used by `FileServer`.
 19 | ///
 20 | /// `FileServer` is configured on a path to watch. The files do _not_ need to
 21 | /// exist at cernan startup. `FileServer` will discover new files which match
 22 | /// its path in at most 60 seconds.
 23 | pub struct FileServer {
 24 |     pattern: PathBuf,
 25 |     max_read_bytes: usize,
 26 | }
 27 | 
 28 | /// The configuration struct for `FileServer`.
 29 | #[derive(Clone, Debug, Deserialize)]
 30 | pub struct FileServerConfig {
 31 |     /// The path that `FileServer` will watch. Globs are allowed and
 32 |     /// `FileServer` will watch multiple files.
 33 |     pub path: Option<PathBuf>,
 34 |     /// The maximum number of bytes to read from a file before switching to a
 35 |     /// new file.
 36 |     pub max_read_bytes: usize,
 37 |     /// The forwards which `FileServer` will obey.
 38 |     pub forwards: Vec<String>,
 39 |     /// The configured name of FileServer.
 40 |     pub config_path: Option<String>,
 41 | }
 42 | 
 43 | impl Default for FileServerConfig {
 44 |     fn default() -> Self {
 45 |         FileServerConfig {
 46 |             path: None,
 47 |             max_read_bytes: 2048,
 48 |             forwards: Vec::default(),
 49 |             config_path: None,
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | /// `FileServer` as Source
 55 | ///
 56 | /// The 'run' of `FileServer` performs the cooperative scheduling of reads over
 57 | /// `FileServer`'s configured files. Much care has been taking to make this
 58 | /// scheduling 'fair', meaning busy files do not drown out quiet files or vice
 59 | /// versa but there's no one perfect approach. Very fast files _will_ be lost if
 60 | /// your system aggressively rolls log files. `FileServer` will keep a file
 61 | /// handler open but should your system move so quickly that a file disappears
 62 | /// before cernan is able to open it the contents will be lost. This should be a
 63 | /// rare occurence.
 64 | ///
 65 | /// Specific operating systems support evented interfaces that correct this
 66 | /// problem but your intrepid authors know of no generic solution.
 67 | impl source::Source<FileServerConfig> for FileServer {
 68 |     /// Make a FileServer
 69 |     fn init(config: FileServerConfig) -> Self {
 70 |         let pattern = config.path.expect("must specify a 'path' for FileServer");
 71 |         FileServer {
 72 |             pattern: pattern,
 73 |             max_read_bytes: config.max_read_bytes,
 74 |         }
 75 |     }
 76 | 
 77 |     fn run(self, mut chans: util::Channel, poller: mio::Poll) {
 78 |         let mut buffer = String::new();
 79 | 
 80 |         let mut fp_map: util::HashMap<PathBuf, FileWatcher> = Default::default();
 81 |         let mut fp_map_alt: util::HashMap<PathBuf, FileWatcher> = Default::default();
 82 | 
 83 |         let mut backoff_cap: usize = 1;
 84 |         let mut lines = Vec::new();
 85 |         // Alright friends, how does this work?
 86 |         //
 87 |         // We want to avoid burning up users' CPUs. To do this we sleep after
 88 |         // reading lines out of files. But! We want to be responsive as well. We
 89 |         // keep track of a 'backoff_cap' to decide how long we'll wait in any
 90 |         // given loop. This cap grows each time we fail to read lines in an
 91 |         // exponential fashion to some hard-coded cap.
 92 |         loop {
 93 |             let mut global_bytes_read: usize = 0;
 94 |             // glob poll
 95 |             for entry in glob(self.pattern.to_str().expect("no ability to glob"))
 96 |                 .expect("Failed to read glob pattern")
 97 |             {
 98 |                 if let Ok(path) = entry {
 99 |                     let entry = fp_map.entry(path.clone());
100 |                     if let Ok(fw) = FileWatcher::new(&path) {
101 |                         entry.or_insert(fw);
102 |                     };
103 |                 }
104 |             }
105 |             // line polling
106 |             for (path, mut watcher) in fp_map.drain() {
107 |                 let mut bytes_read: usize = 0;
108 |                 while let Ok(sz) = watcher.read_line(&mut buffer) {
109 |                     if sz > 0 {
110 |                         bytes_read += sz;
111 |                         lines.push(metric::LogLine::new(
112 |                             path.to_str().expect("not a valid path"),
113 |                             &buffer,
114 |                         ));
115 |                         buffer.clear();
116 |                     } else {
117 |                         break;
118 |                     }
119 |                     if bytes_read > self.max_read_bytes {
120 |                         break;
121 |                     }
122 |                 }
123 |                 report_full_telemetry(
124 |                     "cernan.sources.file.bytes_read",
125 |                     bytes_read as f64,
126 |                     Some(vec![(
127 |                         "file_path",
128 |                         path.to_str().expect("not a valid path"),
129 |                     )]),
130 |                 );
131 |                 // A FileWatcher is dead when the underlying file has
132 |                 // disappeared. If the FileWatcher is dead we don't stick it in
133 |                 // the fp_map_alt and deallocate it.
134 |                 if !watcher.dead() {
135 |                     fp_map_alt.insert(path, watcher);
136 |                 }
137 |                 global_bytes_read = global_bytes_read.saturating_add(bytes_read);
138 |             }
139 |             for l in lines.drain(..) {
140 |                 send(&mut chans, metric::Event::new_log(l));
141 |             }
142 |             // We've drained the live FileWatchers into fp_map_alt in the line
143 |             // polling loop. Now we swapped them back to fp_map so next time we
144 |             // loop through we'll read from the live FileWatchers.
145 |             mem::swap(&mut fp_map, &mut fp_map_alt);
146 |             // When no lines have been read we kick the backup_cap up by twice,
147 |             // limited by the hard-coded cap. Else, we set the backup_cap to its
148 |             // minimum on the assumption that next time through there will be
149 |             // more lines to read promptly.
150 |             if global_bytes_read == 0 {
151 |                 let lim = backoff_cap.saturating_mul(2);
152 |                 if lim > 2_048 {
153 |                     backoff_cap = 2_048;
154 |                 } else {
155 |                     backoff_cap = lim;
156 |                 }
157 |             } else {
158 |                 backoff_cap = 1;
159 |             }
160 |             let backoff = backoff_cap.saturating_sub(global_bytes_read);
161 |             let mut events = mio::Events::with_capacity(1024);
162 |             match poller.poll(
163 |                 &mut events,
164 |                 Some(time::Duration::from_millis(backoff as u64)),
165 |             ) {
166 |                 Err(e) => panic!(format!("Failed during poll {:?}", e)),
167 |                 Ok(0) => {}
168 |                 Ok(_num_events) => {
169 |                     // File server doesn't poll for anything other than SYSTEM events.
170 |                     // As currently there are no system events other than SHUTDOWN,
171 |                     // we immediately exit.
172 |                     send(&mut chans, metric::Event::Shutdown);
173 |                     return;
174 |                 }
175 |             }
176 |         }
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/src/source/file/file_watcher.rs:
--------------------------------------------------------------------------------
  1 | use crate::source::internal::report_full_telemetry;
  2 | use std::fs;
  3 | use std::io;
  4 | use std::io::BufRead;
  5 | use std::io::Seek;
  6 | use std::os::unix::fs::MetadataExt;
  7 | use std::path::PathBuf;
  8 | 
  9 | /// The `FileWatcher` struct defines the polling based state machine which reads
 10 | /// from a file path, transparently updating the underlying file descriptor when
 11 | /// the file has been rolled over, as is common for logs.
 12 | ///
 13 | /// The `FileWatcher` is expected to live for the lifetime of the file
 14 | /// path. `FileServer` is responsible for clearing away `FileWatchers` which no
 15 | /// longer exist.
 16 | pub struct FileWatcher {
 17 |     pub path: PathBuf,
 18 |     reader: Option<io::BufReader<fs::File>>,
 19 |     file_id: Option<(u64, u64)>,
 20 |     previous_size: u64,
 21 |     reopen: bool,
 22 | }
 23 | 
 24 | type Devno = u64;
 25 | type Ino = u64;
 26 | type FileId = (Devno, Ino);
 27 | 
 28 | #[inline]
 29 | fn file_id(path: &PathBuf) -> Option<FileId> {
 30 |     if let Ok(metadata) = fs::metadata(path) {
 31 |         let dev = metadata.dev();
 32 |         let ino = metadata.ino();
 33 |         Some((dev, ino))
 34 |     } else {
 35 |         None
 36 |     }
 37 | }
 38 | 
 39 | impl FileWatcher {
 40 |     /// Create a new `FileWatcher`
 41 |     ///
 42 |     /// The input path will be used by `FileWatcher` to prime its state
 43 |     /// machine. A `FileWatcher` tracks _only one_ file. This function returns
 44 |     /// None if the path does not exist or is not readable by cernan.
 45 |     pub fn new(path: &PathBuf) -> io::Result<FileWatcher> {
 46 |         match fs::File::open(&path) {
 47 |             Ok(f) => {
 48 |                 let metadata = f.metadata()?;
 49 |                 let dev = metadata.dev();
 50 |                 let ino = metadata.ino();
 51 |                 let mut rdr = io::BufReader::new(f);
 52 |                 assert!(rdr.seek(io::SeekFrom::End(0)).is_ok());
 53 |                 Ok(FileWatcher {
 54 |                     path: path.clone(),
 55 |                     reader: Some(rdr),
 56 |                     file_id: Some((dev, ino)),
 57 |                     previous_size: 0,
 58 |                     reopen: false,
 59 |                 })
 60 |             }
 61 |             Err(e) => match e.kind() {
 62 |                 io::ErrorKind::NotFound => {
 63 |                     let fw = {
 64 |                         FileWatcher {
 65 |                             path: path.clone(),
 66 |                             reader: None,
 67 |                             file_id: None,
 68 |                             previous_size: 0,
 69 |                             reopen: false,
 70 |                         }
 71 |                     };
 72 |                     Ok(fw)
 73 |                 }
 74 |                 _ => Err(e),
 75 |             },
 76 |         }
 77 |     }
 78 | 
 79 |     fn open_at_start(&mut self) {
 80 |         if let Ok(f) = fs::File::open(&self.path) {
 81 |             let metadata = f.metadata().unwrap(); // we _must_ be able to read the metadata
 82 |             let dev = metadata.dev();
 83 |             let ino = metadata.ino();
 84 |             self.file_id = Some((dev, ino));
 85 |             self.previous_size = metadata.size();
 86 |             self.reader = Some(io::BufReader::new(f));
 87 |             if self.file_id.is_none() {
 88 |                 // It's possible that between opening the file and reading its
 89 |                 // ID the file will have been deleted. This is that branch.
 90 |                 self.file_id = None;
 91 |                 self.reader = None;
 92 |             } else {
 93 |                 report_full_telemetry(
 94 |                     "cernan.sources.file.switch",
 95 |                     1.0,
 96 |                     Some(vec![(
 97 |                         "file_path",
 98 |                         self.path.to_str().expect("could not make path"),
 99 |                     )]),
100 |                 );
101 |             }
102 |         } else {
103 |             self.reader = None;
104 |             self.file_id = None;
105 |         }
106 |         self.reopen = false;
107 |     }
108 | 
109 |     pub fn dead(&self) -> bool {
110 |         self.reader.is_none() && self.file_id.is_none()
111 |     }
112 | 
113 |     /// Read a single line from the underlying file
114 |     ///
115 |     /// This function will attempt to read a new line from its file, blocking,
116 |     /// up to some maximum but unspecified amount of time. `read_line` will open
117 |     /// a new file handler at need, transparently to the caller.
118 |     pub fn read_line(&mut self, mut buffer: &mut String) -> io::Result<usize> {
119 |         if self.reopen {
120 |             self.open_at_start();
121 |         }
122 |         if let Some(ref mut reader) = self.reader {
123 |             // Every read we detect the current_size of the file and compare
124 |             // against the previous_size. There are three cases to consider:
125 |             //
126 |             //  * current_size > previous_size
127 |             //  * current_size == previous_size
128 |             //  * current_size < previous_size
129 |             //
130 |             // In the last case we must consider that the file has been
131 |             // truncated and we can no longer trust our seek position
132 |             // in-file. We MUST seek back to position 0. This is the _simplest_
133 |             // case to handle.
134 |             //
135 |             // Consider the equality case. It's possible that NO WRITES have
136 |             // come into the file _or_ that the file has been truncated and
137 |             // coincidentally the new writes exactly match the byte size of the
138 |             // previous writes. THESE WRITES WILL BE LOST.
139 |             //
140 |             // Now the greater than inequality. All of the equality
141 |             // considerations hold for this case. Also, consider if a write
142 |             // straddles the line between previous_size and current_size. Then
143 |             // we will be UNABLE to determine the proper start index of this
144 |             // write and we WILL return a partial write of length
145 |             // absolute_write_idx - previous_size.
146 |             let current_size = reader.get_ref().metadata().unwrap().size();
147 |             if self.previous_size > current_size {
148 |                 assert!(reader.seek(io::SeekFrom::Start(0)).is_ok());
149 |                 report_full_telemetry(
150 |                     "cernan.sources.file.truncation",
151 |                     (self.previous_size - current_size) as f64,
152 |                     Some(vec![(
153 |                         "file_path",
154 |                         self.path.to_str().expect("could not make path"),
155 |                     )]),
156 |                 );
157 |             }
158 |             self.previous_size = current_size;
159 |             // match here on error, if metadata doesn't match up open_at_start
160 |             // new reader and let it catch on the next looparound
161 |             match reader.read_line(&mut buffer) {
162 |                 Ok(0) => {
163 |                     if file_id(&self.path) != self.file_id {
164 |                         self.reopen = true;
165 |                     }
166 |                     Ok(0)
167 |                 }
168 |                 Ok(sz) => {
169 |                     assert_eq!(sz, buffer.len());
170 |                     buffer.pop();
171 |                     Ok(buffer.len())
172 |                 }
173 |                 Err(e) => {
174 |                     if let io::ErrorKind::NotFound = e.kind() {
175 |                         self.reopen = true;
176 |                     }
177 |                     Err(e)
178 |                 }
179 |             }
180 |         } else {
181 |             self.open_at_start();
182 |             Ok(0)
183 |         }
184 |     }
185 | }
186 | 


--------------------------------------------------------------------------------
/src/source/flush.rs:
--------------------------------------------------------------------------------
 1 | use crate::metric;
 2 | use crate::source;
 3 | use crate::util;
 4 | use crate::util::send;
 5 | use mio;
 6 | use std::thread::sleep;
 7 | use std::time::Duration;
 8 | 
 9 | /// The source of all flush pulses.
10 | pub struct FlushTimer;
11 | 
12 | /// Nil config for `FlushTimer`.
13 | #[derive(Clone, Debug, Deserialize)]
14 | pub struct FlushTimerConfig;
15 | 
16 | /// Returns the number of discrete flushes per second.
17 | pub fn flushes_per_second() -> u64 {
18 |     // With 100 flushes per second, we have a maximum precision of 10ms.
19 |     // Anything more than this is probably asking for the hopper queues to be
20 |     // filled more by flushes than metrics.
21 |     100
22 | }
23 | 
24 | impl source::Source<FlushTimerConfig> for FlushTimer {
25 |     /// Create a new FlushTimer. This will not produce a new thread, that must
26 |     /// be managed by the end-user.
27 |     fn init(_config: FlushTimerConfig) -> Self {
28 |         FlushTimer {}
29 |     }
30 | 
31 |     fn run(self, mut chans: util::Channel, _poller: mio::Poll) {
32 |         let flush_duration = Duration::from_millis(1000 / flushes_per_second());
33 |         // idx will _always_ increase. If it's kept at u64 or greater it will
34 |         // overflow long past the collapse of our industrial civilization even
35 |         // if the flush interval is set to a millisecond.
36 |         //
37 |         // Point being, there's a theoretical overflow problem here but it's not
38 |         // going to be hit in practice.
39 |         let mut idx: u64 = 0;
40 |         loop {
41 |             // We start with TimerFlush(1) as receivers start with
42 |             // TimerFlush(0). This will update their last_flush_idx seen at
43 |             // system boot.
44 |             idx += 1;
45 |             sleep(flush_duration);
46 |             send(&mut chans, metric::Event::TimerFlush(idx));
47 |         }
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/source/graphite.rs:
--------------------------------------------------------------------------------
  1 | use crate::constants;
  2 | use crate::metric;
  3 | use crate::protocols::graphite::parse_graphite;
  4 | use crate::source::{TCPConfig, TCPStreamHandler, TCP};
  5 | use crate::util;
  6 | use crate::util::send;
  7 | use mio;
  8 | use std::io::prelude::*;
  9 | use std::io::BufReader;
 10 | use std::str;
 11 | use std::sync;
 12 | use std::sync::atomic::{AtomicUsize, Ordering};
 13 | 
 14 | pub static GRAPHITE_NEW_PEER: AtomicUsize = AtomicUsize::new(0);
 15 | pub static GRAPHITE_GOOD_PACKET: AtomicUsize = AtomicUsize::new(0);
 16 | pub static GRAPHITE_TELEM: AtomicUsize = AtomicUsize::new(0);
 17 | pub static GRAPHITE_BAD_PACKET: AtomicUsize = AtomicUsize::new(0);
 18 | 
 19 | /// Configured for the `metric::Telemetry` source.
 20 | #[derive(Debug, Deserialize, Clone)]
 21 | pub struct GraphiteConfig {
 22 |     /// The host that the source will listen on. May be an IP address or a DNS
 23 |     /// hostname.
 24 |     pub host: String,
 25 |     /// The port that the source will listen on.
 26 |     pub port: u16,
 27 |     /// The forwards that the source will send all its Telemetry.
 28 |     pub forwards: Vec<String>,
 29 |     /// The unique name of the source in the routing topology.
 30 |     pub config_path: Option<String>,
 31 | }
 32 | 
 33 | impl Default for GraphiteConfig {
 34 |     fn default() -> GraphiteConfig {
 35 |         GraphiteConfig {
 36 |             host: "localhost".to_string(),
 37 |             port: 2003,
 38 |             forwards: Vec::new(),
 39 |             config_path: Some("sources.graphite".to_string()),
 40 |         }
 41 |     }
 42 | }
 43 | 
 44 | impl From<GraphiteConfig> for TCPConfig {
 45 |     fn from(item: GraphiteConfig) -> Self {
 46 |         TCPConfig {
 47 |             host: item.host,
 48 |             port: item.port,
 49 |             forwards: item.forwards,
 50 |             config_path: item.config_path,
 51 |         }
 52 |     }
 53 | }
 54 | 
 55 | #[derive(Default, Debug, Clone, Deserialize)]
 56 | pub struct GraphiteStreamHandler;
 57 | 
 58 | impl TCPStreamHandler for GraphiteStreamHandler {
 59 |     fn handle_stream(
 60 |         &mut self,
 61 |         mut chans: util::Channel,
 62 |         poller: &mio::Poll,
 63 |         stream: mio::net::TcpStream,
 64 |     ) {
 65 |         let mut line = String::new();
 66 |         let mut res = Vec::new();
 67 |         let mut line_reader = BufReader::new(stream);
 68 |         let basic_metric = sync::Arc::new(Some(metric::Telemetry::default()));
 69 | 
 70 |         loop {
 71 |             let mut events = mio::Events::with_capacity(1024);
 72 |             match poller.poll(&mut events, None) {
 73 |                 Err(e) => panic!("Failed during poll {:?}", e),
 74 |                 Ok(_num_events) => {
 75 |                     for event in events {
 76 |                         match event.token() {
 77 |                             constants::SYSTEM => return,
 78 |                             _stream_token => {
 79 |                                 while let Ok(len) = line_reader.read_line(&mut line) {
 80 |                                     if len > 0 {
 81 |                                         if parse_graphite(
 82 |                                             &line,
 83 |                                             &mut res,
 84 |                                             &basic_metric,
 85 |                                         ) {
 86 |                                             assert!(!res.is_empty());
 87 |                                             GRAPHITE_GOOD_PACKET
 88 |                                                 .fetch_add(1, Ordering::Relaxed);
 89 |                                             GRAPHITE_TELEM
 90 |                                                 .fetch_add(1, Ordering::Relaxed);
 91 |                                             for m in res.drain(..) {
 92 |                                                 send(
 93 |                                                     &mut chans,
 94 |                                                     metric::Event::Telemetry(m),
 95 |                                                 );
 96 |                                             }
 97 |                                             line.clear();
 98 |                                         } else {
 99 |                                             GRAPHITE_BAD_PACKET
100 |                                                 .fetch_add(1, Ordering::Relaxed);
101 |                                             error!("bad packet: {:?}", line);
102 |                                             line.clear();
103 |                                         }
104 |                                     } else {
105 |                                         break;
106 |                                     }
107 |                                 }
108 |                             }
109 |                         }
110 |                     }
111 |                 }
112 |             }
113 |         }
114 |     }
115 | }
116 | 
117 | /// Graphite protocol source
118 | ///
119 | /// This source produces `metric::Telemetry` from the graphite protocol.
120 | pub type Graphite = TCP<GraphiteStreamHandler>;
121 | 


--------------------------------------------------------------------------------
/src/source/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Staging ground for all sources
 2 | //!
 3 | //! In cernan a `Source` is a place where all `metric::Event` come from, feeding
 4 | //! down into the source's forwards for further processing. Statsd is a source
 5 | //! that creates `Telemetry`, `FileServer` is a source that creates `LogLine`s.
 6 | use crate::thread;
 7 | use crate::util;
 8 | use mio;
 9 | use std::marker::PhantomData;
10 | 
11 | mod avro;
12 | mod file;
13 | mod flush;
14 | mod graphite;
15 | mod internal;
16 | mod native;
17 | mod nonblocking;
18 | mod statsd;
19 | mod tcp;
20 | 
21 | pub use self::avro::Avro;
22 | pub use self::file::{FileServer, FileServerConfig};
23 | pub use self::flush::{flushes_per_second, FlushTimer, FlushTimerConfig};
24 | pub use self::graphite::{Graphite, GraphiteConfig};
25 | pub use self::internal::{report_full_telemetry, Internal, InternalConfig};
26 | pub use self::native::{NativeServer, NativeServerConfig};
27 | use self::nonblocking::{BufferedPayload, PayloadErr};
28 | pub use self::statsd::{Statsd, StatsdConfig, StatsdParseConfig};
29 | pub use self::tcp::{TCPConfig, TCPStreamHandler, TCP};
30 | 
31 | /// Generic interface used to capture global source configuration
32 | /// parameters as well as source specific parameters.
33 | ///
34 | /// Stored configuration is consumed when the source is spawned,
35 | /// resulting in a new thread which executes the given source.
36 | pub struct RunnableSource<S, SConfig>
37 | where
38 |     S: Send + Source<SConfig>,
39 |     SConfig: 'static + Send + Clone,
40 | {
41 |     chans: util::Channel,
42 |     source: S,
43 | 
44 |     // Yes, compiler, we know that we aren't storing
45 |     // anything of type SConfig.
46 |     config: PhantomData<SConfig>,
47 | }
48 | 
49 | impl<S, SConfig> RunnableSource<S, SConfig>
50 | where
51 |     S: Send + Source<SConfig>,
52 |     SConfig: 'static + Send + Clone,
53 | {
54 |     /// Constructs a new RunnableSource.
55 |     pub fn new(chans: util::Channel, config: SConfig) -> Self {
56 |         RunnableSource {
57 |             chans: chans,
58 |             config: PhantomData,
59 |             source: S::init(config),
60 |         }
61 |     }
62 | 
63 |     /// Spawns a thread corresponding to the given RunnableSource, consuming
64 |     /// the given RunnableSource in the process.
65 |     pub fn run(self) -> thread::ThreadHandle {
66 |         thread::spawn(move |poller| self.source.run(self.chans, poller))
67 |     }
68 | }
69 | 
70 | /// cernan Source, the originator of all `metric::Event`.
71 | ///
72 | /// A cernan Source creates all `metric::Event`, doing so by listening to
73 | /// network IO, reading from files, etc etc. All sources push into the routing
74 | /// topology.
75 | pub trait Source<SConfig>
76 | where
77 |     Self: 'static + Send + Sized,
78 |     SConfig: 'static + Send + Clone,
79 | {
80 |     /// Constructs a so-called runnable source for the given Source and
81 |     /// config.`  See RunnableSource.
82 |     fn new(chans: util::Channel, config: SConfig) -> RunnableSource<Self, SConfig> {
83 |         RunnableSource::<Self, SConfig>::new(chans, config)
84 |     }
85 | 
86 |     /// Initializes state for the given Source.
87 |     fn init(config: SConfig) -> Self;
88 | 
89 |     /// Run method invoked by RunnableSource.
90 |     /// It is from this method that Sources produce metric::Events.
91 |     fn run(self, chans: util::Channel, poller: mio::Poll);
92 | }
93 | 


--------------------------------------------------------------------------------
/src/source/native.rs:
--------------------------------------------------------------------------------
  1 | use crate::constants;
  2 | use crate::metric;
  3 | use crate::protocols::native::{AggregationMethod, Payload};
  4 | use crate::source::{BufferedPayload, PayloadErr, TCPConfig, TCPStreamHandler, TCP};
  5 | use crate::util;
  6 | use mio;
  7 | use protobuf;
  8 | use std::net;
  9 | use std::str;
 10 | use std::sync::atomic::{AtomicUsize, Ordering};
 11 | 
 12 | /// Total payloads processed.
 13 | pub static NATIVE_PAYLOAD_SUCCESS_SUM: AtomicUsize = AtomicUsize::new(0);
 14 | /// Total fatal parse failures.
 15 | pub static NATIVE_PAYLOAD_PARSE_FAILURE_SUM: AtomicUsize = AtomicUsize::new(0);
 16 | 
 17 | /// The native source
 18 | ///
 19 | /// This source is the pair to the native sink. The native source/sink use or
 20 | /// consume cernan's native protocol, defined
 21 | /// `resources/protobufs/native.proto`. Clients may use the native protocol
 22 | /// without having to obey the translation required in other sources or
 23 | /// operators may set up cernan to cernan communication.
 24 | 
 25 | /// Configuration for the native source
 26 | #[derive(Debug, Clone, Deserialize)]
 27 | pub struct NativeServerConfig {
 28 |     /// The IP address the native source will bind to.
 29 |     pub ip: String,
 30 |     /// The port the source will listen on.
 31 |     pub port: u16,
 32 |     /// The forwards for the native source to send its Telemetry along.
 33 |     pub forwards: Vec<String>,
 34 |     /// The unique name for the source in the routing topology.
 35 |     pub config_path: Option<String>,
 36 | }
 37 | 
 38 | impl Default for NativeServerConfig {
 39 |     fn default() -> Self {
 40 |         NativeServerConfig {
 41 |             ip: "0.0.0.0".to_string(),
 42 |             port: 1972,
 43 |             forwards: Vec::default(),
 44 |             config_path: None,
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | impl From<NativeServerConfig> for TCPConfig {
 50 |     fn from(item: NativeServerConfig) -> Self {
 51 |         TCPConfig {
 52 |             host: item.ip,
 53 |             port: item.port,
 54 |             forwards: item.forwards,
 55 |             config_path: item.config_path,
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | #[derive(Default, Debug, Clone, Deserialize)]
 61 | pub struct NativeStreamHandler;
 62 | 
 63 | impl TCPStreamHandler for NativeStreamHandler {
 64 |     fn handle_stream(
 65 |         &mut self,
 66 |         chans: util::Channel,
 67 |         poller: &mio::Poll,
 68 |         stream: mio::net::TcpStream,
 69 |     ) {
 70 |         let mut streaming = true;
 71 |         let mut reader = BufferedPayload::new(stream.try_clone().unwrap(), 1_048_576);
 72 |         while streaming {
 73 |             let mut events = mio::Events::with_capacity(1024);
 74 |             match poller.poll(&mut events, None) {
 75 |                 Err(e) => panic!("Failed during poll {:?}", e),
 76 |                 Ok(_num_events) => {
 77 |                     for event in events {
 78 |                         match event.token() {
 79 |                             constants::SYSTEM => {
 80 |                                 streaming = false;
 81 |                                 break;
 82 |                             }
 83 |                             _stream_token => {
 84 |                                 while streaming {
 85 |                                     match reader.read() {
 86 |                                         Ok(mut raw) => {
 87 |                                             let handle_res = self
 88 |                                                 .handle_stream_payload(
 89 |                                                     chans.clone(),
 90 |                                                     &mut raw,
 91 |                                                 );
 92 |                                             if handle_res.is_err() {
 93 |                                                 NATIVE_PAYLOAD_PARSE_FAILURE_SUM
 94 |                                                     .fetch_add(1, Ordering::Relaxed);
 95 |                                                 streaming = false;
 96 |                                                 break;
 97 |                                             }
 98 |                                             NATIVE_PAYLOAD_SUCCESS_SUM
 99 |                                                 .fetch_add(1, Ordering::Relaxed);
100 |                                         }
101 |                                         Err(PayloadErr::WouldBlock) => {
102 |                                             // Not enough data yet.  Try again.
103 |                                             break;
104 |                                         }
105 |                                         Err(PayloadErr::EOF) => {
106 |                                             // Client went away.  Shut it down
107 |                                             // (gracefully).
108 |                                             trace!("TCP stream closed.");
109 |                                             streaming = false;
110 |                                             break;
111 |                                         }
112 |                                         Err(e) => {
113 |                                             error!("Failed to process native payload! {:?}", e);
114 |                                             streaming = false;
115 |                                             break;
116 |                                         }
117 |                                     }
118 |                                 }
119 |                             }
120 |                         }
121 |                     }
122 |                 } // events processing
123 |             } // poll
124 |         } // while connected
125 | 
126 |         // On some systems shutting down an already closed connection (client or
127 |         // otherwise) results in an Err.  See -
128 |         // https://doc.rust-lang.org/beta/std/net/struct.TcpStream.html#platform-specific-behavior
129 |         let _shutdown_result = stream.shutdown(net::Shutdown::Both);
130 |     } // handle_stream
131 | }
132 | 
133 | impl NativeStreamHandler {
134 |     fn handle_stream_payload(
135 |         &mut self,
136 |         mut chans: util::Channel,
137 |         buf: &mut Vec<u8>,
138 |     ) -> Result<(), protobuf::ProtobufError> {
139 |         match protobuf::parse_from_bytes::<Payload>(buf) {
140 |             // TODO we have to handle bin_bounds. We'll use samples to get
141 |             // the values of each bounds' counter.
142 |             Ok(mut pyld) => {
143 |                 for mut point in pyld.take_points().into_iter() {
144 |                     let name: String = point.take_name();
145 |                     let smpls: Vec<f64> = point.take_samples();
146 |                     let aggr_type: AggregationMethod = point.get_method();
147 |                     let mut meta = point.take_metadata();
148 |                     // FIXME #166
149 |                     let ts: i64 = (point.get_timestamp_ms() as f64 * 0.001) as i64;
150 | 
151 |                     if smpls.is_empty() {
152 |                         continue;
153 |                     }
154 |                     let mut metric = metric::Telemetry::new().name(name);
155 |                     metric = metric.value(smpls[0]);
156 |                     metric = match aggr_type {
157 |                         AggregationMethod::SET => {
158 |                             metric.kind(metric::AggregationMethod::Set)
159 |                         }
160 |                         AggregationMethod::SUM => {
161 |                             metric.kind(metric::AggregationMethod::Sum)
162 |                         }
163 |                         AggregationMethod::SUMMARIZE => {
164 |                             metric.kind(metric::AggregationMethod::Summarize)
165 |                         }
166 |                         AggregationMethod::BIN => {
167 |                             metric.kind(metric::AggregationMethod::Histogram)
168 |                         }
169 |                     };
170 |                     metric = metric.persist(point.get_persisted());
171 |                     metric = metric.timestamp(ts);
172 |                     let mut metric = metric.harden().unwrap(); // todo don't unwrap
173 |                     for (key, value) in meta.drain() {
174 |                         metric = metric.overlay_tag(key, value);
175 |                     }
176 |                     for smpl in &smpls[1..] {
177 |                         metric = metric.insert(*smpl);
178 |                     }
179 |                     util::send(&mut chans, metric::Event::new_telemetry(metric));
180 |                 }
181 |                 for mut line in pyld.take_lines().into_iter() {
182 |                     let path: String = line.take_path();
183 |                     let value: String = line.take_value();
184 |                     let mut meta = line.take_metadata();
185 |                     // FIXME #166
186 |                     let ts: i64 = (line.get_timestamp_ms() as f64 * 0.001) as i64;
187 | 
188 |                     let mut logline = metric::LogLine::new(path, value);
189 |                     logline = logline.time(ts);
190 |                     for (key, value) in meta.drain() {
191 |                         logline = logline.overlay_tag(key, value);
192 |                     }
193 |                     util::send(&mut chans, metric::Event::new_log(logline));
194 |                 }
195 |                 Ok(())
196 |             }
197 |             Err(err) => {
198 |                 trace!("Unable to read payload: {:?}", err);
199 |                 Err(err)
200 |             }
201 |         }
202 |     }
203 | }
204 | 
205 | /// Source for Cernan's native protocol.
206 | pub type NativeServer = TCP<NativeStreamHandler>;
207 | 


--------------------------------------------------------------------------------
/src/source/nonblocking.rs:
--------------------------------------------------------------------------------
  1 | //! Handy interfaces for nonblocking streams.
  2 | 
  3 | use byteorder::{BigEndian, ReadBytesExt};
  4 | use mio;
  5 | use std::io::{Read, Write};
  6 | use std::{io, mem};
  7 | 
  8 | /// Like `std::net::TcpStream::write_all`, except it handles `WouldBlock` too.
  9 | pub fn write_all(
 10 |     mut stream: &mio::net::TcpStream,
 11 |     bytes: &[u8],
 12 | ) -> Result<(), io::Error> {
 13 |     let mut written = 0;
 14 | 
 15 |     while written < bytes.len() {
 16 |         match stream.write(&bytes[written..]) {
 17 |             Ok(bytes_written) => {
 18 |                 written += bytes_written;
 19 |             }
 20 | 
 21 |             Err(e) => match e.kind() {
 22 |                 io::ErrorKind::WouldBlock | io::ErrorKind::Interrupted => {
 23 |                     continue;
 24 |                 }
 25 | 
 26 |                 _ => {
 27 |                     error!("Failed to write bytes onto stream! {:?}", e);
 28 |                     return Err(e);
 29 |                 }
 30 |             },
 31 |         }
 32 |     }
 33 |     Ok(())
 34 | }
 35 | 
 36 | /// Handler error types returned by `handle_avro_payload`.
 37 | #[derive(Debug)]
 38 | pub enum PayloadErr {
 39 |     /// End of stream has been reached.
 40 |     EOF,
 41 |     /// Not enough data present to construct the payload.
 42 |     /// Try again later.
 43 |     WouldBlock,
 44 |     /// An IO error occured.
 45 |     IO(io::Error),
 46 |     /// Payload parsing failure.
 47 |     Protocol(String),
 48 |     /// The length prefix is too large to be allocated
 49 |     LengthTooLarge,
 50 | }
 51 | 
 52 | impl From<io::Error> for PayloadErr {
 53 |     fn from(e: io::Error) -> PayloadErr {
 54 |         if e.kind() == io::ErrorKind::WouldBlock {
 55 |             PayloadErr::WouldBlock
 56 |         } else if e.kind() == io::ErrorKind::UnexpectedEof {
 57 |             PayloadErr::EOF
 58 |         } else {
 59 |             PayloadErr::IO(e)
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | impl From<String> for PayloadErr {
 65 |     fn from(s: String) -> PayloadErr {
 66 |         PayloadErr::Protocol(s)
 67 |     }
 68 | }
 69 | 
 70 | /// Buffered length-prefixed payload.
 71 | ///
 72 | /// For use on blocking or non-blocking streams.
 73 | pub struct BufferedPayload {
 74 |     /// Size of the expected payload in bytes. When None, this value is read
 75 |     /// off the underlying stream as a big-endian u32.
 76 |     payload_size: Option<usize>,
 77 | 
 78 |     /// The maximum allowable payload size. If a payload_size comes in over the
 79 |     /// wire that is greater than this limit we close the connection.
 80 |     max_payload_size: usize,
 81 | 
 82 |     /// Position in the payload byte vector receiving.
 83 |     payload_pos: usize,
 84 | 
 85 |     ///Bytes comprising the payload.
 86 |     payload: Vec<u8>,
 87 | 
 88 |     /// Inner buffer where bytes from the underlying stream are staged.
 89 |     buffer: io::BufReader<mio::net::TcpStream>,
 90 | }
 91 | 
 92 | impl BufferedPayload {
 93 |     /// Constructs a new BufferedPayload.
 94 |     pub fn new(stream: mio::net::TcpStream, max_payload_size: usize) -> Self {
 95 |         BufferedPayload {
 96 |             payload_size: None,
 97 |             max_payload_size: max_payload_size,
 98 |             payload_pos: 0,
 99 |             payload: Vec::new(),
100 |             buffer: io::BufReader::new(stream),
101 |         }
102 |     }
103 | 
104 |     /// Reads existing buffer from the underlying data
105 |     /// stream.  If enough data is present, a single payload
106 |     /// is constructed and returned.
107 |     ///
108 |     /// On non-blocking streams, it is up to the user to call
109 |     /// this method repeatedly until PayloadErr::WouldBlock
110 |     /// is returned.
111 |     pub fn read(&mut self) -> Result<Vec<u8>, PayloadErr> {
112 |         // Are we actively reading a payload already?
113 |         if self.payload_size.is_none() {
114 |             self.read_length()?;
115 |         }
116 |         if self.payload_size.unwrap() > self.max_payload_size {
117 |             return Err(PayloadErr::LengthTooLarge);
118 |         }
119 | 
120 |         self.read_payload()?;
121 | 
122 |         // By this point we assert that we have read exactly
123 |         // 1 payload off the buffer.  We may have have read partial
124 |         // or entire other payloads off the wire. Additional bytes
125 |         // will persist in buffer for later parsing.
126 |         Ok(mem::replace(&mut self.payload, Vec::new()))
127 |     }
128 | 
129 |     /// Reads the payload's length from the wire, caching the result.
130 |     ///
131 |     /// If a cached value already exists, this function noops.
132 |     fn read_length(&mut self) -> Result<(), PayloadErr> {
133 |         if self.payload_size.is_none() {
134 |             self.payload_size = Some(self.buffer.read_u32::<BigEndian>()? as usize);
135 |         };
136 |         Ok(())
137 |     }
138 | 
139 |     /// Attempts to read at least one payload worth of data.  If there
140 |     /// isn't enough data between the inner buffer and the underlying stream,
141 |     /// then PayloadErr::WouldBlock is returned.
142 |     fn read_payload(&mut self) -> Result<(), PayloadErr> {
143 |         // At this point we can assume that we have successfully
144 |         // read the length off the wire.
145 |         let payload_size = self.payload_size.unwrap();
146 | 
147 |         if self.payload.len() != payload_size {
148 |             trace!("Resizing internal buffer to {:?}", payload_size);
149 |             self.payload.resize(payload_size, 0);
150 |         }
151 | 
152 |         loop {
153 |             match self
154 |                 .buffer
155 |                 .read(&mut self.payload[self.payload_pos..payload_size])
156 |             {
157 |                 Ok(0) => return Err(PayloadErr::EOF),
158 | 
159 |                 Ok(bytes_read) if (self.payload_pos + bytes_read) == payload_size => {
160 |                     // We successfully pulled a payload off the wire.
161 |                     // Reset bytes remaining for the next payload.
162 |                     self.payload_size = None;
163 |                     self.payload_pos = 0;
164 |                     return Ok(());
165 |                 }
166 | 
167 |                 Ok(bytes_read) => {
168 |                     // We read some data, but not yet enough.
169 |                     // Store the difference and try again later.
170 |                     self.payload_pos += bytes_read;
171 |                     continue;
172 |                 }
173 | 
174 |                 Err(e) => return Err(e.into()),
175 |             }
176 |         }
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/src/source/statsd.rs:
--------------------------------------------------------------------------------
  1 | use crate::constants;
  2 | use crate::metric;
  3 | use crate::protocols::statsd::parse_statsd;
  4 | use crate::source;
  5 | use crate::util;
  6 | use crate::util::send;
  7 | use mio;
  8 | use regex::Regex;
  9 | use std::io::ErrorKind;
 10 | use std::net::ToSocketAddrs;
 11 | use std::str;
 12 | use std::sync;
 13 | use std::sync::atomic::{AtomicUsize, Ordering};
 14 | 
 15 | pub static STATSD_GOOD_PACKET: AtomicUsize = AtomicUsize::new(0);
 16 | pub static STATSD_BAD_PACKET: AtomicUsize = AtomicUsize::new(0);
 17 | 
 18 | /// The statsd source
 19 | ///
 20 | /// Statsd is a collection of protocols, originally spawned by the telemetering
 21 | /// work done out of Etsy. Cernan tries to support a cow-path subset of the
 22 | /// statsd protocol family.
 23 | pub struct Statsd {
 24 |     conns: util::TokenSlab<mio::net::UdpSocket>,
 25 |     parse_config: sync::Arc<StatsdParseConfig>,
 26 | }
 27 | 
 28 | /// The mask type for metrics in `StatsdParseConfig`.
 29 | pub type Mask = Regex;
 30 | 
 31 | /// The bound type for metrics in `StatsdParseConfig`.
 32 | pub type Bounds = Vec<f64>;
 33 | 
 34 | /// Configuration for the statsd parser
 35 | #[derive(Debug, Clone)]
 36 | pub struct StatsdParseConfig {
 37 |     /// Set specific bin masks for timeseries according to their name. The name
 38 |     /// may be a [regex](https://crates.io/crates/regex) match, such like
 39 |     /// 'foo.*'. In this case all metrics prefixed by 'foo.' which are timer or
 40 |     /// histogram will be interpreted as a histogram.
 41 |     pub histogram_masks: Vec<(Mask, Bounds)>,
 42 |     /// Configure the error bound for a statsd timer or histogram. Cernan does
 43 |     /// not compute precise quantiles but approximations with a guaranteed upper
 44 |     /// bound on the error of approximation. This allows the end-user to set
 45 |     /// that.
 46 |     pub summarize_error_bound: f64,
 47 | }
 48 | 
 49 | impl Default for StatsdParseConfig {
 50 |     fn default() -> StatsdParseConfig {
 51 |         StatsdParseConfig {
 52 |             histogram_masks: vec![],
 53 |             summarize_error_bound: 0.01,
 54 |         }
 55 |     }
 56 | }
 57 | 
 58 | /// Configuration for the statsd source.
 59 | #[derive(Debug, Clone)]
 60 | pub struct StatsdConfig {
 61 |     /// The host for the statsd protocol to bind to.
 62 |     pub host: String,
 63 |     /// The port for the statsd source to listen on.
 64 |     pub port: u16,
 65 |     /// The forwards that statsd will send its telemetry on to.
 66 |     pub forwards: Vec<String>,
 67 |     /// The unique name for the source in the routing topology.
 68 |     pub config_path: Option<String>,
 69 |     /// Configuration for the parsing of statsd lines
 70 |     pub parse_config: StatsdParseConfig,
 71 | }
 72 | 
 73 | impl Default for StatsdConfig {
 74 |     fn default() -> StatsdConfig {
 75 |         StatsdConfig {
 76 |             host: "localhost".to_string(),
 77 |             port: 8125,
 78 |             forwards: Vec::new(),
 79 |             config_path: None,
 80 |             parse_config: StatsdParseConfig::default(),
 81 |         }
 82 |     }
 83 | }
 84 | 
 85 | enum StatsdHandlerErr {
 86 |     Fatal,
 87 | }
 88 | 
 89 | impl Statsd {
 90 |     fn handle_datagrams(
 91 |         &self,
 92 |         mut chans: &mut util::Channel,
 93 |         socket: &mio::net::UdpSocket,
 94 |         mut buf: &mut Vec<u8>,
 95 |     ) -> Result<(), StatsdHandlerErr> {
 96 |         let mut metrics = Vec::new();
 97 |         let basic_metric = sync::Arc::new(Some(metric::Telemetry::default()));
 98 |         loop {
 99 |             match socket.recv_from(&mut buf) {
100 |                 Ok((len, _)) => match str::from_utf8(&buf[..len]) {
101 |                     Ok(val) => {
102 |                         if parse_statsd(
103 |                             val,
104 |                             &mut metrics,
105 |                             &basic_metric,
106 |                             &self.parse_config,
107 |                         ) {
108 |                             for m in metrics.drain(..) {
109 |                                 send(&mut chans, metric::Event::new_telemetry(m));
110 |                             }
111 |                             STATSD_GOOD_PACKET.fetch_add(1, Ordering::Relaxed);
112 |                         } else {
113 |                             STATSD_BAD_PACKET.fetch_add(1, Ordering::Relaxed);
114 |                             error!("BAD PACKET: {:?}", val);
115 |                         }
116 |                     }
117 |                     Err(e) => {
118 |                         error!("Payload not valid UTF-8: {:?}", e);
119 |                     }
120 |                 },
121 |                 Err(e) => match e.kind() {
122 |                     ErrorKind::WouldBlock => {
123 |                         break;
124 |                     }
125 |                     _ => {
126 |                         error!("Could not read UDP socket with error {:?}", e);
127 |                         return Err(StatsdHandlerErr::Fatal);
128 |                     }
129 |                 },
130 |             }
131 |         }
132 |         Ok(())
133 |     }
134 | }
135 | 
136 | impl source::Source<StatsdConfig> for Statsd {
137 |     /// Create and spawn a new statsd source
138 |     fn init(config: StatsdConfig) -> Self {
139 |         let mut conns = util::TokenSlab::<mio::net::UdpSocket>::new();
140 |         let addrs = (config.host.as_str(), config.port).to_socket_addrs();
141 |         match addrs {
142 |             Ok(ips) => {
143 |                 for addr in ips {
144 |                     let socket = mio::net::UdpSocket::bind(&addr)
145 |                         .expect("Unable to bind to UDP socket");
146 |                     conns.insert(socket);
147 |                 }
148 |             }
149 |             Err(e) => {
150 |                 info!(
151 |                     "Unable to perform DNS lookup on host {} with error {}",
152 |                     config.host, e
153 |                 );
154 |             }
155 |         };
156 | 
157 |         Statsd {
158 |             conns: conns,
159 |             parse_config: sync::Arc::new(config.parse_config),
160 |         }
161 |     }
162 | 
163 |     fn run(self, mut chans: util::Channel, poller: mio::Poll) {
164 |         for (idx, socket) in self.conns.iter() {
165 |             if let Err(e) = poller.register(
166 |                 socket,
167 |                 mio::Token::from(idx),
168 |                 mio::Ready::readable(),
169 |                 mio::PollOpt::edge(),
170 |             ) {
171 |                 error!("Failed to register {:?} - {:?}!", socket, e);
172 |             }
173 |         }
174 | 
175 |         let mut buf = vec![0; 16_250];
176 |         loop {
177 |             let mut events = mio::Events::with_capacity(1024);
178 |             match poller.poll(&mut events, None) {
179 |                 Ok(_num_events) => {
180 |                     for event in events {
181 |                         match event.token() {
182 |                             constants::SYSTEM => {
183 |                                 send(&mut chans, metric::Event::Shutdown);
184 |                                 return;
185 |                             }
186 | 
187 |                             token => {
188 |                                 let socket = &self.conns[token];
189 |                                 if let Err(_e) =
190 |                                     self.handle_datagrams(&mut chans, socket, &mut buf)
191 |                                 {
192 |                                     error!(
193 |                                         "Deregistering {:?} due to unrecoverable error!",
194 |                                         *socket
195 |                                     );
196 |                                 }
197 |                             }
198 |                         }
199 |                     }
200 |                 }
201 |                 Err(e) => panic!(format!("Failed during poll {:?}", e)),
202 |             }
203 |         } // loop
204 |     } // run
205 | }
206 | 


--------------------------------------------------------------------------------
/src/source/tcp.rs:
--------------------------------------------------------------------------------
  1 | use crate::constants;
  2 | use crate::metric;
  3 | use crate::source::Source;
  4 | use crate::thread;
  5 | use crate::util;
  6 | use hopper;
  7 | use mio;
  8 | use std;
  9 | use std::io::ErrorKind;
 10 | use std::marker::PhantomData;
 11 | use std::net::ToSocketAddrs;
 12 | 
 13 | /// Configured for the `metric::Telemetry` source.
 14 | #[derive(Debug, Deserialize, Clone)]
 15 | pub struct TCPConfig {
 16 |     /// The unique name of the source in the routing topology.
 17 |     pub config_path: Option<String>,
 18 |     /// The host that the source will listen on. May be an IP address or a DNS
 19 |     /// hostname.
 20 |     pub host: String,
 21 |     /// The port that the source will listen on.
 22 |     pub port: u16,
 23 |     /// The forwards that the source will send all its Telemetry.
 24 |     pub forwards: Vec<String>,
 25 | }
 26 | 
 27 | impl Default for TCPConfig {
 28 |     fn default() -> TCPConfig {
 29 |         TCPConfig {
 30 |             host: "localhost".to_string(),
 31 |             port: 8080,
 32 |             forwards: Vec::new(),
 33 |             config_path: Some("sources.tcp".to_string()),
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | /// Simple single threaded TCP Stream handler.
 39 | pub trait TCPStreamHandler: 'static + Default + Clone + Sync + Send {
 40 |     /// Constructs a new handler for mio::net::TCPStreams.
 41 |     fn new() -> Self {
 42 |         Default::default()
 43 |     }
 44 | 
 45 |     /// Handler for a single HTTP request.
 46 |     fn handle_stream(
 47 |         &mut self,
 48 |         _: util::Channel,
 49 |         _: &mio::Poll,
 50 |         _: mio::net::TcpStream,
 51 |     ) -> ();
 52 | }
 53 | 
 54 | /// State for a TCP backed source.
 55 | pub struct TCP<H> {
 56 |     listeners: util::TokenSlab<mio::net::TcpListener>,
 57 |     stream_events: mio::Registration,
 58 |     stream_events_token: mio::Token,
 59 |     handlers: thread::ThreadPool,
 60 |     phantom: PhantomData<H>,
 61 | }
 62 | 
 63 | impl<H> Source<TCPConfig> for TCP<H>
 64 | where
 65 |     H: TCPStreamHandler,
 66 | {
 67 |     /// Constructs and starts a new TCP source.
 68 |     fn init(config: TCPConfig) -> Self {
 69 |         // Create registrations and for all TCP interfaces and stream handlers.
 70 |         //
 71 |         // Note - Due to restrictions in mio, we must construct these registrations
 72 |         // here as we are assuming this function is called directly from the main
 73 |         // process.  Registrations must be bound to a mio poller by the subordiante
 74 |         // thread.
 75 |         let addrs = (config.host.as_str(), config.port).to_socket_addrs();
 76 |         let mut listeners = util::TokenSlab::<mio::net::TcpListener>::new();
 77 |         match addrs {
 78 |             Ok(ips) => {
 79 |                 let ips: Vec<_> = ips.collect();
 80 |                 for addr in ips {
 81 |                     let listener = mio::net::TcpListener::bind(&addr)
 82 |                         .expect("Unable to bind to TCP socket");
 83 |                     info!("Registering listener for {:?}", addr);
 84 |                     listeners.insert(listener);
 85 |                 }
 86 |             }
 87 | 
 88 |             Err(e) => {
 89 |                 panic!(
 90 |                     "Unable to perform DNS lookup on {:?}:{:?} with error {}",
 91 |                     config.host.as_str(),
 92 |                     config.port,
 93 |                     e
 94 |                 );
 95 |             }
 96 |         };
 97 | 
 98 |         let (stream_events, stream_events_readiness) = mio::Registration::new2();
 99 |         let stream_events_token = mio::Token::from(listeners.count());
100 |         let thread_pool = thread::ThreadPool::new(Some(stream_events_readiness));
101 |         TCP {
102 |             listeners: listeners,
103 |             stream_events: stream_events,
104 |             stream_events_token: stream_events_token,
105 |             handlers: thread_pool,
106 |             phantom: PhantomData,
107 |         }
108 |     }
109 | 
110 |     /// Starts the accept loop.
111 |     fn run(self, chans: util::Channel, poller: mio::Poll) {
112 |         for (idx, listener) in self.listeners.iter() {
113 |             if let Err(e) = poller.register(
114 |                 listener,
115 |                 mio::Token::from(idx),
116 |                 mio::Ready::readable(),
117 |                 mio::PollOpt::edge(),
118 |             ) {
119 |                 error!("Failed to register {:?} - {:?}!", listener, e);
120 |             }
121 |         }
122 | 
123 |         if let Err(e) = poller.register(
124 |             &self.stream_events,
125 |             self.stream_events_token,
126 |             mio::Ready::readable(),
127 |             mio::PollOpt::edge(),
128 |         ) {
129 |             error!("Failed to register stream events - {:?}!", e);
130 |         };
131 | 
132 |         self.accept_loop(chans, &poller)
133 |     }
134 | }
135 | 
136 | impl<H> TCP<H>
137 | where
138 |     H: TCPStreamHandler,
139 | {
140 |     fn accept_loop(mut self, mut chans: util::Channel, poll: &mio::Poll) {
141 |         loop {
142 |             let mut events = mio::Events::with_capacity(1024);
143 |             match poll.poll(&mut events, None) {
144 |                 Err(e) => panic!(format!("Failed during poll {:?}", e)),
145 |                 Ok(_num_events) => {
146 |                     for event in events {
147 |                         match event.token() {
148 |                             constants::SYSTEM => {
149 |                                 self.handlers.shutdown();
150 |                                 util::send(&mut chans, metric::Event::Shutdown);
151 |                                 return;
152 |                             }
153 |                             listener_token => {
154 |                                 if listener_token == self.stream_events_token {
155 |                                     // Mio event corresponding to a StreamHandler.
156 |                                     // Currently, the only StreamHandler event flags
157 |                                     // the StreamHandler as terminated.  Cleanup state.
158 |                                     let ready = self.handlers.join_ready();
159 |                                     trace!(
160 |                                         "Removed {:?} terminated stream handlers.",
161 |                                         ready.len()
162 |                                     );
163 |                                 } else if let Err(e) =
164 |                                     self.spawn_stream_handlers(&chans, listener_token)
165 |                                 {
166 |                                     let listener = &self.listeners[listener_token];
167 |                                     error!("Failed to spawn stream handlers! {:?}", e);
168 |                                     error!("Deregistering listener for {:?} due to unrecoverable error!", *listener);
169 |                                     let _ = poll.deregister(listener);
170 |                                 }
171 |                             }
172 |                         }
173 |                     }
174 |                 }
175 |             }
176 |         }
177 |     }
178 | 
179 |     fn spawn_stream_handlers(
180 |         &mut self,
181 |         chans: &[hopper::Sender<metric::Event>],
182 |         listener_token: mio::Token,
183 |     ) -> Result<(), std::io::Error> {
184 |         let listener = &self.listeners[listener_token];
185 |         loop {
186 |             match listener.accept() {
187 |                 Ok((stream, _addr)) => {
188 |                     // Actually spawn the stream handler
189 |                     let rchans = chans.to_owned();
190 |                     self.handlers.spawn(move |poller| {
191 |                         // Note - Stream handlers are allowed to crash without
192 |                         // compromising Cernan's ability to gracefully shutdown.
193 |                         poller
194 |                             .register(
195 |                                 &stream,
196 |                                 mio::Token(0),
197 |                                 mio::Ready::readable(),
198 |                                 mio::PollOpt::edge(),
199 |                             )
200 |                             .unwrap();
201 | 
202 |                         let mut handler = H::new();
203 |                         handler.handle_stream(rchans, &poller, stream);
204 |                     });
205 |                 }
206 |                 Err(e) => match e.kind() {
207 |                     ErrorKind::ConnectionAborted
208 |                     | ErrorKind::Interrupted
209 |                     | ErrorKind::TimedOut => {
210 |                         // Connection was closed before we could accept or
211 |                         // we were interrupted. Press on.
212 |                         continue;
213 |                     }
214 |                     ErrorKind::WouldBlock => {
215 |                         //Out of connections to accept. Wrap it up.
216 |                         return Ok(());
217 |                     }
218 |                     _ => return Err(e),
219 |                 },
220 |             };
221 |         }
222 |     }
223 | }
224 | 


--------------------------------------------------------------------------------
/src/thread.rs:
--------------------------------------------------------------------------------
  1 | //! Mio enabled threading library.
  2 | use crate::constants;
  3 | use crate::util;
  4 | use mio;
  5 | use std::option;
  6 | use std::sync;
  7 | use std::thread;
  8 | 
  9 | /// Event polling structure. Alias of `mio::Poll`.
 10 | pub type Poll = mio::Poll;
 11 | /// Events buffer type. Alias of `mio::Events`.
 12 | pub type Events = mio::Events;
 13 | 
 14 | /// Mio enabled thread state.
 15 | pub struct ThreadHandle {
 16 |     /// JoinHandle for the executing thread.
 17 |     pub handle: thread::JoinHandle<()>,
 18 | 
 19 |     /// Readiness signal used to notify the given thread when an event is ready
 20 |     /// to be consumed on the SYSTEM channel.
 21 |     shutdown_event: mio::SetReadiness,
 22 | }
 23 | 
 24 | /// Trait for stoppable processes.
 25 | pub trait Stoppable {
 26 |     /// Join the given process, blocking until it exits.
 27 |     fn join(self) -> ();
 28 | 
 29 |     /// Gracefully shutdown the process, blocking until exit.
 30 |     fn shutdown(self) -> ();
 31 | }
 32 | 
 33 | impl Stoppable for ThreadHandle {
 34 |     /// Join the given Thread, blocking until it exits.
 35 |     fn join(self) {
 36 |         self.handle.join().expect("Failed to join child thread!");
 37 |     }
 38 | 
 39 |     /// Gracefully shutdown the given Thread, blocking until it exists.
 40 |     ///
 41 |     /// Note - It is the responsability of the developer to ensure
 42 |     /// that thread logic polls for events occuring on the SYSTEM token.
 43 |     fn shutdown(self) {
 44 |         self.shutdown_event
 45 |             .set_readiness(mio::Ready::readable())
 46 |             .expect("Failed to notify child thread of shutdown!");
 47 |         self.join();
 48 |     }
 49 | }
 50 | 
 51 | /// Spawns a new thread executing the provided closure.
 52 | pub fn spawn<F>(f: F) -> ThreadHandle
 53 | where
 54 |     F: Send + 'static + FnOnce(mio::Poll) -> (),
 55 | {
 56 |     let child_poller = mio::Poll::new().unwrap();
 57 |     let (shutdown_event_registration, shutdown_event) = mio::Registration::new2();
 58 |     ThreadHandle {
 59 |         shutdown_event: shutdown_event,
 60 |         handle: thread::spawn(move || {
 61 |             child_poller
 62 |                 .register(
 63 |                     &shutdown_event_registration,
 64 |                     constants::SYSTEM,
 65 |                     mio::Ready::readable(),
 66 |                     mio::PollOpt::edge(),
 67 |                 )
 68 |                 .expect("Failed to register system pipe");
 69 | 
 70 |             f(child_poller);
 71 |         }),
 72 |     }
 73 | }
 74 | 
 75 | /// mio Eventable ThreadPool.
 76 | pub struct ThreadPool {
 77 |     /// thread_id counter.
 78 |     thread_id: usize,
 79 |     /// Listing of all the joinable threads in the pool.
 80 |     joinable: sync::Arc<sync::Mutex<Vec<usize>>>,
 81 |     /// Mapping of thread_id to ThreadHandle.
 82 |     threads: util::HashMap<usize, ThreadHandle>,
 83 |     /// Mio readiness flagging when threads finish execution.
 84 |     thread_event_readiness: option::Option<mio::SetReadiness>,
 85 | }
 86 | 
 87 | impl ThreadPool {
 88 |     /// Construct a new ThreadPool.
 89 |     pub fn new(thread_events_readiness: option::Option<mio::SetReadiness>) -> Self {
 90 |         ThreadPool {
 91 |             thread_id: 0,
 92 |             joinable: sync::Arc::new(sync::Mutex::new(Vec::new())),
 93 |             thread_event_readiness: thread_events_readiness,
 94 |             threads: util::HashMap::default(),
 95 |         }
 96 |     }
 97 | 
 98 |     /// Spawn a new thread and assign it to the pool.
 99 |     pub fn spawn<F>(&mut self, f: F) -> usize
100 |     where
101 |         F: Send + 'static + FnOnce(mio::Poll) -> (),
102 |     {
103 |         let id = self.next_thread_id();
104 |         let joinable_arc = self.joinable.clone();
105 |         let thread_event_readiness = self.thread_event_readiness.clone();
106 |         let handler = spawn(move |poller| {
107 |             f(poller);
108 | 
109 |             let mut joinable = joinable_arc.lock().unwrap();
110 |             joinable.push(id);
111 | 
112 |             if let Some(readiness) = thread_event_readiness {
113 |                 readiness
114 |                     .set_readiness(mio::Ready::readable())
115 |                     .expect("Failed to flag readiness for ThreadPool event!");
116 |             }
117 |         });
118 |         self.threads.insert(id, handler);
119 |         id
120 |     }
121 | 
122 |     fn next_thread_id(&mut self) -> usize {
123 |         let thread_id = self.thread_id;
124 |         self.thread_id += 1;
125 |         thread_id
126 |     }
127 | 
128 |     /// Block on completion of all executing threads.
129 |     pub fn join(mut self) -> Vec<usize> {
130 |         self.threads.drain().for_each(|(_, h)| h.join());
131 |         self.join_ready()
132 |     }
133 | 
134 |     /// Join all completed threads.
135 |     pub fn join_ready(&mut self) -> Vec<usize> {
136 |         let mut joinable = self.joinable.lock().unwrap();
137 |         let mut joined = Vec::new();
138 |         while let Some(id) = joinable.pop() {
139 |             if let Some(handle) = self.threads.remove(&id) {
140 |                 handle.join();
141 |             }
142 |             joined.push(id);
143 |         }
144 |         joined
145 |     }
146 | 
147 |     /// Serially signal shutdown and block for completion of all threads.
148 |     pub fn shutdown(mut self) -> Vec<usize> {
149 |         self.threads.drain().for_each(|(_, h)| h.shutdown());
150 |         self.join_ready()
151 |     }
152 | }
153 | 


--------------------------------------------------------------------------------
/src/time.rs:
--------------------------------------------------------------------------------
 1 | //! Collection of time utilities for cernan
 2 | //!
 3 | //! Time in cernan is not based strictly on wall-clock. We keep a global clock
 4 | //! for cernan and update it ourselves periodically. See `update_time` in this
 5 | //! module for more details.
 6 | 
 7 | use chrono::offset::Utc;
 8 | use std::sync::atomic::{AtomicUsize, Ordering};
 9 | use std::{thread, time};
10 | 
11 | lazy_static! {
12 |     static ref NOW: AtomicUsize = AtomicUsize::new(Utc::now().timestamp() as usize);
13 | }
14 | 
15 | /// Return the current time in epoch seconds
16 | pub fn now() -> i64 {
17 |     NOW.load(Ordering::Relaxed) as i64
18 | }
19 | 
20 | /// Update cernan's view of time every 500ms. Time is in UTC.
21 | pub fn update_time() {
22 |     let dur = time::Duration::from_millis(500);
23 |     loop {
24 |         thread::sleep(dur);
25 |         let now = Utc::now().timestamp() as usize;
26 |         let order = Ordering::Relaxed;
27 |         NOW.store(now, order);
28 |     }
29 | }
30 | 
31 | /// Pause a thread of execution
32 | ///
33 | /// This function pauses the thread of execution for a fixed number of
34 | /// attempts. That input, attempts, is used to eponentially increase the length
35 | /// of delay, from 0 milliseconds to 512. A delay attempt of X will pause the
36 | /// thread of execution for:
37 | ///
38 | /// - 0 = 0 ms
39 | /// - x, x >= 9 = 512 ms
40 | /// - x, x < 9 = 2**x ms
41 | #[inline]
42 | pub fn delay(attempts: u32) {
43 |     let delay = match attempts {
44 |         0 => return,
45 |         1 => 1,
46 |         2 => 4,
47 |         3 => 8,
48 |         4 => 16,
49 |         5 => 32,
50 |         6 => 64,
51 |         7 => 128,
52 |         8 => 256,
53 |         _ => 512,
54 |     };
55 |     let sleep_time = time::Duration::from_millis(delay as u64);
56 |     thread::sleep(sleep_time);
57 | }
58 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
  1 | //! Utility module, a grab-bag of functionality
  2 | use crate::constants;
  3 | use crate::metric;
  4 | use hopper;
  5 | use mio;
  6 | use seahash::SeaHasher;
  7 | use slab;
  8 | use std::collections;
  9 | use std::hash;
 10 | use std::ops::{Index, IndexMut};
 11 | use std::sync::atomic::{AtomicUsize, Ordering};
 12 | 
 13 | /// Number of dropped events due to channel being totally full
 14 | pub static UTIL_SEND_HOPPER_ERROR_FULL: AtomicUsize = AtomicUsize::new(0);
 15 | 
 16 | /// Cernan hashmap
 17 | ///
 18 | /// In most cases where cernan needs a hashmap we've got smallish inputs as keys
 19 | /// and, more, have a smallish number of total elements (< 100k) to store in the
 20 | /// map. This hashmap is specialized to address that common use-case.
 21 | pub type HashMap<K, V> =
 22 |     collections::HashMap<K, V, hash::BuildHasherDefault<SeaHasher>>;
 23 | 
 24 | /// A vector of `hopper::Sender`s.
 25 | pub type Channel = Vec<hopper::Sender<metric::Event>>;
 26 | 
 27 | /// Send a `metric::Event` into a `Channel`.
 28 | pub fn send(chans: &mut Channel, mut event: metric::Event) {
 29 |     if chans.is_empty() {
 30 |         // Nothing to send to.
 31 |         return;
 32 |     }
 33 | 
 34 |     let max: usize = chans.len().saturating_sub(1);
 35 |     if max != 0 {
 36 |         for chan in &mut chans[1..] {
 37 |             let mut snd_event = event.clone();
 38 |             while let Err(res) = chan.send(snd_event) {
 39 |                 // The are a variety of errors that hopper will signal back up
 40 |                 // when we do a send. The only one we care about is
 41 |                 // `Error::Full`, meaning that all disk and memory buffer space
 42 |                 // is consumed. We drop the event on the floor in that case.
 43 |                 match res.1 {
 44 |                     hopper::Error::Full => {
 45 |                         UTIL_SEND_HOPPER_ERROR_FULL.fetch_add(1, Ordering::Relaxed);
 46 |                         break;
 47 |                     }
 48 |                     _ => {
 49 |                         snd_event = res.0;
 50 |                         continue;
 51 |                     }
 52 |                 }
 53 |             }
 54 |         }
 55 |     }
 56 |     while let Err(res) = chans[0].send(event) {
 57 |         match res.1 {
 58 |             hopper::Error::Full => {
 59 |                 UTIL_SEND_HOPPER_ERROR_FULL.fetch_add(1, Ordering::Relaxed);
 60 |                 break;
 61 |             }
 62 |             _ => {
 63 |                 event = res.0;
 64 |                 continue;
 65 |             }
 66 |         }
 67 |     }
 68 | }
 69 | 
 70 | /// Determine the state of a buffering queue, whether open or closed.
 71 | ///
 72 | /// Cernan is architected to be a push-based system. It copes with demand rushes
 73 | /// by buffering to disk -- via the hopper queues -- and rejecting memory-based
 74 | /// storage with overload signals. This signal, in particular, limits the amount
 75 | /// of information delivered to a filter / sink by declaring that said filter /
 76 | /// sink's input 'valve' is closed. Exactly how and why a filter / sink declares
 77 | /// its valve state is left to the implementation.
 78 | #[derive(Debug, PartialEq)]
 79 | pub enum Valve {
 80 |     /// In the `Open` state a filter / sink will accept new inputs
 81 |     Open,
 82 |     /// In the `Closed` state a filter / sink will reject new inputs, backing
 83 |     /// them up in the communication queue.
 84 |     Closed,
 85 | }
 86 | 
 87 | #[inline]
 88 | fn token_to_idx(token: mio::Token) -> usize {
 89 |     match token {
 90 |         mio::Token(idx) => idx,
 91 |     }
 92 | }
 93 | 
 94 | /// Wrapper around Slab
 95 | pub struct TokenSlab<E: mio::Evented> {
 96 |     token_count: usize,
 97 |     tokens: slab::Slab<E>,
 98 | }
 99 | 
100 | impl<E: mio::Evented> Default for TokenSlab<E> {
101 |     fn default() -> Self {
102 |         Self::new()
103 |     }
104 | }
105 | 
106 | impl<E: mio::Evented> Index<mio::Token> for TokenSlab<E> {
107 |     type Output = E;
108 | 
109 |     /// Returns Evented object corresponding to Token.
110 |     fn index(&self, token: mio::Token) -> &E {
111 |         &self.tokens[token_to_idx(token)]
112 |     }
113 | }
114 | 
115 | impl<E: mio::Evented> IndexMut<mio::Token> for TokenSlab<E> {
116 |     fn index_mut(&mut self, token: mio::Token) -> &mut E {
117 |         &mut self.tokens[token_to_idx(token)]
118 |     }
119 | }
120 | 
121 | /// Interface wrapping a subset of Slab such
122 | /// that we can magically translate indices to
123 | /// `mio::token`.
124 | impl<E: mio::Evented> TokenSlab<E> {
125 |     /// Constructs a new TokenSlab with a capacity derived from the value
126 |     /// of constants::SYSTEM.
127 |     pub fn new() -> TokenSlab<E> {
128 |         TokenSlab {
129 |             token_count: 0,
130 |             tokens: slab::Slab::with_capacity(token_to_idx(constants::SYSTEM)),
131 |         }
132 |     }
133 | 
134 |     /// Iterates over the underlying slab mapping index to mio::Evented.
135 |     pub fn iter(&self) -> slab::Iter<E> {
136 |         self.tokens.iter()
137 |     }
138 | 
139 |     /// Return the number of tokens stored in the TokenSlab.
140 |     pub fn count(&self) -> usize {
141 |         self.token_count
142 |     }
143 | 
144 |     /// Inserts a new Evented into the slab, returning a mio::Token
145 |     /// corresponding to the index of the newly inserted type.
146 |     pub fn insert(&mut self, thing: E) -> mio::Token {
147 |         let idx = self.tokens.insert(thing);
148 |         self.token_count += 1;
149 |         mio::Token::from(idx)
150 |     }
151 | }
152 | 


--------------------------------------------------------------------------------
/upload-artifact.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o pipefail
 5 | 
 6 | function usage() {
 7 |     echo "$0 <version>"
 8 | }
 9 | 
10 | if [ -z "$1" ]; then
11 |     usage
12 |     exit 1
13 | fi
14 | 
15 | VERSION="${1}"
16 | 
17 | mkdir -p target/artifact
18 | TARGET="target/artifact/cernan"
19 | 
20 | container_id=$(docker create quay.io/postmates/cernan:$VERSION)
21 | # Output of docker cp is always a tar archive regardless of source
22 | docker cp $container_id:/usr/bin/cernan - | tar x -C target/artifact
23 | docker rm -v $container_id
24 | 
25 | DEST="s3://artifacts.postmates.com/binaries/cernan/cernan-$VERSION"
26 | 
27 | aws s3 cp $TARGET $DEST
28 | 


--------------------------------------------------------------------------------