├── .dockerignore ├── .github ├── codeql │ └── codeql-config.yml └── workflows │ └── codeql-analysis.yml ├── .gitignore ├── .rustfmt.toml ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── Gene-Cernan-1-578x485.jpg ├── LICENSE.txt ├── README.md ├── benches ├── buckets.rs ├── protocols_graphite.rs └── protocols_statsd.rs ├── build-container.sh ├── codecov.yml ├── examples ├── configs │ ├── basic.toml │ ├── counting-example.toml │ ├── quickstart-files.toml │ ├── quickstart-filters.toml │ ├── quickstart.toml │ ├── receiver-config.toml │ └── transmitter-config.toml └── scripts │ ├── collectd_scrub.lua │ ├── frau_im_mond.lua │ └── keep_count.lua ├── resources ├── protobufs │ ├── native.proto │ └── prometheus.proto └── tests │ ├── data │ ├── data-deflate.avro │ ├── data-null.avro │ ├── data-snappy.avro │ ├── users-deflate.avro │ ├── users-null.avro │ └── users-snappy.avro │ └── scripts │ ├── add_keys.lua │ ├── clear_logs.lua │ ├── clear_metrics.lua │ ├── collectd_scrub.lua │ ├── demonstrate_require.lua │ ├── field_from_path.lua │ ├── identity.lua │ ├── insufficient_args.lua │ ├── json_parse.lua │ ├── keep_count.lua │ ├── lib │ ├── demo.lua │ └── json.lua │ ├── lua_error.lua │ ├── missing_func.lua │ ├── remove_keys.lua │ └── set_value.lua ├── src ├── bin │ └── cernan.rs ├── buckets.rs ├── config.rs ├── constants.rs ├── filter │ ├── delay_filter.rs │ ├── flush_boundary_filter.rs │ ├── json_encode_filter.rs │ ├── mod.rs │ └── programmable_filter.rs ├── http.rs ├── lib.rs ├── matrix.rs ├── metric │ ├── ackbag.rs │ ├── event.rs │ ├── logline.rs │ ├── mod.rs │ └── telemetry.rs ├── protocols │ ├── graphite.rs │ ├── mod.rs │ ├── native.rs │ ├── prometheus.rs │ └── statsd.rs ├── sink │ ├── console.rs │ ├── elasticsearch.rs │ ├── influxdb.rs │ ├── kafka.rs │ ├── mod.rs │ ├── native.rs │ ├── null.rs │ ├── prometheus.rs │ └── wavefront.rs ├── source │ ├── avro.rs │ ├── file │ │ ├── file_server.rs │ │ ├── file_watcher.rs │ │ └── mod.rs │ ├── flush.rs │ ├── graphite.rs │ ├── internal.rs │ ├── mod.rs │ ├── native.rs │ ├── nonblocking.rs │ ├── statsd.rs │ └── tcp.rs ├── thread.rs ├── time.rs └── util.rs ├── tests └── programmable_filter.rs └── upload-artifact.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /.github/codeql/codeql-config.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL config" 2 | 3 | queries: 4 | - uses: security-extended 5 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "Code scanning - action" 2 | 3 | on: 4 | push: 5 | branches: [master, ] 6 | schedule: 7 | - cron: '0 8 * * 2' 8 | 9 | jobs: 10 | CodeQL-Build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v2 17 | with: 18 | # We must fetch at least the immediate parents so that if this is 19 | # a pull request then we can checkout the head. 20 | fetch-depth: 2 21 | 22 | # If this run was triggered by a pull request event, then checkout 23 | # the head of the pull request instead of the merge commit. 24 | - run: git checkout HEAD^2 25 | if: ${{ github.event_name == 'pull_request' }} 26 | 27 | # Initializes the CodeQL tools for scanning. 28 | - name: Initialize CodeQL 29 | uses: github/codeql-action/init@v1 30 | # Override language selection by uncommenting this and choosing your languages 31 | # with: 32 | # languages: go, javascript, csharp, python, cpp, java 33 | with: 34 | config-file: ./.github/codeql/codeql-config.yml 35 | 36 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 37 | # If this step fails, then you should remove it and run the build manually (see below) 38 | - name: Autobuild 39 | uses: github/codeql-action/autobuild@v1 40 | 41 | # ℹ️ Command-line programs to run using the OS shell. 42 | # 📚 https://git.io/JvXDl 43 | 44 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 45 | # and modify them (or add more) to build your code if your project 46 | # uses a compiled language 47 | 48 | #- run: | 49 | # make bootstrap 50 | # make release 51 | 52 | - name: Perform CodeQL Analysis 53 | uses: github/codeql-action/analyze@v1 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .criterion/ 2 | data/ 3 | target/ 4 | src/metrics/statsd.rs 5 | src/metrics/graphite.rs 6 | *.bk 7 | /tags 8 | /.vscode 9 | tmp/ 10 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 87 2 | format_strings = false 3 | wrap_comments = true 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | cache: cargo 3 | rust: 4 | - stable 5 | - beta 6 | - nightly 7 | 8 | before_script: 9 | - rustup component add rustfmt 10 | - rustup component add clippy 11 | script: 12 | - cargo fmt --all -- --check 13 | - cargo clippy -- -A clippy::redundant_field_names 14 | - cargo clean 15 | - cargo test 16 | 17 | matrix: 18 | allow_failures: 19 | - rust: nightly 20 | - rust: beta 21 | 22 | before_install: 23 | - sudo apt-get update 24 | 25 | install: 26 | - PATH=$PATH:/home/travis/.cargo/bin 27 | 28 | addons: 29 | apt: 30 | packages: 31 | - libcurl4-openssl-dev 32 | - libelf-dev 33 | - libdw-dev 34 | - cmake 35 | - gcc 36 | - binutils-dev 37 | - libiberty-dev 38 | 39 | after_success: | 40 | wget https://github.com/SimonKagstrom/kcov/archive/v34.tar.gz && 41 | tar xzf v34.tar.gz && 42 | cd kcov-34 && 43 | mkdir build && 44 | cd build && 45 | cmake .. && 46 | make && 47 | sudo make install && 48 | cd ../.. && 49 | rm -rf kcov-34 && 50 | find target/debug -maxdepth 1 -name 'cernan-*' -type f | while read file; do 51 | [ -x $file ] || continue; 52 | mkdir -p "target/cov/$(basename $file)"; 53 | kcov --exclude-pattern=/.cargo,/usr/lib --include-path="$(pwd)" --verify "target/cov/$(basename $file)" "$file"; 54 | done && 55 | bash <(curl -s https://codecov.io/bash) && 56 | echo "Uploaded code coverage" 57 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at brian@troutwine.us. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to cernan 2 | 3 | Hey there, wow! Cernan is a collaborative effort and we're really excited to see 4 | that you're giving it a shot. Thank you! 5 | 6 | ## Feature Requests 7 | 8 | If you'd like to request we add a feature to cernan, go right ahead! Please 9 | create an issue in [our tracker](https://github.com/postmates/cernan/issues) and 10 | tag it as a "feature request". 11 | 12 | ## Bug Reports 13 | 14 | Cernan is intended to run on a variety of hosts but the developers, as of this 15 | writing, have access to a limited pool of systems. If you've caught a bug please 16 | do create an issue in [our tracker](http://github.com/postmates/cernan/issues). 17 | Here's a template that you can use to file a bug, though it's not necessary to 18 | use it exactly: 19 | 20 | 21 | 22 | I tried this: 23 | 24 | 25 | 26 | I expected to see this happen: 27 | 28 | Instead, this happened: 29 | 30 | ## Meta 31 | 32 | `rustc --version --verbose`: 33 | 34 | `cernan --version`: 35 | 36 | Backtrace: 37 | 38 | All three components are important: what you did, what you expected, what 39 | happened instead. Please include the output of `rustc --version --verbose`, 40 | which includes important information about what platform you're on and what 41 | version of Rust you're using to compile cernan. 42 | 43 | Sometimes, a backtrace is helpful, and so including that is nice. To get a 44 | backtrace, set the `RUST_BACKTRACE` environment variable to a value other than 45 | `0`. The easiest way to do this is to invoke cernan like this: 46 | 47 | ```bash 48 | $ RUST_BACKTRACE=1 cernan ... 49 | ``` 50 | 51 | ## Pull Requests 52 | 53 | Pull requests are the mechanism we use to incorporate changes to cernan. GitHub 54 | itself has [documentation](https://metrics.wavefront.com/alert/1469751512848) on 55 | using the Pull Request feature. We use the 'fork and pull' model described 56 | there. 57 | 58 | Please make pull requests against the `master` branch. 59 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Brian L. Troutwine ", 3 | "John Koenig ", 4 | "Tom Santero "] 5 | description = "A telemetry and logging aggregation server." 6 | keywords = ["statsd", "graphite", "telemetry", "logging", "metrics"] 7 | license = "MIT" 8 | name = "cernan" 9 | readme = "README.md" 10 | repository = "https://github.com/postmates/cernan" 11 | version = "0.9.2-pre" 12 | edition = "2018" 13 | 14 | [[bin]] 15 | name = "cernan" 16 | doc = false 17 | 18 | [dependencies] 19 | base64 = "0.9.0" 20 | byteorder = "1.0" 21 | chan-signal = "0.3.1" 22 | chrono = "0.4" 23 | clap = "2.27" 24 | coco = "0.3" 25 | elastic = "0.20" 26 | elastic_types = "0.20" 27 | fern = "0.5" 28 | flate2 = "1.0" 29 | futures = "0.1" 30 | glob = "0.2.11" 31 | hopper = "0.4" 32 | lazy_static = "1.0" 33 | libc = "0.2" 34 | log = "0.4" 35 | mond = "0.1" 36 | mio = "0.6.11" 37 | openssl-probe = "0.1" 38 | protobuf = "1.7" 39 | quantiles = { version = "0.7", features = ["serde_support"] } 40 | rand = "0.5" 41 | rdkafka = "0.17.0" 42 | regex = "1.0" 43 | reqwest = "0.8" 44 | seahash = "3.0" 45 | serde = "1.0" 46 | serde-avro = "0.5.0" 47 | serde_derive = "1.0" 48 | serde_json = "1.0" 49 | slab = "0.4" 50 | tiny_http = "0.6" 51 | toml = "0.4" 52 | url = "1.6" 53 | uuid = {version = "0.6", features = ["v4", "serde"]} 54 | 55 | [dev-dependencies] 56 | tempdir = "0.3" 57 | quickcheck = "0.6" 58 | criterion = "0.2.9" 59 | 60 | [profile.dev] 61 | codegen-units = 4 62 | 63 | [profile.release] 64 | lto = true 65 | 66 | [[bench]] 67 | name = "buckets" 68 | harness = false 69 | 70 | [[bench]] 71 | name = "protocols_statsd" 72 | harness = false 73 | 74 | [[bench]] 75 | name = "protocols_graphite" 76 | harness = false 77 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ekidd/rust-musl-builder:1.32.0 as builder 2 | 3 | RUN VERS=1.2.11 && \ 4 | cd /home/rust/libs && \ 5 | curl -LO http://zlib.net/zlib-$VERS.tar.gz && \ 6 | tar xzf zlib-$VERS.tar.gz && cd zlib-$VERS && \ 7 | CC=musl-gcc CFLAGS=-fPIC ./configure --static --prefix=/usr/local/musl && \ 8 | make && sudo make install && \ 9 | cd .. && rm -rf zlib-$VERS.tar.gz zlib-$VERS 10 | 11 | RUN cd /home/rust/libs && \ 12 | curl -LO https://github.com/lz4/lz4/archive/master.tar.gz && \ 13 | tar xfz master.tar.gz && \ 14 | ls && \ 15 | cd lz4-master && \ 16 | CC=musl-gcc CFLAGS=-fPIC make prefix=/usr/local/musl && \ 17 | sudo make install prefix=/usr/local/musl && \ 18 | cd .. && \ 19 | rm -rf master.tar.gz lz4-master 20 | 21 | RUN sudo apt-get update && \ 22 | sudo apt-get install -y python2.7-minimal && \ 23 | sudo ln -sf /usr/bin/python2.7 /usr/bin/python 24 | 25 | ENV CC=musl-gcc \ 26 | CFLAGS=-I/usr/local/musl/include \ 27 | LDFLAGS=-L/usr/local/musl/lib 28 | 29 | COPY --chown=rust:rust . /source 30 | RUN cd /source && cargo build --release 31 | 32 | FROM alpine:3.8 33 | 34 | RUN apk update \ 35 | && apk upgrade --no-cache 36 | 37 | RUN apk add --no-cache --update \ 38 | ca-certificates \ 39 | llvm-libunwind \ 40 | openssl && \ 41 | update-ca-certificates && \ 42 | rm -rf /var/cache/apk/* && \ 43 | mkdir -p /etc/cernan/scripts 44 | 45 | COPY --from=builder /source/target/x86_64-unknown-linux-musl/release/cernan /usr/bin/cernan 46 | COPY examples/configs/quickstart.toml /etc/cernan/cernan.toml 47 | 48 | ENV STATSD_PORT 8125 49 | 50 | ENTRYPOINT /usr/bin/cernan 51 | CMD ["--config", "/etc/cernan/cernan.toml"] 52 | -------------------------------------------------------------------------------- /Gene-Cernan-1-578x485.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/Gene-Cernan-1-578x485.jpg -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Original work Copyright (c) 2016, Postmates, Inc. 4 | Modified work Copyright (c) 2015, Mark Story 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a 7 | copy of this software and associated documentation files (the "Software"), 8 | to deal in the Software without restriction, including without limitation 9 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | and/or sell copies of the Software, and to permit persons to whom the 11 | Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Now Archived and Forked 2 | _cernan_ will not be maintained in this repository going forward. Please use, create issues on, and make PRs to the fork of _cernan_ located [here](https://github.com/blt/cernan). 3 | 4 | # cernan - telemetry aggregation and shipping, last up the ladder 5 | 6 | [![Build Status](https://travis-ci.com/postmates/cernan.svg?branch=master)](https://travis-ci.com/postmates/cernan) [![Codecov](https://img.shields.io/codecov/c/github/postmates/cernan.svg)](https://codecov.io/gh/postmates/cernan) 7 | 8 | ![Eugene Cernan, Apollo 17 EVA](Gene-Cernan-1-578x485.jpg) 9 | 10 | Cernan is a telemetry and logging aggregation server. It exposes multiple 11 | interfaces for ingestion and can emit to multiple aggregation sources while doing 12 | in-flight manipulation of data. Cernan has minimal CPU and memory requirements 13 | and is intended to service bursty telemetry _without_ load shedding. Cernan aims 14 | to be _reliable_ and _convenient_ to use, both for application engineers and 15 | operations staff. 16 | 17 | Why you might choose to use cernan: 18 | 19 | * You need to ingest telemetry from multiple protocols. 20 | * You need to multiplex telemetry over aggregation services. 21 | * You want to convert log lines into telemetry. 22 | * You want to convert telemetry into log lines. 23 | * You want to transform telemetry or log lines in-flight. 24 | 25 | If you'd like to learn more, please do have a look in 26 | our [wiki](https://github.com/postmates/cernan/wiki/). 27 | 28 | # Quickstart 29 | 30 | To build cernan you will need to 31 | have [Rust](https://www.rust-lang.org/en-US/). This should be as simple as: 32 | 33 | > curl -sSf https://static.rust-lang.org/rustup.sh | sh 34 | 35 | Once Rust is installed, from the root of this project run: 36 | 37 | > cargo run -- --config examples/configs/quickstart.toml 38 | 39 | and you're good to go. Cernan will report to stdout what ports it is now 40 | listening on. If you would like to debug your service--to determine if the 41 | telemetry you intend is issued--run cernan like: 42 | 43 | > cargo run -- -vvvv --config examples/configs/quickstart.toml 44 | 45 | and full trace output will be reported to stdout. 46 | 47 | # Usage 48 | 49 | The cernan server has a few command-line toggles to control its behaviour: 50 | 51 | ``` 52 | -C, --config The config file to feed in. 53 | -v Turn on verbose output. 54 | ``` 55 | 56 | The verbose flag `-v` allows multiples, each addition cranking up the verbosity 57 | by one. So: 58 | 59 | * `-v` -- error, warning 60 | * `-vv` -- error, warning, info 61 | * `-vvv` -- error, warning, info, debug 62 | * `-vvvv` -- error, warning, info, debug, trace 63 | 64 | # License 65 | 66 | cernan is copyright © 2017-2018 Postmates, Inc and released to the public under the 67 | terms of the MIT license. 68 | -------------------------------------------------------------------------------- /benches/buckets.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use criterion::Criterion; 5 | 6 | extern crate cernan; 7 | extern crate chrono; 8 | extern crate rand; 9 | 10 | use cernan::buckets; 11 | use cernan::metric::{AggregationMethod, Telemetry}; 12 | use chrono::{TimeZone, Utc}; 13 | use rand::distributions::Alphanumeric; 14 | use rand::{Rng, SeedableRng, XorShiftRng}; 15 | 16 | fn experiment(input: &ExperimentInput) { 17 | let total_adds = input.total_adds; 18 | let name_pool_size = input.name_pool_size; 19 | let mut rng: XorShiftRng = SeedableRng::from_seed([ 20 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 21 | ]); 22 | let aggregations = [ 23 | AggregationMethod::Histogram, 24 | AggregationMethod::Set, 25 | AggregationMethod::Sum, 26 | AggregationMethod::Summarize, 27 | ]; 28 | let times = [ 29 | Utc.ymd(1972, 12, 11) 30 | .and_hms_milli(11, 59, 49, 0) 31 | .timestamp(), 32 | Utc.ymd(1972, 12, 11) 33 | .and_hms_milli(11, 59, 50, 0) 34 | .timestamp(), 35 | Utc.ymd(1972, 12, 11) 36 | .and_hms_milli(11, 59, 51, 0) 37 | .timestamp(), 38 | Utc.ymd(1972, 12, 11) 39 | .and_hms_milli(11, 59, 52, 0) 40 | .timestamp(), 41 | Utc.ymd(1972, 12, 11) 42 | .and_hms_milli(11, 59, 52, 0) 43 | .timestamp(), 44 | ]; 45 | let mut pool: Vec = Vec::with_capacity(name_pool_size); 46 | for _ in 0..name_pool_size { 47 | pool.push(rng.sample_iter(&Alphanumeric).take(10).collect()); 48 | } 49 | let mut bucket = buckets::Buckets::default(); 50 | 51 | for _ in 0..total_adds { 52 | bucket.add( 53 | Telemetry::new() 54 | .value(rng.gen::()) 55 | .name(rng.choose(&pool).unwrap().clone()) 56 | .kind(*rng.choose(&aggregations).unwrap()) 57 | .harden() 58 | .unwrap() 59 | .timestamp(*rng.choose(×).unwrap()), 60 | ); 61 | } 62 | } 63 | 64 | #[derive(Debug)] 65 | struct ExperimentInput { 66 | total_adds: usize, 67 | name_pool_size: usize, 68 | } 69 | 70 | impl ::std::fmt::Display for ExperimentInput { 71 | fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { 72 | write!(f, "({}, {})", self.total_adds, self.name_pool_size) 73 | } 74 | } 75 | 76 | fn benchmark(c: &mut Criterion) { 77 | let mut inputs = Vec::with_capacity(32); 78 | for i in 6..8 { 79 | for j in 6..10 { 80 | inputs.push(ExperimentInput { 81 | total_adds: 2usize.pow(i), 82 | name_pool_size: 2usize.pow(j), 83 | }); 84 | } 85 | } 86 | 87 | c.bench_function_over_inputs( 88 | "bucket_add", 89 | |b, input| { 90 | b.iter(|| experiment(input)); 91 | }, 92 | inputs, 93 | ); 94 | } 95 | 96 | criterion_group!(benches, benchmark); 97 | criterion_main!(benches); 98 | -------------------------------------------------------------------------------- /benches/protocols_graphite.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use criterion::Criterion; 5 | 6 | extern crate cernan; 7 | 8 | use cernan::metric::Telemetry; 9 | use cernan::protocols::graphite::parse_graphite; 10 | use std::sync; 11 | 12 | fn experiment() { 13 | let packet = "fst 1 101 14 | snd -2.0 202 15 | thr 3 303 16 | fth@fth 4 404 17 | fv%fv 5 505 18 | s-th 6 606"; 19 | 20 | let mut res = Vec::new(); 21 | let metric = sync::Arc::new(Some(Telemetry::default())); 22 | assert!(parse_graphite(packet, &mut res, &metric)); 23 | } 24 | 25 | fn benchmark(c: &mut Criterion) { 26 | c.bench_function("parse_graphite", |b| { 27 | b.iter(experiment); 28 | }); 29 | } 30 | 31 | criterion_group!(benches, benchmark); 32 | criterion_main!(benches); 33 | -------------------------------------------------------------------------------- /benches/protocols_statsd.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use criterion::Criterion; 5 | 6 | extern crate cernan; 7 | 8 | use cernan::metric::Telemetry; 9 | use cernan::protocols::statsd::parse_statsd; 10 | use cernan::source::StatsdParseConfig; 11 | use std::sync; 12 | 13 | fn experiment() { 14 | let packet = "zrth:0|g 15 | fst:-1.1|ms 16 | snd:+2.2|g 17 | thd:3.3|h 18 | fth:4|c 19 | fvth:5.5|c|@0.1 20 | sxth:-6.6|g 21 | svth:+7.77|g"; 22 | 23 | let metric = sync::Arc::new(Some(Telemetry::default())); 24 | let config = sync::Arc::new(StatsdParseConfig::default()); 25 | let mut res = Vec::new(); 26 | assert!(parse_statsd(packet, &mut res, &metric, &config)); 27 | } 28 | 29 | fn benchmark(c: &mut Criterion) { 30 | c.bench_function("parse_statsd", |b| { 31 | b.iter(experiment); 32 | }); 33 | } 34 | 35 | criterion_group!(benches, benchmark); 36 | criterion_main!(benches); 37 | -------------------------------------------------------------------------------- /build-container.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | set -o xtrace 7 | 8 | function usage() { 9 | echo "$0 " 10 | } 11 | 12 | if [ -z "$1" ]; then 13 | usage 14 | exit 1 15 | fi 16 | 17 | cargo clean 18 | 19 | VERSION="${1}" 20 | 21 | docker build -t "quay.io/postmates/cernan:$VERSION" . 22 | docker push "quay.io/postmates/cernan:$VERSION" 23 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | precision: 2 3 | round: down 4 | range: 70...100 5 | 6 | status: 7 | project: true 8 | changes: false 9 | 10 | patch: 11 | default: 12 | target: 90% 13 | 14 | project: 15 | default: 16 | target: 91% 17 | threshold: 1% 18 | 19 | comment: 20 | layout: "header, diff" 21 | behavior: default # update if exists else create new 22 | -------------------------------------------------------------------------------- /examples/configs/basic.toml: -------------------------------------------------------------------------------- 1 | data-directory = "data/" 2 | scripts-directory = "examples/scripts/" 3 | 4 | flush-interval = 5 5 | 6 | [tags] 7 | source = "cernan" 8 | 9 | [sources] 10 | [sources.internal] 11 | forwards = ["sinks.console", "sinks.null"] 12 | 13 | [sources.statsd.primary] 14 | enabled = true 15 | port = 8125 16 | forwards = ["sinks.console", "filters.delay.two_seconds"] 17 | 18 | [sources.native.primary] 19 | ip = "127.0.0.1" 20 | port = 1972 21 | forwards = ["filters.delay.two_seconds"] 22 | 23 | [sources.graphite.primary] 24 | enabled = true 25 | port = 2004 26 | forwards = ["filters.programmable.collectd_scrub"] 27 | 28 | [sources.files] 29 | [sources.files.example_log] 30 | path = "example.log" 31 | forwards = ["sinks.firehose.stream_two"] 32 | 33 | [sources.files.foo_log] 34 | path = "foo.log" 35 | forwards = ["sinks.firehose.stream_two"] 36 | 37 | [filters] 38 | [filters.programmable.collectd_scrub] 39 | script = "collectd_scrub.lua" 40 | forwards = ["filters.delay.two_seconds"] 41 | 42 | [filters.delay.two_seconds] 43 | tolerance = 2 44 | forwards = ["filters.flush_boundary.two_seconds"] 45 | 46 | [filters.flush_boundary.two_seconds] 47 | tolerance = 2 48 | forwards = ["sinks.console", "sinks.null", "sinks.influxdb", "sinks.prometheus"] 49 | 50 | [sinks] 51 | [sinks.console] 52 | bin_width = 1 53 | 54 | [sinks.null] 55 | 56 | # [sinks.wavefront] 57 | # port = 2878 58 | # host = "127.0.0.1" 59 | # bin_width = 1 60 | 61 | [sinks.influxdb] 62 | port = 8086 63 | host = "127.0.0.1" 64 | bin_width = 1 65 | 66 | [sinks.prometheus] 67 | port = 8080 68 | host = "127.0.0.1" 69 | bin_width = 1 70 | 71 | [sinks.firehose.stream_two] 72 | delivery_stream = "stream_two" 73 | batch_size = 800 74 | region = "us-east-1" 75 | -------------------------------------------------------------------------------- /examples/configs/counting-example.toml: -------------------------------------------------------------------------------- 1 | data-directory = "data/" 2 | scripts-directory = "examples/scripts/" 3 | 4 | flush-interval = 1 5 | 6 | [tags] 7 | source = "cernan" 8 | 9 | [sources] 10 | [sources.statsd.primary] 11 | enabled = true 12 | port = 8125 13 | forwards = ["filters.keep_count"] 14 | 15 | [filters] 16 | [filters.keep_count] 17 | script = "keep_count.lua" 18 | forwards = ["sinks.console"] 19 | 20 | [sinks] 21 | [sinks.console] 22 | bin_width = 1 23 | flush_interval = 2 24 | -------------------------------------------------------------------------------- /examples/configs/quickstart-files.toml: -------------------------------------------------------------------------------- 1 | data-directory = "data/" 2 | scripts-directory = "examples/scripts/" 3 | 4 | flush-interval = 5 5 | 6 | [tags] 7 | source = "cernan" 8 | 9 | [sources] 10 | [sources.files] 11 | [sources.files.tmp_logs] 12 | path = "/tmp/log/*.log" 13 | forwards = ["sinks.firehose.stream_two"] 14 | 15 | [sinks] 16 | [sinks.firehose.stream_two] 17 | delivery_stream = "stream_two" 18 | batch_size = 800 19 | region = "us-east-1" 20 | -------------------------------------------------------------------------------- /examples/configs/quickstart-filters.toml: -------------------------------------------------------------------------------- 1 | scripts-directory = "examples/scripts/" 2 | flush-interval = 10 3 | 4 | [sources] 5 | [sources.statsd.primary] 6 | port = 8125 7 | forwards = ["filters.frau_im_mond"] 8 | 9 | [filters] 10 | [filters.frau_im_mond] 11 | script = "frau_im_mond.lua" 12 | forwards = ["sinks.console"] 13 | 14 | [sinks] 15 | [sinks.console] 16 | -------------------------------------------------------------------------------- /examples/configs/quickstart.toml: -------------------------------------------------------------------------------- 1 | flush-interval = 10 2 | 3 | [sources] 4 | [sources.statsd.primary] 5 | port = 8125 6 | forwards = ["sinks.console"] 7 | 8 | [sinks] 9 | [sinks.console] 10 | -------------------------------------------------------------------------------- /examples/configs/receiver-config.toml: -------------------------------------------------------------------------------- 1 | data-directory = "data-receiver/" 2 | 3 | [statsd] 4 | enabled = false 5 | 6 | [graphite] 7 | enabled = false 8 | 9 | flush-interval = 10 10 | 11 | [tags] 12 | service = "receiver" 13 | uhf_band = "s" 14 | source = "cernan" 15 | 16 | [federation_receiver] 17 | port = 1972 18 | ip = "0.0.0.0" 19 | 20 | [wavefront] 21 | port = 2878 22 | host = "127.0.0.1" 23 | 24 | [console] 25 | -------------------------------------------------------------------------------- /examples/configs/transmitter-config.toml: -------------------------------------------------------------------------------- 1 | data-directory = "data-transmitter/" 2 | 3 | [statsd] 4 | port = 8125 5 | 6 | [graphite] 7 | port = 2003 8 | 9 | flush-interval = 10 10 | 11 | [tags] 12 | service = "transmitter" 13 | source = "cernan" 14 | 15 | [federation_transmitter] 16 | port = 1972 17 | host = "127.0.0.1" 18 | -------------------------------------------------------------------------------- /examples/scripts/collectd_scrub.lua: -------------------------------------------------------------------------------- 1 | count_per_tick = 0 2 | 3 | function process_metric(pyld) 4 | count_per_tick = count_per_tick + 1 5 | 6 | local old_name = payload.metric_name(pyld, 1) 7 | local collectd, rest = string.match(old_name, "^(collectd)[%.@][%w_]+(.*)") 8 | if collectd ~= nil then 9 | local new_name = string.format("%s%s", collectd, rest) 10 | payload.set_metric_name(pyld, 1, new_name) 11 | end 12 | end 13 | 14 | function process_log(pyld) 15 | count_per_tick = count_per_tick + 1 16 | end 17 | 18 | function tick(pyld) 19 | payload.push_metric(pyld, "cernan_bridge.count_per_tick", count_per_tick) 20 | payload.push_log(pyld, string.format("count_per_tick: %s", count_per_tick)) 21 | count_per_tick = 0 22 | end 23 | -------------------------------------------------------------------------------- /examples/scripts/frau_im_mond.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | payload.set_metric_name(pyld, 1, "frau_im_mond") 3 | end 4 | 5 | function process_log(pyld) 6 | end 7 | 8 | function tick(pyld) 9 | end 10 | -------------------------------------------------------------------------------- /examples/scripts/keep_count.lua: -------------------------------------------------------------------------------- 1 | count_per_tick = 0 2 | 3 | function process_metric(pyld) 4 | count_per_tick = count_per_tick + 1 5 | end 6 | 7 | function process_log(pyld) 8 | count_per_tick = count_per_tick + 1 9 | end 10 | 11 | function tick(pyld) 12 | payload.push_metric(pyld, "count_per_tick", count_per_tick) 13 | payload.push_log(pyld, string.format("count_per_tick: %s", count_per_tick)) 14 | count_per_tick = 0 15 | end 16 | -------------------------------------------------------------------------------- /resources/protobufs/native.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Postmates Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | // SOFTWARE. 20 | 21 | // 22 | // Welcome! 23 | // 24 | // This file defines the protocol that cernan speaks natively. We hope that it's 25 | // a relatively straightforward protocol to implement. Cernan's native transport 26 | // is TCP. We require that all on-wire payloads have the following form: 27 | // 28 | // [--------------------------------|~~~~~~~~~~ . . . ~~~~~~~~~~~~] 29 | // ^ ^ 30 | // u32, payload length in bytes protobuf payload, of prefix len 31 | // 32 | // The protobuf payload conforms to the following definition. 33 | syntax = "proto2"; 34 | 35 | package com.postmates.cernan; 36 | option java_package = "com.postmates.cernan"; 37 | 38 | // 'Payload' - the top-level structure in each on-wire payload 39 | // 40 | // Payload is a container for repeated Telemetry and LogLines. There's not much 41 | // more to it than that. 42 | message Payload { 43 | repeated Telemetry points = 2; 44 | repeated LogLine lines = 3; 45 | } 46 | 47 | // 'LogLine' - a bit of unstructure text 48 | // 49 | // One of cernan's gigs is picking up logs from disk and transforming them 50 | // in-flight, shipping them off. This structure allows you to ship lines 51 | // directly via the native protocol without having to round-trip through disk 52 | // first. 53 | message LogLine { 54 | optional string path = 1; // unique 'location' of the log line 55 | optional string value = 2; // the line itself 56 | map metadata = 3; // associated key/value metadata 57 | optional int64 timestamp_ms = 4; // milliseconds since the Unix epoch 58 | } 59 | 60 | // 'Telemetry' - a numeric measure of a thing 61 | // 62 | // Cernan's slightly more complicated gig is its 'telemetry' 63 | // subsystem. Telemetry is defined as a name and time associated collection of 64 | // measurements. In the structure we refer to these measurements as 65 | // 'samples'. The Telemetry structure makes is possible to associate multiple 66 | // samples in a single millisecond time window. Cernan will build a quantile 67 | // structure over these samples but you may further choose aggregation 68 | // interpretations by setting AggregationMethod. 69 | message Telemetry { 70 | optional string name = 1; // the unique name of the telemetry 71 | repeated double samples = 2 [ packed = true ]; // telemetry samples present in timestamp_ms 72 | optional bool persisted = 3 [ default = false ]; // persist metric across time windows 73 | optional AggregationMethod method = 4 [ default = SUMMARIZE ]; // see below 74 | map metadata = 5; // associated key/value metadata 75 | optional int64 timestamp_ms = 6; // milliseconds since the Unix epoch 76 | repeated double bin_bounds = 7; // BIN inclusive upper bounds 77 | } 78 | 79 | // 'AggregationMethod' - an interpretation signal 80 | // 81 | // Cernan maintains quantile summaries for all Telemetry samples. Not all sinks 82 | // are capable of interpreting summaries natively. Cernan allows the client to 83 | // set preferred aggregations over the summaries for reporting to 'flat' 84 | // sinks. Sinks are allows to ignore AggregationMethod at their 85 | // convenience. Additionally, aggregation time windows may be configured 86 | // per-sink and are not controllable through the protocol. 87 | enum AggregationMethod { 88 | // SUM keeps a sum of samples. This is often interpreted as a 89 | // per-window counter. 90 | SUM = 1; 91 | // SET preserves the last sample set into the Telemetry per time 92 | // window. 93 | SET = 2; 94 | // SUMMARIZE produces a quantile summary of the input samples per time 95 | // window. This is the default behaviour. 96 | SUMMARIZE = 3; 97 | // BIN produces a histogram summary of the input samples per time window. The 98 | // user will specify the bins' upper bounds. 99 | BIN = 4; 100 | } -------------------------------------------------------------------------------- /resources/protobufs/prometheus.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Prometheus Team 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | syntax = "proto2"; 15 | 16 | package io.prometheus.client; 17 | option java_package = "io.prometheus.client"; 18 | 19 | message LabelPair { 20 | optional string name = 1; 21 | optional string value = 2; 22 | } 23 | 24 | enum MetricType { 25 | COUNTER = 0; 26 | GAUGE = 1; 27 | SUMMARY = 2; 28 | UNTYPED = 3; 29 | HISTOGRAM = 4; 30 | } 31 | 32 | message Gauge { 33 | optional double value = 1; 34 | } 35 | 36 | message Counter { 37 | optional double value = 1; 38 | } 39 | 40 | message Quantile { 41 | optional double quantile = 1; 42 | optional double value = 2; 43 | } 44 | 45 | message Summary { 46 | optional uint64 sample_count = 1; 47 | optional double sample_sum = 2; 48 | repeated Quantile quantile = 3; 49 | } 50 | 51 | message Untyped { 52 | optional double value = 1; 53 | } 54 | 55 | message Histogram { 56 | optional uint64 sample_count = 1; 57 | optional double sample_sum = 2; 58 | repeated Bucket bucket = 3; // Ordered in increasing order of upper_bound, +Inf bucket is optional. 59 | } 60 | 61 | message Bucket { 62 | optional uint64 cumulative_count = 1; // Cumulative in increasing order. 63 | optional double upper_bound = 2; // Inclusive. 64 | } 65 | 66 | message Metric { 67 | repeated LabelPair label = 1; 68 | optional Gauge gauge = 2; 69 | optional Counter counter = 3; 70 | optional Summary summary = 4; 71 | optional Untyped untyped = 5; 72 | optional Histogram histogram = 7; 73 | optional int64 timestamp_ms = 6; 74 | } 75 | 76 | message MetricFamily { 77 | optional string name = 1; 78 | optional string help = 2; 79 | optional MetricType type = 3; 80 | repeated Metric metric = 4; 81 | } 82 | -------------------------------------------------------------------------------- /resources/tests/data/data-deflate.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/data-deflate.avro -------------------------------------------------------------------------------- /resources/tests/data/data-null.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/data-null.avro -------------------------------------------------------------------------------- /resources/tests/data/data-snappy.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/data-snappy.avro -------------------------------------------------------------------------------- /resources/tests/data/users-deflate.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/users-deflate.avro -------------------------------------------------------------------------------- /resources/tests/data/users-null.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/users-null.avro -------------------------------------------------------------------------------- /resources/tests/data/users-snappy.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/postmates/cernan/7f3f72ab0c785b260d9df5367f9658287d6891d0/resources/tests/data/users-snappy.avro -------------------------------------------------------------------------------- /resources/tests/scripts/add_keys.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | payload.metric_set_tag(pyld, 1, "bizz", "bazz") 3 | end 4 | 5 | function process_log(pyld) 6 | payload.log_set_tag(pyld, 1, "bizz", "bazz") 7 | end 8 | 9 | function tick(pyld) 10 | end 11 | -------------------------------------------------------------------------------- /resources/tests/scripts/clear_logs.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | end 3 | 4 | function process_log(pyld) 5 | payload.clear_logs(pyld) 6 | end 7 | 8 | function tick(pyld) 9 | end 10 | -------------------------------------------------------------------------------- /resources/tests/scripts/clear_metrics.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | payload.clear_metrics(pyld) 3 | end 4 | 5 | function process_log(pyld) 6 | end 7 | 8 | function tick(pyld) 9 | end 10 | -------------------------------------------------------------------------------- /resources/tests/scripts/collectd_scrub.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | local old_name = payload.metric_name(pyld, 1) 3 | local collectd, rest = string.match(old_name, "^(collectd)[%.@][%w_-]+(.*)") 4 | if collectd ~= nil then 5 | local new_name = string.format("%s%s", collectd, rest) 6 | payload.set_metric_name(pyld, 1, new_name) 7 | end 8 | end 9 | 10 | function process_log(pyld) 11 | end 12 | 13 | function tick(pyld) 14 | end 15 | -------------------------------------------------------------------------------- /resources/tests/scripts/demonstrate_require.lua: -------------------------------------------------------------------------------- 1 | local demo = require "lib/demo" 2 | 3 | function process_metric(pyld) 4 | end 5 | 6 | function process_log(pyld) 7 | payload.log_set_tag(pyld, 1, "bizz", demo.demo()) 8 | end 9 | 10 | function tick(pyld) 11 | end 12 | -------------------------------------------------------------------------------- /resources/tests/scripts/field_from_path.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | end 3 | 4 | function process_log(pyld) 5 | local path = payload.log_path(pyld, 1) 6 | payload.log_set_field(pyld, 1, "foo", path) 7 | end 8 | 9 | function tick(pyld) 10 | end 11 | -------------------------------------------------------------------------------- /resources/tests/scripts/identity.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | end 3 | 4 | function process_log(pyld) 5 | end 6 | 7 | function tick(pyld) 8 | end 9 | -------------------------------------------------------------------------------- /resources/tests/scripts/insufficient_args.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | payload.metric_remove_tag(pyld, 1) 3 | end 4 | 5 | function process_log(pyld) 6 | end 7 | 8 | function tick(pyld) 9 | end 10 | -------------------------------------------------------------------------------- /resources/tests/scripts/json_parse.lua: -------------------------------------------------------------------------------- 1 | local json = require "lib/json" 2 | 3 | function process_metric(pyld) 4 | end 5 | 6 | function process_log(pyld) 7 | local line = payload.log_value(pyld, 1) 8 | local json_pyld = json.decode(line) 9 | payload.log_set_field(pyld, 1, "foo", json_pyld["foo"]) 10 | end 11 | 12 | function tick(pyld) 13 | end 14 | -------------------------------------------------------------------------------- /resources/tests/scripts/keep_count.lua: -------------------------------------------------------------------------------- 1 | count_per_tick = 0 2 | 3 | function process_metric(pyld) 4 | count_per_tick = count_per_tick + 1 5 | end 6 | 7 | function process_log(pyld) 8 | count_per_tick = count_per_tick + 1 9 | end 10 | 11 | function tick(pyld) 12 | payload.push_metric(pyld, "count_per_tick", count_per_tick) 13 | payload.push_log(pyld, string.format("count_per_tick: %s", count_per_tick)) 14 | count_per_tick = 0 15 | end 16 | -------------------------------------------------------------------------------- /resources/tests/scripts/lib/demo.lua: -------------------------------------------------------------------------------- 1 | local demo = {} 2 | 3 | function demo.demo() 4 | return "bazz" 5 | end 6 | 7 | return demo 8 | -------------------------------------------------------------------------------- /resources/tests/scripts/lua_error.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | error("boom") 3 | end 4 | 5 | function process_log(pyld) 6 | end 7 | 8 | function tick(pyld) 9 | end 10 | -------------------------------------------------------------------------------- /resources/tests/scripts/missing_func.lua: -------------------------------------------------------------------------------- 1 | function process_log(pyld) 2 | end 3 | 4 | function tick(pyld) 5 | end 6 | -------------------------------------------------------------------------------- /resources/tests/scripts/remove_keys.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | payload.metric_remove_tag(pyld, 1, "bizz") 3 | end 4 | 5 | function process_log(pyld) 6 | payload.log_remove_tag(pyld, 1, "bizz") 7 | end 8 | 9 | function tick(pyld) 10 | end 11 | -------------------------------------------------------------------------------- /resources/tests/scripts/set_value.lua: -------------------------------------------------------------------------------- 1 | function process_metric(pyld) 2 | end 3 | 4 | function process_log(pyld) 5 | payload.log_set_value(pyld, 1, "foo") 6 | end 7 | 8 | function tick(pyld) 9 | end 10 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | //! Library level constants 2 | use mio; 3 | 4 | /// MIO token used to distinguish system events 5 | /// from other event sources. 6 | /// 7 | /// Note - It is assumed that sources will not hold 8 | /// more than 2048 addressable streams, 0 indexed. 9 | pub const SYSTEM: mio::Token = mio::Token(2048); 10 | -------------------------------------------------------------------------------- /src/filter/delay_filter.rs: -------------------------------------------------------------------------------- 1 | //! Filter streams to within a bounded interval of current time. 2 | //! 3 | //! This filter is intended to remove items from the stream which are too old, 4 | //! as defined by the current time and the configured `tolerance`. That is, if 5 | //! for some time `T`, `(T - time::now()).abs() > tolerance` the item associated 6 | //! with `T` will be rejected. 7 | 8 | use crate::filter; 9 | use crate::metric; 10 | use crate::time; 11 | use std::sync::atomic::{AtomicUsize, Ordering}; 12 | 13 | /// Total number of telemetry rejected for age 14 | pub static DELAY_TELEM_REJECT: AtomicUsize = AtomicUsize::new(0); 15 | /// Total number of telemetry accepted for age 16 | pub static DELAY_TELEM_ACCEPT: AtomicUsize = AtomicUsize::new(0); 17 | /// Total number of logline rejected for age 18 | pub static DELAY_LOG_REJECT: AtomicUsize = AtomicUsize::new(0); 19 | /// Total number of logline accepted for age 20 | pub static DELAY_LOG_ACCEPT: AtomicUsize = AtomicUsize::new(0); 21 | 22 | /// Filter streams to within a bounded interval of current time. 23 | /// 24 | /// This filter is intended to remove items from the stream which are too old, 25 | /// as defined by the current time and the configured `tolerance`. That is, if 26 | /// for some time `T`, `(T - time::now()).abs() > tolerance` the item associated 27 | /// with `T` will be rejected. 28 | pub struct DelayFilter { 29 | tolerance: i64, 30 | } 31 | 32 | /// Configuration for `DelayFilter` 33 | #[derive(Clone, Debug)] 34 | pub struct DelayFilterConfig { 35 | /// The filter's unique name in the routing topology. 36 | pub config_path: Option, 37 | /// The forwards along which the filter will emit its `metric::Event`s. 38 | pub forwards: Vec, 39 | /// The delay tolerance of the filter, measured in seconds. 40 | pub tolerance: i64, 41 | } 42 | 43 | impl DelayFilter { 44 | /// Create a new DelayFilter 45 | pub fn new(config: &DelayFilterConfig) -> DelayFilter { 46 | DelayFilter { 47 | tolerance: config.tolerance, 48 | } 49 | } 50 | } 51 | 52 | impl filter::Filter for DelayFilter { 53 | fn process( 54 | &mut self, 55 | event: metric::Event, 56 | res: &mut Vec, 57 | ) -> Result<(), filter::FilterError> { 58 | match event { 59 | metric::Event::Telemetry(telem) => { 60 | if (telem.timestamp - time::now()).abs() < self.tolerance { 61 | DELAY_TELEM_ACCEPT.fetch_add(1, Ordering::Relaxed); 62 | res.push(metric::Event::Telemetry(telem)); 63 | } else { 64 | DELAY_TELEM_REJECT.fetch_add(1, Ordering::Relaxed); 65 | } 66 | } 67 | metric::Event::Log(log) => { 68 | if (log.time - time::now()).abs() < self.tolerance { 69 | DELAY_LOG_ACCEPT.fetch_add(1, Ordering::Relaxed); 70 | res.push(metric::Event::Log(log)); 71 | } else { 72 | DELAY_LOG_REJECT.fetch_add(1, Ordering::Relaxed); 73 | } 74 | } 75 | ev => { 76 | res.push(ev); 77 | } 78 | } 79 | Ok(()) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/filter/flush_boundary_filter.rs: -------------------------------------------------------------------------------- 1 | use crate::filter; 2 | use crate::metric; 3 | use std::mem; 4 | 5 | /// Buffer events for a set period of flushes 6 | /// 7 | /// This filter is intended to hold events for a set number of flushes. This 8 | /// delays the events for the duration of those flushes but reduce the 9 | /// likelyhood of cross-flush splits of timestamps. 10 | pub struct FlushBoundaryFilter { 11 | tolerance: usize, 12 | holds: Vec, 13 | } 14 | 15 | /// Configuration for `FlushBoundaryFilter` 16 | #[derive(Clone, Debug)] 17 | pub struct FlushBoundaryFilterConfig { 18 | /// The filter's unique name in the routing topology 19 | pub config_path: Option, 20 | /// The forwards along which the filter will emit its `metric::Event`s 21 | /// stream. 22 | pub forwards: Vec, 23 | /// The flush boundary tolerance, measured in seconds. 24 | pub tolerance: usize, 25 | } 26 | 27 | struct Hold { 28 | timestamp: i64, 29 | age: usize, 30 | events: Vec, 31 | } 32 | 33 | impl Hold { 34 | pub fn new(event: metric::Event) -> Hold { 35 | let ts = event.timestamp().unwrap(); 36 | let mut events = Vec::new(); 37 | events.push(event); 38 | Hold { 39 | timestamp: ts, 40 | age: 0, 41 | events: events, 42 | } 43 | } 44 | } 45 | 46 | impl FlushBoundaryFilter { 47 | /// Create a new FlushBoundaryFilter 48 | pub fn new(config: &FlushBoundaryFilterConfig) -> FlushBoundaryFilter { 49 | FlushBoundaryFilter { 50 | tolerance: config.tolerance, 51 | holds: Vec::new(), 52 | } 53 | } 54 | } 55 | 56 | impl filter::Filter for FlushBoundaryFilter { 57 | fn process( 58 | &mut self, 59 | event: metric::Event, 60 | res: &mut Vec, 61 | ) -> Result<(), filter::FilterError> { 62 | if event.is_timer_flush() { 63 | for hold in &mut self.holds { 64 | hold.age += 1; 65 | } 66 | let holds = mem::replace(&mut self.holds, Vec::new()); 67 | let mut too_new = Vec::new(); 68 | for mut hold in holds { 69 | if hold.age > self.tolerance { 70 | res.append(&mut hold.events); 71 | } else { 72 | too_new.push(hold); 73 | } 74 | } 75 | res.push(event); 76 | self.holds = too_new; 77 | } else { 78 | let opt_ts = event.timestamp(); 79 | if let Some(ts) = opt_ts { 80 | match self.holds.binary_search_by(|hold| hold.timestamp.cmp(&ts)) { 81 | Ok(idx) => self.holds[idx].events.push(event), 82 | Err(idx) => { 83 | let hold = Hold::new(event); 84 | self.holds.insert(idx, hold) 85 | } 86 | } 87 | } 88 | } 89 | Ok(()) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/filter/json_encode_filter.rs: -------------------------------------------------------------------------------- 1 | //! Convert `LogLine` events into Raw events encoded as JSON. 2 | //! 3 | //! This filter takes `LogLines` and encodes them into JSON, emitting the 4 | //! encoded event as a Raw event. This allows further filters or sinks to 5 | //! operate on the JSON without needing to understand a `LogLine` event in 6 | //! particular. If the `LogLine` value is a valid JSON object and 7 | //! `parse_line` config option is true, then the JSON will be merged with 8 | //! `LogLine` metadata. Otherwise, the original line will be included simply as 9 | //! a string. 10 | 11 | use crate::filter; 12 | use crate::metric; 13 | use chrono::naive::NaiveDateTime; 14 | use chrono::offset::Utc; 15 | use chrono::DateTime; 16 | use rand::random; 17 | use serde_json; 18 | use serde_json::map::Map; 19 | use serde_json::Value; 20 | use std::iter::FromIterator; 21 | use std::sync::atomic::{AtomicUsize, Ordering}; 22 | 23 | /// Total number of logline processed 24 | pub static JSON_ENCODE_LOG_PROCESSED: AtomicUsize = AtomicUsize::new(0); 25 | /// Total number of logline with JSON value successfully parsed 26 | pub static JSON_ENCODE_LOG_PARSED: AtomicUsize = AtomicUsize::new(0); 27 | 28 | /// Convert `LogLine` events into Raw events encoded as JSON. 29 | /// 30 | /// This filter takes `LogLines` and encodes them into JSON, emitting the 31 | /// encoded event as a Raw event. This allows further filters or sinks to 32 | /// operate on the JSON without needing to understand a `LogLine` event in 33 | /// particular. If the `LogLine` value is a valid JSON object and `parse_line` 34 | /// config option is true, then the JSON will be merged with `LogLine` 35 | /// metadata. Otherwise, the original line will be included simply as a string. 36 | pub struct JSONEncodeFilter { 37 | parse_line: bool, 38 | tags: metric::TagMap, 39 | } 40 | 41 | /// Configuration for `JSONEncodeFilter` 42 | #[derive(Clone, Debug)] 43 | pub struct JSONEncodeFilterConfig { 44 | /// The filter's unique name in the routing topology. 45 | pub config_path: Option, 46 | /// The forwards along which the filter will emit its `metric::Event`s. 47 | pub forwards: Vec, 48 | /// Whether the filter should attempt to parse `LogLine` values that are 49 | /// valid JSON objects. 50 | pub parse_line: bool, 51 | /// The tags to be applied to all `metric::Event`s streaming through this 52 | /// filter. These tags will overwrite any tags carried by the 53 | /// `metric::Event` itself. 54 | pub tags: metric::TagMap, 55 | } 56 | 57 | impl JSONEncodeFilter { 58 | /// Create a new JSONEncodeFilter 59 | pub fn new(config: JSONEncodeFilterConfig) -> JSONEncodeFilter { 60 | JSONEncodeFilter { 61 | parse_line: config.parse_line, 62 | tags: config.tags, 63 | } 64 | } 65 | } 66 | 67 | impl filter::Filter for JSONEncodeFilter { 68 | fn process( 69 | &mut self, 70 | event: metric::Event, 71 | res: &mut Vec, 72 | ) -> Result<(), filter::FilterError> { 73 | match event { 74 | metric::Event::Log(log) => { 75 | let naive_time = NaiveDateTime::from_timestamp(log.time, 0); 76 | let utc_time: DateTime = DateTime::from_utc(naive_time, Utc); 77 | let metadata = json_to_object(json!({ 78 | "time": utc_time.to_rfc3339(), 79 | "path": log.path.clone(), 80 | "tags": Map::from_iter(log.tags(&self.tags).map(|(k, v): (&String, &String)| (k.to_string(), v.to_string().into()))), 81 | })); 82 | // If parse_line is true, and line is parsable as a JSON object, parse 83 | // it. Otherwise get an object containing the original 84 | // line. 85 | let value = (if self.parse_line { Some(()) } else { None }) 86 | .and_then(|_| serde_json::from_str::(&log.value).ok()) 87 | .and_then(|v| { 88 | if let Value::Object(obj) = v { 89 | Some(obj) 90 | } else { 91 | None 92 | } 93 | }) 94 | .map(|v| { 95 | JSON_ENCODE_LOG_PARSED.fetch_add(1, Ordering::Relaxed); 96 | v 97 | }) 98 | .unwrap_or_else(|| { 99 | json_to_object(json!({"message": log.value.clone()})) 100 | }); 101 | // Combine our various sources of data. 102 | // Data that is more likely to be correct (more specific to the 103 | // source) overrides other data. So the parsed value 104 | // is authoritative, followed by any fields we could 105 | // parse by filters, then finally the metadata we were able to work 106 | // out on our own. 107 | let value = merge_objects(vec![ 108 | value, 109 | Map::from_iter(log.fields.into_iter().map(|(k, v)| (k, v.into()))), 110 | metadata, 111 | ]); 112 | res.push(metric::Event::Raw { 113 | order_by: random(), 114 | encoding: metric::Encoding::JSON, 115 | bytes: serde_json::to_string(&value).unwrap().into(), /* serde_json::Value 116 | * will never fail to 117 | * encode */ 118 | metadata: None, 119 | connection_id: None, 120 | }); 121 | JSON_ENCODE_LOG_PROCESSED.fetch_add(1, Ordering::Relaxed); 122 | } 123 | // All other event types are passed through. 124 | event => { 125 | res.push(event); 126 | } 127 | } 128 | Ok(()) 129 | } 130 | } 131 | 132 | /// Convenience helper to take a json!() macro you know is an object and get 133 | /// back a Map, instead of a generic Value. 134 | fn json_to_object(v: Value) -> Map { 135 | if let Value::Object(obj) = v { 136 | obj 137 | } else { 138 | unreachable!() 139 | } 140 | } 141 | 142 | /// Merge JSON objects, with values from earlier objects in the list overriding 143 | /// later ones. Note this is not a recursive merge - if the same key is in many 144 | /// objects, we simply take the value from the earliest one. 145 | fn merge_objects(objs: Vec>) -> Map { 146 | let mut result = Map::new(); 147 | for obj in objs { 148 | for (key, value) in obj { 149 | if !result.contains_key(&key) { 150 | result.insert(key, value); 151 | } 152 | } 153 | } 154 | result 155 | } 156 | 157 | // Tests 158 | // 159 | #[cfg(test)] 160 | mod test { 161 | use super::*; 162 | use crate::filter::Filter; 163 | use crate::metric; 164 | use quickcheck::QuickCheck; 165 | use serde_json::map::Map; 166 | use serde_json::Value; 167 | 168 | fn process_event(parse_line: bool, event: metric::Event) -> Value { 169 | let mut filter = JSONEncodeFilter { 170 | parse_line: parse_line, 171 | tags: metric::TagMap::default(), 172 | }; 173 | let mut results = Vec::new(); 174 | filter.process(event, &mut results).unwrap(); 175 | // fail if results empty, else return processed event's payload 176 | if let metric::Event::Raw { ref bytes, .. } = results[0] { 177 | return serde_json::from_slice(bytes).unwrap(); 178 | } 179 | panic!("Processed event was not Raw") 180 | } 181 | 182 | #[test] 183 | fn parsable_line_parsing_off() { 184 | // Test we don't parse a line if parsing is off 185 | assert_eq!( 186 | process_event( 187 | false, 188 | metric::Event::new_log(metric::LogLine { 189 | path: "testpath".to_string(), 190 | value: "{\"bad\": \"do not parse\"}".to_string(), 191 | time: 946684800, 192 | tags: Default::default(), 193 | fields: Default::default(), 194 | }) 195 | ), 196 | json!({ 197 | "path": "testpath", 198 | "message": "{\"bad\": \"do not parse\"}", 199 | "time": "2000-01-01T00:00:00+00:00", 200 | "tags": {}, 201 | }) 202 | ); 203 | } 204 | 205 | #[test] 206 | fn parsable_line_parsing_on() { 207 | // Test we do parse a line if parsing is on 208 | assert_eq!( 209 | process_event( 210 | true, 211 | metric::Event::new_log(metric::LogLine { 212 | path: "testpath".to_string(), 213 | value: "{\"good\": \"do parse\"}".to_string(), 214 | time: 946684800, 215 | tags: Default::default(), 216 | fields: Default::default(), 217 | }) 218 | ), 219 | json!({ 220 | "path": "testpath", 221 | "good": "do parse", 222 | "time": "2000-01-01T00:00:00+00:00", 223 | "tags": {}, 224 | }) 225 | ); 226 | } 227 | 228 | #[test] 229 | fn unparsable_line() { 230 | // Test we don't parse a line if it's not JSON 231 | assert_eq!( 232 | process_event( 233 | true, 234 | metric::Event::new_log(metric::LogLine { 235 | path: "testpath".to_string(), 236 | value: "this is not json".to_string(), 237 | time: 946684800, 238 | tags: Default::default(), 239 | fields: Default::default(), 240 | }) 241 | ), 242 | json!({ 243 | "path": "testpath", 244 | "message": "this is not json", 245 | "time": "2000-01-01T00:00:00+00:00", 246 | "tags": {}, 247 | }) 248 | ); 249 | } 250 | 251 | #[test] 252 | fn non_object_line() { 253 | // Test we don't parse a line if it's not a JSON object but is valid JSON 254 | assert_eq!( 255 | process_event( 256 | true, 257 | metric::Event::new_log(metric::LogLine { 258 | path: "testpath".to_string(), 259 | value: "[123, \"not an object\"]".to_string(), 260 | time: 946684800, 261 | tags: Default::default(), 262 | fields: Default::default(), 263 | }) 264 | ), 265 | json!({ 266 | "path": "testpath", 267 | "message": "[123, \"not an object\"]", 268 | "time": "2000-01-01T00:00:00+00:00", 269 | "tags": {}, 270 | }) 271 | ); 272 | } 273 | 274 | // quickcheck and serde_json::map::Map aren't compatible, so we ask quickcheck 275 | // for many Vec<(String, String)>s that we turn into maps. 276 | fn vecs_to_objs(vecs: &Vec>) -> Vec> { 277 | vecs.iter() 278 | .map(|vec| { 279 | Map::from_iter( 280 | vec.iter() 281 | .map(|&(ref k, ref v)| (k.clone(), v.clone().into())), 282 | ) 283 | }) 284 | .collect() 285 | } 286 | 287 | #[test] 288 | fn merged_objects_contain_all_keys() { 289 | fn inner(vecs: Vec>) -> bool { 290 | let result = merge_objects(vecs_to_objs(&vecs)); 291 | for obj in vecs { 292 | for (k, _v) in obj { 293 | if !result.contains_key(&k) { 294 | return false; 295 | } 296 | } 297 | } 298 | true 299 | } 300 | QuickCheck::new().quickcheck(inner as fn(Vec>) -> bool); 301 | } 302 | 303 | #[test] 304 | fn merged_objects_takes_first_value() { 305 | fn inner(vecs: Vec>) -> bool { 306 | let objs = vecs_to_objs(&vecs); 307 | let result = merge_objects(objs.clone()); 308 | for (key, result_value) in result { 309 | match objs.iter().find(|obj| obj.contains_key(&key)) { 310 | Some(obj) => { 311 | if obj[&key] != result_value { 312 | return false; // result value did not match first obj containing key 313 | } 314 | } 315 | None => return false, // key in result was not in any input objs 316 | } 317 | } 318 | true 319 | } 320 | QuickCheck::new().quickcheck(inner as fn(Vec>) -> bool); 321 | } 322 | } 323 | -------------------------------------------------------------------------------- /src/filter/mod.rs: -------------------------------------------------------------------------------- 1 | //! Transform or create `metric::Event` from a stream of `metric::Event`. 2 | //! 3 | //! cernan filters are intended to input `metric::Event` and then adapt that 4 | //! stream, either by injecting new `metric::Event` or by transforming the 5 | //! stream members as they come through. Exact behaviour varies by filters. The 6 | //! filter receives on an input channel and outputs over its forwards. 7 | 8 | use crate::metric; 9 | use crate::time; 10 | use crate::util; 11 | use hopper; 12 | 13 | pub mod delay_filter; 14 | mod flush_boundary_filter; 15 | pub mod json_encode_filter; 16 | mod programmable_filter; 17 | 18 | pub use self::delay_filter::{DelayFilter, DelayFilterConfig}; 19 | pub use self::flush_boundary_filter::{ 20 | FlushBoundaryFilter, FlushBoundaryFilterConfig, 21 | }; 22 | pub use self::json_encode_filter::{JSONEncodeFilter, JSONEncodeFilterConfig}; 23 | pub use self::programmable_filter::{ProgrammableFilter, ProgrammableFilterConfig}; 24 | 25 | /// Errors that can strike a Filter 26 | #[derive(Debug)] 27 | pub enum FilterError { 28 | /// Specific to a ProgrammableFilter, means no function is available as 29 | /// called in the script 30 | NoSuchFunction(&'static str, metric::Event), 31 | /// Specific to a ProgrammableFilter, means the lua code errored 32 | LuaError(String, metric::Event), 33 | } 34 | 35 | fn msg_in_fe(fe: &FilterError) -> &str { 36 | match fe { 37 | FilterError::NoSuchFunction(n, _) => n, 38 | FilterError::LuaError(ref n, _) => n, 39 | } 40 | } 41 | 42 | fn event_in_fe(fe: FilterError) -> metric::Event { 43 | match fe { 44 | FilterError::NoSuchFunction(_, m) | FilterError::LuaError(_, m) => m, 45 | } 46 | } 47 | 48 | /// The Filter trait 49 | /// 50 | /// All filters take as input a stream of `metric::Event` and produce as output 51 | /// another `metric::Event` stream. That's it. The exact method by which each 52 | /// stream works depends on the implementation of the Filter. 53 | pub trait Filter { 54 | /// Process a single `metric::Event` 55 | /// 56 | /// Individual Filters will implementat this function depending on their 57 | /// mechanism. See individaul filters for details. 58 | fn process( 59 | &mut self, 60 | event: metric::Event, 61 | res: &mut Vec, 62 | ) -> Result<(), FilterError>; 63 | 64 | /// Run the Filter 65 | /// 66 | /// It is not expected that most Filters will re-implement this. If this is 67 | /// done, take care to obey overload signals and interpret errors from 68 | /// `Filter::process`. 69 | fn run( 70 | &mut self, 71 | recv: hopper::Receiver, 72 | sources: Vec, 73 | mut chans: util::Channel, 74 | ) { 75 | let mut attempts = 0; 76 | let mut events = Vec::with_capacity(64); 77 | let mut recv = recv.into_iter(); 78 | let mut total_shutdowns = 0; 79 | loop { 80 | time::delay(attempts); 81 | match recv.next() { 82 | None => attempts += 1, 83 | Some(metric::Event::Shutdown) => { 84 | util::send(&mut chans, metric::Event::Shutdown); 85 | total_shutdowns += 1; 86 | if total_shutdowns >= sources.len() { 87 | trace!( 88 | "Received shutdown from every configured source: {:?}", 89 | sources 90 | ); 91 | return; 92 | } 93 | } 94 | Some(event) => { 95 | attempts = 0; 96 | match self.process(event, &mut events) { 97 | Ok(()) => { 98 | for ev in events.drain(..) { 99 | util::send(&mut chans, ev) 100 | } 101 | } 102 | Err(fe) => { 103 | error!( 104 | "Failed to run filter with error: {:?}", 105 | msg_in_fe(&fe) 106 | ); 107 | let event = event_in_fe(fe); 108 | util::send(&mut chans, event); 109 | } 110 | } 111 | } 112 | } 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/http.rs: -------------------------------------------------------------------------------- 1 | //! Tiny, unassuming HTTP Server 2 | 3 | extern crate tiny_http; 4 | 5 | use crate::thread; 6 | use std; 7 | 8 | /// HTTP request. Alias of `tiny_http::Request`. 9 | pub type Request = tiny_http::Request; 10 | /// HTTP response. Alias of `tiny_http::Response`. 11 | pub type Response<'a> = tiny_http::Response<&'a [u8]>; 12 | /// HTTP header. Alias of `tiny_http::Header`. 13 | pub type Header = tiny_http::Header; 14 | /// HTTP header field. Alias of `tiny_http::HeaderField`. 15 | pub type HeaderField = tiny_http::HeaderField; 16 | /// HTTP status code. Alias of `tiny_http::StatusCode`. 17 | pub type StatusCode = tiny_http::StatusCode; 18 | 19 | /// Simple single threaded HTTP request handler. 20 | pub trait Handler: Sync + Send { 21 | /// Handler for a single HTTP request. 22 | fn handle(&self, request: Request) -> (); 23 | } 24 | 25 | /// Single threaded HTTP server. 26 | pub struct Server { 27 | /// Thread handle for the operating HTTP server. 28 | thread: thread::ThreadHandle, 29 | } 30 | 31 | fn http_server( 32 | poller: &thread::Poll, 33 | tiny_http_server: &tiny_http::Server, 34 | handler: &H, 35 | ) where 36 | H: Handler, 37 | { 38 | loop { 39 | let mut events = thread::Events::with_capacity(1024); 40 | match poller.poll(&mut events, Some(std::time::Duration::from_millis(5))) { 41 | Ok(num_events) if num_events > 0 => { 42 | break; 43 | } 44 | 45 | Ok(_) => match tiny_http_server 46 | .recv_timeout(std::time::Duration::from_millis(1000)) 47 | { 48 | Ok(maybe_a_request) => { 49 | if let Some(request) = maybe_a_request { 50 | handler.handle(request); 51 | } 52 | } 53 | 54 | Err(e) => { 55 | panic!(format!("Failed during recv_timeout {:?}", e)); 56 | } 57 | }, 58 | 59 | Err(e) => { 60 | panic!(format!("Failed during poll {:?}", e)); 61 | } 62 | }; 63 | } 64 | } 65 | 66 | /// Single threaded HTTP server implementation. 67 | impl Server { 68 | /// Create and start an HTTP server on the given host and port. 69 | pub fn new(host_port: String, handler: H) -> Self { 70 | Server { 71 | thread: thread::spawn(move |poller| { 72 | let tiny_http_server = tiny_http::Server::http(host_port).unwrap(); 73 | http_server(&poller, &tiny_http_server, &handler) 74 | }), 75 | } 76 | } 77 | } 78 | 79 | /// Graceful shutdown support for Server. 80 | impl thread::Stoppable for Server { 81 | fn join(self) { 82 | self.thread.join(); 83 | } 84 | 85 | fn shutdown(self) { 86 | self.thread.shutdown(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Cernan is a telemetry and logging aggregation server. It exposes multiple 2 | //! interfaces for ingestion and can emit to mutiple aggregation sources while 3 | //! doing in-flight manipulation of data. Cernan has minimal CPU and memory 4 | //! requirements and is intended to service bursty telemetry _without_ load 5 | //! shedding. Cernan aims to be _reliable_ and _convenient_ to use, both for 6 | //! application engineers and operations staff. 7 | //! 8 | //! Why you might choose to use cernan: 9 | //! 10 | //! * You need to ingest telemetry from multiple protocols. 11 | //! * You need to multiplex telemetry over aggregation services. 12 | //! * You want to convert log lines into telemetry. 13 | //! * You want to convert telemetry into log lines. 14 | //! * You want to transform telemetry or log lines in-flight. 15 | //! 16 | //! If you'd like to learn more, please do have a look in 17 | //! our [wiki](https://github.com/postmates/cernan/wiki/). 18 | #![allow(unknown_lints)] 19 | #![deny( 20 | trivial_numeric_casts, 21 | missing_docs, 22 | unstable_features, 23 | unused_import_braces 24 | )] 25 | extern crate base64; 26 | extern crate byteorder; 27 | extern crate chrono; 28 | extern crate clap; 29 | extern crate coco; 30 | extern crate elastic; 31 | extern crate flate2; 32 | extern crate futures; 33 | extern crate glob; 34 | extern crate hopper; 35 | extern crate libc; 36 | extern crate mio; 37 | extern crate mond; 38 | extern crate protobuf; 39 | extern crate quantiles; 40 | extern crate rand; 41 | extern crate rdkafka; 42 | extern crate regex; 43 | extern crate reqwest; 44 | extern crate seahash; 45 | extern crate serde_avro; 46 | #[macro_use] 47 | extern crate serde_json; 48 | extern crate slab; 49 | extern crate toml; 50 | extern crate url; 51 | extern crate uuid; 52 | 53 | #[macro_use] 54 | extern crate log; 55 | 56 | #[macro_use] 57 | extern crate lazy_static; 58 | 59 | #[macro_use] 60 | extern crate serde_derive; 61 | 62 | #[cfg(test)] 63 | extern crate quickcheck; 64 | 65 | pub mod buckets; 66 | pub mod config; 67 | pub mod constants; 68 | pub mod filter; 69 | pub mod http; 70 | pub mod matrix; 71 | pub mod metric; 72 | pub mod protocols; 73 | pub mod sink; 74 | pub mod source; 75 | pub mod thread; 76 | pub mod time; 77 | pub mod util; 78 | -------------------------------------------------------------------------------- /src/matrix.rs: -------------------------------------------------------------------------------- 1 | //! Collection of matrix implementations. 2 | 3 | use crate::util; 4 | use std; 5 | use std::fmt::Debug; 6 | use std::str::FromStr; 7 | 8 | type AdjacencyMap = util::HashMap>; 9 | 10 | type AdjacencyMatrix = util::HashMap>; 11 | 12 | /// Adjacency matrix struct. 13 | pub struct Adjacency { 14 | edges: AdjacencyMatrix, 15 | } 16 | 17 | impl Default for Adjacency { 18 | fn default() -> Adjacency { 19 | Adjacency { 20 | edges: Default::default(), 21 | } 22 | } 23 | } 24 | 25 | /// Poor man's adjacency matrix biased towards incident edge queries. 26 | /// 27 | /// Edges are not symmetric. Two values are symmetrically adjacent when 28 | /// edges originate from each value to the other value. 29 | impl Adjacency { 30 | /// Construct a new adjacency matrix. 31 | pub fn new() -> Self { 32 | Adjacency { 33 | edges: Default::default(), 34 | } 35 | } 36 | 37 | /// Adds an outbound edge from a node to another. 38 | pub fn add_asymmetric_edge( 39 | &mut self, 40 | from_str: &str, 41 | to_str: &str, 42 | metadata: Option, 43 | ) { 44 | let to = String::from_str(to_str).unwrap(); 45 | let from = String::from_str(from_str).unwrap(); 46 | let vec = self.edges.entry(from).or_insert_with(Default::default); 47 | vec.insert(to, metadata); 48 | } 49 | 50 | /// Adds symmetric edges between the given node and a set of other nodes. 51 | pub fn add_edges( 52 | &mut self, 53 | from_str: &str, 54 | to_strs: Vec, 55 | metadata: Option, 56 | ) { 57 | for to_str in to_strs { 58 | self.add_asymmetric_edge(from_str, &to_str, metadata.clone()); 59 | self.add_asymmetric_edge(&to_str, from_str, metadata.clone()) 60 | } 61 | 62 | drop(metadata); 63 | } 64 | 65 | /// Returns the number of incident edges to the given node. 66 | pub fn num_edges(&mut self, id: &str) -> usize { 67 | match self.edges.get(id) { 68 | Some(value) => value.keys().len(), 69 | 70 | None => 0, 71 | } 72 | } 73 | 74 | /// Returns true iff relations exist for the given node id. 75 | pub fn contains_node(&self, id: &str) -> bool { 76 | self.edges.contains_key(id) 77 | } 78 | 79 | /// Filters and returns edges satisfying the given constraint. 80 | pub fn filter_nodes(&self, id: &str, f: F) -> Vec 81 | where 82 | for<'r> F: FnMut(&'r (&String, &Option)) -> bool, 83 | { 84 | self.edges[id] 85 | .iter() 86 | .filter(f) 87 | .map(|(k, _v)| k.clone()) 88 | .collect() 89 | } 90 | 91 | /// Iterates over edge relations in the matrix. 92 | pub fn iter(&self) -> std::collections::hash_map::Iter> { 93 | self.edges.iter() 94 | } 95 | 96 | /// Pops adjacency metadata for the given node. 97 | pub fn pop(&mut self, id: &str) -> Option> { 98 | self.edges.remove(id) 99 | } 100 | 101 | /// As pop, but returns a vec of node identifiers connected to the given 102 | /// node. 103 | pub fn pop_nodes(&mut self, id: &str) -> Vec { 104 | match self.pop(id) { 105 | Some(map) => map.into_iter().map(|(k, _v)| k).collect(), 106 | 107 | None => Vec::new(), 108 | } 109 | } 110 | 111 | /// As pop, but returns a vec of edge metadata. 112 | /// Option values will be unwrapped and None values filtered. 113 | pub fn pop_metadata(&mut self, id: &str) -> Vec { 114 | match self.pop(id) { 115 | Some(map) => map 116 | .into_iter() 117 | .filter(|&(ref _k, ref option_v)| option_v.is_some()) 118 | .map(|(_, some_v)| some_v.unwrap()) 119 | .collect(), 120 | 121 | None => Vec::new(), 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/metric/ackbag.rs: -------------------------------------------------------------------------------- 1 | use crate::util; 2 | use std::cmp::min; 3 | use std::sync::{Arc, Mutex}; 4 | use std::{thread, time}; 5 | use uuid::Uuid; 6 | 7 | #[derive(Clone)] 8 | pub struct SyncAckProps { 9 | acks_received: usize, 10 | } 11 | 12 | impl Default for SyncAckProps { 13 | fn default() -> SyncAckProps { 14 | SyncAckProps { acks_received: 0 } 15 | } 16 | } 17 | 18 | impl SyncAckProps { 19 | pub fn ack(&mut self) { 20 | self.acks_received = self.acks_received.saturating_add(1); 21 | } 22 | 23 | pub fn acks_received(&self) -> usize { 24 | self.acks_received 25 | } 26 | } 27 | 28 | type SyncAckBagMap = util::HashMap; 29 | 30 | pub struct SyncAckBag { 31 | waiting_syncs: Arc>, 32 | } 33 | 34 | impl Default for SyncAckBag { 35 | fn default() -> SyncAckBag { 36 | SyncAckBag { 37 | waiting_syncs: Arc::new(Mutex::new(SyncAckBagMap::default())), 38 | } 39 | } 40 | } 41 | 42 | impl SyncAckBag { 43 | /// Return a clone of the SyncAckProps for the given key 44 | pub fn props_for(&self, key: Uuid) -> Option { 45 | let bag = self.waiting_syncs.lock().unwrap(); 46 | match bag.get(&key) { 47 | Some(v) => Some((*v).clone()), 48 | None => None, 49 | } 50 | } 51 | 52 | /// Insert an empty-initialized SyncAckProps for the given key 53 | pub fn prepare_wait(&self, key: Uuid) { 54 | let mut bag = self.waiting_syncs.lock().unwrap(); 55 | bag.insert(key, SyncAckProps::default()); 56 | } 57 | 58 | /// Remove the given key from the internal bag 59 | pub fn remove(&self, key: Uuid) { 60 | let mut bag = self.waiting_syncs.lock().unwrap(); 61 | bag.remove(&key); 62 | } 63 | 64 | /// Retrieve a mutable reference to the SyncAckProps for the key and invoke 65 | /// a callback if it exists. If the key is not present, then the 66 | /// callback is not called. 67 | pub fn with_props(&self, key: Uuid, callback: F) { 68 | let mut bag = self.waiting_syncs.lock().unwrap(); 69 | if let Some(v) = bag.get_mut(&key) { 70 | callback(v); 71 | } 72 | } 73 | 74 | /// Wait until the number of acks in the specified key becomes non-zero. 75 | pub fn wait_for(&self, key: Uuid) { 76 | self.wait_for_callback(key, thread::sleep); 77 | } 78 | 79 | /// Testability driver for wait_for 80 | pub fn wait_for_callback(&self, key: Uuid, cb: F) { 81 | let max_nap_time = time::Duration::from_millis(250); 82 | let mut current_nap_time = time::Duration::from_millis(5); 83 | let mut wait = true; 84 | while wait { 85 | match self.props_for(key) { 86 | Some(props) => wait = props.acks_received() == 0, 87 | _ => wait = false, 88 | } 89 | if wait { 90 | cb(current_nap_time); 91 | current_nap_time = min(max_nap_time, current_nap_time * 2); 92 | } 93 | } 94 | } 95 | } 96 | 97 | lazy_static! { 98 | static ref SINGLETON: SyncAckBag = SyncAckBag::default(); 99 | } 100 | 101 | /// Returns the singleton ack bag. 102 | /// The ack bag is necessary because we need to protect concurrent access of raw 103 | /// events' data, but we can't Arc that enum due to serialization needs. 104 | /// Instead, we keep (and Arc) a global bag of properties and only serialize a 105 | /// key into the bag. 106 | pub fn global_ack_bag() -> &'static SyncAckBag { 107 | &SINGLETON 108 | } 109 | 110 | #[cfg(test)] 111 | mod tests { 112 | use super::*; 113 | use std::cell::RefCell; 114 | 115 | #[test] 116 | fn test_prepare_wait_adds_default_entry() { 117 | let ack_bag = SyncAckBag::default(); 118 | let key = Uuid::new_v4(); 119 | ack_bag.prepare_wait(key); 120 | 121 | let value = ack_bag.props_for(key).unwrap(); 122 | assert_eq!(value.acks_received(), 0); 123 | } 124 | 125 | #[test] 126 | fn test_remove_works() { 127 | let ack_bag = SyncAckBag::default(); 128 | let key = Uuid::new_v4(); 129 | ack_bag.prepare_wait(key); 130 | ack_bag.remove(key); 131 | 132 | assert_eq!(ack_bag.props_for(key).is_some(), false); 133 | 134 | // Removing a non-existent key is fine. 135 | let other_key = Uuid::new_v4(); 136 | ack_bag.remove(other_key); 137 | } 138 | 139 | #[test] 140 | fn test_ack_adds_one_to_tally() { 141 | let ack_bag = SyncAckBag::default(); 142 | let key = Uuid::new_v4(); 143 | ack_bag.prepare_wait(key); 144 | ack_bag.with_props(key, |props| { 145 | props.ack(); 146 | }); 147 | 148 | assert_eq!(ack_bag.props_for(key).unwrap().acks_received(), 1); 149 | } 150 | 151 | #[test] 152 | fn test_wait_for_returns_when_there_are_acks() { 153 | let ack_bag = SyncAckBag::default(); 154 | let key = Uuid::new_v4(); 155 | ack_bag.prepare_wait(key); 156 | ack_bag.with_props(key, |props| { 157 | props.ack(); 158 | }); 159 | 160 | ack_bag.wait_for(key); 161 | } 162 | 163 | #[test] 164 | fn test_wait_for_sleeps_when_there_are_no_acks() { 165 | let callback_count = RefCell::new(0); 166 | let ack_bag = SyncAckBag::default(); 167 | let key = Uuid::new_v4(); 168 | ack_bag.prepare_wait(key); 169 | ack_bag.wait_for_callback(key, |_| { 170 | *callback_count.borrow_mut() += 1; 171 | ack_bag.with_props(key, |props| { 172 | props.ack(); 173 | }) 174 | }); 175 | assert_eq!(*callback_count.borrow(), 1); 176 | } 177 | 178 | #[test] 179 | fn test_waiting_for_nonexistent_key_returns() { 180 | let ack_bag = SyncAckBag::default(); 181 | ack_bag.wait_for(Uuid::new_v4()); 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/metric/event.rs: -------------------------------------------------------------------------------- 1 | use crate::metric::{LogLine, Telemetry}; 2 | use crate::util::HashMap; 3 | use uuid::Uuid; 4 | 5 | /// Supported event encodings. 6 | #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] 7 | pub enum Encoding { 8 | /// Raw bytes, no encoding. 9 | Raw, 10 | /// Avro 11 | Avro, 12 | /// JSON 13 | JSON, 14 | } 15 | 16 | /// Metadata: additional data attached to an event 17 | pub type Metadata = HashMap, Vec>; 18 | 19 | /// Event: the central cernan datastructure 20 | /// 21 | /// Event is the heart of cernan, the enumeration that cernan works on in all 22 | /// cases. The enumeration fields drive sink / source / filter operations 23 | /// depending on their implementation. 24 | #[allow(clippy::large_enum_variant)] 25 | #[derive(PartialEq, Debug, Serialize, Deserialize, Clone)] 26 | pub enum Event { 27 | /// A wrapper for `metric::Telemetry`. See its documentation for more 28 | /// detail. 29 | Telemetry(Telemetry), 30 | /// A wrapper for `metric::LogLine`. See its documentation for more 31 | /// detail. 32 | Log(LogLine), 33 | /// A flush pulse signal. The `TimerFlush` keeps a counter of the total 34 | /// flushes made in this cernan's run. See `source::Flush` for the origin of 35 | /// these pulses in cernan operation. 36 | TimerFlush(u64), 37 | /// Shutdown event which marks the location in the queue after which no 38 | /// more events will appear. It is expected that after receiving this 39 | /// marker the given source will exit cleanly. 40 | Shutdown, 41 | /// Raw, encoded bytes. 42 | Raw { 43 | /// Ordering value used by some sinks accepting Raw events. 44 | order_by: u64, 45 | /// Encoding for the included bytes. 46 | encoding: Encoding, 47 | /// Encoded payload. 48 | bytes: Vec, 49 | /// Metadata used by some sinks 50 | metadata: Option, 51 | /// Connection ID of the source on which this raw event was received 52 | connection_id: Option, 53 | }, 54 | } 55 | 56 | impl Event { 57 | /// Determine if an event is a `TimerFlush`. 58 | pub fn is_timer_flush(&self) -> bool { 59 | match *self { 60 | Event::TimerFlush(_) => true, 61 | _ => false, 62 | } 63 | } 64 | 65 | /// Retrieve the timestamp from an `Event` if such exists. `TimerFlush` has 66 | /// no sensible timestamp -- being itself a mechanism _of_ time, not inside 67 | /// time -- and these `Event`s will always return None. 68 | pub fn timestamp(&self) -> Option { 69 | match *self { 70 | Event::Telemetry(ref telem) => Some(telem.timestamp), 71 | Event::Log(ref log) => Some(log.time), 72 | Event::TimerFlush(_) | Event::Shutdown | Event::Raw { .. } => None, 73 | } 74 | } 75 | } 76 | 77 | impl Event { 78 | /// Create a new `Event::Telemetry` from an existing `metric::Telemetry`. 79 | #[inline] 80 | pub fn new_telemetry(metric: Telemetry) -> Event { 81 | Event::Telemetry(metric) 82 | } 83 | 84 | /// Create a new `Event::Log` from an existing `metric::LogLine`. 85 | #[inline] 86 | pub fn new_log(log: LogLine) -> Event { 87 | Event::Log(log) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/metric/logline.rs: -------------------------------------------------------------------------------- 1 | use crate::metric::{TagIter, TagMap}; 2 | use crate::time; 3 | use std::collections::HashSet; 4 | 5 | /// An unstructured piece of text, plus associated metadata 6 | #[derive(PartialEq, Debug, Serialize, Deserialize, Clone)] 7 | pub struct LogLine { 8 | /// The time that this `LogLine` occupies, in the units of time::now() 9 | pub time: i64, 10 | /// The path that this `LogLine` originated from. May be a unix path or not, 11 | /// depending on origin. 12 | pub path: String, 13 | /// The line read from the `LogLine` path 14 | pub value: String, 15 | /// Fields that may have been parsed out of the value, a key/value structure 16 | pub fields: TagMap, 17 | /// Cernan tags for this LogLine 18 | pub tags: Option, 19 | } 20 | 21 | /// `LogLine` - a structure that represents a bit of text 22 | /// 23 | /// A `LogLine` is intended to hold a bit of text in its 'value' that may or may 24 | /// not be structured. The field 'fields' is available for 25 | impl LogLine { 26 | /// Create a new `LogLine` 27 | /// 28 | /// Please see `LogLine` struct documentation for more details. 29 | pub fn new(path: S, value: S) -> LogLine 30 | where 31 | S: Into, 32 | { 33 | LogLine { 34 | path: path.into(), 35 | value: value.into(), 36 | time: time::now(), 37 | tags: Default::default(), 38 | fields: Default::default(), 39 | } 40 | } 41 | 42 | /// Set the time of the Logline 43 | /// 44 | /// # Examples 45 | /// ``` 46 | /// use cernan::metric::LogLine; 47 | /// 48 | /// let time = 101; 49 | /// let mut l = LogLine::new("some_path", "value"); 50 | /// assert!(l.time != time); 51 | /// 52 | /// l = l.time(time); 53 | /// assert_eq!(l.time, time); 54 | /// ``` 55 | pub fn time(mut self, time: i64) -> LogLine { 56 | self.time = time; 57 | self 58 | } 59 | 60 | /// Insert a new field into LogLine 61 | /// 62 | /// Fields are distinct from tags. A 'field' is related to data that has 63 | /// been parsed out of LogLine.value and will be treated specially by 64 | /// supporting sinks. For instance, the firehose sink will put the field 65 | /// _into_ the payload where tags will be associated metadata that define 66 | /// groups of related LogLines. 67 | pub fn insert_field(mut self, key: S, val: S) -> LogLine 68 | where 69 | S: Into, 70 | { 71 | self.fields.insert(key.into(), val.into()); 72 | self 73 | } 74 | 75 | /// Insert a tag into the LogLine 76 | /// 77 | /// This inserts a key/value pair into the LogLine, returning the previous 78 | /// value if the key already existed. 79 | pub fn insert_tag(&mut self, key: S, val: S) -> Option 80 | where 81 | S: Into, 82 | { 83 | if let Some(ref mut tags) = self.tags { 84 | tags.insert(key.into(), val.into()) 85 | } else { 86 | let mut tags = TagMap::default(); 87 | let res = tags.insert(key.into(), val.into()); 88 | self.tags = Some(tags); 89 | res 90 | } 91 | } 92 | 93 | /// Remove a tag from the Telemetry 94 | /// 95 | /// This removes a key/value pair from the Telemetry, returning the previous 96 | /// value if the key existed. 97 | pub fn remove_tag(&mut self, key: &str) -> Option { 98 | if let Some(ref mut tags) = self.tags { 99 | tags.remove(key) 100 | } else { 101 | None 102 | } 103 | } 104 | 105 | /// Overlay a tag into the LogLine 106 | /// 107 | /// This function inserts a new key and value into the LogLine's tags. If 108 | /// the key was already present the old value is replaced. 109 | pub fn overlay_tag(mut self, key: S, val: S) -> LogLine 110 | where 111 | S: Into, 112 | { 113 | let _ = self.insert_tag(key, val); 114 | self 115 | } 116 | 117 | /// Overlay a TagMap on the LogLine's tags 118 | /// 119 | /// This function overlays a TagMap onto the LogLine's existing tags. If a 120 | /// key is present in both TagMaps the one from 'map' will be preferred. 121 | pub fn overlay_tags_from_map(mut self, map: &TagMap) -> LogLine { 122 | if let Some(ref mut tags) = self.tags { 123 | for (k, v) in map.iter() { 124 | tags.insert(k.clone(), v.clone()); 125 | } 126 | } else if !map.is_empty() { 127 | self.tags = Some(map.clone()); 128 | } 129 | self 130 | } 131 | 132 | /// Get a value from tags, either interior or default 133 | pub fn get_from_tags<'a>( 134 | &'a mut self, 135 | key: &'a str, 136 | defaults: &'a TagMap, 137 | ) -> Option<&'a String> { 138 | if let Some(ref mut tags) = self.tags { 139 | match tags.get(key) { 140 | Some(v) => Some(v), 141 | None => defaults.get(key), 142 | } 143 | } else { 144 | defaults.get(key) 145 | } 146 | } 147 | 148 | /// Iterate tags, layering in defaults when needed 149 | /// 150 | /// The defaults serves to fill 'holes' in the Telemetry's view of the 151 | /// tags. We avoid shipping tags through the whole system at the expense of 152 | /// slightly more complicated call-sites in sinks. 153 | pub fn tags<'a>(&'a self, defaults: &'a TagMap) -> TagIter<'a> { 154 | if let Some(ref tags) = self.tags { 155 | TagIter::Double { 156 | iters_exhausted: false, 157 | seen_keys: HashSet::new(), 158 | defaults: defaults.iter(), 159 | iters: tags.iter(), 160 | } 161 | } else { 162 | TagIter::Single { 163 | defaults: defaults.iter(), 164 | } 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/metric/mod.rs: -------------------------------------------------------------------------------- 1 | //! `metric` is a collection of the abstract datatypes that cernan operates 2 | //! over, plus related metadata. The main show here is 3 | //! `metric::Event`. Everything branches down from that. 4 | mod ackbag; 5 | mod event; 6 | mod logline; 7 | mod telemetry; 8 | 9 | pub use self::ackbag::global_ack_bag; 10 | pub use self::event::{Encoding, Event, Metadata}; 11 | pub use self::logline::LogLine; 12 | #[cfg(test)] 13 | pub use self::telemetry::Value; 14 | pub use self::telemetry::{AggregationMethod, Telemetry}; 15 | use crate::util; 16 | use std::cmp; 17 | use std::collections::{hash_map, HashSet}; 18 | 19 | /// A common type in cernan, a map from string to string 20 | pub type TagMap = util::HashMap; 21 | 22 | /// Compare two tagmaps 23 | /// 24 | /// K/Vs are compared lexographically unless the maps are of different length, 25 | /// in which case length is the comparator. 26 | pub fn cmp_tagmap( 27 | lhs: &Option, 28 | rhs: &Option, 29 | ) -> Option { 30 | match (lhs, rhs) { 31 | (&Some(ref l), &Some(ref r)) => { 32 | if l.len() != r.len() { 33 | l.len().partial_cmp(&r.len()) 34 | } else { 35 | l.iter().partial_cmp(r) 36 | } 37 | } 38 | _ => Some(cmp::Ordering::Equal), 39 | } 40 | } 41 | 42 | #[allow(missing_docs)] 43 | pub enum TagIter<'a> { 44 | Single { 45 | defaults: hash_map::Iter<'a, String, String>, 46 | }, 47 | Double { 48 | seen_keys: HashSet, 49 | iters: hash_map::Iter<'a, String, String>, 50 | defaults: hash_map::Iter<'a, String, String>, 51 | iters_exhausted: bool, 52 | }, 53 | } 54 | 55 | impl<'a> Iterator for TagIter<'a> { 56 | type Item = (&'a String, &'a String); 57 | 58 | fn next(&mut self) -> Option { 59 | match *self { 60 | TagIter::Single { ref mut defaults } => defaults.next(), 61 | TagIter::Double { 62 | ref mut seen_keys, 63 | ref mut iters, 64 | ref mut defaults, 65 | ref mut iters_exhausted, 66 | } => loop { 67 | if *iters_exhausted { 68 | if let Some((k, v)) = defaults.next() { 69 | if seen_keys.insert(k.to_string()) { 70 | return Some((k, v)); 71 | } 72 | } else { 73 | return None; 74 | } 75 | } else if let Some((k, v)) = iters.next() { 76 | seen_keys.insert(k.to_string()); 77 | return Some((k, v)); 78 | } else { 79 | *iters_exhausted = true; 80 | continue; 81 | } 82 | }, 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/protocols/graphite.rs: -------------------------------------------------------------------------------- 1 | //! Handle the graphite protocol. Graphite is a timestamped, simple text-based 2 | //! protocol for telemetry. 3 | 4 | use crate::metric::{AggregationMethod, Telemetry}; 5 | use std::str::FromStr; 6 | use std::sync; 7 | 8 | /// Parse a string for graphite data into a `metric::Telemetry` if possible. 9 | pub fn parse_graphite( 10 | source: &str, 11 | res: &mut Vec, 12 | metric: &sync::Arc>, 13 | ) -> bool { 14 | let mut iter = source.split_whitespace(); 15 | while let Some(name) = iter.next() { 16 | match iter.next() { 17 | Some(val) => match iter.next() { 18 | Some(time) => { 19 | let parsed_val = match f64::from_str(val) { 20 | Ok(f) => f, 21 | Err(_) => return false, 22 | }; 23 | let parsed_time = match i64::from_str(time) { 24 | Ok(t) => t, 25 | Err(_) => return false, 26 | }; 27 | let metric = sync::Arc::make_mut(&mut sync::Arc::clone(metric)) 28 | .take() 29 | .unwrap(); 30 | res.push( 31 | metric 32 | .thaw() 33 | .name(name) 34 | .value(parsed_val) 35 | .kind(AggregationMethod::Set) 36 | .timestamp(parsed_time) 37 | .harden() 38 | .unwrap(), 39 | ); 40 | } 41 | None => return false, 42 | }, 43 | None => return false, 44 | } 45 | } 46 | !res.is_empty() 47 | } 48 | 49 | #[cfg(test)] 50 | mod tests { 51 | use super::*; 52 | use crate::metric::{AggregationMethod, Telemetry}; 53 | use chrono::{TimeZone, Utc}; 54 | use std::sync; 55 | 56 | #[test] 57 | fn test_parse_graphite() { 58 | let pyld = "fst 1 101\nsnd -2.0 202\nthr 3 303\nfth@fth 4 404\nfv%fv 5 505\ns-th 6 606\n"; 59 | let mut res = Vec::new(); 60 | let metric = sync::Arc::new(Some(Telemetry::default())); 61 | assert!(parse_graphite(pyld, &mut res, &metric)); 62 | 63 | assert_eq!(res[0].kind(), AggregationMethod::Set); 64 | assert_eq!(res[0].name, "fst"); 65 | assert_eq!(res[0].value(), Some(1.0)); 66 | assert_eq!(res[0].timestamp, Utc.timestamp(101, 0).timestamp()); 67 | 68 | assert_eq!(res[1].kind(), AggregationMethod::Set); 69 | assert_eq!(res[1].name, "snd"); 70 | assert_eq!(res[1].value(), Some(-2.0)); 71 | assert_eq!(res[1].timestamp, Utc.timestamp(202, 0).timestamp()); 72 | 73 | assert_eq!(res[2].kind(), AggregationMethod::Set); 74 | assert_eq!(res[2].name, "thr"); 75 | assert_eq!(res[2].value(), Some(3.0)); 76 | assert_eq!(res[2].timestamp, Utc.timestamp(303, 0).timestamp()); 77 | 78 | assert_eq!(res[3].kind(), AggregationMethod::Set); 79 | assert_eq!(res[3].name, "fth@fth"); 80 | assert_eq!(res[3].value(), Some(4.0)); 81 | assert_eq!(res[3].timestamp, Utc.timestamp(404, 0).timestamp()); 82 | 83 | assert_eq!(res[4].kind(), AggregationMethod::Set); 84 | assert_eq!(res[4].name, "fv%fv"); 85 | assert_eq!(res[4].value(), Some(5.0)); 86 | assert_eq!(res[4].timestamp, Utc.timestamp(505, 0).timestamp()); 87 | 88 | assert_eq!(res[5].kind(), AggregationMethod::Set); 89 | assert_eq!(res[5].name, "s-th"); 90 | assert_eq!(res[5].value(), Some(6.0)); 91 | assert_eq!(res[5].timestamp, Utc.timestamp(606, 0).timestamp()); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/protocols/mod.rs: -------------------------------------------------------------------------------- 1 | //! The input protocols that cernan must parse. Not all sources are reflected 2 | //! here. These modules are used by the sources to do their work. 3 | 4 | #![allow(renamed_and_removed_lints)] 5 | 6 | pub mod graphite; 7 | pub mod native; 8 | pub mod prometheus; 9 | pub mod statsd; 10 | -------------------------------------------------------------------------------- /src/sink/console.rs: -------------------------------------------------------------------------------- 1 | //! Console Event logger. 2 | 3 | use crate::buckets::Buckets; 4 | use crate::metric::{AggregationMethod, LogLine, TagMap, Telemetry}; 5 | use crate::sink::{Sink, Valve}; 6 | use crate::source::flushes_per_second; 7 | use chrono::naive::NaiveDateTime; 8 | use chrono::offset::Utc; 9 | use chrono::DateTime; 10 | 11 | /// The 'console' sink exists for development convenience. The sink will 12 | /// aggregate according to [buckets](../buckets/struct.Buckets.html) method and 13 | /// print each `flush-interval` to stdout. 14 | pub struct Console { 15 | aggrs: Buckets, 16 | buffer: Vec, 17 | flush_interval: u64, 18 | tags: TagMap, 19 | } 20 | 21 | /// The configuration struct for Console. There's not a whole lot to configure 22 | /// here, independent of other sinks, but Console does do aggregations and that 23 | /// requires knowing what the user wants for `bin_width`. 24 | #[derive(Clone, Debug, Deserialize)] 25 | pub struct ConsoleConfig { 26 | /// The sink's unique name in the routing topology. 27 | pub config_path: Option, 28 | /// Sets the bin width for Console's underlying 29 | /// [bucket](../buckets/struct.Bucket.html). 30 | pub bin_width: i64, 31 | /// The tags to be applied to all `metric::Event`s streaming through this 32 | /// sink. These tags will overwrite any tags carried by the `metric::Event` 33 | /// itself. 34 | pub tags: TagMap, 35 | /// The sink specific `flush_interval`. 36 | pub flush_interval: u64, 37 | } 38 | 39 | impl Default for ConsoleConfig { 40 | fn default() -> ConsoleConfig { 41 | ConsoleConfig { 42 | bin_width: 1, 43 | flush_interval: 60 * flushes_per_second(), 44 | config_path: None, 45 | tags: TagMap::default(), 46 | } 47 | } 48 | } 49 | 50 | impl ConsoleConfig { 51 | /// Convenience method to create a ConsoleConfig with `bin_width` equal to 52 | /// 1. 53 | pub fn new( 54 | config_path: String, 55 | flush_interval: u64, 56 | tags: TagMap, 57 | ) -> ConsoleConfig { 58 | ConsoleConfig { 59 | config_path: Some(config_path), 60 | bin_width: 1, 61 | flush_interval, 62 | tags, 63 | } 64 | } 65 | } 66 | 67 | impl Sink for Console { 68 | fn init(config: ConsoleConfig) -> Self { 69 | Console { 70 | aggrs: Buckets::new(config.bin_width), 71 | buffer: Vec::new(), 72 | flush_interval: config.flush_interval, 73 | tags: config.tags, 74 | } 75 | } 76 | 77 | fn valve_state(&self) -> Valve { 78 | Valve::Open 79 | } 80 | 81 | fn deliver(&mut self, point: Telemetry) { 82 | self.aggrs.add(point); 83 | } 84 | 85 | fn deliver_line(&mut self, line: LogLine) { 86 | self.buffer.append(&mut vec![line]); 87 | } 88 | 89 | fn flush_interval(&self) -> Option { 90 | Some(self.flush_interval) 91 | } 92 | 93 | fn flush(&mut self) { 94 | println!("Flushing lines: {}", Utc::now().to_rfc3339()); 95 | for line in &self.buffer { 96 | print!("{} {}: {}", format_time(line.time), line.path, line.value); 97 | for (k, v) in line.tags(&self.tags) { 98 | print!(" {}={}", k, v); 99 | } 100 | println!(); 101 | } 102 | self.buffer.clear(); 103 | 104 | println!("Flushing metrics: {}", Utc::now().to_rfc3339()); 105 | 106 | let mut sums = String::new(); 107 | let mut sets = String::new(); 108 | let mut summaries = String::new(); 109 | let mut histograms = String::new(); 110 | 111 | for telem in self.aggrs.iter() { 112 | match telem.kind() { 113 | AggregationMethod::Histogram => { 114 | use quantiles::histogram::Bound; 115 | let tgt = &mut histograms; 116 | if let Some(bin_iter) = telem.bins() { 117 | for &(bound, val) in bin_iter { 118 | tgt.push_str(" "); 119 | tgt.push_str(&telem.name); 120 | tgt.push_str("_"); 121 | match bound { 122 | Bound::Finite(bnd) => { 123 | tgt.push_str(&bnd.to_string()); 124 | } 125 | Bound::PosInf => { 126 | tgt.push_str("pos_inf"); 127 | } 128 | }; 129 | tgt.push_str("("); 130 | tgt.push_str(&telem.timestamp.to_string()); 131 | tgt.push_str("): "); 132 | tgt.push_str(&val.to_string()); 133 | tgt.push_str("\n"); 134 | } 135 | } 136 | } 137 | AggregationMethod::Sum => { 138 | let tgt = &mut sums; 139 | if let Some(f) = telem.sum() { 140 | tgt.push_str(" "); 141 | tgt.push_str(&telem.name); 142 | tgt.push_str("("); 143 | tgt.push_str(&telem.timestamp.to_string()); 144 | tgt.push_str("): "); 145 | tgt.push_str(&f.to_string()); 146 | tgt.push_str("\n"); 147 | } 148 | } 149 | AggregationMethod::Set => { 150 | let tgt = &mut sets; 151 | if let Some(f) = telem.set() { 152 | tgt.push_str(" "); 153 | tgt.push_str(&telem.name); 154 | tgt.push_str("("); 155 | tgt.push_str(&telem.timestamp.to_string()); 156 | tgt.push_str("): "); 157 | tgt.push_str(&f.to_string()); 158 | tgt.push_str("\n"); 159 | } 160 | } 161 | AggregationMethod::Summarize => { 162 | let tgt = &mut summaries; 163 | for tup in &[ 164 | ("min", 0.0), 165 | ("max", 1.0), 166 | ("50", 0.5), 167 | ("90", 0.90), 168 | ("99", 0.99), 169 | ("999", 0.999), 170 | ] { 171 | let stat: &str = tup.0; 172 | let quant: f64 = tup.1; 173 | if let Some(f) = telem.query(quant) { 174 | tgt.push_str(" "); 175 | tgt.push_str(&telem.name); 176 | tgt.push_str(": "); 177 | tgt.push_str(stat); 178 | tgt.push_str(" "); 179 | tgt.push_str(&f.to_string()); 180 | tgt.push_str("\n"); 181 | } 182 | } 183 | } 184 | } 185 | } 186 | println!(" sums:"); 187 | print!("{}", sums); 188 | println!(" sets:"); 189 | print!("{}", sets); 190 | println!(" summaries:"); 191 | print!("{}", summaries); 192 | println!(" histograms:"); 193 | print!("{}", histograms); 194 | 195 | self.aggrs.reset(); 196 | } 197 | 198 | fn shutdown(mut self) { 199 | self.flush(); 200 | } 201 | } 202 | 203 | #[inline] 204 | fn format_time(time: i64) -> String { 205 | let naive_time = NaiveDateTime::from_timestamp(time, 0); 206 | let utc_time: DateTime = DateTime::from_utc(naive_time, Utc); 207 | format!("{}", utc_time.format("%Y-%m-%dT%H:%M:%S%.3fZ")) 208 | } 209 | -------------------------------------------------------------------------------- /src/sink/elasticsearch.rs: -------------------------------------------------------------------------------- 1 | //! `ElasticSearch` is a documentation indexing engine. 2 | 3 | use crate::metric::{LogLine, TagMap}; 4 | use crate::sink::{Sink, Valve}; 5 | use crate::source::flushes_per_second; 6 | use chrono::naive::NaiveDateTime; 7 | use chrono::offset::Utc; 8 | use chrono::DateTime; 9 | use elastic::client::responses::bulk; 10 | use elastic::error; 11 | use elastic::error::Result; 12 | use elastic::prelude::*; 13 | use std::cmp; 14 | use std::error::Error; 15 | use std::sync::atomic::{AtomicUsize, Ordering}; 16 | use uuid; 17 | 18 | /// Total deliveries made 19 | pub static ELASTIC_RECORDS_DELIVERY: AtomicUsize = AtomicUsize::new(0); 20 | /// Total internal buffer entries 21 | pub static ELASTIC_INTERNAL_BUFFER_LEN: AtomicUsize = AtomicUsize::new(0); 22 | /// Total records delivered in the last delivery 23 | pub static ELASTIC_RECORDS_TOTAL_DELIVERED: AtomicUsize = AtomicUsize::new(0); 24 | /// Total records that failed to be delivered due to error 25 | pub static ELASTIC_RECORDS_TOTAL_FAILED: AtomicUsize = AtomicUsize::new(0); 26 | /// Unknown error occurred during attempted flush 27 | pub static ELASTIC_ERROR_UNKNOWN: AtomicUsize = AtomicUsize::new(0); 28 | /// Total number of index bulk action errors 29 | pub static ELASTIC_BULK_ACTION_INDEX_ERR: AtomicUsize = AtomicUsize::new(0); 30 | /// Total number of create bulk action errors 31 | pub static ELASTIC_BULK_ACTION_CREATE_ERR: AtomicUsize = AtomicUsize::new(0); 32 | /// Total number of update bulk action errors 33 | pub static ELASTIC_BULK_ACTION_UPDATE_ERR: AtomicUsize = AtomicUsize::new(0); 34 | /// Total number of delete bulk action errors 35 | pub static ELASTIC_BULK_ACTION_DELETE_ERR: AtomicUsize = AtomicUsize::new(0); 36 | 37 | /// Total number of api errors due to index not found 38 | pub static ELASTIC_ERROR_API_INDEX_NOT_FOUND: AtomicUsize = AtomicUsize::new(0); 39 | /// Total number of api errors due to parsing 40 | pub static ELASTIC_ERROR_API_PARSING: AtomicUsize = AtomicUsize::new(0); 41 | /// Total number of api errors due to mapper parsing 42 | pub static ELASTIC_ERROR_API_MAPPER_PARSING: AtomicUsize = AtomicUsize::new(0); 43 | /// Total number of api errors due to action request validation 44 | pub static ELASTIC_ERROR_API_ACTION_REQUEST_VALIDATION: AtomicUsize = 45 | AtomicUsize::new(0); 46 | /// Total number of api errors due to missing document 47 | pub static ELASTIC_ERROR_API_DOCUMENT_MISSING: AtomicUsize = AtomicUsize::new(0); 48 | /// Total number of api errors due to index already existing 49 | pub static ELASTIC_ERROR_API_INDEX_ALREADY_EXISTS: AtomicUsize = AtomicUsize::new(0); 50 | /// Total number of api errors due to unknown reasons 51 | pub static ELASTIC_ERROR_API_UNKNOWN: AtomicUsize = AtomicUsize::new(0); 52 | /// Total number of client errors, no specific reasons 53 | pub static ELASTIC_ERROR_CLIENT: AtomicUsize = AtomicUsize::new(0); 54 | 55 | /// Configuration for the Elasticsearch sink 56 | /// 57 | /// Elasticsearch is an open-source document indexing engine. It can be used for 58 | /// performing searches over corpus, which for cernan's use is 59 | /// `metric::LogLine`. 60 | #[derive(Debug, Clone)] 61 | pub struct ElasticsearchConfig { 62 | /// The unique name of the sink in the routing topology 63 | pub config_path: Option, 64 | /// The Elasticsearch index prefix. This prefix will be added to the 65 | /// automatically created date-based index of this sink. 66 | pub index_prefix: Option, 67 | /// The _type of the Elasticsearch index 68 | pub index_type: String, 69 | /// Determines whether to use HTTP or HTTPS when publishing to 70 | /// Elasticsearch. 71 | pub secure: bool, 72 | /// Determine how many times to attempt the delivery of a log line before 73 | /// dropping it from the buffer. Failures of a global bulk request does not 74 | /// count against this limit. 75 | pub delivery_attempt_limit: u8, 76 | /// The Elasticsearch host. May be an IP address or DNS hostname. 77 | pub host: String, 78 | /// The Elasticsearch port. 79 | pub port: usize, 80 | /// The sink's specific flush interval. 81 | pub flush_interval: u64, 82 | /// The tags to be applied to all `metric::Event`s streaming through this 83 | /// sink. These tags will overwrite any tags carried by the `metric::Event` 84 | /// itself. 85 | pub tags: TagMap, 86 | } 87 | 88 | impl Default for ElasticsearchConfig { 89 | fn default() -> Self { 90 | ElasticsearchConfig { 91 | config_path: Some("sinks.elasticsearch".to_string()), 92 | secure: false, 93 | host: "127.0.0.1".to_string(), 94 | index_prefix: None, 95 | index_type: "payload".to_string(), 96 | delivery_attempt_limit: 10, 97 | port: 9200, 98 | flush_interval: flushes_per_second(), 99 | tags: TagMap::default(), 100 | } 101 | } 102 | } 103 | 104 | struct Line { 105 | attempts: u8, 106 | uuid: uuid::Uuid, 107 | line: LogLine, 108 | } 109 | 110 | /// The elasticsearch sink struct. 111 | /// 112 | /// Refer to the documentation on `ElasticsearchConfig` for more details. 113 | pub struct Elasticsearch { 114 | buffer: Vec, 115 | secure: bool, 116 | host: String, 117 | port: usize, 118 | delivery_attempt_limit: u8, 119 | index_prefix: Option, 120 | index_type: String, 121 | flush_interval: u64, 122 | tags: TagMap, 123 | } 124 | 125 | impl Elasticsearch { 126 | fn bulk_body(&self, buffer: &mut String) { 127 | assert!(!self.buffer.is_empty()); 128 | use serde_json::{to_string, Value}; 129 | for m in &self.buffer { 130 | let uuid = m.uuid.hyphenated().to_string(); 131 | let line = &m.line; 132 | let header: Value = json!({ 133 | "index": { 134 | "_index" : idx(&self.index_prefix, line.time), 135 | "_type" : self.index_type.clone(), 136 | "_id" : uuid.clone(), 137 | } 138 | }); 139 | buffer.push_str(&to_string(&header).unwrap()); 140 | buffer.push('\n'); 141 | let mut payload: Value = json!({ 142 | "uuid": uuid, 143 | "path": line.path.clone(), 144 | "payload": line.value.clone(), 145 | "timestamp": format_time(line.time), 146 | }); 147 | let obj = payload.as_object_mut().unwrap(); 148 | for (k, v) in line.tags(&self.tags) { 149 | obj.insert(k.clone(), Value::String(v.clone())); 150 | } 151 | for (k, v) in &line.fields { 152 | obj.insert(k.clone(), Value::String(v.clone())); 153 | } 154 | buffer.push_str(&to_string(&obj).unwrap()); 155 | buffer.push('\n'); 156 | } 157 | } 158 | } 159 | 160 | impl Sink for Elasticsearch { 161 | fn init(config: ElasticsearchConfig) -> Self { 162 | Elasticsearch { 163 | buffer: Vec::new(), 164 | secure: config.secure, 165 | host: config.host, 166 | port: config.port, 167 | index_prefix: config.index_prefix, 168 | index_type: config.index_type, 169 | delivery_attempt_limit: config.delivery_attempt_limit, 170 | flush_interval: config.flush_interval, 171 | tags: config.tags, 172 | } 173 | } 174 | 175 | fn flush_interval(&self) -> Option { 176 | Some(self.flush_interval) 177 | } 178 | 179 | #[allow(clippy::cyclomatic_complexity)] 180 | fn flush(&mut self) { 181 | if self.buffer.is_empty() { 182 | return; 183 | } 184 | 185 | let proto = if self.secure { "https" } else { "http" }; 186 | let params = 187 | RequestParams::new(format!("{}://{}:{}", proto, self.host, self.port)); 188 | let client = SyncClientBuilder::from_params(params).build().unwrap(); 189 | 190 | let mut buffer = String::with_capacity(4048); 191 | self.bulk_body(&mut buffer); 192 | if let Ok(snd) = client.request(BulkRequest::new(buffer)).send() { 193 | let bulk_resp: Result = snd.into_response::(); 194 | ELASTIC_INTERNAL_BUFFER_LEN.store(self.buffer.len(), Ordering::Relaxed); 195 | match bulk_resp { 196 | Ok(bulk) => { 197 | ELASTIC_RECORDS_DELIVERY.fetch_add(1, Ordering::Relaxed); 198 | for item in bulk.iter() { 199 | match item { 200 | Ok(item) => { 201 | let uuid = uuid::Uuid::parse_str(item.id()) 202 | .expect("catastrophic error, TID not a UUID"); 203 | let mut idx = 0; 204 | for i in 0..self.buffer.len() { 205 | match self.buffer[i].uuid.cmp(&uuid) { 206 | cmp::Ordering::Equal => { 207 | break; 208 | } 209 | _ => idx += 1, 210 | } 211 | } 212 | self.buffer.remove(idx); 213 | ELASTIC_RECORDS_TOTAL_DELIVERED 214 | .fetch_add(1, Ordering::Relaxed); 215 | } 216 | Err(item) => { 217 | let uuid = uuid::Uuid::parse_str(item.id()) 218 | .expect("catastrophic error, TID not a UUID"); 219 | let mut idx = 0; 220 | for i in 0..self.buffer.len() { 221 | match self.buffer[i].uuid.cmp(&uuid) { 222 | cmp::Ordering::Equal => { 223 | break; 224 | } 225 | _ => idx += 1, 226 | } 227 | } 228 | self.buffer[idx].attempts += 1; 229 | if self.buffer[idx].attempts 230 | > self.delivery_attempt_limit 231 | { 232 | self.buffer.remove(idx); 233 | } 234 | ELASTIC_RECORDS_TOTAL_FAILED 235 | .fetch_add(1, Ordering::Relaxed); 236 | if let Some(source) = item.source() { 237 | debug!( 238 | "Failed to write item with error {}, source {}", 239 | item.description(), 240 | source 241 | ); 242 | } else { 243 | debug!( 244 | "Failed to write item with error {}", 245 | item.description() 246 | ); 247 | } 248 | match item.action() { 249 | bulk::Action::Index => { 250 | ELASTIC_BULK_ACTION_INDEX_ERR 251 | .fetch_add(1, Ordering::Relaxed) 252 | } 253 | bulk::Action::Create => { 254 | ELASTIC_BULK_ACTION_CREATE_ERR 255 | .fetch_add(1, Ordering::Relaxed) 256 | } 257 | bulk::Action::Update => { 258 | ELASTIC_BULK_ACTION_UPDATE_ERR 259 | .fetch_add(1, Ordering::Relaxed) 260 | } 261 | bulk::Action::Delete => { 262 | ELASTIC_BULK_ACTION_DELETE_ERR 263 | .fetch_add(1, Ordering::Relaxed) 264 | } 265 | }; 266 | } 267 | } 268 | } 269 | } 270 | Err(err) => { 271 | match err { 272 | error::Error::Api(ref api_error) => { 273 | use elastic::error::ApiError; 274 | match *api_error { 275 | ApiError::IndexNotFound { ref index } => { 276 | ELASTIC_ERROR_API_INDEX_NOT_FOUND 277 | .fetch_add(1, Ordering::Relaxed); 278 | debug!("Unable to write, API Error (Index Not Found): {}", index); 279 | } 280 | ApiError::Parsing { ref reason, .. } => { 281 | ELASTIC_ERROR_API_PARSING 282 | .fetch_add(1, Ordering::Relaxed); 283 | debug!( 284 | "Unable to write, API Error (Parsing): {}", 285 | reason 286 | ); 287 | } 288 | ApiError::MapperParsing { ref reason, .. } => { 289 | ELASTIC_ERROR_API_MAPPER_PARSING 290 | .fetch_add(1, Ordering::Relaxed); 291 | debug!("Unable to write, API Error (Mapper Parsing): {}", reason); 292 | } 293 | ApiError::ActionRequestValidation { 294 | ref reason, 295 | .. 296 | } => { 297 | ELASTIC_ERROR_API_ACTION_REQUEST_VALIDATION 298 | .fetch_add(1, Ordering::Relaxed); 299 | debug!( 300 | "Unable to write, API Error (Action Request Validation): {}", 301 | reason 302 | ); 303 | } 304 | ApiError::DocumentMissing { ref index, .. } => { 305 | ELASTIC_ERROR_API_DOCUMENT_MISSING 306 | .fetch_add(1, Ordering::Relaxed); 307 | debug!("Unable to write, API Error (Document Missing): {}", index); 308 | } 309 | ApiError::IndexAlreadyExists { ref index, .. } => { 310 | ELASTIC_ERROR_API_INDEX_ALREADY_EXISTS 311 | .fetch_add(1, Ordering::Relaxed); 312 | debug!( 313 | "Unable to write, API Error (Index Already Exists): {}", 314 | index 315 | ); 316 | } 317 | _ => { 318 | ELASTIC_ERROR_API_UNKNOWN 319 | .fetch_add(1, Ordering::Relaxed); 320 | debug!("Unable to write, API Error (Unknown)"); 321 | } 322 | } 323 | } 324 | error::Error::Client(ref client_error) => { 325 | ELASTIC_ERROR_CLIENT.fetch_add(1, Ordering::Relaxed); 326 | debug!( 327 | "Unable to write, client error: {}", 328 | client_error.description() 329 | ); 330 | } 331 | } 332 | } 333 | } 334 | } 335 | } 336 | 337 | fn shutdown(mut self) { 338 | self.flush(); 339 | } 340 | 341 | fn deliver_line(&mut self, line: LogLine) { 342 | let uuid = uuid::Uuid::new_v4(); 343 | self.buffer.push(Line { 344 | uuid: uuid, 345 | line: line, 346 | attempts: 0, 347 | }); 348 | } 349 | 350 | fn valve_state(&self) -> Valve { 351 | if self.buffer.len() > 10_000 { 352 | Valve::Closed 353 | } else { 354 | Valve::Open 355 | } 356 | } 357 | } 358 | 359 | #[inline] 360 | fn format_time(time: i64) -> String { 361 | let naive_time = NaiveDateTime::from_timestamp(time, 0); 362 | let utc_time: DateTime = DateTime::from_utc(naive_time, Utc); 363 | format!("{}", utc_time.format("%+")) 364 | } 365 | 366 | #[inline] 367 | fn idx(prefix: &Option, time: i64) -> String { 368 | let naive_time = NaiveDateTime::from_timestamp(time, 0); 369 | let utc_time: DateTime = DateTime::from_utc(naive_time, Utc); 370 | match *prefix { 371 | Some(ref p) => format!("{}-{}", p, utc_time.format("%Y-%m-%d")), 372 | None => format!("{}", utc_time.format("%Y-%m-%d")), 373 | } 374 | } 375 | -------------------------------------------------------------------------------- /src/sink/mod.rs: -------------------------------------------------------------------------------- 1 | //! A 'sink' is a final destination for telemetry and log lines. That is, a 2 | //! 'sink' is that which is at the end of a `source -> filter -> filter -> 3 | //! ... -> sink` chain. The sink has no obligations with regard to the telemetry 4 | //! and log lines it receives, other than to receive them. Individual sinks make 5 | //! different choices. 6 | 7 | use crate::metric::{Encoding, Event, LogLine, Metadata, Telemetry}; 8 | use crate::thread; 9 | use crate::time; 10 | use crate::util::Valve; 11 | use hopper; 12 | use std::marker::PhantomData; 13 | use uuid::Uuid; 14 | 15 | mod console; 16 | pub mod elasticsearch; 17 | pub mod influxdb; 18 | pub mod kafka; 19 | mod native; 20 | mod null; 21 | pub mod prometheus; 22 | pub mod wavefront; 23 | 24 | pub use self::console::{Console, ConsoleConfig}; 25 | pub use self::elasticsearch::{Elasticsearch, ElasticsearchConfig}; 26 | pub use self::influxdb::{InfluxDB, InfluxDBConfig}; 27 | pub use self::kafka::{Kafka, KafkaConfig}; 28 | pub use self::native::{Native, NativeConfig}; 29 | pub use self::null::{Null, NullConfig}; 30 | pub use self::prometheus::{Prometheus, PrometheusConfig}; 31 | pub use self::wavefront::{Wavefront, WavefrontConfig}; 32 | 33 | /// Generic interface used to capture global sink configuration 34 | /// parameters as well as sink specific parameters. 35 | /// 36 | /// Stored configuration is consumed when the sink is spawned, 37 | /// resulting in a new thread executing the given sink. 38 | pub struct RunnableSink 39 | where 40 | S: Send + Sink, 41 | SConfig: 'static + Send + Clone, 42 | { 43 | recv: hopper::Receiver, 44 | sources: Vec, 45 | state: S, 46 | 47 | // Yes, compiler, we know that we aren't storing 48 | // anything of type SConfig. 49 | config: PhantomData, 50 | } 51 | 52 | impl RunnableSink 53 | where 54 | S: 'static + Send + Sink, 55 | SConfig: 'static + Clone + Send, 56 | { 57 | /// Generic constructor for RunnableSink - execution wrapper around objects 58 | /// implementing Sink. 59 | pub fn new( 60 | recv: hopper::Receiver, 61 | sources: Vec, 62 | config: SConfig, 63 | ) -> RunnableSink { 64 | RunnableSink { 65 | recv: recv, 66 | sources: sources, 67 | state: S::init(config), 68 | config: PhantomData, 69 | } 70 | } 71 | 72 | /// Spawns / consumes the given stateful sink, returning the corresponding 73 | /// thread. 74 | pub fn run(self) -> thread::ThreadHandle { 75 | thread::spawn(move |_poll| { 76 | self.consume(); 77 | }) 78 | } 79 | 80 | fn consume(mut self) { 81 | let mut attempts = 0; 82 | let mut recv = self.recv.into_iter(); 83 | let mut last_flush_idx = 0; 84 | let mut total_shutdowns = 0; 85 | // The run-loop of a sink is two nested loops. The outer loop pulls a 86 | // value from the hopper queue. If that value is Some the inner loop 87 | // tries to do something with it, only discarding it at such time as 88 | // it's been delivered to the Sink. 89 | loop { 90 | let nxt = recv.next(); 91 | if nxt.is_none() { 92 | time::delay(attempts); 93 | attempts += 1; 94 | continue; 95 | } 96 | attempts = 0; 97 | let event = nxt.unwrap(); 98 | loop { 99 | // We have to be careful here not to dump a value until it's 100 | // already been delivered _and_ be sure we at least attempt to 101 | // make progress on delivery. There are two conditions we have 102 | // to look out for most carefully: 103 | // 104 | // 1. Is the valve_state closed? 105 | // 2. Does the flush_interval match our flush index? 106 | // 107 | // If the valve state is closed we attempt to flush the sink to 108 | // clear the valve, hold on to the value and loop around again 109 | // after a delay. If the flush_interval is Some and DOES match 110 | // then we flush. If the flush_interval is Some and DOES NOT 111 | // match then we do not flush. If the flush_interval is NONE 112 | // then we never flush. 113 | match self.state.valve_state() { 114 | Valve::Open => match event { 115 | Event::TimerFlush(idx) => { 116 | // Flush timers are interesting. The timer thread 117 | // sends a TimerFlush pulse once a second and it's 118 | // possible that a sink will have multiple Sources / 119 | // Filters pushing down into it. That means multiple 120 | // TimerFlush values for the same time index. 121 | // 122 | // What we do to avoid duplicating time pulses is 123 | // keep track of a 'last_flush_idx', our current 124 | // time and only reset to a new time when the idx in 125 | // the pulse is greater than the last one we've 126 | // seen. If it's not, we ignore it. 127 | if idx > last_flush_idx { 128 | // Now, because sinks will not want to flush 129 | // every timer pulse we query the flush_interval 130 | // of the sink. If the interval and the idx 131 | // match up, we flush. Else, not. 132 | if let Some(flush_interval) = 133 | self.state.flush_interval() 134 | { 135 | if flush_interval == 0 || idx % flush_interval == 0 136 | { 137 | self.state.flush(); 138 | } 139 | } 140 | last_flush_idx = idx; 141 | } 142 | break; 143 | } 144 | Event::Telemetry(metric) => { 145 | self.state.deliver(metric); 146 | break; 147 | } 148 | Event::Log(line) => { 149 | self.state.deliver_line(line); 150 | break; 151 | } 152 | Event::Raw { 153 | order_by, 154 | encoding, 155 | bytes, 156 | metadata, 157 | connection_id, 158 | } => { 159 | self.state.deliver_raw( 160 | order_by, 161 | encoding, 162 | bytes, 163 | metadata, 164 | connection_id, 165 | ); 166 | break; 167 | } 168 | Event::Shutdown => { 169 | // Invariant - In order to ensure at least once delivery 170 | // at the sink level, the following properties must hold: 171 | // 172 | // 1) An upstream source injects a Shutdown event after 173 | // all of its events have been processed. 174 | // 175 | // 2) Sources shutdown only after receiving Shutdown 176 | // from each of its 177 | // upstream sources/filters. 178 | total_shutdowns += 1; 179 | if total_shutdowns >= self.sources.len() { 180 | trace!( 181 | "Received shutdown from every configured source: {:?}", 182 | self.sources 183 | ); 184 | self.state.shutdown(); 185 | return; 186 | } 187 | } 188 | }, 189 | Valve::Closed => { 190 | self.state.flush(); 191 | continue; 192 | } 193 | } 194 | } 195 | } 196 | } 197 | } 198 | 199 | /// A 'sink' is a sink for metrics. 200 | pub trait Sink 201 | where 202 | Self: 'static + Send + Sized, 203 | SConfig: 'static + Send + Clone, 204 | { 205 | /// Generic constructor for sinks implementing this trait. 206 | fn new( 207 | recv: hopper::Receiver, 208 | sources: Vec, 209 | config: SConfig, 210 | ) -> RunnableSink { 211 | RunnableSink::::new(recv, sources, config) 212 | } 213 | 214 | /// Constructs a new sink. 215 | fn init(config: SConfig) -> Self; 216 | 217 | /// Lookup the `Sink`'s specific flush interval. This determines how often a 218 | /// sink will obey the periodic flush pulse. 219 | /// 220 | /// If the value is `None` this is a signal that the sink will NEVER flush 221 | /// EXCEPT in the case where the sink's valve_state is Closed. 222 | fn flush_interval(&self) -> Option; 223 | /// Perform the `Sink` specific flush. The rate at which this occurs is 224 | /// determined by the global `flush_interval` or the sink specific flush 225 | /// interval. Pulses occur at a rate of once per second, subject to 226 | /// communication delays in the routing topology. 227 | fn flush(&mut self) -> (); 228 | /// Lookup the `Sink` valve state. See `Valve` documentation for more 229 | /// information. 230 | fn valve_state(&self) -> Valve { 231 | // never close up shop 232 | Valve::Open 233 | } 234 | /// Deliver a `Telemetry` to the `Sink`. Exact behaviour varies by 235 | /// implementation. 236 | fn deliver(&mut self, _telem: Telemetry) { 237 | // nothing, intentionally 238 | } 239 | /// Deliver a `LogLine` to the `Sink`. Exact behaviour varies by 240 | /// implementation. 241 | fn deliver_line(&mut self, _line: LogLine) { 242 | // nothing, intentionally 243 | } 244 | /// Deliver a 'Raw' series of encoded bytes to the sink. 245 | fn deliver_raw( 246 | &mut self, 247 | _order_by: u64, 248 | _encoding: Encoding, 249 | _bytes: Vec, 250 | _metadata: Option, 251 | _connection_id: Option, 252 | ) { 253 | // Not all sinks accept raw events. By default, we do nothing. 254 | } 255 | /// Provide a hook to shutdown a sink. This is necessary for sinks which 256 | /// have their own long-running threads. 257 | fn shutdown(self) -> (); 258 | } 259 | -------------------------------------------------------------------------------- /src/sink/native.rs: -------------------------------------------------------------------------------- 1 | //! Sink for Cernan's native protocol. 2 | 3 | use crate::metric; 4 | use crate::protocols::native::{AggregationMethod, LogLine, Payload, Telemetry}; 5 | use crate::sink::Sink; 6 | use crate::source::flushes_per_second; 7 | use crate::time; 8 | use byteorder::{BigEndian, ByteOrder}; 9 | use protobuf::repeated::RepeatedField; 10 | use protobuf::stream::CodedOutputStream; 11 | use protobuf::Message; 12 | use std::collections::HashMap; 13 | use std::io::BufWriter; 14 | use std::mem::replace; 15 | use std::net::{TcpStream, ToSocketAddrs}; 16 | 17 | /// The native sink 18 | /// 19 | /// This sink is the pair to the native source. The native source/sink use or 20 | /// consume cernan's native protocol, defined 21 | /// `resources/protobufs/native.proto`. Clients may use the native protocol 22 | /// without having to obey the translation required in other sources or 23 | /// operators may set up cernan to cernan communication. 24 | pub struct Native { 25 | port: u16, 26 | host: String, 27 | buffer: Vec, 28 | flush_interval: u64, 29 | delivery_attempts: u32, 30 | stream: Option, 31 | tags: metric::TagMap, 32 | } 33 | 34 | /// Configuration for the native sink 35 | #[derive(Clone, Debug, Deserialize)] 36 | pub struct NativeConfig { 37 | /// The port to communicate with the native host 38 | pub port: u16, 39 | /// The native cernan host to communicate with. May be an IP address or DNS 40 | /// hostname. 41 | pub host: String, 42 | /// The sink's unique name in the routing topology. 43 | pub config_path: Option, 44 | /// The sink's specific flush interval. 45 | pub flush_interval: u64, 46 | /// The tags to be applied to all `metric::Event`s streaming through this 47 | /// sink. These tags will overwrite any tags carried by the `metric::Event` 48 | /// itself. 49 | pub tags: metric::TagMap, 50 | } 51 | 52 | impl Default for NativeConfig { 53 | fn default() -> Self { 54 | NativeConfig { 55 | port: 1972, 56 | host: "localhost".to_string(), 57 | config_path: None, 58 | flush_interval: 60 * flushes_per_second(), 59 | tags: metric::TagMap::default(), 60 | } 61 | } 62 | } 63 | 64 | fn connect(host: &str, port: u16) -> Option { 65 | let addrs = (host, port).to_socket_addrs(); 66 | match addrs { 67 | Ok(srv) => { 68 | let ips: Vec<_> = srv.collect(); 69 | for ip in ips { 70 | match TcpStream::connect(ip) { 71 | Ok(stream) => return Some(stream), 72 | Err(e) => info!( 73 | "Unable to connect to proxy at {} using addr {} with error \ 74 | {}", 75 | host, ip, e 76 | ), 77 | } 78 | } 79 | None 80 | } 81 | Err(e) => { 82 | info!( 83 | "Unable to perform DNS lookup on host {} with error {}", 84 | host, e 85 | ); 86 | None 87 | } 88 | } 89 | } 90 | 91 | impl Sink for Native { 92 | fn init(config: NativeConfig) -> Self { 93 | let stream = connect(&config.host, config.port); 94 | Native { 95 | port: config.port, 96 | host: config.host, 97 | buffer: Vec::new(), 98 | flush_interval: config.flush_interval, 99 | delivery_attempts: 0, 100 | stream, 101 | tags: config.tags, 102 | } 103 | } 104 | 105 | fn deliver(&mut self, telemetry: metric::Telemetry) { 106 | self.buffer.push(metric::Event::Telemetry(telemetry)); 107 | } 108 | 109 | fn deliver_line(&mut self, line: metric::LogLine) { 110 | self.buffer.push(metric::Event::Log(line)); 111 | } 112 | 113 | fn flush_interval(&self) -> Option { 114 | Some(self.flush_interval) 115 | } 116 | 117 | fn flush(&mut self) { 118 | let mut points = Vec::with_capacity(1024); 119 | let mut lines = Vec::with_capacity(1024); 120 | 121 | for ev in self.buffer.drain(..) { 122 | match ev { 123 | metric::Event::Telemetry(mut m) => { 124 | let mut telem = Telemetry::new(); 125 | telem.set_name(replace(&mut m.name, Default::default())); 126 | let method = match m.kind() { 127 | metric::AggregationMethod::Histogram => AggregationMethod::BIN, 128 | metric::AggregationMethod::Sum => AggregationMethod::SUM, 129 | metric::AggregationMethod::Set => AggregationMethod::SET, 130 | metric::AggregationMethod::Summarize => { 131 | AggregationMethod::SUMMARIZE 132 | } 133 | }; 134 | let persist = m.persist; 135 | telem.set_persisted(persist); 136 | telem.set_method(method); 137 | let mut meta = HashMap::new(); 138 | // TODO 139 | // 140 | // Learn how to consume bits of the metric without having to 141 | // clone like crazy 142 | for (k, v) in m.tags(&self.tags) { 143 | meta.insert(k.to_string(), v.to_string()); 144 | } 145 | telem.set_metadata(meta); 146 | telem.set_timestamp_ms(m.timestamp * 1000); // FIXME #166 147 | telem.set_samples(m.samples()); 148 | // TODO set bin_bounds. What we do is set the counts for the 149 | // bins as set_samples above, then bin_bounds comes from 150 | // elsewhere 151 | points.push(telem); 152 | } 153 | metric::Event::Log(l) => { 154 | let mut ll = LogLine::new(); 155 | let mut meta = HashMap::new(); 156 | // TODO 157 | // 158 | // Learn how to consume bits of the metric without having to 159 | // clone like crazy 160 | for (k, v) in l.tags(&self.tags) { 161 | meta.insert(k.clone(), v.clone()); 162 | } 163 | ll.set_path(l.path); 164 | ll.set_value(l.value); 165 | ll.set_metadata(meta); 166 | ll.set_timestamp_ms(l.time * 1000); // FIXME #166 167 | 168 | lines.push(ll); 169 | } 170 | _ => {} 171 | } 172 | } 173 | 174 | let mut pyld = Payload::new(); 175 | pyld.set_points(RepeatedField::from_vec(points)); 176 | pyld.set_lines(RepeatedField::from_vec(lines)); 177 | 178 | loop { 179 | let mut delivery_failure = false; 180 | if let Some(ref mut stream) = self.stream { 181 | let mut bufwrite = BufWriter::new(stream); 182 | let mut stream = CodedOutputStream::new(&mut bufwrite); 183 | let mut sz_buf = [0; 4]; 184 | let pyld_len = pyld.compute_size(); 185 | BigEndian::write_u32(&mut sz_buf, pyld_len); 186 | stream.write_raw_bytes(&sz_buf).unwrap(); 187 | let res = pyld.write_to_with_cached_sizes(&mut stream); 188 | if res.is_ok() { 189 | self.buffer.clear(); 190 | return; 191 | } else { 192 | self.delivery_attempts = self.delivery_attempts.saturating_add(1); 193 | delivery_failure = true; 194 | } 195 | } else { 196 | time::delay(self.delivery_attempts); 197 | self.stream = connect(&self.host, self.port); 198 | } 199 | if delivery_failure { 200 | self.stream = None 201 | } 202 | } 203 | } 204 | 205 | fn shutdown(mut self) { 206 | self.flush(); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/sink/null.rs: -------------------------------------------------------------------------------- 1 | //! Sink equivalent of /dev/null. 2 | use crate::sink::{Sink, Valve}; 3 | 4 | /// Null sink 5 | /// 6 | /// This sink is intended for testing and demonstration. Every `metric::Event` 7 | /// it receives will be deallocated. 8 | pub struct Null {} 9 | 10 | /// Configuration for the `Null` sink 11 | #[derive(Clone, Debug, Deserialize)] 12 | pub struct NullConfig { 13 | /// The sink's unique name in the routing topology. 14 | pub config_path: String, 15 | } 16 | 17 | impl NullConfig { 18 | /// Create a new `NullConfig` 19 | pub fn new(config_path: String) -> NullConfig { 20 | NullConfig { config_path } 21 | } 22 | } 23 | 24 | impl Sink for Null { 25 | fn init(_config: NullConfig) -> Self { 26 | Null {} 27 | } 28 | 29 | fn valve_state(&self) -> Valve { 30 | Valve::Open 31 | } 32 | 33 | fn flush_interval(&self) -> Option { 34 | Some(1) 35 | } 36 | 37 | fn flush(&mut self) { 38 | // do nothing 39 | } 40 | 41 | fn shutdown(mut self) { 42 | self.flush(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/source/file/file_server.rs: -------------------------------------------------------------------------------- 1 | use crate::metric; 2 | use crate::source; 3 | use crate::source::file::file_watcher::FileWatcher; 4 | use crate::source::internal::report_full_telemetry; 5 | use crate::util; 6 | use crate::util::send; 7 | use glob::glob; 8 | use mio; 9 | use std::mem; 10 | use std::path::PathBuf; 11 | use std::str; 12 | use std::time; 13 | 14 | /// `FileServer` is a Source which cooperatively schedules reads over files, 15 | /// converting the lines of said files into `LogLine` structures. As 16 | /// `FileServer` is intended to be useful across multiple operating systems with 17 | /// POSIX filesystem semantics `FileServer` must poll for changes. That is, no 18 | /// event notification is used by `FileServer`. 19 | /// 20 | /// `FileServer` is configured on a path to watch. The files do _not_ need to 21 | /// exist at cernan startup. `FileServer` will discover new files which match 22 | /// its path in at most 60 seconds. 23 | pub struct FileServer { 24 | pattern: PathBuf, 25 | max_read_bytes: usize, 26 | } 27 | 28 | /// The configuration struct for `FileServer`. 29 | #[derive(Clone, Debug, Deserialize)] 30 | pub struct FileServerConfig { 31 | /// The path that `FileServer` will watch. Globs are allowed and 32 | /// `FileServer` will watch multiple files. 33 | pub path: Option, 34 | /// The maximum number of bytes to read from a file before switching to a 35 | /// new file. 36 | pub max_read_bytes: usize, 37 | /// The forwards which `FileServer` will obey. 38 | pub forwards: Vec, 39 | /// The configured name of FileServer. 40 | pub config_path: Option, 41 | } 42 | 43 | impl Default for FileServerConfig { 44 | fn default() -> Self { 45 | FileServerConfig { 46 | path: None, 47 | max_read_bytes: 2048, 48 | forwards: Vec::default(), 49 | config_path: None, 50 | } 51 | } 52 | } 53 | 54 | /// `FileServer` as Source 55 | /// 56 | /// The 'run' of `FileServer` performs the cooperative scheduling of reads over 57 | /// `FileServer`'s configured files. Much care has been taking to make this 58 | /// scheduling 'fair', meaning busy files do not drown out quiet files or vice 59 | /// versa but there's no one perfect approach. Very fast files _will_ be lost if 60 | /// your system aggressively rolls log files. `FileServer` will keep a file 61 | /// handler open but should your system move so quickly that a file disappears 62 | /// before cernan is able to open it the contents will be lost. This should be a 63 | /// rare occurence. 64 | /// 65 | /// Specific operating systems support evented interfaces that correct this 66 | /// problem but your intrepid authors know of no generic solution. 67 | impl source::Source for FileServer { 68 | /// Make a FileServer 69 | fn init(config: FileServerConfig) -> Self { 70 | let pattern = config.path.expect("must specify a 'path' for FileServer"); 71 | FileServer { 72 | pattern: pattern, 73 | max_read_bytes: config.max_read_bytes, 74 | } 75 | } 76 | 77 | fn run(self, mut chans: util::Channel, poller: mio::Poll) { 78 | let mut buffer = String::new(); 79 | 80 | let mut fp_map: util::HashMap = Default::default(); 81 | let mut fp_map_alt: util::HashMap = Default::default(); 82 | 83 | let mut backoff_cap: usize = 1; 84 | let mut lines = Vec::new(); 85 | // Alright friends, how does this work? 86 | // 87 | // We want to avoid burning up users' CPUs. To do this we sleep after 88 | // reading lines out of files. But! We want to be responsive as well. We 89 | // keep track of a 'backoff_cap' to decide how long we'll wait in any 90 | // given loop. This cap grows each time we fail to read lines in an 91 | // exponential fashion to some hard-coded cap. 92 | loop { 93 | let mut global_bytes_read: usize = 0; 94 | // glob poll 95 | for entry in glob(self.pattern.to_str().expect("no ability to glob")) 96 | .expect("Failed to read glob pattern") 97 | { 98 | if let Ok(path) = entry { 99 | let entry = fp_map.entry(path.clone()); 100 | if let Ok(fw) = FileWatcher::new(&path) { 101 | entry.or_insert(fw); 102 | }; 103 | } 104 | } 105 | // line polling 106 | for (path, mut watcher) in fp_map.drain() { 107 | let mut bytes_read: usize = 0; 108 | while let Ok(sz) = watcher.read_line(&mut buffer) { 109 | if sz > 0 { 110 | bytes_read += sz; 111 | lines.push(metric::LogLine::new( 112 | path.to_str().expect("not a valid path"), 113 | &buffer, 114 | )); 115 | buffer.clear(); 116 | } else { 117 | break; 118 | } 119 | if bytes_read > self.max_read_bytes { 120 | break; 121 | } 122 | } 123 | report_full_telemetry( 124 | "cernan.sources.file.bytes_read", 125 | bytes_read as f64, 126 | Some(vec![( 127 | "file_path", 128 | path.to_str().expect("not a valid path"), 129 | )]), 130 | ); 131 | // A FileWatcher is dead when the underlying file has 132 | // disappeared. If the FileWatcher is dead we don't stick it in 133 | // the fp_map_alt and deallocate it. 134 | if !watcher.dead() { 135 | fp_map_alt.insert(path, watcher); 136 | } 137 | global_bytes_read = global_bytes_read.saturating_add(bytes_read); 138 | } 139 | for l in lines.drain(..) { 140 | send(&mut chans, metric::Event::new_log(l)); 141 | } 142 | // We've drained the live FileWatchers into fp_map_alt in the line 143 | // polling loop. Now we swapped them back to fp_map so next time we 144 | // loop through we'll read from the live FileWatchers. 145 | mem::swap(&mut fp_map, &mut fp_map_alt); 146 | // When no lines have been read we kick the backup_cap up by twice, 147 | // limited by the hard-coded cap. Else, we set the backup_cap to its 148 | // minimum on the assumption that next time through there will be 149 | // more lines to read promptly. 150 | if global_bytes_read == 0 { 151 | let lim = backoff_cap.saturating_mul(2); 152 | if lim > 2_048 { 153 | backoff_cap = 2_048; 154 | } else { 155 | backoff_cap = lim; 156 | } 157 | } else { 158 | backoff_cap = 1; 159 | } 160 | let backoff = backoff_cap.saturating_sub(global_bytes_read); 161 | let mut events = mio::Events::with_capacity(1024); 162 | match poller.poll( 163 | &mut events, 164 | Some(time::Duration::from_millis(backoff as u64)), 165 | ) { 166 | Err(e) => panic!(format!("Failed during poll {:?}", e)), 167 | Ok(0) => {} 168 | Ok(_num_events) => { 169 | // File server doesn't poll for anything other than SYSTEM events. 170 | // As currently there are no system events other than SHUTDOWN, 171 | // we immediately exit. 172 | send(&mut chans, metric::Event::Shutdown); 173 | return; 174 | } 175 | } 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/source/file/file_watcher.rs: -------------------------------------------------------------------------------- 1 | use crate::source::internal::report_full_telemetry; 2 | use std::fs; 3 | use std::io; 4 | use std::io::BufRead; 5 | use std::io::Seek; 6 | use std::os::unix::fs::MetadataExt; 7 | use std::path::PathBuf; 8 | 9 | /// The `FileWatcher` struct defines the polling based state machine which reads 10 | /// from a file path, transparently updating the underlying file descriptor when 11 | /// the file has been rolled over, as is common for logs. 12 | /// 13 | /// The `FileWatcher` is expected to live for the lifetime of the file 14 | /// path. `FileServer` is responsible for clearing away `FileWatchers` which no 15 | /// longer exist. 16 | pub struct FileWatcher { 17 | pub path: PathBuf, 18 | reader: Option>, 19 | file_id: Option<(u64, u64)>, 20 | previous_size: u64, 21 | reopen: bool, 22 | } 23 | 24 | type Devno = u64; 25 | type Ino = u64; 26 | type FileId = (Devno, Ino); 27 | 28 | #[inline] 29 | fn file_id(path: &PathBuf) -> Option { 30 | if let Ok(metadata) = fs::metadata(path) { 31 | let dev = metadata.dev(); 32 | let ino = metadata.ino(); 33 | Some((dev, ino)) 34 | } else { 35 | None 36 | } 37 | } 38 | 39 | impl FileWatcher { 40 | /// Create a new `FileWatcher` 41 | /// 42 | /// The input path will be used by `FileWatcher` to prime its state 43 | /// machine. A `FileWatcher` tracks _only one_ file. This function returns 44 | /// None if the path does not exist or is not readable by cernan. 45 | pub fn new(path: &PathBuf) -> io::Result { 46 | match fs::File::open(&path) { 47 | Ok(f) => { 48 | let metadata = f.metadata()?; 49 | let dev = metadata.dev(); 50 | let ino = metadata.ino(); 51 | let mut rdr = io::BufReader::new(f); 52 | assert!(rdr.seek(io::SeekFrom::End(0)).is_ok()); 53 | Ok(FileWatcher { 54 | path: path.clone(), 55 | reader: Some(rdr), 56 | file_id: Some((dev, ino)), 57 | previous_size: 0, 58 | reopen: false, 59 | }) 60 | } 61 | Err(e) => match e.kind() { 62 | io::ErrorKind::NotFound => { 63 | let fw = { 64 | FileWatcher { 65 | path: path.clone(), 66 | reader: None, 67 | file_id: None, 68 | previous_size: 0, 69 | reopen: false, 70 | } 71 | }; 72 | Ok(fw) 73 | } 74 | _ => Err(e), 75 | }, 76 | } 77 | } 78 | 79 | fn open_at_start(&mut self) { 80 | if let Ok(f) = fs::File::open(&self.path) { 81 | let metadata = f.metadata().unwrap(); // we _must_ be able to read the metadata 82 | let dev = metadata.dev(); 83 | let ino = metadata.ino(); 84 | self.file_id = Some((dev, ino)); 85 | self.previous_size = metadata.size(); 86 | self.reader = Some(io::BufReader::new(f)); 87 | if self.file_id.is_none() { 88 | // It's possible that between opening the file and reading its 89 | // ID the file will have been deleted. This is that branch. 90 | self.file_id = None; 91 | self.reader = None; 92 | } else { 93 | report_full_telemetry( 94 | "cernan.sources.file.switch", 95 | 1.0, 96 | Some(vec![( 97 | "file_path", 98 | self.path.to_str().expect("could not make path"), 99 | )]), 100 | ); 101 | } 102 | } else { 103 | self.reader = None; 104 | self.file_id = None; 105 | } 106 | self.reopen = false; 107 | } 108 | 109 | pub fn dead(&self) -> bool { 110 | self.reader.is_none() && self.file_id.is_none() 111 | } 112 | 113 | /// Read a single line from the underlying file 114 | /// 115 | /// This function will attempt to read a new line from its file, blocking, 116 | /// up to some maximum but unspecified amount of time. `read_line` will open 117 | /// a new file handler at need, transparently to the caller. 118 | pub fn read_line(&mut self, mut buffer: &mut String) -> io::Result { 119 | if self.reopen { 120 | self.open_at_start(); 121 | } 122 | if let Some(ref mut reader) = self.reader { 123 | // Every read we detect the current_size of the file and compare 124 | // against the previous_size. There are three cases to consider: 125 | // 126 | // * current_size > previous_size 127 | // * current_size == previous_size 128 | // * current_size < previous_size 129 | // 130 | // In the last case we must consider that the file has been 131 | // truncated and we can no longer trust our seek position 132 | // in-file. We MUST seek back to position 0. This is the _simplest_ 133 | // case to handle. 134 | // 135 | // Consider the equality case. It's possible that NO WRITES have 136 | // come into the file _or_ that the file has been truncated and 137 | // coincidentally the new writes exactly match the byte size of the 138 | // previous writes. THESE WRITES WILL BE LOST. 139 | // 140 | // Now the greater than inequality. All of the equality 141 | // considerations hold for this case. Also, consider if a write 142 | // straddles the line between previous_size and current_size. Then 143 | // we will be UNABLE to determine the proper start index of this 144 | // write and we WILL return a partial write of length 145 | // absolute_write_idx - previous_size. 146 | let current_size = reader.get_ref().metadata().unwrap().size(); 147 | if self.previous_size > current_size { 148 | assert!(reader.seek(io::SeekFrom::Start(0)).is_ok()); 149 | report_full_telemetry( 150 | "cernan.sources.file.truncation", 151 | (self.previous_size - current_size) as f64, 152 | Some(vec![( 153 | "file_path", 154 | self.path.to_str().expect("could not make path"), 155 | )]), 156 | ); 157 | } 158 | self.previous_size = current_size; 159 | // match here on error, if metadata doesn't match up open_at_start 160 | // new reader and let it catch on the next looparound 161 | match reader.read_line(&mut buffer) { 162 | Ok(0) => { 163 | if file_id(&self.path) != self.file_id { 164 | self.reopen = true; 165 | } 166 | Ok(0) 167 | } 168 | Ok(sz) => { 169 | assert_eq!(sz, buffer.len()); 170 | buffer.pop(); 171 | Ok(buffer.len()) 172 | } 173 | Err(e) => { 174 | if let io::ErrorKind::NotFound = e.kind() { 175 | self.reopen = true; 176 | } 177 | Err(e) 178 | } 179 | } 180 | } else { 181 | self.open_at_start(); 182 | Ok(0) 183 | } 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/source/flush.rs: -------------------------------------------------------------------------------- 1 | use crate::metric; 2 | use crate::source; 3 | use crate::util; 4 | use crate::util::send; 5 | use mio; 6 | use std::thread::sleep; 7 | use std::time::Duration; 8 | 9 | /// The source of all flush pulses. 10 | pub struct FlushTimer; 11 | 12 | /// Nil config for `FlushTimer`. 13 | #[derive(Clone, Debug, Deserialize)] 14 | pub struct FlushTimerConfig; 15 | 16 | /// Returns the number of discrete flushes per second. 17 | pub fn flushes_per_second() -> u64 { 18 | // With 100 flushes per second, we have a maximum precision of 10ms. 19 | // Anything more than this is probably asking for the hopper queues to be 20 | // filled more by flushes than metrics. 21 | 100 22 | } 23 | 24 | impl source::Source for FlushTimer { 25 | /// Create a new FlushTimer. This will not produce a new thread, that must 26 | /// be managed by the end-user. 27 | fn init(_config: FlushTimerConfig) -> Self { 28 | FlushTimer {} 29 | } 30 | 31 | fn run(self, mut chans: util::Channel, _poller: mio::Poll) { 32 | let flush_duration = Duration::from_millis(1000 / flushes_per_second()); 33 | // idx will _always_ increase. If it's kept at u64 or greater it will 34 | // overflow long past the collapse of our industrial civilization even 35 | // if the flush interval is set to a millisecond. 36 | // 37 | // Point being, there's a theoretical overflow problem here but it's not 38 | // going to be hit in practice. 39 | let mut idx: u64 = 0; 40 | loop { 41 | // We start with TimerFlush(1) as receivers start with 42 | // TimerFlush(0). This will update their last_flush_idx seen at 43 | // system boot. 44 | idx += 1; 45 | sleep(flush_duration); 46 | send(&mut chans, metric::Event::TimerFlush(idx)); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/source/graphite.rs: -------------------------------------------------------------------------------- 1 | use crate::constants; 2 | use crate::metric; 3 | use crate::protocols::graphite::parse_graphite; 4 | use crate::source::{TCPConfig, TCPStreamHandler, TCP}; 5 | use crate::util; 6 | use crate::util::send; 7 | use mio; 8 | use std::io::prelude::*; 9 | use std::io::BufReader; 10 | use std::str; 11 | use std::sync; 12 | use std::sync::atomic::{AtomicUsize, Ordering}; 13 | 14 | pub static GRAPHITE_NEW_PEER: AtomicUsize = AtomicUsize::new(0); 15 | pub static GRAPHITE_GOOD_PACKET: AtomicUsize = AtomicUsize::new(0); 16 | pub static GRAPHITE_TELEM: AtomicUsize = AtomicUsize::new(0); 17 | pub static GRAPHITE_BAD_PACKET: AtomicUsize = AtomicUsize::new(0); 18 | 19 | /// Configured for the `metric::Telemetry` source. 20 | #[derive(Debug, Deserialize, Clone)] 21 | pub struct GraphiteConfig { 22 | /// The host that the source will listen on. May be an IP address or a DNS 23 | /// hostname. 24 | pub host: String, 25 | /// The port that the source will listen on. 26 | pub port: u16, 27 | /// The forwards that the source will send all its Telemetry. 28 | pub forwards: Vec, 29 | /// The unique name of the source in the routing topology. 30 | pub config_path: Option, 31 | } 32 | 33 | impl Default for GraphiteConfig { 34 | fn default() -> GraphiteConfig { 35 | GraphiteConfig { 36 | host: "localhost".to_string(), 37 | port: 2003, 38 | forwards: Vec::new(), 39 | config_path: Some("sources.graphite".to_string()), 40 | } 41 | } 42 | } 43 | 44 | impl From for TCPConfig { 45 | fn from(item: GraphiteConfig) -> Self { 46 | TCPConfig { 47 | host: item.host, 48 | port: item.port, 49 | forwards: item.forwards, 50 | config_path: item.config_path, 51 | } 52 | } 53 | } 54 | 55 | #[derive(Default, Debug, Clone, Deserialize)] 56 | pub struct GraphiteStreamHandler; 57 | 58 | impl TCPStreamHandler for GraphiteStreamHandler { 59 | fn handle_stream( 60 | &mut self, 61 | mut chans: util::Channel, 62 | poller: &mio::Poll, 63 | stream: mio::net::TcpStream, 64 | ) { 65 | let mut line = String::new(); 66 | let mut res = Vec::new(); 67 | let mut line_reader = BufReader::new(stream); 68 | let basic_metric = sync::Arc::new(Some(metric::Telemetry::default())); 69 | 70 | loop { 71 | let mut events = mio::Events::with_capacity(1024); 72 | match poller.poll(&mut events, None) { 73 | Err(e) => panic!("Failed during poll {:?}", e), 74 | Ok(_num_events) => { 75 | for event in events { 76 | match event.token() { 77 | constants::SYSTEM => return, 78 | _stream_token => { 79 | while let Ok(len) = line_reader.read_line(&mut line) { 80 | if len > 0 { 81 | if parse_graphite( 82 | &line, 83 | &mut res, 84 | &basic_metric, 85 | ) { 86 | assert!(!res.is_empty()); 87 | GRAPHITE_GOOD_PACKET 88 | .fetch_add(1, Ordering::Relaxed); 89 | GRAPHITE_TELEM 90 | .fetch_add(1, Ordering::Relaxed); 91 | for m in res.drain(..) { 92 | send( 93 | &mut chans, 94 | metric::Event::Telemetry(m), 95 | ); 96 | } 97 | line.clear(); 98 | } else { 99 | GRAPHITE_BAD_PACKET 100 | .fetch_add(1, Ordering::Relaxed); 101 | error!("bad packet: {:?}", line); 102 | line.clear(); 103 | } 104 | } else { 105 | break; 106 | } 107 | } 108 | } 109 | } 110 | } 111 | } 112 | } 113 | } 114 | } 115 | } 116 | 117 | /// Graphite protocol source 118 | /// 119 | /// This source produces `metric::Telemetry` from the graphite protocol. 120 | pub type Graphite = TCP; 121 | -------------------------------------------------------------------------------- /src/source/mod.rs: -------------------------------------------------------------------------------- 1 | //! Staging ground for all sources 2 | //! 3 | //! In cernan a `Source` is a place where all `metric::Event` come from, feeding 4 | //! down into the source's forwards for further processing. Statsd is a source 5 | //! that creates `Telemetry`, `FileServer` is a source that creates `LogLine`s. 6 | use crate::thread; 7 | use crate::util; 8 | use mio; 9 | use std::marker::PhantomData; 10 | 11 | mod avro; 12 | mod file; 13 | mod flush; 14 | mod graphite; 15 | mod internal; 16 | mod native; 17 | mod nonblocking; 18 | mod statsd; 19 | mod tcp; 20 | 21 | pub use self::avro::Avro; 22 | pub use self::file::{FileServer, FileServerConfig}; 23 | pub use self::flush::{flushes_per_second, FlushTimer, FlushTimerConfig}; 24 | pub use self::graphite::{Graphite, GraphiteConfig}; 25 | pub use self::internal::{report_full_telemetry, Internal, InternalConfig}; 26 | pub use self::native::{NativeServer, NativeServerConfig}; 27 | use self::nonblocking::{BufferedPayload, PayloadErr}; 28 | pub use self::statsd::{Statsd, StatsdConfig, StatsdParseConfig}; 29 | pub use self::tcp::{TCPConfig, TCPStreamHandler, TCP}; 30 | 31 | /// Generic interface used to capture global source configuration 32 | /// parameters as well as source specific parameters. 33 | /// 34 | /// Stored configuration is consumed when the source is spawned, 35 | /// resulting in a new thread which executes the given source. 36 | pub struct RunnableSource 37 | where 38 | S: Send + Source, 39 | SConfig: 'static + Send + Clone, 40 | { 41 | chans: util::Channel, 42 | source: S, 43 | 44 | // Yes, compiler, we know that we aren't storing 45 | // anything of type SConfig. 46 | config: PhantomData, 47 | } 48 | 49 | impl RunnableSource 50 | where 51 | S: Send + Source, 52 | SConfig: 'static + Send + Clone, 53 | { 54 | /// Constructs a new RunnableSource. 55 | pub fn new(chans: util::Channel, config: SConfig) -> Self { 56 | RunnableSource { 57 | chans: chans, 58 | config: PhantomData, 59 | source: S::init(config), 60 | } 61 | } 62 | 63 | /// Spawns a thread corresponding to the given RunnableSource, consuming 64 | /// the given RunnableSource in the process. 65 | pub fn run(self) -> thread::ThreadHandle { 66 | thread::spawn(move |poller| self.source.run(self.chans, poller)) 67 | } 68 | } 69 | 70 | /// cernan Source, the originator of all `metric::Event`. 71 | /// 72 | /// A cernan Source creates all `metric::Event`, doing so by listening to 73 | /// network IO, reading from files, etc etc. All sources push into the routing 74 | /// topology. 75 | pub trait Source 76 | where 77 | Self: 'static + Send + Sized, 78 | SConfig: 'static + Send + Clone, 79 | { 80 | /// Constructs a so-called runnable source for the given Source and 81 | /// config.` See RunnableSource. 82 | fn new(chans: util::Channel, config: SConfig) -> RunnableSource { 83 | RunnableSource::::new(chans, config) 84 | } 85 | 86 | /// Initializes state for the given Source. 87 | fn init(config: SConfig) -> Self; 88 | 89 | /// Run method invoked by RunnableSource. 90 | /// It is from this method that Sources produce metric::Events. 91 | fn run(self, chans: util::Channel, poller: mio::Poll); 92 | } 93 | -------------------------------------------------------------------------------- /src/source/native.rs: -------------------------------------------------------------------------------- 1 | use crate::constants; 2 | use crate::metric; 3 | use crate::protocols::native::{AggregationMethod, Payload}; 4 | use crate::source::{BufferedPayload, PayloadErr, TCPConfig, TCPStreamHandler, TCP}; 5 | use crate::util; 6 | use mio; 7 | use protobuf; 8 | use std::net; 9 | use std::str; 10 | use std::sync::atomic::{AtomicUsize, Ordering}; 11 | 12 | /// Total payloads processed. 13 | pub static NATIVE_PAYLOAD_SUCCESS_SUM: AtomicUsize = AtomicUsize::new(0); 14 | /// Total fatal parse failures. 15 | pub static NATIVE_PAYLOAD_PARSE_FAILURE_SUM: AtomicUsize = AtomicUsize::new(0); 16 | 17 | /// The native source 18 | /// 19 | /// This source is the pair to the native sink. The native source/sink use or 20 | /// consume cernan's native protocol, defined 21 | /// `resources/protobufs/native.proto`. Clients may use the native protocol 22 | /// without having to obey the translation required in other sources or 23 | /// operators may set up cernan to cernan communication. 24 | 25 | /// Configuration for the native source 26 | #[derive(Debug, Clone, Deserialize)] 27 | pub struct NativeServerConfig { 28 | /// The IP address the native source will bind to. 29 | pub ip: String, 30 | /// The port the source will listen on. 31 | pub port: u16, 32 | /// The forwards for the native source to send its Telemetry along. 33 | pub forwards: Vec, 34 | /// The unique name for the source in the routing topology. 35 | pub config_path: Option, 36 | } 37 | 38 | impl Default for NativeServerConfig { 39 | fn default() -> Self { 40 | NativeServerConfig { 41 | ip: "0.0.0.0".to_string(), 42 | port: 1972, 43 | forwards: Vec::default(), 44 | config_path: None, 45 | } 46 | } 47 | } 48 | 49 | impl From for TCPConfig { 50 | fn from(item: NativeServerConfig) -> Self { 51 | TCPConfig { 52 | host: item.ip, 53 | port: item.port, 54 | forwards: item.forwards, 55 | config_path: item.config_path, 56 | } 57 | } 58 | } 59 | 60 | #[derive(Default, Debug, Clone, Deserialize)] 61 | pub struct NativeStreamHandler; 62 | 63 | impl TCPStreamHandler for NativeStreamHandler { 64 | fn handle_stream( 65 | &mut self, 66 | chans: util::Channel, 67 | poller: &mio::Poll, 68 | stream: mio::net::TcpStream, 69 | ) { 70 | let mut streaming = true; 71 | let mut reader = BufferedPayload::new(stream.try_clone().unwrap(), 1_048_576); 72 | while streaming { 73 | let mut events = mio::Events::with_capacity(1024); 74 | match poller.poll(&mut events, None) { 75 | Err(e) => panic!("Failed during poll {:?}", e), 76 | Ok(_num_events) => { 77 | for event in events { 78 | match event.token() { 79 | constants::SYSTEM => { 80 | streaming = false; 81 | break; 82 | } 83 | _stream_token => { 84 | while streaming { 85 | match reader.read() { 86 | Ok(mut raw) => { 87 | let handle_res = self 88 | .handle_stream_payload( 89 | chans.clone(), 90 | &mut raw, 91 | ); 92 | if handle_res.is_err() { 93 | NATIVE_PAYLOAD_PARSE_FAILURE_SUM 94 | .fetch_add(1, Ordering::Relaxed); 95 | streaming = false; 96 | break; 97 | } 98 | NATIVE_PAYLOAD_SUCCESS_SUM 99 | .fetch_add(1, Ordering::Relaxed); 100 | } 101 | Err(PayloadErr::WouldBlock) => { 102 | // Not enough data yet. Try again. 103 | break; 104 | } 105 | Err(PayloadErr::EOF) => { 106 | // Client went away. Shut it down 107 | // (gracefully). 108 | trace!("TCP stream closed."); 109 | streaming = false; 110 | break; 111 | } 112 | Err(e) => { 113 | error!("Failed to process native payload! {:?}", e); 114 | streaming = false; 115 | break; 116 | } 117 | } 118 | } 119 | } 120 | } 121 | } 122 | } // events processing 123 | } // poll 124 | } // while connected 125 | 126 | // On some systems shutting down an already closed connection (client or 127 | // otherwise) results in an Err. See - 128 | // https://doc.rust-lang.org/beta/std/net/struct.TcpStream.html#platform-specific-behavior 129 | let _shutdown_result = stream.shutdown(net::Shutdown::Both); 130 | } // handle_stream 131 | } 132 | 133 | impl NativeStreamHandler { 134 | fn handle_stream_payload( 135 | &mut self, 136 | mut chans: util::Channel, 137 | buf: &mut Vec, 138 | ) -> Result<(), protobuf::ProtobufError> { 139 | match protobuf::parse_from_bytes::(buf) { 140 | // TODO we have to handle bin_bounds. We'll use samples to get 141 | // the values of each bounds' counter. 142 | Ok(mut pyld) => { 143 | for mut point in pyld.take_points().into_iter() { 144 | let name: String = point.take_name(); 145 | let smpls: Vec = point.take_samples(); 146 | let aggr_type: AggregationMethod = point.get_method(); 147 | let mut meta = point.take_metadata(); 148 | // FIXME #166 149 | let ts: i64 = (point.get_timestamp_ms() as f64 * 0.001) as i64; 150 | 151 | if smpls.is_empty() { 152 | continue; 153 | } 154 | let mut metric = metric::Telemetry::new().name(name); 155 | metric = metric.value(smpls[0]); 156 | metric = match aggr_type { 157 | AggregationMethod::SET => { 158 | metric.kind(metric::AggregationMethod::Set) 159 | } 160 | AggregationMethod::SUM => { 161 | metric.kind(metric::AggregationMethod::Sum) 162 | } 163 | AggregationMethod::SUMMARIZE => { 164 | metric.kind(metric::AggregationMethod::Summarize) 165 | } 166 | AggregationMethod::BIN => { 167 | metric.kind(metric::AggregationMethod::Histogram) 168 | } 169 | }; 170 | metric = metric.persist(point.get_persisted()); 171 | metric = metric.timestamp(ts); 172 | let mut metric = metric.harden().unwrap(); // todo don't unwrap 173 | for (key, value) in meta.drain() { 174 | metric = metric.overlay_tag(key, value); 175 | } 176 | for smpl in &smpls[1..] { 177 | metric = metric.insert(*smpl); 178 | } 179 | util::send(&mut chans, metric::Event::new_telemetry(metric)); 180 | } 181 | for mut line in pyld.take_lines().into_iter() { 182 | let path: String = line.take_path(); 183 | let value: String = line.take_value(); 184 | let mut meta = line.take_metadata(); 185 | // FIXME #166 186 | let ts: i64 = (line.get_timestamp_ms() as f64 * 0.001) as i64; 187 | 188 | let mut logline = metric::LogLine::new(path, value); 189 | logline = logline.time(ts); 190 | for (key, value) in meta.drain() { 191 | logline = logline.overlay_tag(key, value); 192 | } 193 | util::send(&mut chans, metric::Event::new_log(logline)); 194 | } 195 | Ok(()) 196 | } 197 | Err(err) => { 198 | trace!("Unable to read payload: {:?}", err); 199 | Err(err) 200 | } 201 | } 202 | } 203 | } 204 | 205 | /// Source for Cernan's native protocol. 206 | pub type NativeServer = TCP; 207 | -------------------------------------------------------------------------------- /src/source/nonblocking.rs: -------------------------------------------------------------------------------- 1 | //! Handy interfaces for nonblocking streams. 2 | 3 | use byteorder::{BigEndian, ReadBytesExt}; 4 | use mio; 5 | use std::io::{Read, Write}; 6 | use std::{io, mem}; 7 | 8 | /// Like `std::net::TcpStream::write_all`, except it handles `WouldBlock` too. 9 | pub fn write_all( 10 | mut stream: &mio::net::TcpStream, 11 | bytes: &[u8], 12 | ) -> Result<(), io::Error> { 13 | let mut written = 0; 14 | 15 | while written < bytes.len() { 16 | match stream.write(&bytes[written..]) { 17 | Ok(bytes_written) => { 18 | written += bytes_written; 19 | } 20 | 21 | Err(e) => match e.kind() { 22 | io::ErrorKind::WouldBlock | io::ErrorKind::Interrupted => { 23 | continue; 24 | } 25 | 26 | _ => { 27 | error!("Failed to write bytes onto stream! {:?}", e); 28 | return Err(e); 29 | } 30 | }, 31 | } 32 | } 33 | Ok(()) 34 | } 35 | 36 | /// Handler error types returned by `handle_avro_payload`. 37 | #[derive(Debug)] 38 | pub enum PayloadErr { 39 | /// End of stream has been reached. 40 | EOF, 41 | /// Not enough data present to construct the payload. 42 | /// Try again later. 43 | WouldBlock, 44 | /// An IO error occured. 45 | IO(io::Error), 46 | /// Payload parsing failure. 47 | Protocol(String), 48 | /// The length prefix is too large to be allocated 49 | LengthTooLarge, 50 | } 51 | 52 | impl From for PayloadErr { 53 | fn from(e: io::Error) -> PayloadErr { 54 | if e.kind() == io::ErrorKind::WouldBlock { 55 | PayloadErr::WouldBlock 56 | } else if e.kind() == io::ErrorKind::UnexpectedEof { 57 | PayloadErr::EOF 58 | } else { 59 | PayloadErr::IO(e) 60 | } 61 | } 62 | } 63 | 64 | impl From for PayloadErr { 65 | fn from(s: String) -> PayloadErr { 66 | PayloadErr::Protocol(s) 67 | } 68 | } 69 | 70 | /// Buffered length-prefixed payload. 71 | /// 72 | /// For use on blocking or non-blocking streams. 73 | pub struct BufferedPayload { 74 | /// Size of the expected payload in bytes. When None, this value is read 75 | /// off the underlying stream as a big-endian u32. 76 | payload_size: Option, 77 | 78 | /// The maximum allowable payload size. If a payload_size comes in over the 79 | /// wire that is greater than this limit we close the connection. 80 | max_payload_size: usize, 81 | 82 | /// Position in the payload byte vector receiving. 83 | payload_pos: usize, 84 | 85 | ///Bytes comprising the payload. 86 | payload: Vec, 87 | 88 | /// Inner buffer where bytes from the underlying stream are staged. 89 | buffer: io::BufReader, 90 | } 91 | 92 | impl BufferedPayload { 93 | /// Constructs a new BufferedPayload. 94 | pub fn new(stream: mio::net::TcpStream, max_payload_size: usize) -> Self { 95 | BufferedPayload { 96 | payload_size: None, 97 | max_payload_size: max_payload_size, 98 | payload_pos: 0, 99 | payload: Vec::new(), 100 | buffer: io::BufReader::new(stream), 101 | } 102 | } 103 | 104 | /// Reads existing buffer from the underlying data 105 | /// stream. If enough data is present, a single payload 106 | /// is constructed and returned. 107 | /// 108 | /// On non-blocking streams, it is up to the user to call 109 | /// this method repeatedly until PayloadErr::WouldBlock 110 | /// is returned. 111 | pub fn read(&mut self) -> Result, PayloadErr> { 112 | // Are we actively reading a payload already? 113 | if self.payload_size.is_none() { 114 | self.read_length()?; 115 | } 116 | if self.payload_size.unwrap() > self.max_payload_size { 117 | return Err(PayloadErr::LengthTooLarge); 118 | } 119 | 120 | self.read_payload()?; 121 | 122 | // By this point we assert that we have read exactly 123 | // 1 payload off the buffer. We may have have read partial 124 | // or entire other payloads off the wire. Additional bytes 125 | // will persist in buffer for later parsing. 126 | Ok(mem::replace(&mut self.payload, Vec::new())) 127 | } 128 | 129 | /// Reads the payload's length from the wire, caching the result. 130 | /// 131 | /// If a cached value already exists, this function noops. 132 | fn read_length(&mut self) -> Result<(), PayloadErr> { 133 | if self.payload_size.is_none() { 134 | self.payload_size = Some(self.buffer.read_u32::()? as usize); 135 | }; 136 | Ok(()) 137 | } 138 | 139 | /// Attempts to read at least one payload worth of data. If there 140 | /// isn't enough data between the inner buffer and the underlying stream, 141 | /// then PayloadErr::WouldBlock is returned. 142 | fn read_payload(&mut self) -> Result<(), PayloadErr> { 143 | // At this point we can assume that we have successfully 144 | // read the length off the wire. 145 | let payload_size = self.payload_size.unwrap(); 146 | 147 | if self.payload.len() != payload_size { 148 | trace!("Resizing internal buffer to {:?}", payload_size); 149 | self.payload.resize(payload_size, 0); 150 | } 151 | 152 | loop { 153 | match self 154 | .buffer 155 | .read(&mut self.payload[self.payload_pos..payload_size]) 156 | { 157 | Ok(0) => return Err(PayloadErr::EOF), 158 | 159 | Ok(bytes_read) if (self.payload_pos + bytes_read) == payload_size => { 160 | // We successfully pulled a payload off the wire. 161 | // Reset bytes remaining for the next payload. 162 | self.payload_size = None; 163 | self.payload_pos = 0; 164 | return Ok(()); 165 | } 166 | 167 | Ok(bytes_read) => { 168 | // We read some data, but not yet enough. 169 | // Store the difference and try again later. 170 | self.payload_pos += bytes_read; 171 | continue; 172 | } 173 | 174 | Err(e) => return Err(e.into()), 175 | } 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/source/statsd.rs: -------------------------------------------------------------------------------- 1 | use crate::constants; 2 | use crate::metric; 3 | use crate::protocols::statsd::parse_statsd; 4 | use crate::source; 5 | use crate::util; 6 | use crate::util::send; 7 | use mio; 8 | use regex::Regex; 9 | use std::io::ErrorKind; 10 | use std::net::ToSocketAddrs; 11 | use std::str; 12 | use std::sync; 13 | use std::sync::atomic::{AtomicUsize, Ordering}; 14 | 15 | pub static STATSD_GOOD_PACKET: AtomicUsize = AtomicUsize::new(0); 16 | pub static STATSD_BAD_PACKET: AtomicUsize = AtomicUsize::new(0); 17 | 18 | /// The statsd source 19 | /// 20 | /// Statsd is a collection of protocols, originally spawned by the telemetering 21 | /// work done out of Etsy. Cernan tries to support a cow-path subset of the 22 | /// statsd protocol family. 23 | pub struct Statsd { 24 | conns: util::TokenSlab, 25 | parse_config: sync::Arc, 26 | } 27 | 28 | /// The mask type for metrics in `StatsdParseConfig`. 29 | pub type Mask = Regex; 30 | 31 | /// The bound type for metrics in `StatsdParseConfig`. 32 | pub type Bounds = Vec; 33 | 34 | /// Configuration for the statsd parser 35 | #[derive(Debug, Clone)] 36 | pub struct StatsdParseConfig { 37 | /// Set specific bin masks for timeseries according to their name. The name 38 | /// may be a [regex](https://crates.io/crates/regex) match, such like 39 | /// 'foo.*'. In this case all metrics prefixed by 'foo.' which are timer or 40 | /// histogram will be interpreted as a histogram. 41 | pub histogram_masks: Vec<(Mask, Bounds)>, 42 | /// Configure the error bound for a statsd timer or histogram. Cernan does 43 | /// not compute precise quantiles but approximations with a guaranteed upper 44 | /// bound on the error of approximation. This allows the end-user to set 45 | /// that. 46 | pub summarize_error_bound: f64, 47 | } 48 | 49 | impl Default for StatsdParseConfig { 50 | fn default() -> StatsdParseConfig { 51 | StatsdParseConfig { 52 | histogram_masks: vec![], 53 | summarize_error_bound: 0.01, 54 | } 55 | } 56 | } 57 | 58 | /// Configuration for the statsd source. 59 | #[derive(Debug, Clone)] 60 | pub struct StatsdConfig { 61 | /// The host for the statsd protocol to bind to. 62 | pub host: String, 63 | /// The port for the statsd source to listen on. 64 | pub port: u16, 65 | /// The forwards that statsd will send its telemetry on to. 66 | pub forwards: Vec, 67 | /// The unique name for the source in the routing topology. 68 | pub config_path: Option, 69 | /// Configuration for the parsing of statsd lines 70 | pub parse_config: StatsdParseConfig, 71 | } 72 | 73 | impl Default for StatsdConfig { 74 | fn default() -> StatsdConfig { 75 | StatsdConfig { 76 | host: "localhost".to_string(), 77 | port: 8125, 78 | forwards: Vec::new(), 79 | config_path: None, 80 | parse_config: StatsdParseConfig::default(), 81 | } 82 | } 83 | } 84 | 85 | enum StatsdHandlerErr { 86 | Fatal, 87 | } 88 | 89 | impl Statsd { 90 | fn handle_datagrams( 91 | &self, 92 | mut chans: &mut util::Channel, 93 | socket: &mio::net::UdpSocket, 94 | mut buf: &mut Vec, 95 | ) -> Result<(), StatsdHandlerErr> { 96 | let mut metrics = Vec::new(); 97 | let basic_metric = sync::Arc::new(Some(metric::Telemetry::default())); 98 | loop { 99 | match socket.recv_from(&mut buf) { 100 | Ok((len, _)) => match str::from_utf8(&buf[..len]) { 101 | Ok(val) => { 102 | if parse_statsd( 103 | val, 104 | &mut metrics, 105 | &basic_metric, 106 | &self.parse_config, 107 | ) { 108 | for m in metrics.drain(..) { 109 | send(&mut chans, metric::Event::new_telemetry(m)); 110 | } 111 | STATSD_GOOD_PACKET.fetch_add(1, Ordering::Relaxed); 112 | } else { 113 | STATSD_BAD_PACKET.fetch_add(1, Ordering::Relaxed); 114 | error!("BAD PACKET: {:?}", val); 115 | } 116 | } 117 | Err(e) => { 118 | error!("Payload not valid UTF-8: {:?}", e); 119 | } 120 | }, 121 | Err(e) => match e.kind() { 122 | ErrorKind::WouldBlock => { 123 | break; 124 | } 125 | _ => { 126 | error!("Could not read UDP socket with error {:?}", e); 127 | return Err(StatsdHandlerErr::Fatal); 128 | } 129 | }, 130 | } 131 | } 132 | Ok(()) 133 | } 134 | } 135 | 136 | impl source::Source for Statsd { 137 | /// Create and spawn a new statsd source 138 | fn init(config: StatsdConfig) -> Self { 139 | let mut conns = util::TokenSlab::::new(); 140 | let addrs = (config.host.as_str(), config.port).to_socket_addrs(); 141 | match addrs { 142 | Ok(ips) => { 143 | for addr in ips { 144 | let socket = mio::net::UdpSocket::bind(&addr) 145 | .expect("Unable to bind to UDP socket"); 146 | conns.insert(socket); 147 | } 148 | } 149 | Err(e) => { 150 | info!( 151 | "Unable to perform DNS lookup on host {} with error {}", 152 | config.host, e 153 | ); 154 | } 155 | }; 156 | 157 | Statsd { 158 | conns: conns, 159 | parse_config: sync::Arc::new(config.parse_config), 160 | } 161 | } 162 | 163 | fn run(self, mut chans: util::Channel, poller: mio::Poll) { 164 | for (idx, socket) in self.conns.iter() { 165 | if let Err(e) = poller.register( 166 | socket, 167 | mio::Token::from(idx), 168 | mio::Ready::readable(), 169 | mio::PollOpt::edge(), 170 | ) { 171 | error!("Failed to register {:?} - {:?}!", socket, e); 172 | } 173 | } 174 | 175 | let mut buf = vec![0; 16_250]; 176 | loop { 177 | let mut events = mio::Events::with_capacity(1024); 178 | match poller.poll(&mut events, None) { 179 | Ok(_num_events) => { 180 | for event in events { 181 | match event.token() { 182 | constants::SYSTEM => { 183 | send(&mut chans, metric::Event::Shutdown); 184 | return; 185 | } 186 | 187 | token => { 188 | let socket = &self.conns[token]; 189 | if let Err(_e) = 190 | self.handle_datagrams(&mut chans, socket, &mut buf) 191 | { 192 | error!( 193 | "Deregistering {:?} due to unrecoverable error!", 194 | *socket 195 | ); 196 | } 197 | } 198 | } 199 | } 200 | } 201 | Err(e) => panic!(format!("Failed during poll {:?}", e)), 202 | } 203 | } // loop 204 | } // run 205 | } 206 | -------------------------------------------------------------------------------- /src/source/tcp.rs: -------------------------------------------------------------------------------- 1 | use crate::constants; 2 | use crate::metric; 3 | use crate::source::Source; 4 | use crate::thread; 5 | use crate::util; 6 | use hopper; 7 | use mio; 8 | use std; 9 | use std::io::ErrorKind; 10 | use std::marker::PhantomData; 11 | use std::net::ToSocketAddrs; 12 | 13 | /// Configured for the `metric::Telemetry` source. 14 | #[derive(Debug, Deserialize, Clone)] 15 | pub struct TCPConfig { 16 | /// The unique name of the source in the routing topology. 17 | pub config_path: Option, 18 | /// The host that the source will listen on. May be an IP address or a DNS 19 | /// hostname. 20 | pub host: String, 21 | /// The port that the source will listen on. 22 | pub port: u16, 23 | /// The forwards that the source will send all its Telemetry. 24 | pub forwards: Vec, 25 | } 26 | 27 | impl Default for TCPConfig { 28 | fn default() -> TCPConfig { 29 | TCPConfig { 30 | host: "localhost".to_string(), 31 | port: 8080, 32 | forwards: Vec::new(), 33 | config_path: Some("sources.tcp".to_string()), 34 | } 35 | } 36 | } 37 | 38 | /// Simple single threaded TCP Stream handler. 39 | pub trait TCPStreamHandler: 'static + Default + Clone + Sync + Send { 40 | /// Constructs a new handler for mio::net::TCPStreams. 41 | fn new() -> Self { 42 | Default::default() 43 | } 44 | 45 | /// Handler for a single HTTP request. 46 | fn handle_stream( 47 | &mut self, 48 | _: util::Channel, 49 | _: &mio::Poll, 50 | _: mio::net::TcpStream, 51 | ) -> (); 52 | } 53 | 54 | /// State for a TCP backed source. 55 | pub struct TCP { 56 | listeners: util::TokenSlab, 57 | stream_events: mio::Registration, 58 | stream_events_token: mio::Token, 59 | handlers: thread::ThreadPool, 60 | phantom: PhantomData, 61 | } 62 | 63 | impl Source for TCP 64 | where 65 | H: TCPStreamHandler, 66 | { 67 | /// Constructs and starts a new TCP source. 68 | fn init(config: TCPConfig) -> Self { 69 | // Create registrations and for all TCP interfaces and stream handlers. 70 | // 71 | // Note - Due to restrictions in mio, we must construct these registrations 72 | // here as we are assuming this function is called directly from the main 73 | // process. Registrations must be bound to a mio poller by the subordiante 74 | // thread. 75 | let addrs = (config.host.as_str(), config.port).to_socket_addrs(); 76 | let mut listeners = util::TokenSlab::::new(); 77 | match addrs { 78 | Ok(ips) => { 79 | let ips: Vec<_> = ips.collect(); 80 | for addr in ips { 81 | let listener = mio::net::TcpListener::bind(&addr) 82 | .expect("Unable to bind to TCP socket"); 83 | info!("Registering listener for {:?}", addr); 84 | listeners.insert(listener); 85 | } 86 | } 87 | 88 | Err(e) => { 89 | panic!( 90 | "Unable to perform DNS lookup on {:?}:{:?} with error {}", 91 | config.host.as_str(), 92 | config.port, 93 | e 94 | ); 95 | } 96 | }; 97 | 98 | let (stream_events, stream_events_readiness) = mio::Registration::new2(); 99 | let stream_events_token = mio::Token::from(listeners.count()); 100 | let thread_pool = thread::ThreadPool::new(Some(stream_events_readiness)); 101 | TCP { 102 | listeners: listeners, 103 | stream_events: stream_events, 104 | stream_events_token: stream_events_token, 105 | handlers: thread_pool, 106 | phantom: PhantomData, 107 | } 108 | } 109 | 110 | /// Starts the accept loop. 111 | fn run(self, chans: util::Channel, poller: mio::Poll) { 112 | for (idx, listener) in self.listeners.iter() { 113 | if let Err(e) = poller.register( 114 | listener, 115 | mio::Token::from(idx), 116 | mio::Ready::readable(), 117 | mio::PollOpt::edge(), 118 | ) { 119 | error!("Failed to register {:?} - {:?}!", listener, e); 120 | } 121 | } 122 | 123 | if let Err(e) = poller.register( 124 | &self.stream_events, 125 | self.stream_events_token, 126 | mio::Ready::readable(), 127 | mio::PollOpt::edge(), 128 | ) { 129 | error!("Failed to register stream events - {:?}!", e); 130 | }; 131 | 132 | self.accept_loop(chans, &poller) 133 | } 134 | } 135 | 136 | impl TCP 137 | where 138 | H: TCPStreamHandler, 139 | { 140 | fn accept_loop(mut self, mut chans: util::Channel, poll: &mio::Poll) { 141 | loop { 142 | let mut events = mio::Events::with_capacity(1024); 143 | match poll.poll(&mut events, None) { 144 | Err(e) => panic!(format!("Failed during poll {:?}", e)), 145 | Ok(_num_events) => { 146 | for event in events { 147 | match event.token() { 148 | constants::SYSTEM => { 149 | self.handlers.shutdown(); 150 | util::send(&mut chans, metric::Event::Shutdown); 151 | return; 152 | } 153 | listener_token => { 154 | if listener_token == self.stream_events_token { 155 | // Mio event corresponding to a StreamHandler. 156 | // Currently, the only StreamHandler event flags 157 | // the StreamHandler as terminated. Cleanup state. 158 | let ready = self.handlers.join_ready(); 159 | trace!( 160 | "Removed {:?} terminated stream handlers.", 161 | ready.len() 162 | ); 163 | } else if let Err(e) = 164 | self.spawn_stream_handlers(&chans, listener_token) 165 | { 166 | let listener = &self.listeners[listener_token]; 167 | error!("Failed to spawn stream handlers! {:?}", e); 168 | error!("Deregistering listener for {:?} due to unrecoverable error!", *listener); 169 | let _ = poll.deregister(listener); 170 | } 171 | } 172 | } 173 | } 174 | } 175 | } 176 | } 177 | } 178 | 179 | fn spawn_stream_handlers( 180 | &mut self, 181 | chans: &[hopper::Sender], 182 | listener_token: mio::Token, 183 | ) -> Result<(), std::io::Error> { 184 | let listener = &self.listeners[listener_token]; 185 | loop { 186 | match listener.accept() { 187 | Ok((stream, _addr)) => { 188 | // Actually spawn the stream handler 189 | let rchans = chans.to_owned(); 190 | self.handlers.spawn(move |poller| { 191 | // Note - Stream handlers are allowed to crash without 192 | // compromising Cernan's ability to gracefully shutdown. 193 | poller 194 | .register( 195 | &stream, 196 | mio::Token(0), 197 | mio::Ready::readable(), 198 | mio::PollOpt::edge(), 199 | ) 200 | .unwrap(); 201 | 202 | let mut handler = H::new(); 203 | handler.handle_stream(rchans, &poller, stream); 204 | }); 205 | } 206 | Err(e) => match e.kind() { 207 | ErrorKind::ConnectionAborted 208 | | ErrorKind::Interrupted 209 | | ErrorKind::TimedOut => { 210 | // Connection was closed before we could accept or 211 | // we were interrupted. Press on. 212 | continue; 213 | } 214 | ErrorKind::WouldBlock => { 215 | //Out of connections to accept. Wrap it up. 216 | return Ok(()); 217 | } 218 | _ => return Err(e), 219 | }, 220 | }; 221 | } 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /src/thread.rs: -------------------------------------------------------------------------------- 1 | //! Mio enabled threading library. 2 | use crate::constants; 3 | use crate::util; 4 | use mio; 5 | use std::option; 6 | use std::sync; 7 | use std::thread; 8 | 9 | /// Event polling structure. Alias of `mio::Poll`. 10 | pub type Poll = mio::Poll; 11 | /// Events buffer type. Alias of `mio::Events`. 12 | pub type Events = mio::Events; 13 | 14 | /// Mio enabled thread state. 15 | pub struct ThreadHandle { 16 | /// JoinHandle for the executing thread. 17 | pub handle: thread::JoinHandle<()>, 18 | 19 | /// Readiness signal used to notify the given thread when an event is ready 20 | /// to be consumed on the SYSTEM channel. 21 | shutdown_event: mio::SetReadiness, 22 | } 23 | 24 | /// Trait for stoppable processes. 25 | pub trait Stoppable { 26 | /// Join the given process, blocking until it exits. 27 | fn join(self) -> (); 28 | 29 | /// Gracefully shutdown the process, blocking until exit. 30 | fn shutdown(self) -> (); 31 | } 32 | 33 | impl Stoppable for ThreadHandle { 34 | /// Join the given Thread, blocking until it exits. 35 | fn join(self) { 36 | self.handle.join().expect("Failed to join child thread!"); 37 | } 38 | 39 | /// Gracefully shutdown the given Thread, blocking until it exists. 40 | /// 41 | /// Note - It is the responsability of the developer to ensure 42 | /// that thread logic polls for events occuring on the SYSTEM token. 43 | fn shutdown(self) { 44 | self.shutdown_event 45 | .set_readiness(mio::Ready::readable()) 46 | .expect("Failed to notify child thread of shutdown!"); 47 | self.join(); 48 | } 49 | } 50 | 51 | /// Spawns a new thread executing the provided closure. 52 | pub fn spawn(f: F) -> ThreadHandle 53 | where 54 | F: Send + 'static + FnOnce(mio::Poll) -> (), 55 | { 56 | let child_poller = mio::Poll::new().unwrap(); 57 | let (shutdown_event_registration, shutdown_event) = mio::Registration::new2(); 58 | ThreadHandle { 59 | shutdown_event: shutdown_event, 60 | handle: thread::spawn(move || { 61 | child_poller 62 | .register( 63 | &shutdown_event_registration, 64 | constants::SYSTEM, 65 | mio::Ready::readable(), 66 | mio::PollOpt::edge(), 67 | ) 68 | .expect("Failed to register system pipe"); 69 | 70 | f(child_poller); 71 | }), 72 | } 73 | } 74 | 75 | /// mio Eventable ThreadPool. 76 | pub struct ThreadPool { 77 | /// thread_id counter. 78 | thread_id: usize, 79 | /// Listing of all the joinable threads in the pool. 80 | joinable: sync::Arc>>, 81 | /// Mapping of thread_id to ThreadHandle. 82 | threads: util::HashMap, 83 | /// Mio readiness flagging when threads finish execution. 84 | thread_event_readiness: option::Option, 85 | } 86 | 87 | impl ThreadPool { 88 | /// Construct a new ThreadPool. 89 | pub fn new(thread_events_readiness: option::Option) -> Self { 90 | ThreadPool { 91 | thread_id: 0, 92 | joinable: sync::Arc::new(sync::Mutex::new(Vec::new())), 93 | thread_event_readiness: thread_events_readiness, 94 | threads: util::HashMap::default(), 95 | } 96 | } 97 | 98 | /// Spawn a new thread and assign it to the pool. 99 | pub fn spawn(&mut self, f: F) -> usize 100 | where 101 | F: Send + 'static + FnOnce(mio::Poll) -> (), 102 | { 103 | let id = self.next_thread_id(); 104 | let joinable_arc = self.joinable.clone(); 105 | let thread_event_readiness = self.thread_event_readiness.clone(); 106 | let handler = spawn(move |poller| { 107 | f(poller); 108 | 109 | let mut joinable = joinable_arc.lock().unwrap(); 110 | joinable.push(id); 111 | 112 | if let Some(readiness) = thread_event_readiness { 113 | readiness 114 | .set_readiness(mio::Ready::readable()) 115 | .expect("Failed to flag readiness for ThreadPool event!"); 116 | } 117 | }); 118 | self.threads.insert(id, handler); 119 | id 120 | } 121 | 122 | fn next_thread_id(&mut self) -> usize { 123 | let thread_id = self.thread_id; 124 | self.thread_id += 1; 125 | thread_id 126 | } 127 | 128 | /// Block on completion of all executing threads. 129 | pub fn join(mut self) -> Vec { 130 | self.threads.drain().for_each(|(_, h)| h.join()); 131 | self.join_ready() 132 | } 133 | 134 | /// Join all completed threads. 135 | pub fn join_ready(&mut self) -> Vec { 136 | let mut joinable = self.joinable.lock().unwrap(); 137 | let mut joined = Vec::new(); 138 | while let Some(id) = joinable.pop() { 139 | if let Some(handle) = self.threads.remove(&id) { 140 | handle.join(); 141 | } 142 | joined.push(id); 143 | } 144 | joined 145 | } 146 | 147 | /// Serially signal shutdown and block for completion of all threads. 148 | pub fn shutdown(mut self) -> Vec { 149 | self.threads.drain().for_each(|(_, h)| h.shutdown()); 150 | self.join_ready() 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/time.rs: -------------------------------------------------------------------------------- 1 | //! Collection of time utilities for cernan 2 | //! 3 | //! Time in cernan is not based strictly on wall-clock. We keep a global clock 4 | //! for cernan and update it ourselves periodically. See `update_time` in this 5 | //! module for more details. 6 | 7 | use chrono::offset::Utc; 8 | use std::sync::atomic::{AtomicUsize, Ordering}; 9 | use std::{thread, time}; 10 | 11 | lazy_static! { 12 | static ref NOW: AtomicUsize = AtomicUsize::new(Utc::now().timestamp() as usize); 13 | } 14 | 15 | /// Return the current time in epoch seconds 16 | pub fn now() -> i64 { 17 | NOW.load(Ordering::Relaxed) as i64 18 | } 19 | 20 | /// Update cernan's view of time every 500ms. Time is in UTC. 21 | pub fn update_time() { 22 | let dur = time::Duration::from_millis(500); 23 | loop { 24 | thread::sleep(dur); 25 | let now = Utc::now().timestamp() as usize; 26 | let order = Ordering::Relaxed; 27 | NOW.store(now, order); 28 | } 29 | } 30 | 31 | /// Pause a thread of execution 32 | /// 33 | /// This function pauses the thread of execution for a fixed number of 34 | /// attempts. That input, attempts, is used to eponentially increase the length 35 | /// of delay, from 0 milliseconds to 512. A delay attempt of X will pause the 36 | /// thread of execution for: 37 | /// 38 | /// - 0 = 0 ms 39 | /// - x, x >= 9 = 512 ms 40 | /// - x, x < 9 = 2**x ms 41 | #[inline] 42 | pub fn delay(attempts: u32) { 43 | let delay = match attempts { 44 | 0 => return, 45 | 1 => 1, 46 | 2 => 4, 47 | 3 => 8, 48 | 4 => 16, 49 | 5 => 32, 50 | 6 => 64, 51 | 7 => 128, 52 | 8 => 256, 53 | _ => 512, 54 | }; 55 | let sleep_time = time::Duration::from_millis(delay as u64); 56 | thread::sleep(sleep_time); 57 | } 58 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | //! Utility module, a grab-bag of functionality 2 | use crate::constants; 3 | use crate::metric; 4 | use hopper; 5 | use mio; 6 | use seahash::SeaHasher; 7 | use slab; 8 | use std::collections; 9 | use std::hash; 10 | use std::ops::{Index, IndexMut}; 11 | use std::sync::atomic::{AtomicUsize, Ordering}; 12 | 13 | /// Number of dropped events due to channel being totally full 14 | pub static UTIL_SEND_HOPPER_ERROR_FULL: AtomicUsize = AtomicUsize::new(0); 15 | 16 | /// Cernan hashmap 17 | /// 18 | /// In most cases where cernan needs a hashmap we've got smallish inputs as keys 19 | /// and, more, have a smallish number of total elements (< 100k) to store in the 20 | /// map. This hashmap is specialized to address that common use-case. 21 | pub type HashMap = 22 | collections::HashMap>; 23 | 24 | /// A vector of `hopper::Sender`s. 25 | pub type Channel = Vec>; 26 | 27 | /// Send a `metric::Event` into a `Channel`. 28 | pub fn send(chans: &mut Channel, mut event: metric::Event) { 29 | if chans.is_empty() { 30 | // Nothing to send to. 31 | return; 32 | } 33 | 34 | let max: usize = chans.len().saturating_sub(1); 35 | if max != 0 { 36 | for chan in &mut chans[1..] { 37 | let mut snd_event = event.clone(); 38 | while let Err(res) = chan.send(snd_event) { 39 | // The are a variety of errors that hopper will signal back up 40 | // when we do a send. The only one we care about is 41 | // `Error::Full`, meaning that all disk and memory buffer space 42 | // is consumed. We drop the event on the floor in that case. 43 | match res.1 { 44 | hopper::Error::Full => { 45 | UTIL_SEND_HOPPER_ERROR_FULL.fetch_add(1, Ordering::Relaxed); 46 | break; 47 | } 48 | _ => { 49 | snd_event = res.0; 50 | continue; 51 | } 52 | } 53 | } 54 | } 55 | } 56 | while let Err(res) = chans[0].send(event) { 57 | match res.1 { 58 | hopper::Error::Full => { 59 | UTIL_SEND_HOPPER_ERROR_FULL.fetch_add(1, Ordering::Relaxed); 60 | break; 61 | } 62 | _ => { 63 | event = res.0; 64 | continue; 65 | } 66 | } 67 | } 68 | } 69 | 70 | /// Determine the state of a buffering queue, whether open or closed. 71 | /// 72 | /// Cernan is architected to be a push-based system. It copes with demand rushes 73 | /// by buffering to disk -- via the hopper queues -- and rejecting memory-based 74 | /// storage with overload signals. This signal, in particular, limits the amount 75 | /// of information delivered to a filter / sink by declaring that said filter / 76 | /// sink's input 'valve' is closed. Exactly how and why a filter / sink declares 77 | /// its valve state is left to the implementation. 78 | #[derive(Debug, PartialEq)] 79 | pub enum Valve { 80 | /// In the `Open` state a filter / sink will accept new inputs 81 | Open, 82 | /// In the `Closed` state a filter / sink will reject new inputs, backing 83 | /// them up in the communication queue. 84 | Closed, 85 | } 86 | 87 | #[inline] 88 | fn token_to_idx(token: mio::Token) -> usize { 89 | match token { 90 | mio::Token(idx) => idx, 91 | } 92 | } 93 | 94 | /// Wrapper around Slab 95 | pub struct TokenSlab { 96 | token_count: usize, 97 | tokens: slab::Slab, 98 | } 99 | 100 | impl Default for TokenSlab { 101 | fn default() -> Self { 102 | Self::new() 103 | } 104 | } 105 | 106 | impl Index for TokenSlab { 107 | type Output = E; 108 | 109 | /// Returns Evented object corresponding to Token. 110 | fn index(&self, token: mio::Token) -> &E { 111 | &self.tokens[token_to_idx(token)] 112 | } 113 | } 114 | 115 | impl IndexMut for TokenSlab { 116 | fn index_mut(&mut self, token: mio::Token) -> &mut E { 117 | &mut self.tokens[token_to_idx(token)] 118 | } 119 | } 120 | 121 | /// Interface wrapping a subset of Slab such 122 | /// that we can magically translate indices to 123 | /// `mio::token`. 124 | impl TokenSlab { 125 | /// Constructs a new TokenSlab with a capacity derived from the value 126 | /// of constants::SYSTEM. 127 | pub fn new() -> TokenSlab { 128 | TokenSlab { 129 | token_count: 0, 130 | tokens: slab::Slab::with_capacity(token_to_idx(constants::SYSTEM)), 131 | } 132 | } 133 | 134 | /// Iterates over the underlying slab mapping index to mio::Evented. 135 | pub fn iter(&self) -> slab::Iter { 136 | self.tokens.iter() 137 | } 138 | 139 | /// Return the number of tokens stored in the TokenSlab. 140 | pub fn count(&self) -> usize { 141 | self.token_count 142 | } 143 | 144 | /// Inserts a new Evented into the slab, returning a mio::Token 145 | /// corresponding to the index of the newly inserted type. 146 | pub fn insert(&mut self, thing: E) -> mio::Token { 147 | let idx = self.tokens.insert(thing); 148 | self.token_count += 1; 149 | mio::Token::from(idx) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /upload-artifact.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | 6 | function usage() { 7 | echo "$0 " 8 | } 9 | 10 | if [ -z "$1" ]; then 11 | usage 12 | exit 1 13 | fi 14 | 15 | VERSION="${1}" 16 | 17 | mkdir -p target/artifact 18 | TARGET="target/artifact/cernan" 19 | 20 | container_id=$(docker create quay.io/postmates/cernan:$VERSION) 21 | # Output of docker cp is always a tar archive regardless of source 22 | docker cp $container_id:/usr/bin/cernan - | tar x -C target/artifact 23 | docker rm -v $container_id 24 | 25 | DEST="s3://artifacts.postmates.com/binaries/cernan/cernan-$VERSION" 26 | 27 | aws s3 cp $TARGET $DEST 28 | --------------------------------------------------------------------------------