├── .github ├── arcon_logo.png ├── arcon_overview.jpg ├── arcon_vision.png └── workflows │ ├── ci.yaml │ └── gh-pages.yml ├── .gitignore ├── .gitmodules ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── arcon ├── Cargo.toml ├── arcon_allocator │ ├── Cargo.toml │ ├── README.md │ └── src │ │ └── lib.rs ├── arcon_state │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── backend │ │ ├── handles.rs │ │ ├── macros.rs │ │ ├── metrics_utils.rs │ │ ├── mod.rs │ │ ├── ops.rs │ │ ├── rocks │ │ │ ├── aggregator_ops.rs │ │ │ ├── map_ops.rs │ │ │ ├── mod.rs │ │ │ ├── reducer_ops.rs │ │ │ ├── value_ops.rs │ │ │ └── vec_ops.rs │ │ ├── serialization.rs │ │ ├── sled │ │ │ ├── aggregator_ops.rs │ │ │ ├── map_ops.rs │ │ │ ├── mod.rs │ │ │ ├── reducer_ops.rs │ │ │ ├── value_ops.rs │ │ │ └── vec_ops.rs │ │ └── test_common.rs │ │ ├── data.rs │ │ ├── error.rs │ │ └── lib.rs ├── benches │ ├── allocator.rs │ ├── buffer_pool.rs │ ├── hash_table.rs │ └── value.rs └── src │ ├── application │ ├── builder.rs │ ├── conf │ │ ├── logger.rs │ │ └── mod.rs │ └── mod.rs │ ├── buffer │ ├── event │ │ └── mod.rs │ ├── mod.rs │ └── network │ │ └── mod.rs │ ├── data │ ├── arrow.rs │ ├── flight_serde.rs │ ├── mod.rs │ ├── partition.rs │ └── ser_id.rs │ ├── dataflow │ ├── builder.rs │ ├── conf.rs │ ├── constructor.rs │ ├── dfg.rs │ ├── mod.rs │ ├── sink │ │ └── mod.rs │ ├── source │ │ ├── kafka.rs │ │ └── mod.rs │ └── stream │ │ ├── arrow │ │ └── mod.rs │ │ ├── filter.rs │ │ ├── keyed │ │ └── mod.rs │ │ ├── map.rs │ │ ├── mod.rs │ │ ├── operator.rs │ │ └── partition.rs │ ├── error │ ├── mod.rs │ ├── source.rs │ └── timer.rs │ ├── index │ ├── appender │ │ ├── eager.rs │ │ └── mod.rs │ ├── hash_table │ │ ├── bitmask.rs │ │ ├── eager.rs │ │ ├── generic.rs │ │ ├── mod.rs │ │ ├── sse2.rs │ │ └── table.rs │ ├── mod.rs │ ├── value │ │ ├── eager.rs │ │ ├── local.rs │ │ └── mod.rs │ └── window │ │ ├── appender.rs │ │ ├── arrow.rs │ │ ├── incremental.rs │ │ └── mod.rs │ ├── lib.rs │ ├── manager │ ├── epoch.rs │ ├── mod.rs │ ├── node.rs │ ├── snapshot.rs │ ├── source.rs │ └── state.rs │ ├── metrics │ ├── ewma.rs │ ├── log_recorder.rs │ ├── meter.rs │ ├── mod.rs │ ├── perf_event.rs │ └── runtime_metrics.rs │ ├── stream │ ├── channel │ │ ├── mod.rs │ │ └── strategy │ │ │ ├── broadcast.rs │ │ │ ├── forward.rs │ │ │ ├── keyed.rs │ │ │ └── mod.rs │ ├── mod.rs │ ├── node │ │ ├── common.rs │ │ ├── debug.rs │ │ ├── mod.rs │ │ ├── source.rs │ │ └── timer.rs │ ├── operator │ │ ├── function │ │ │ ├── filter.rs │ │ │ ├── flatmap.rs │ │ │ ├── map.rs │ │ │ ├── map_in_place.rs │ │ │ └── mod.rs │ │ ├── mod.rs │ │ ├── sink │ │ │ ├── kafka.rs │ │ │ ├── local_file.rs │ │ │ ├── measure.rs │ │ │ ├── mod.rs │ │ │ └── socket.rs │ │ └── window │ │ │ ├── assigner.rs │ │ │ └── mod.rs │ ├── source │ │ ├── kafka.rs │ │ ├── local_file.rs │ │ ├── mod.rs │ │ ├── schema.rs │ │ └── socket.rs │ └── time │ │ └── mod.rs │ ├── table │ └── mod.rs │ ├── test │ ├── arcon_state.rs │ ├── keyby_integration.rs │ └── mod.rs │ └── util │ ├── io.rs │ ├── mod.rs │ ├── prost_helpers.rs │ └── system_killer.rs ├── arcon_build ├── Cargo.toml ├── README.md └── src │ └── lib.rs ├── arcon_macros ├── Cargo.toml ├── README.md ├── src │ ├── app.rs │ ├── arcon.rs │ ├── arrow.rs │ ├── decoder.rs │ ├── lib.rs │ ├── proto.rs │ └── state.rs └── tests │ └── decoder_test.rs ├── arcon_tests ├── Cargo.toml ├── build.rs └── src │ ├── basic_v3.proto │ ├── lib.rs │ └── proto_derive_test.rs ├── arcon_util ├── Cargo.toml ├── README.md └── src │ └── lib.rs ├── clippy.toml ├── docs ├── README.md ├── config.toml └── content │ ├── _index.md │ ├── authors │ ├── _index.md │ └── max-meldrum.md │ ├── blog │ ├── _index.md │ ├── arrow.md │ └── dev_update_22_08_10.md │ ├── docs │ ├── _index.md │ ├── arcon │ │ ├── _index.md │ │ ├── about.md │ │ ├── community.md │ │ └── roadmap.md │ └── getting-started │ │ ├── _index.md │ │ ├── data-types.md │ │ └── quick-start.md │ └── privacy-policy │ └── _index.md ├── examples ├── Cargo.toml ├── custom_operator.rs ├── file.rs ├── kafka_source.rs ├── stateful.rs └── window.rs ├── run_checks.sh └── rustfmt.toml /.github/arcon_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cda-group/arcon/e6d266579b134bbd1fd52c4a5ee30646f89dd914/.github/arcon_logo.png -------------------------------------------------------------------------------- /.github/arcon_overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cda-group/arcon/e6d266579b134bbd1fd52c4a5ee30646f89dd914/.github/arcon_overview.jpg -------------------------------------------------------------------------------- /.github/arcon_vision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cda-group/arcon/e6d266579b134bbd1fd52c4a5ee30646f89dd914/.github/arcon_vision.png -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | branches: ["master"] 5 | pull_request: 6 | branches: ["master"] 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | msrv: 1.56.1 11 | 12 | jobs: 13 | rustfmt: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: checkout 17 | uses: actions/checkout@v2 18 | - name: install toolchain 19 | uses: actions-rs/toolchain@v1 20 | with: 21 | toolchain: stable 22 | default: true 23 | profile: minimal 24 | components: rustfmt 25 | - name: rustfmt 26 | uses: actions-rs/cargo@v1 27 | with: 28 | command: fmt 29 | args: --all -- --check 30 | 31 | clippy: 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: actions/checkout@v2 35 | - name: install toolchain 36 | uses: actions-rs/toolchain@v1 37 | with: 38 | toolchain: stable 39 | default: true 40 | profile: minimal 41 | components: clippy 42 | - name: clippy 43 | uses: actions-rs/clippy-check@v1 44 | with: 45 | token: ${{ secrets.GITHUB_TOKEN }} 46 | args: --workspace --all-features --all-targets -- -D warnings 47 | 48 | test: 49 | runs-on: ${{ matrix.os }} 50 | continue-on-error: ${{ matrix.experimental }} 51 | strategy: 52 | fail-fast: false 53 | matrix: 54 | build: [Linux, macOS] 55 | include: 56 | - build: Linux 57 | os: ubuntu-latest 58 | experimental: false 59 | - build: macOS 60 | os: macos-latest 61 | experimental: false 62 | steps: 63 | - uses: actions/checkout@v2 64 | - name: install toolchain 65 | uses: actions-rs/toolchain@v1 66 | with: 67 | toolchain: ${{ env.msrv }} 68 | default: true 69 | profile: minimal 70 | - name: Install cargo-hack 71 | run: cargo install cargo-hack 72 | 73 | # Run tests with all features on workspace 74 | - name: test all --all-features 75 | uses: actions-rs/cargo@v1 76 | with: 77 | command: test 78 | args: --all --all-features 79 | 80 | # Run tests for each Arcon feature 81 | - name: test --each-feature 82 | run: cargo hack test --each-feature 83 | working-directory: arcon 84 | 85 | # Verify benches 86 | - name: bench check 87 | uses: actions-rs/cargo@v1 88 | with: 89 | command: check 90 | args: --benches --all --all-features 91 | -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: github pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | build: 10 | name: Publish website 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout main 14 | uses: actions/checkout@v3.0.0 15 | - name: Build and deploy 16 | uses: shalzz/zola-deploy-action@master 17 | env: 18 | BUILD_DIR: docs 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .swp 3 | .swo 4 | *.rs.swm 5 | *.rs.swn 6 | *.rs.swl 7 | *.rs.swo 8 | *.rs.swp 9 | *.proto.swp 10 | target/ 11 | shell.nix 12 | Cargo.lock 13 | 14 | op_* 15 | 16 | # Sublime Text 17 | *.sublime-project 18 | *.sublime-workspace 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "docs/themes/adidoks"] 2 | path = docs/themes/adidoks 3 | url = https://github.com/aaranxu/adidoks.git 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Arcon 2 | 3 | All contributions are appreciated! Whether if it is fixing a typo, refactoring existing code, or adding a new feature. 4 | 5 | If you are unsure where to start, have a look at the open issues here on Github. 6 | 7 | ## Getting Started 8 | 9 | Fork the arcon repository and create a new branch for the feature you aim to work on. Keep the master branch of the fork clean and have it follow arcon's master. 10 | 11 | Create a new branch 12 | 13 | ```bash 14 | git checkout -b my_feature_branch 15 | ``` 16 | Add remote upstream (SSH): 17 | 18 | ```bash 19 | git remote add upstream git@github.com:cda-group/arcon.git 20 | ``` 21 | Add remote upstream (HTTPS): 22 | 23 | ```bash 24 | git remote add upstream https://github.com/cda-group/arcon.git 25 | ``` 26 | Whenever you need to sync your fork: 27 | 28 | ```bash 29 | git pull upstream master 30 | ``` 31 | 32 | ## Pull Requests 33 | 34 | Arcon uses a squash and merge strategy for all pull requests. This means that all commits of a pull request will be squashed into a single commit. 35 | 36 | Some general tips for creating Pull Requests: 37 | 38 | 1. Provide a description of what your PR adds to Arcon. 39 | 2. Motivate your changes. If the PR now uses library X rather than Y to solve a problem, please motivate the change. 40 | 3. Keep the PR simple, that is, try not to add several features into a single PR. 41 | 4. Connect PR/commit to a github issue, e.g., "closes #4" 42 | 43 | 44 | Before submitting a PR, make sure to run all related tests and verifications to catch potential errors locally rather than at the CI. 45 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "arcon", 4 | "arcon_build", 5 | "arcon_util", 6 | "arcon_tests", 7 | "examples" 8 | ] 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # Arcon 6 | 7 | Arcon is a library for building state-first streaming applications in Rust. 8 | 9 | ![ci](https://github.com/cda-group/arcon/workflows/ci/badge.svg) 10 | [![Cargo](https://img.shields.io/badge/crates.io-v0.2.1-orange)](https://crates.io/crates/arcon) 11 | [![Documentation](https://docs.rs/arcon/badge.svg)](https://docs.rs/arcon) 12 | [![project chat](https://img.shields.io/badge/zulip-join%20chat-ff69b4)](https://arcon.zulipchat.com) 13 | [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](https://github.com/cda-group/arcon) 14 | 15 | #### Project Status 16 | 17 | Arcon is in development and should be considered experimental until further notice. 18 | 19 | The APIs may break and you should not be running Arcon with important data! 20 | 21 | #### Rust Version 22 | 23 | Arcon builds against the latest stable release and the current MSRV is 1.56.1 24 | 25 | ### Roadmap 26 | 27 | See the roadmap [here](https://github.com/cda-group/arcon/projects/1) 28 | 29 | ## Highlights 30 | 31 | * Out-of-order Processing 32 | * Event-time & Watermarks 33 | * Epoch Snapshotting for Exactly-once Processing 34 | * Hybrid Row(Protobuf) / Columnar (Arrow) System 35 | * Modular State Backend Abstraction 36 | 37 | ## Example 38 | 39 | ```rust,no_run 40 | #[arcon::app] 41 | fn main() { 42 | (0..100u64) 43 | .to_stream(|conf| conf.set_arcon_time(ArconTime::Process)) 44 | .filter(|x| *x > 50) 45 | .map(|x| x * 10) 46 | .print() 47 | } 48 | ``` 49 | 50 | More examples can be found [here](examples). 51 | 52 | ## Project Layout 53 | 54 | * [`arcon`]: Arcon crate 55 | * [`arcon_build`]: Protobuf builder 56 | * [`arcon_macros`]: Arcon derive macros. 57 | * [`arcon_tests`]: Integration tests 58 | * [`arcon_util`]: Common Arcon utilities 59 | * [`docs`]: Project website docs 60 | * [`examples`]: Example applications 61 | 62 | [`arcon`]: arcon 63 | [`arcon_build`]: arcon_build 64 | [`arcon_macros`]: arcon_macros 65 | [`arcon_tests`]: arcon_tests 66 | [`arcon_util`]: arcon_util 67 | [`docs`]: docs 68 | [`examples`]: examples 69 | 70 | ## Contributing 71 | 72 | See [Contributing](CONTRIBUTING.md) 73 | 74 | ## Community 75 | 76 | Arcon is an ambitious project with many different development & research areas. 77 | 78 | If you find Arcon interesting and want to learn more, then join the [Zulip](https://arcon.zulipchat.com) community! 79 | 80 | ## Acknowledgements 81 | 82 | Arcon is influenced by many great projects whether it is implementation, code practices or project structure: 83 | 84 | - [Tokio](https://github.com/tokio-rs/tokio) 85 | - [Datafusion](https://github.com/apache/arrow-datafusion) 86 | - [Apache Flink](https://github.com/apache/flink) 87 | - [Sled](https://github.com/spacejam/sled) 88 | 89 | ## License 90 | 91 | This project is licensed under the [Apache-2.0 license](LICENSE). 92 | 93 | ## Contribution 94 | 95 | Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Arcon by you shall be licensed as Apache-2.0, without any additional terms or conditions. -------------------------------------------------------------------------------- /arcon/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arcon" 3 | version = "0.2.1" 4 | authors = ["Max Meldrum "] 5 | edition = "2018" 6 | rust-version = "1.56" 7 | readme = "../README.md" 8 | license = "Apache-2.0" 9 | repository = "https://github.com/cda-group/arcon" 10 | homepage = "https://github.com/cda-group/arcon" 11 | description = """ 12 | A runtime for writing streaming applications 13 | """ 14 | 15 | [features] 16 | default = [] 17 | rocksdb = ["arcon_state/rocks"] 18 | kafka = ["rdkafka", "futures", "serde_json"] 19 | socket = ["tokio", "tokio-util", "futures", "serde", "serde_json"] 20 | hardware_counters = ["perf-event", "metrics"] 21 | prometheus_exporter = ["metrics-exporter-prometheus", "metrics"] 22 | allocator_metrics = ["arcon_allocator/metrics"] 23 | state_metrics = ["arcon_state/metrics"] 24 | 25 | [dependencies] 26 | arcon_allocator = { version = "0.2", path = "arcon_allocator" } 27 | arcon_macros = { version = "0.2", path = "../arcon_macros" } 28 | arcon_state = { version = "0.2", path = "arcon_state" } 29 | arcon_util = { version = "0.2", path = "../arcon_util", features = ["hasher"] } 30 | 31 | kompact = "0.11" 32 | uuid = "0.8" 33 | cfg-if = "0.1.10" 34 | crossbeam-utils = "0.7" 35 | num_cpus = "1.0" 36 | hierarchical_hash_wheel_timer = "1.1" 37 | snafu = "0.6" 38 | arrow = "5.0" 39 | parquet = { version = "5.0", features = ["zstd"] } 40 | 41 | # Logging 42 | slog = "2" 43 | slog-async = "2" 44 | slog-term = "2" 45 | 46 | # Hashing 47 | fxhash = "0.2.1" 48 | 49 | # Serialisation 50 | prost = "0.9" 51 | bytes = "1.0" 52 | 53 | # Optional 54 | rdkafka = { version = "0.26", optional = true } 55 | tokio = { version = "1.0", optional = true, features = ["full"] } # TODO: figure out which are truly needed 56 | tokio-util = { version = "0.6", optional = true, features = ["full"] } 57 | futures = { version = "0.3", optional = true } 58 | serde_json = { version = "1.0.44", optional = true } 59 | serde = { version = "1.0.104", optional = true, features = ["derive"] } 60 | hocon = { version = "0.3", optional = true, default-features = false, features = ["serde-support"] } 61 | metrics = { version = "0.16.0", optional = true } 62 | metrics-exporter-prometheus = { version = "0.5.0", optional = true } 63 | 64 | [target.'cfg(target_os = "linux")'.dependencies] 65 | perf-event = { version = "0.4.7", optional = true } 66 | 67 | 68 | [dev-dependencies] 69 | tempfile = "3" 70 | rand = "0.6.5" 71 | slog = { version = "2.2", features = ["max_level_error"] } 72 | criterion = "0.3" 73 | once_cell = "1.3.1" 74 | 75 | [[bench]] 76 | name = "allocator" 77 | harness = false 78 | 79 | [[bench]] 80 | name = "buffer_pool" 81 | harness = false 82 | 83 | [[bench]] 84 | name = "hash_table" 85 | harness = false 86 | 87 | [[bench]] 88 | name = "value" 89 | harness = false -------------------------------------------------------------------------------- /arcon/arcon_allocator/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arcon_allocator" 3 | version = "0.2.1" 4 | authors = ["Max Meldrum "] 5 | repository = "https://github.com/cda-group/arcon" 6 | readme = "README.md" 7 | license = "Apache-2.0" 8 | homepage = "https://github.com/cda-group/arcon" 9 | description = """ 10 | Allocator for Arcon 11 | """ 12 | edition = "2018" 13 | 14 | [dependencies] 15 | fxhash = "0.2.1" 16 | snafu = "0.6" 17 | metrics = { version = "0.16.0", optional = true } 18 | -------------------------------------------------------------------------------- /arcon/arcon_allocator/README.md: -------------------------------------------------------------------------------- 1 | ![ci](https://github.com/cda-group/arcon/workflows/ci/badge.svg) 2 | [![Cargo](https://img.shields.io/badge/crates.io-v0.2.1-orange)](https://crates.io/crates/arcon_allocator) 3 | 4 | # arcon allocator 5 | 6 | This crate is not intended to be used directly. It is a part of the [Arcon project](https://github.com/cda-group/arcon) 7 | -------------------------------------------------------------------------------- /arcon/arcon_state/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arcon_state" 3 | version = "0.2.1" 4 | authors = ["Mikołaj Robakowski , Max Meldrum "] 5 | edition = "2018" 6 | readme = "README.md" 7 | license = "Apache-2.0" 8 | repository = "https://github.com/cda-group/arcon" 9 | description = """ 10 | State management features for Arcon 11 | """ 12 | 13 | [features] 14 | default = ["sled", "sled_checkpoints"] 15 | rocks = ["rocksdb"] 16 | sled_checkpoints = ["sled"] 17 | 18 | [dependencies] 19 | prost = "0.9" 20 | bytes = "1.0" 21 | snafu = "0.6" 22 | cfg-if = "0.1.10" 23 | sled = { version = "0.34", optional = true } 24 | metrics = { version = "0.16.0", optional = true } 25 | 26 | [dependencies.rocksdb] 27 | default-features = false 28 | features = ["lz4"] 29 | version = "0.17" 30 | optional = true 31 | 32 | [dev-dependencies] 33 | tempfile = "3" 34 | -------------------------------------------------------------------------------- /arcon/arcon_state/README.md: -------------------------------------------------------------------------------- 1 | ![ci](https://github.com/cda-group/arcon/workflows/ci/badge.svg) 2 | [![Cargo](https://img.shields.io/badge/crates.io-v0.2.1-orange)](https://crates.io/crates/arcon) 3 | 4 | # arcon state 5 | 6 | This crate is not intended to be used directly. It is a part of the [Arcon project](https://github.com/cda-group/arcon) 7 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/backend/macros.rs: -------------------------------------------------------------------------------- 1 | // region helper macros for `with_backend_type!` 2 | #[doc(hidden)] 3 | #[cfg(feature = "rocks")] 4 | #[macro_export] 5 | macro_rules! cfg_if_rocks { 6 | (@pat $i: pat) => { 7 | $i 8 | }; 9 | ($($body:tt)*) => { 10 | $($body)* 11 | }; 12 | } 13 | 14 | #[doc(hidden)] 15 | #[cfg(not(feature = "rocks"))] 16 | #[macro_export] 17 | macro_rules! cfg_if_rocks { 18 | (@pat $i: pat) => { 19 | _ 20 | }; 21 | ($($body:tt)*) => { 22 | unreachable!() 23 | }; 24 | } 25 | 26 | #[doc(hidden)] 27 | #[cfg(feature = "sled")] 28 | #[macro_export] 29 | macro_rules! cfg_if_sled { 30 | (@pat $i: pat) => { 31 | $i 32 | }; 33 | ($($body:tt)*) => { 34 | $($body)* 35 | }; 36 | } 37 | 38 | #[doc(hidden)] 39 | #[cfg(not(feature = "sled"))] 40 | #[macro_export] 41 | macro_rules! cfg_if_sled { 42 | (@pat $i: pat) => { 43 | _ 44 | }; 45 | ($($body:tt)*) => { 46 | unreachable!() 47 | }; 48 | } 49 | // endregion 50 | 51 | /// Runs `$body` with `$type_ident` bound to a concrete state backend type based on the runtime 52 | /// value of `$type_value` (which has to be of type [BackendType](crate::BackendType)). The caller 53 | /// has to make sure that the return type of this expression is the same regardless of what 54 | /// `$type_ident` happens to be bound to. 55 | /// 56 | /// # Examples 57 | /// 58 | /// ```no_run 59 | /// # extern crate arcon_state; 60 | /// # use arcon_state::backend::{BackendType, Backend}; 61 | /// # use arcon_state::with_backend_type; 62 | /// # use std::any::Any; 63 | /// let runtime_type = BackendType::Sled; 64 | /// let boxed: Box = with_backend_type!(runtime_type, 65 | /// |SB| Box::new(SB::create("test_dir".as_ref(), "testDB".to_string()).unwrap()) as Box 66 | /// ); 67 | /// ``` 68 | #[macro_export] 69 | macro_rules! with_backend_type { 70 | ($type_value:expr, |$type_ident:ident| $body:expr) => {{ 71 | use $crate::backend::BackendType::*; 72 | #[allow(unreachable_patterns)] 73 | match $type_value { 74 | $crate::cfg_if_sled!(@pat Sled) => { 75 | $crate::cfg_if_sled! { 76 | type $type_ident = $crate::backend::sled::Sled; 77 | $body 78 | } 79 | } 80 | $crate::cfg_if_rocks!(@pat Rocks) => { 81 | $crate::cfg_if_rocks! { 82 | type $type_ident = $crate::backend::rocks::Rocks; 83 | $body 84 | } 85 | } 86 | } 87 | }}; 88 | } 89 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/backend/metrics_utils.rs: -------------------------------------------------------------------------------- 1 | use metrics::counter; 2 | 3 | #[inline] 4 | pub fn record_bytes_written(handle_name: &str, total_bytes: u64, backend_name: &str) { 5 | counter!(format!("{}_bytes_written", handle_name), total_bytes, "backend" => backend_name.to_string()); 6 | } 7 | 8 | #[inline] 9 | pub fn record_bytes_read(handle_name: &str, total_bytes: u64, backend_name: &str) { 10 | counter!(format!("{}_bytes_read", handle_name), total_bytes, "backend" => backend_name.to_string()); 11 | } 12 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/backend/rocks/reducer_ops.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "metrics")] 2 | use crate::metrics_utils::*; 3 | use crate::{ 4 | data::{Metakey, Value}, 5 | error::*, 6 | rocks::default_write_opts, 7 | serialization::protobuf, 8 | Handle, Reducer, ReducerOps, ReducerState, Rocks, 9 | }; 10 | 11 | use rocksdb::{merge_operator::MergeFn, MergeOperands}; 12 | 13 | impl ReducerOps for Rocks { 14 | fn reducer_clear, IK: Metakey, N: Metakey>( 15 | &self, 16 | handle: &Handle, IK, N>, 17 | ) -> Result<()> { 18 | let key = handle.serialize_metakeys()?; 19 | self.remove(&handle.id, &key)?; 20 | Ok(()) 21 | } 22 | 23 | fn reducer_get, IK: Metakey, N: Metakey>( 24 | &self, 25 | handle: &Handle, IK, N>, 26 | ) -> Result> { 27 | let key = handle.serialize_metakeys()?; 28 | if let Some(storage) = self.get(&handle.id, &key)? { 29 | #[cfg(feature = "metrics")] 30 | record_bytes_read(handle.name(), storage.len() as u64, self.name.as_str()); 31 | let value = protobuf::deserialize(&*storage)?; 32 | Ok(Some(value)) 33 | } else { 34 | Ok(None) 35 | } 36 | } 37 | 38 | fn reducer_reduce, IK: Metakey, N: Metakey>( 39 | &self, 40 | handle: &Handle, IK, N>, 41 | value: T, 42 | ) -> Result<()> { 43 | let key = handle.serialize_metakeys()?; 44 | let serialized = protobuf::serialize(&value)?; 45 | 46 | let cf = self.get_cf_handle(&handle.id)?; 47 | // See the make_reducer_merge function in this module. Its result is set as the merging 48 | // operator for this state. 49 | #[cfg(feature = "metrics")] 50 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 51 | Ok(self 52 | .db() 53 | .merge_cf_opt(cf, key, serialized, &default_write_opts())?) 54 | } 55 | } 56 | 57 | pub fn make_reducer_merge(reduce_fn: F) -> impl MergeFn + Clone 58 | where 59 | F: Reducer, 60 | T: Value, 61 | { 62 | move |_key: &[u8], first: Option<&[u8]>, rest: &mut MergeOperands| { 63 | let res: Result> = first 64 | .into_iter() 65 | .chain(rest) 66 | .map(|bytes| protobuf::deserialize::(bytes)) 67 | .try_fold(None, |acc, value| match acc { 68 | None => Ok(Some(value?)), 69 | Some(old) => Ok(Some(reduce_fn(&old, &value?))), 70 | }); 71 | 72 | // TODO: change eprintlns to actual logs 73 | // we don't really have a way to send results back to rust across rocksdb ffi, so we just 74 | // log the errors 75 | match res { 76 | Ok(Some(v)) => match protobuf::serialize(&v) { 77 | Ok(serialized) => Some(serialized), 78 | Err(e) => { 79 | eprintln!("reduce state merge result serialization error: {}", e); 80 | None 81 | } 82 | }, 83 | Ok(None) => { 84 | eprintln!("reducing state merge result is None???"); 85 | None 86 | } 87 | Err(e) => { 88 | eprintln!("reducing state merge error: {}", e); 89 | None 90 | } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/backend/rocks/value_ops.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{Metakey, Value}, 3 | error::*, 4 | serialization::protobuf, 5 | Handle, Rocks, ValueOps, ValueState, 6 | }; 7 | 8 | #[cfg(feature = "metrics")] 9 | use crate::metrics_utils::*; 10 | 11 | impl ValueOps for Rocks { 12 | fn value_clear( 13 | &self, 14 | handle: &Handle, IK, N>, 15 | ) -> Result<()> { 16 | let key = handle.serialize_metakeys()?; 17 | self.remove(&handle.id, &key)?; 18 | Ok(()) 19 | } 20 | 21 | fn value_get( 22 | &self, 23 | handle: &Handle, IK, N>, 24 | ) -> Result> { 25 | let key = handle.serialize_metakeys()?; 26 | if let Some(serialized) = self.get(&handle.id, &key)? { 27 | #[cfg(feature = "metrics")] 28 | record_bytes_read(handle.name(), serialized.len() as u64, self.name.as_str()); 29 | let value = protobuf::deserialize(&serialized)?; 30 | Ok(Some(value)) 31 | } else { 32 | Ok(None) 33 | } 34 | } 35 | 36 | fn value_set( 37 | &self, 38 | handle: &Handle, IK, N>, 39 | value: T, 40 | ) -> Result> { 41 | let key = handle.serialize_metakeys()?; 42 | let old = if let Some(serialized) = self.get(&handle.id, &key)? { 43 | let value = protobuf::deserialize(&serialized)?; 44 | Some(value) 45 | } else { 46 | None 47 | }; 48 | let serialized = protobuf::serialize(&value)?; 49 | #[cfg(feature = "metrics")] 50 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 51 | self.put(&handle.id, key, serialized)?; 52 | Ok(old) 53 | } 54 | 55 | fn value_fast_set( 56 | &self, 57 | handle: &Handle, IK, N>, 58 | value: T, 59 | ) -> Result<()> { 60 | let key = handle.serialize_metakeys()?; 61 | let serialized = protobuf::serialize(&value)?; 62 | #[cfg(feature = "metrics")] 63 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 64 | self.put(&handle.id, key, serialized)?; 65 | Ok(()) 66 | } 67 | 68 | fn value_fast_set_by_ref( 69 | &self, 70 | handle: &Handle, IK, N>, 71 | value: &T, 72 | ) -> Result<()> { 73 | let key = handle.serialize_metakeys()?; 74 | let serialized = protobuf::serialize(value)?; 75 | #[cfg(feature = "metrics")] 76 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 77 | self.put(&handle.id, key, serialized)?; 78 | Ok(()) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/backend/sled/aggregator_ops.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::Metakey, error::*, serialization::protobuf, sled::Sled, Aggregator, AggregatorOps, 3 | AggregatorState, Handle, 4 | }; 5 | 6 | use sled::MergeOperator; 7 | use std::iter; 8 | 9 | #[cfg(feature = "metrics")] 10 | use crate::metrics_utils::*; 11 | 12 | pub(crate) const ACCUMULATOR_MARKER: u8 = 0xAC; 13 | pub(crate) const VALUE_MARKER: u8 = 0x00; 14 | 15 | impl AggregatorOps for Sled { 16 | fn aggregator_clear( 17 | &self, 18 | handle: &Handle, IK, N>, 19 | ) -> Result<()> { 20 | let key = handle.serialize_metakeys()?; 21 | self.remove(&handle.id, &key)?; 22 | Ok(()) 23 | } 24 | 25 | fn aggregator_get( 26 | &self, 27 | handle: &Handle, IK, N>, 28 | ) -> Result<::Result> { 29 | let key = handle.serialize_metakeys()?; 30 | 31 | if let Some(serialized) = self.get(&handle.id, &key)? { 32 | assert_eq!(serialized[0], ACCUMULATOR_MARKER); 33 | let serialized = &serialized[1..]; 34 | #[cfg(feature = "metrics")] 35 | record_bytes_read(handle.name(), serialized.len() as u64, self.name.as_str()); 36 | let current_accumulator = protobuf::deserialize(serialized)?; 37 | Ok(handle 38 | .extra_data 39 | .accumulator_into_result(current_accumulator)) 40 | } else { 41 | Ok(handle 42 | .extra_data 43 | .accumulator_into_result(handle.extra_data.create_accumulator())) 44 | } 45 | } 46 | 47 | fn aggregator_aggregate( 48 | &self, 49 | handle: &Handle, IK, N>, 50 | value: ::Input, 51 | ) -> Result<()> { 52 | let key = handle.serialize_metakeys()?; 53 | let mut serialized = vec![VALUE_MARKER]; 54 | protobuf::serialize_into(&mut serialized, &value)?; 55 | #[cfg(feature = "metrics")] 56 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 57 | 58 | // See the make_aggregator_merge function in this module. Its result is set as the merging operator for this state. 59 | self.tree(&handle.id)?.merge(key, serialized)?; 60 | 61 | Ok(()) 62 | } 63 | } 64 | 65 | pub fn make_aggregator_merge(aggregator: A) -> impl MergeOperator + 'static 66 | where 67 | A: Aggregator, 68 | { 69 | move |_key: &[u8], existent: Option<&[u8]>, new: &[u8]| { 70 | let mut all_slices = existent.into_iter().chain(iter::once(new)); 71 | 72 | let first = all_slices.next(); 73 | let mut accumulator = { 74 | match first { 75 | Some([ACCUMULATOR_MARKER, accumulator_bytes @ ..]) => { 76 | protobuf::deserialize(accumulator_bytes).ok()? 77 | } 78 | Some([VALUE_MARKER, value_bytes @ ..]) => { 79 | let value: A::Input = protobuf::deserialize(value_bytes).ok()?; 80 | let mut acc = aggregator.create_accumulator(); 81 | aggregator.add(&mut acc, value); 82 | acc 83 | } 84 | Some(_) => { 85 | eprintln!("unknown operand in aggregate merge operator"); 86 | return None; 87 | } 88 | None => aggregator.create_accumulator(), 89 | } 90 | }; 91 | 92 | for slice in all_slices { 93 | match slice { 94 | [ACCUMULATOR_MARKER, accumulator_bytes @ ..] => { 95 | let second_acc = protobuf::deserialize(accumulator_bytes).ok()?; 96 | accumulator = aggregator.merge_accumulators(accumulator, second_acc); 97 | } 98 | [VALUE_MARKER, value_bytes @ ..] => { 99 | let value = protobuf::deserialize(value_bytes).ok()?; 100 | aggregator.add(&mut accumulator, value); 101 | } 102 | _ => { 103 | eprintln!("unknown operand in aggregate merge operator"); 104 | return None; 105 | } 106 | } 107 | } 108 | 109 | let mut result = vec![ACCUMULATOR_MARKER]; 110 | protobuf::serialize_into(&mut result, &accumulator).ok()?; 111 | 112 | Some(result) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/backend/sled/reducer_ops.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{Metakey, Value}, 3 | error::*, 4 | serialization::protobuf, 5 | sled::Sled, 6 | Handle, Reducer, ReducerOps, ReducerState, 7 | }; 8 | 9 | #[cfg(feature = "metrics")] 10 | use crate::metrics_utils::*; 11 | 12 | use sled::MergeOperator; 13 | use std::iter; 14 | 15 | impl ReducerOps for Sled { 16 | fn reducer_clear, IK: Metakey, N: Metakey>( 17 | &self, 18 | handle: &Handle, IK, N>, 19 | ) -> Result<()> { 20 | let key = handle.serialize_metakeys()?; 21 | self.remove(&handle.id, &key)?; 22 | Ok(()) 23 | } 24 | 25 | fn reducer_get, IK: Metakey, N: Metakey>( 26 | &self, 27 | handle: &Handle, IK, N>, 28 | ) -> Result> { 29 | let key = handle.serialize_metakeys()?; 30 | if let Some(storage) = self.get(&handle.id, &key)? { 31 | #[cfg(feature = "metrics")] 32 | record_bytes_read(handle.name(), storage.len() as u64, self.name.as_str()); 33 | let value = protobuf::deserialize(&*storage)?; 34 | Ok(Some(value)) 35 | } else { 36 | Ok(None) 37 | } 38 | } 39 | 40 | fn reducer_reduce, IK: Metakey, N: Metakey>( 41 | &self, 42 | handle: &Handle, IK, N>, 43 | value: T, 44 | ) -> Result<()> { 45 | let key = handle.serialize_metakeys()?; 46 | let serialized = protobuf::serialize(&value)?; 47 | #[cfg(feature = "metrics")] 48 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 49 | 50 | // See the make_reducer_merge function in this module. Its result is set as the merging 51 | // operator for this state. 52 | self.tree(&handle.id)?.merge(key, serialized)?; 53 | 54 | Ok(()) 55 | } 56 | } 57 | 58 | pub fn make_reducer_merge(reduce_fn: F) -> impl MergeOperator + 'static 59 | where 60 | F: Reducer, 61 | T: Value, 62 | { 63 | move |_key: &[u8], existent: Option<&[u8]>, new: &[u8]| { 64 | let res = existent 65 | .into_iter() 66 | .chain(iter::once(new)) 67 | .map(|bytes| protobuf::deserialize(bytes)) 68 | .try_fold(None, |acc, value| -> Result<_> { 69 | match acc { 70 | None => Ok(Some(value?)), 71 | Some(old) => Ok(Some(reduce_fn(&old, &value?))), 72 | } 73 | }); 74 | 75 | // TODO: change eprintlns to actual logs 76 | // we don't really have a way to send results back to rust across Sled ffi, so we just log the errors 77 | match res { 78 | Ok(Some(v)) => match protobuf::serialize(&v) { 79 | Ok(serialized) => Some(serialized), 80 | Err(e) => { 81 | eprintln!("reduce state merge result serialization error: {}", e); 82 | None 83 | } 84 | }, 85 | Ok(None) => { 86 | eprintln!("reducing state merge result is None???"); 87 | None 88 | } 89 | Err(e) => { 90 | eprintln!("reducing state merge error: {}", e); 91 | None 92 | } 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/backend/sled/value_ops.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{Metakey, Value}, 3 | error::*, 4 | serialization::protobuf, 5 | sled::Sled, 6 | Handle, ValueOps, ValueState, 7 | }; 8 | 9 | #[cfg(feature = "metrics")] 10 | use crate::metrics_utils::*; 11 | 12 | impl ValueOps for Sled { 13 | fn value_clear( 14 | &self, 15 | handle: &Handle, IK, N>, 16 | ) -> Result<()> { 17 | let key = handle.serialize_metakeys()?; 18 | self.remove(&handle.id, &key)?; 19 | Ok(()) 20 | } 21 | 22 | fn value_get( 23 | &self, 24 | handle: &Handle, IK, N>, 25 | ) -> Result> { 26 | let key = handle.serialize_metakeys()?; 27 | if let Some(serialized) = self.get(&handle.id, &key)? { 28 | #[cfg(feature = "metrics")] 29 | record_bytes_read(handle.name(), serialized.len() as u64, self.name.as_str()); 30 | let value = protobuf::deserialize(&serialized)?; 31 | Ok(Some(value)) 32 | } else { 33 | Ok(None) 34 | } 35 | } 36 | 37 | fn value_set( 38 | &self, 39 | handle: &Handle, IK, N>, 40 | value: T, 41 | ) -> Result> { 42 | let key = handle.serialize_metakeys()?; 43 | let serialized = protobuf::serialize(&value)?; 44 | #[cfg(feature = "metrics")] 45 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 46 | let old = match self.put(&handle.id, &key, &serialized)? { 47 | Some(bytes) => Some(protobuf::deserialize(bytes.as_ref())?), 48 | None => None, 49 | }; 50 | Ok(old) 51 | } 52 | 53 | fn value_fast_set( 54 | &self, 55 | handle: &Handle, IK, N>, 56 | value: T, 57 | ) -> Result<()> { 58 | let key = handle.serialize_metakeys()?; 59 | let serialized = protobuf::serialize(&value)?; 60 | #[cfg(feature = "metrics")] 61 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 62 | self.put(&handle.id, &key, &serialized)?; 63 | Ok(()) 64 | } 65 | 66 | fn value_fast_set_by_ref( 67 | &self, 68 | handle: &Handle, IK, N>, 69 | value: &T, 70 | ) -> Result<()> { 71 | let key = handle.serialize_metakeys()?; 72 | let serialized = protobuf::serialize(value)?; 73 | #[cfg(feature = "metrics")] 74 | record_bytes_written(handle.name(), serialized.len() as u64, self.name.as_str()); 75 | self.put(&handle.id, &key, &serialized)?; 76 | Ok(()) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/data.rs: -------------------------------------------------------------------------------- 1 | use crate::backend::serialization::fixed_bytes::FixedBytes; 2 | 3 | pub trait Value: prost::Message + Default + Clone + 'static {} 4 | impl Value for T where T: prost::Message + Default + Clone + 'static {} 5 | 6 | pub trait Key: prost::Message + Default + Clone + 'static {} 7 | impl Key for T where T: prost::Message + Default + Clone + 'static {} 8 | 9 | pub trait Metakey: FixedBytes + Copy + Clone + Send + Sync + 'static {} 10 | impl Metakey for T where T: FixedBytes + Copy + Clone + Send + Sync + 'static {} 11 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/error.rs: -------------------------------------------------------------------------------- 1 | pub use snafu::{ensure, ErrorCompat, OptionExt, ResultExt}; 2 | use snafu::{Backtrace, Snafu}; 3 | #[cfg(feature = "rocks")] 4 | use std::collections::HashSet; 5 | use std::{io, path::PathBuf, result::Result as StdResult}; 6 | 7 | pub type Result = StdResult; 8 | 9 | #[derive(Debug, Snafu)] 10 | #[snafu(visibility = "pub(crate)")] 11 | pub enum ArconStateError { 12 | #[snafu(context(false))] 13 | IO { 14 | source: io::Error, 15 | backtrace: Backtrace, 16 | }, 17 | #[snafu(display("Invalid path: {}", path.display()))] 18 | InvalidPath { path: PathBuf, backtrace: Backtrace }, 19 | #[snafu(display( 20 | "Encountered unknown node when trying to restore: {:?}. Known nodes: {:?}", 21 | unknown_node, 22 | known_nodes 23 | ))] 24 | UnknownNode { 25 | unknown_node: String, 26 | known_nodes: Vec, 27 | backtrace: Backtrace, 28 | }, 29 | #[snafu(display("Destination buffer is too short: {} < {}", dest_len, needed))] 30 | FixedBytesSerializationError { 31 | dest_len: usize, 32 | needed: usize, 33 | backtrace: Backtrace, 34 | }, 35 | #[snafu(display("Source buffer is too short: {} < {}", source_len, needed))] 36 | FixedBytesDeserializationError { 37 | source_len: usize, 38 | needed: usize, 39 | backtrace: Backtrace, 40 | }, 41 | #[snafu(context(false))] 42 | ProtobufDecodeError { 43 | source: prost::DecodeError, 44 | backtrace: Backtrace, 45 | }, 46 | #[snafu(context(false))] 47 | ProtobufEncodeError { 48 | source: prost::EncodeError, 49 | backtrace: Backtrace, 50 | }, 51 | 52 | #[snafu(display("Value in InMemory state backend is of incorrect type"))] 53 | InMemoryWrongType { backtrace: Backtrace }, 54 | 55 | #[cfg(feature = "rocks")] 56 | #[snafu(display("Could not find the requested column family: {:?}", cf_name))] 57 | RocksMissingColumnFamily { 58 | cf_name: String, 59 | backtrace: Backtrace, 60 | }, 61 | #[cfg(feature = "rocks")] 62 | #[snafu(display("Could not find options for column family: {:?}", cf_name))] 63 | RocksMissingOptions { 64 | cf_name: String, 65 | backtrace: Backtrace, 66 | }, 67 | #[cfg(feature = "rocks")] 68 | #[snafu(context(false))] 69 | RocksError { 70 | source: rocksdb::Error, 71 | backtrace: Backtrace, 72 | }, 73 | #[cfg(feature = "rocks")] 74 | #[snafu(display("Rocks state backend is uninitialized! Unknown cfs: {:?}", unknown_cfs))] 75 | RocksUninitialized { 76 | backtrace: Backtrace, 77 | unknown_cfs: HashSet, 78 | }, 79 | #[cfg(feature = "rocks")] 80 | #[snafu(display("Rocks restore directory is not empty: {}", dir.display()))] 81 | RocksRestoreDirNotEmpty { backtrace: Backtrace, dir: PathBuf }, 82 | #[cfg(feature = "sled")] 83 | #[snafu(context(false))] 84 | SledError { 85 | source: ::sled::Error, 86 | backtrace: Backtrace, 87 | }, 88 | #[snafu(display("Error : {}", msg))] 89 | Unknown { msg: String }, 90 | } 91 | -------------------------------------------------------------------------------- /arcon/arcon_state/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `arcon_state` contains all state management related functionality for the arcon system. 2 | 3 | /// State Backend Implementations 4 | pub mod backend; 5 | /// State Trait types 6 | pub mod data; 7 | /// Error utilities 8 | pub mod error; 9 | 10 | #[doc(hidden)] 11 | pub use crate::backend::*; 12 | -------------------------------------------------------------------------------- /arcon/benches/allocator.rs: -------------------------------------------------------------------------------- 1 | use arcon_allocator::{Alloc, Allocator}; 2 | use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion}; 3 | 4 | const ALLOC_SIZE: usize = 1024; 5 | 6 | fn arcon_allocator(c: &mut Criterion) { 7 | let mut group = c.benchmark_group("arcon_allocator"); 8 | group.bench_function("arcon_alloc", arcon_alloc); 9 | group.bench_function("rust_vec_alloc", rust_vec_alloc); 10 | 11 | group.finish() 12 | } 13 | 14 | fn arcon_alloc(b: &mut Bencher) { 15 | let mut a = Allocator::new(81920); 16 | b.iter(|| { 17 | if let Ok(Alloc(id, _)) = unsafe { a.alloc::(ALLOC_SIZE) } { 18 | unsafe { a.dealloc(id) }; 19 | } 20 | }); 21 | } 22 | 23 | fn rust_vec_alloc(b: &mut Bencher) { 24 | b.iter(|| { 25 | black_box(Vec::::with_capacity(ALLOC_SIZE)); 26 | }); 27 | } 28 | 29 | criterion_group!(benches, arcon_allocator); 30 | criterion_main!(benches); 31 | -------------------------------------------------------------------------------- /arcon/src/application/builder.rs: -------------------------------------------------------------------------------- 1 | use super::conf::ApplicationConf; 2 | use super::Application; 3 | use crate::dataflow::{ 4 | dfg::{DFGNodeKind, GlobalNodeId}, 5 | stream::Context, 6 | }; 7 | 8 | /// A Builder for Arcon Applications 9 | /// 10 | /// ApplicationBuilder may be created through any type that implement the extension 11 | /// trait [ToBuilderExt](../../dataflow/sink/trait.ToBuilderExt.html). 12 | /// 13 | /// ## Usage 14 | /// ```no_run 15 | /// use arcon::prelude::*; 16 | /// let mut builder: ApplicationBuilder = (0..100u64) 17 | /// .to_stream(|conf| conf.set_arcon_time(ArconTime::Process)) 18 | /// .print() 19 | /// .builder(); 20 | /// 21 | /// let app: Application = builder.build(); 22 | /// ``` 23 | #[derive(Default)] 24 | pub struct ApplicationBuilder { 25 | ctx: Context, 26 | name: Option, 27 | debug: bool, 28 | conf: ApplicationConf, 29 | } 30 | 31 | impl ApplicationBuilder { 32 | pub(crate) fn new(ctx: Context, debug: bool) -> ApplicationBuilder { 33 | ApplicationBuilder { 34 | ctx, 35 | name: None, 36 | debug, 37 | conf: ApplicationConf::default(), 38 | } 39 | } 40 | /// Sets the name of the application 41 | pub fn name(&mut self, name: impl Into) -> &mut Self { 42 | self.name = Some(name.into()); 43 | self 44 | } 45 | 46 | /// Sets the configuration that is used during the build phase 47 | pub fn config(&mut self, conf: ApplicationConf) -> &mut Self { 48 | self.conf = conf; 49 | self 50 | } 51 | 52 | /// Build an Arcon application 53 | /// 54 | /// Note that this method only builds the application. In order 55 | /// to start it, see the following [method](Application::run). 56 | pub fn build(&mut self) -> Application { 57 | let mut app = Application::with_conf(self.conf.clone()); 58 | if self.debug { 59 | app.with_debug_node(); 60 | } 61 | 62 | let mut output_channels = Vec::new(); 63 | 64 | for dfg_node in self.ctx.dfg.graph.iter().rev() { 65 | let operator_id = dfg_node.get_operator_id(); 66 | let input_channels = dfg_node.get_input_channels(); 67 | let node_ids = dfg_node 68 | .get_node_ids() 69 | .iter() 70 | .map(|id| GlobalNodeId { 71 | operator_id, 72 | node_id: *id, 73 | }) 74 | .collect(); 75 | match &dfg_node.kind { 76 | DFGNodeKind::Source(source_factory) => { 77 | let sources = 78 | source_factory.build_source(output_channels.clone(), Vec::new(), &mut app); 79 | app.set_source_manager(sources); 80 | } 81 | DFGNodeKind::Node(constructor) => { 82 | let components = constructor.build_nodes( 83 | node_ids, 84 | input_channels.to_vec(), 85 | output_channels.clone(), 86 | Vec::new(), 87 | &mut app, 88 | ); 89 | output_channels = components.iter().map(|(_, c)| c.clone()).collect(); 90 | } 91 | DFGNodeKind::Placeholder => { 92 | panic!("Critical Error, Stream built incorrectly"); 93 | } 94 | } 95 | } 96 | app 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /arcon/src/application/conf/logger.rs: -------------------------------------------------------------------------------- 1 | use slog::{o, Drain, Fuse, Logger}; 2 | use slog_async::Async; 3 | use std::{fs::OpenOptions, sync::Arc}; 4 | 5 | /// Alias for logger in Arcon 6 | pub type ArconLogger = Logger>>; 7 | 8 | pub const ARCON_LOG_NAME: &str = "arcon.log"; 9 | pub const KOMPACT_LOG_NAME: &str = "kompact.log"; 10 | 11 | /// Defines a logger type 12 | #[derive(Clone, Copy, Debug)] 13 | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 14 | pub enum LoggerType { 15 | /// Logs output directly to the terminal 16 | Terminal, 17 | /// Logs output to file 18 | File, 19 | } 20 | 21 | impl Default for LoggerType { 22 | fn default() -> Self { 23 | LoggerType::Terminal 24 | } 25 | } 26 | pub fn term_logger() -> ArconLogger { 27 | let decorator = slog_term::TermDecorator::new().build(); 28 | let drain = slog_term::FullFormat::new(decorator).build().fuse(); 29 | let drain = slog_async::Async::new(drain).chan_size(1024).build().fuse(); 30 | 31 | slog::Logger::root_typed( 32 | Arc::new(drain), 33 | o!( 34 | "location" => slog::PushFnValue(|r: &slog::Record<'_>, ser: slog::PushFnValueSerializer<'_>| { 35 | ser.emit(format_args!("{}:{}", r.file(), r.line())) 36 | })), 37 | ) 38 | } 39 | 40 | pub fn file_logger(log_path: &str) -> ArconLogger { 41 | let file = OpenOptions::new() 42 | .create(true) 43 | .write(true) 44 | .truncate(true) 45 | .open(log_path) 46 | .unwrap(); 47 | 48 | let decorator = slog_term::PlainDecorator::new(file); 49 | let drain = slog_term::FullFormat::new(decorator).build().fuse(); 50 | let drain = slog_async::Async::new(drain).chan_size(1024).build().fuse(); 51 | 52 | slog::Logger::root_typed( 53 | Arc::new(drain), 54 | o!( 55 | "location" => slog::PushFnValue(|r: &slog::Record<'_>, ser: slog::PushFnValueSerializer<'_>| { 56 | ser.emit(format_args!("{}:{}", r.file(), r.line())) 57 | })), 58 | ) 59 | } 60 | -------------------------------------------------------------------------------- /arcon/src/buffer/mod.rs: -------------------------------------------------------------------------------- 1 | /// EventBuffer and BufferPool implementations 2 | pub mod event; 3 | /// NetworkBuffer backed by Arcon's allocator 4 | pub mod network; 5 | -------------------------------------------------------------------------------- /arcon/src/buffer/network/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::error::*; 2 | use arcon_allocator::{Alloc, AllocId, Allocator}; 3 | use kompact::net::buffers::Chunk; 4 | use std::sync::{Arc, Mutex}; 5 | 6 | /// A Buffer backed by the [ArconAllocator] 7 | /// 8 | /// Is intended to be used by Kompact's network implementation 9 | pub(crate) struct NetworkBuffer { 10 | /// A raw pointer to our allocated memory block 11 | ptr: *mut u8, 12 | /// Reference to the allocator 13 | /// 14 | /// Used to dealloc `ptr` when the NetworkBuffer is dropped 15 | allocator: Arc>, 16 | /// A unique identifier for the allocation 17 | id: AllocId, 18 | /// How many data elements there are in `ptr` 19 | capacity: usize, 20 | } 21 | 22 | impl NetworkBuffer { 23 | /// Creates a new NetworkBuffer 24 | #[inline] 25 | #[allow(dead_code)] 26 | pub fn new(capacity: usize, allocator: Arc>) -> ArconResult { 27 | let mut a = allocator.lock().unwrap(); 28 | 29 | match unsafe { a.alloc::(capacity) } { 30 | Ok(Alloc(id, ptr)) => Ok(NetworkBuffer { 31 | ptr, 32 | allocator: allocator.clone(), 33 | id, 34 | capacity, 35 | }), 36 | Err(err) => Err(Error::Unsupported { 37 | msg: err.to_string(), 38 | }), 39 | } 40 | } 41 | 42 | /// Returns the capacity of the buffer 43 | #[inline] 44 | #[allow(dead_code)] 45 | pub fn capacity(&self) -> usize { 46 | self.capacity 47 | } 48 | } 49 | 50 | impl Drop for NetworkBuffer { 51 | fn drop(&mut self) { 52 | let mut allocator = self.allocator.lock().unwrap(); 53 | // Instruct the allocator to dealloc 54 | unsafe { allocator.dealloc(self.id) }; 55 | } 56 | } 57 | 58 | unsafe impl Send for NetworkBuffer {} 59 | 60 | impl Chunk for NetworkBuffer { 61 | fn as_mut_ptr(&mut self) -> *mut u8 { 62 | self.ptr 63 | } 64 | fn len(&self) -> usize { 65 | self.capacity 66 | } 67 | } 68 | 69 | #[cfg(test)] 70 | mod tests { 71 | use super::*; 72 | 73 | #[test] 74 | fn network_buffer_test() { 75 | // This test does not do much. Just need to ensure allocation and the drop of the NetworkBuffer works correctly. 76 | let total_bytes = 1024; 77 | let allocator = Arc::new(Mutex::new(Allocator::new(total_bytes))); 78 | { 79 | let buffer: NetworkBuffer = NetworkBuffer::new(512, allocator.clone()).unwrap(); 80 | assert_eq!(buffer.capacity(), 512); 81 | } 82 | // Buffer is dropped, check allocator 83 | let a = allocator.lock().unwrap(); 84 | assert_eq!(a.total_allocations(), 1); 85 | assert_eq!(a.bytes_remaining(), total_bytes); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /arcon/src/data/arrow.rs: -------------------------------------------------------------------------------- 1 | use crate::table::MutableTable; 2 | use arrow::{ 3 | array::{ 4 | ArrayBuilder, BinaryBuilder, BooleanBuilder, Float32Builder, Float64Builder, Int32Builder, 5 | Int64Builder, StringBuilder, StructBuilder, UInt32Builder, UInt64Builder, 6 | }, 7 | datatypes::{DataType, Schema}, 8 | error::ArrowError, 9 | }; 10 | 11 | /// Represents an Arcon type that can be converted to Arrow 12 | pub trait ToArrow { 13 | /// Type to help the runtime know which builder to use 14 | type Builder: ArrayBuilder; 15 | /// Returns the underlying Arrow [DataType] 16 | fn arrow_type() -> DataType; 17 | /// Return the Arrow Schema 18 | fn schema() -> Schema; 19 | /// Creates a new MutableTable 20 | fn table() -> MutableTable; 21 | /// Used to append `self` to an Arrow StructBuilder 22 | fn append(self, builder: &mut StructBuilder, timestamp: Option) -> Result<(), ArrowError>; 23 | } 24 | 25 | macro_rules! to_arrow { 26 | ($type:ty, $builder_type:ty, $arrow_type:expr) => { 27 | impl ToArrow for $type { 28 | type Builder = $builder_type; 29 | 30 | fn arrow_type() -> DataType { 31 | $arrow_type 32 | } 33 | fn schema() -> Schema { 34 | unreachable!( 35 | "Operation not possible for single value {}", 36 | stringify!($type) 37 | ); 38 | } 39 | fn table() -> MutableTable { 40 | unreachable!( 41 | "Operation not possible for single value {}", 42 | stringify!($type) 43 | ); 44 | } 45 | fn append(self, _: &mut StructBuilder, _: Option) -> Result<(), ArrowError> { 46 | unreachable!( 47 | "Operation not possible for single value {}", 48 | stringify!($type) 49 | ); 50 | } 51 | } 52 | }; 53 | } 54 | 55 | // Map types to Arrow Types 56 | to_arrow!(u64, UInt64Builder, DataType::UInt64); 57 | to_arrow!(u32, UInt32Builder, DataType::UInt32); 58 | to_arrow!(i64, Int64Builder, DataType::Int64); 59 | to_arrow!(i32, Int32Builder, DataType::Int32); 60 | to_arrow!(f64, Float64Builder, DataType::Float64); 61 | to_arrow!(f32, Float32Builder, DataType::Float32); 62 | to_arrow!(bool, BooleanBuilder, DataType::Boolean); 63 | to_arrow!(String, StringBuilder, DataType::Utf8); 64 | to_arrow!(Vec, BinaryBuilder, DataType::Binary); 65 | -------------------------------------------------------------------------------- /arcon/src/data/partition.rs: -------------------------------------------------------------------------------- 1 | use prost::*; 2 | use std::hash::Hash; 3 | 4 | /// Defines the total amount of keys in the key space 5 | const MAX_KEY: u64 = 65535; 6 | 7 | pub fn create_shards(total: u64) -> Vec { 8 | assert!( 9 | total < MAX_KEY, 10 | "Attempted to create more shards than allowed keys {}", 11 | MAX_KEY 12 | ); 13 | (0..total) 14 | .into_iter() 15 | .map(|index| { 16 | let start = (index * MAX_KEY + total - 1) / total; 17 | let end = ((index + 1) * MAX_KEY - 1) / total; 18 | Shard::new(index, KeyRange::new(start, end)) 19 | }) 20 | .collect() 21 | } 22 | 23 | pub fn shard_lookup(data: &K, total_shards: u64) -> u64 24 | where 25 | K: Hash + ?Sized, 26 | { 27 | let mut hasher = arcon_util::key_hasher(); 28 | data.hash(&mut hasher); 29 | 30 | shard_lookup_with_key(hasher.finish32() as u64, total_shards) 31 | } 32 | 33 | #[inline] 34 | pub fn shard_lookup_with_key(hashed_key: u64, total_shards: u64) -> u64 { 35 | let key = hashed_key % MAX_KEY; 36 | key * total_shards / MAX_KEY 37 | } 38 | 39 | /// A Shard is responsible for a contiguous range of keys 40 | #[derive(Debug)] 41 | pub struct Shard { 42 | /// Shard Identifier 43 | id: u64, 44 | /// Range of keys the shard is responsible for 45 | range: KeyRange, 46 | } 47 | 48 | impl Shard { 49 | pub fn new(id: u64, range: KeyRange) -> Self { 50 | Self { id, range } 51 | } 52 | } 53 | 54 | /// A Key Range with a start and end position 55 | #[derive(Message, PartialEq, Clone)] 56 | pub struct KeyRange { 57 | /// Start of the Key Range 58 | #[prost(uint64)] 59 | pub start: u64, 60 | /// End of the Key Range 61 | #[prost(uint64)] 62 | pub end: u64, 63 | } 64 | 65 | impl KeyRange { 66 | /// Creates a new KeyRange 67 | pub fn new(start: u64, end: u64) -> KeyRange { 68 | assert!(start < end, "start range has to be smaller than end range"); 69 | KeyRange { start, end } 70 | } 71 | } 72 | 73 | #[cfg(test)] 74 | mod tests { 75 | use super::*; 76 | 77 | #[test] 78 | fn shard_test() { 79 | let shards = create_shards(4); 80 | assert_eq!(shards[0].range, KeyRange::new(0, 16383)); 81 | assert_eq!(shards[1].range, KeyRange::new(16384, 32767)); 82 | assert_eq!(shards[2].range, KeyRange::new(32768, 49151)); 83 | assert_eq!(shards[3].range, KeyRange::new(49152, 65534)); 84 | } 85 | 86 | #[test] 87 | fn shard_lookup_test() { 88 | let shards = create_shards(4); 89 | let total_shards = shards.len() as u64; 90 | 91 | let s1 = shard_lookup("a", total_shards); 92 | let s2 = shard_lookup("a", total_shards); 93 | assert_eq!(s1, s2); 94 | 95 | let s3 = shard_lookup("b", total_shards); 96 | assert_ne!(s2, s3); 97 | 98 | // verify tuples work.. 99 | assert_eq!( 100 | shard_lookup(&(10, "user"), total_shards), 101 | shard_lookup(&(10, "user"), total_shards) 102 | ); 103 | assert_ne!( 104 | shard_lookup(&(10, "user"), total_shards), 105 | shard_lookup(&(11, "other_user"), total_shards) 106 | ); 107 | 108 | let mut hit: [bool; 4] = [false; 4]; 109 | 110 | // all shards ought to be hit 111 | for i in 0..100 { 112 | let shard = shard_lookup(&i, total_shards); 113 | hit[shard as usize] = true; 114 | } 115 | assert_eq!(hit, [true, true, true, true]); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /arcon/src/data/ser_id.rs: -------------------------------------------------------------------------------- 1 | use kompact::prelude::SerId; 2 | 3 | pub const NEVER_ID: SerId = 49; 4 | 5 | // Serialisation IDs for Arcon primitives 6 | pub const RELIABLE_U32_ID: SerId = 51; 7 | pub const RELIABLE_U64_ID: SerId = 53; 8 | pub const RELIABLE_I32_ID: SerId = 55; 9 | pub const RELIABLE_I64_ID: SerId = 57; 10 | pub const RELIABLE_F32_ID: SerId = 59; 11 | pub const RELIABLE_F64_ID: SerId = 61; 12 | pub const RELIABLE_STRING_ID: SerId = 63; 13 | pub const RELIABLE_BOOLEAN_ID: SerId = 65; 14 | -------------------------------------------------------------------------------- /arcon/src/dataflow/builder.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::StateID, 3 | dataflow::conf::{DefaultBackend, OperatorConf, SourceConf}, 4 | index::{ArconState, EMPTY_STATE_ID}, 5 | stream::{ 6 | operator::Operator, 7 | source::Source, 8 | time::{ArconTime, Time}, 9 | }, 10 | }; 11 | use arcon_state::Backend; 12 | use std::sync::Arc; 13 | 14 | /// Operator Builder 15 | /// 16 | /// Defines everything needed in order for Arcon to instantiate 17 | /// and manage an Operator during runtime. 18 | /// 19 | /// ```no_run 20 | /// use arcon::prelude::*; 21 | /// let builder = OperatorBuilder { 22 | /// operator: Arc::new(|| Map::new(|x: u64| x + 10)), 23 | /// state: Arc::new(|_backend: Arc| EmptyState), 24 | /// conf: Default::default(), 25 | /// }; 26 | ///``` 27 | #[derive(Clone)] 28 | pub struct OperatorBuilder { 29 | /// Operator Constructor 30 | pub operator: Arc OP + Send + Sync + 'static>, 31 | /// State Constructor 32 | pub state: Arc) -> OP::OperatorState + Send + Sync + 'static>, 33 | /// Operator Config 34 | pub conf: OperatorConf, 35 | } 36 | 37 | impl OperatorBuilder { 38 | pub(crate) fn create_backend( 39 | &self, 40 | state_dir: std::path::PathBuf, 41 | name: String, 42 | ) -> Arc { 43 | Arc::new(Backend::create(&state_dir, name).unwrap()) 44 | } 45 | pub(crate) fn _state_id(&self) -> StateID { 46 | let mut state_id = OP::OperatorState::STATE_ID.to_owned(); 47 | if state_id == EMPTY_STATE_ID { 48 | // create unique identifier so there is no clash between empty states 49 | let unique_id = uuid::Uuid::new_v4().to_string(); 50 | state_id = format!("{}_{}", state_id, unique_id); 51 | } 52 | state_id 53 | } 54 | } 55 | 56 | type SourceIndex = usize; 57 | type TotalSources = usize; 58 | 59 | #[derive(Clone)] 60 | pub enum SourceBuilderType 61 | where 62 | S: Source, 63 | B: Backend, 64 | { 65 | Single(SourceBuilder), 66 | Parallel(ParallelSourceBuilder), 67 | } 68 | 69 | impl SourceBuilderType 70 | where 71 | S: Source, 72 | B: Backend, 73 | { 74 | pub fn parallelism(&self) -> usize { 75 | match self { 76 | SourceBuilderType::Single(_) => 1, 77 | SourceBuilderType::Parallel(builder) => builder.parallelism, 78 | } 79 | } 80 | pub fn time(&self) -> ArconTime { 81 | match self { 82 | SourceBuilderType::Single(builder) => builder.conf.time, 83 | SourceBuilderType::Parallel(builder) => builder.conf.time, 84 | } 85 | } 86 | } 87 | 88 | /// Source Builder 89 | /// 90 | /// Defines how Sources are constructed and managed during runtime. 91 | #[derive(Clone)] 92 | pub struct SourceBuilder { 93 | /// Source Constructor 94 | pub constructor: Arc) -> S + Send + Sync + 'static>, 95 | /// Source Config 96 | pub conf: SourceConf, 97 | } 98 | 99 | #[derive(Clone)] 100 | pub struct ParallelSourceBuilder { 101 | /// Source Constructor 102 | pub constructor: 103 | Arc, SourceIndex, TotalSources) -> S + Send + Sync + 'static>, 104 | /// Source Config 105 | pub conf: SourceConf, 106 | /// Source Parallleism 107 | pub parallelism: usize, 108 | } 109 | 110 | #[derive(Clone)] 111 | pub struct KeyBuilder { 112 | pub extractor: Arc<(dyn Fn(&T) -> u64 + Send + Sync)>, 113 | } 114 | 115 | impl KeyBuilder { 116 | pub fn get_key(&self, event: &T) -> u64 { 117 | (self.extractor)(event) 118 | } 119 | } 120 | 121 | /// Enum containing different window assigner types 122 | #[derive(Clone, Copy)] 123 | pub enum Assigner { 124 | Sliding { 125 | length: Time, 126 | slide: Time, 127 | late_arrival: Time, 128 | }, 129 | Tumbling { 130 | length: Time, 131 | late_arrival: Time, 132 | }, 133 | } 134 | -------------------------------------------------------------------------------- /arcon/src/dataflow/conf.rs: -------------------------------------------------------------------------------- 1 | use super::builder::Assigner; 2 | use crate::{data::ArconType, stream::time::ArconTime}; 3 | use std::sync::Arc; 4 | 5 | #[cfg(all(feature = "hardware_counters", target_os = "linux"))] 6 | use crate::metrics::perf_event::PerfEvents; 7 | 8 | // Defines a Default State Backend for high-level operators that do not use any 9 | // custom-defined state but still need a backend defined for internal runtime state. 10 | cfg_if::cfg_if! { 11 | if #[cfg(feature = "rocksdb")] { 12 | #[cfg(not(test))] 13 | pub type DefaultBackend = arcon_state::Rocks; 14 | #[cfg(test)] 15 | pub type DefaultBackend = arcon_state::Sled; 16 | } else { 17 | pub type DefaultBackend = arcon_state::Sled; 18 | } 19 | } 20 | 21 | /// Defines how the runtime will manage the 22 | /// parallelism for a specific Arcon Operator. 23 | #[derive(Copy, Clone, Debug)] 24 | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 25 | pub enum ParallelismStrategy { 26 | /// Use a static number of Arcon nodes 27 | Static(usize), 28 | /// Tells the runtime to manage the parallelism 29 | Managed, 30 | } 31 | 32 | impl Default for ParallelismStrategy { 33 | fn default() -> Self { 34 | // static for now until managed is complete and stable.. 35 | ParallelismStrategy::Static(1) 36 | } 37 | } 38 | 39 | /// Defines whether a stream is Keyed or Local 40 | /// 41 | /// Streams are by default Keyed in Arcon. 42 | #[derive(Clone, Copy, Debug)] 43 | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 44 | pub enum StreamKind { 45 | Keyed, 46 | Local, 47 | } 48 | 49 | impl Default for StreamKind { 50 | fn default() -> Self { 51 | StreamKind::Keyed 52 | } 53 | } 54 | 55 | /// Operator Configuration 56 | /// 57 | /// Defines how an Operator is to be executed on Arcon. 58 | #[derive(Default, Clone, Debug)] 59 | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 60 | pub struct OperatorConf { 61 | /// Parallelism Strategy for this Operator 62 | pub parallelism_strategy: ParallelismStrategy, 63 | /// Defines the type of Stream, by default streams are Keyed in Arcon. 64 | pub stream_kind: StreamKind, 65 | #[cfg(all(feature = "hardware_counters", target_os = "linux"))] 66 | pub perf_events: PerfEvents, 67 | } 68 | 69 | impl OperatorConf { 70 | /// Load an OperatorConf from a File using the Hocon format 71 | #[cfg(all(feature = "serde", feature = "hocon"))] 72 | pub fn from_file(path: impl AsRef) -> OperatorConf { 73 | // okay to panic here as this is during setup code... 74 | let data = std::fs::read_to_string(path).unwrap(); 75 | hocon::HoconLoader::new() 76 | .load_str(&data) 77 | .unwrap() 78 | .resolve() 79 | .unwrap() 80 | } 81 | } 82 | 83 | pub type TimestampExtractor = Arc u64 + Send + Sync>; 84 | 85 | /// Source Configuration 86 | #[derive(Clone)] 87 | pub struct SourceConf { 88 | pub extractor: Option>, 89 | pub time: ArconTime, 90 | pub batch_size: usize, 91 | pub name: String, 92 | } 93 | 94 | impl SourceConf { 95 | /// Set [ArconTime] to be used for a Source 96 | pub fn set_arcon_time(&mut self, time: ArconTime) { 97 | self.time = time; 98 | } 99 | /// Set a Timestamp Extractor for a Source 100 | pub fn set_timestamp_extractor(&mut self, f: impl Fn(&S) -> u64 + Send + Sync + 'static) { 101 | self.extractor = Some(Arc::new(f)); 102 | } 103 | // Set batch size per process iteration 104 | pub fn set_batch_size(&mut self, size: usize) { 105 | self.batch_size = size; 106 | } 107 | 108 | pub fn set_source_name(&mut self, name: String) { 109 | self.name = name; 110 | } 111 | } 112 | 113 | impl Default for SourceConf { 114 | fn default() -> Self { 115 | Self { 116 | extractor: None, 117 | time: Default::default(), 118 | batch_size: 1024, 119 | name: format!("source_{}", uuid::Uuid::new_v4()), 120 | } 121 | } 122 | } 123 | 124 | #[derive(Clone, Copy)] 125 | pub struct WindowConf { 126 | pub assigner: Assigner, 127 | } 128 | -------------------------------------------------------------------------------- /arcon/src/dataflow/mod.rs: -------------------------------------------------------------------------------- 1 | /// Builder types used in the API 2 | pub mod builder; 3 | /// Dataflow configurations for Operators and Sources 4 | pub mod conf; 5 | /// Sink utilities and extension traits 6 | pub mod sink; 7 | /// Sources that can be converted into Streams 8 | pub mod source; 9 | /// High-level Stream types that users may perform a series of transformations on 10 | pub mod stream; 11 | 12 | /// Runtime constructors 13 | pub(crate) mod constructor; 14 | /// Logical Dataflow Graph 15 | pub(crate) mod dfg; 16 | -------------------------------------------------------------------------------- /arcon/src/dataflow/sink/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::application::ApplicationBuilder; 2 | use crate::data::ArconType; 3 | use crate::dataflow::dfg::ChannelKind; 4 | use crate::dataflow::stream::Stream; 5 | use crate::stream::operator::sink::measure::MeasureSink; 6 | use crate::{ 7 | dataflow::stream::OperatorExt, 8 | dataflow::{ 9 | builder::OperatorBuilder, 10 | conf::{OperatorConf, ParallelismStrategy}, 11 | }, 12 | index::EmptyState, 13 | }; 14 | use std::sync::Arc; 15 | 16 | /// Extension trait for sinks 17 | pub trait ToSinkExt { 18 | /// Print the stream outputs 19 | /// 20 | /// # Usage 21 | /// ```no_run 22 | /// use arcon::prelude::*; 23 | /// let sink = (0..10u64) 24 | /// .to_stream(|conf| { 25 | /// conf.set_arcon_time(ArconTime::Process); 26 | /// }) 27 | /// .print(); 28 | /// ``` 29 | fn print(self) -> Sink; 30 | /// Ignore the stream outputs 31 | /// 32 | /// # Usage 33 | /// ```no_run 34 | /// use arcon::prelude::*; 35 | /// let sink = (0..10u64) 36 | /// .to_stream(|conf| { 37 | /// conf.set_arcon_time(ArconTime::Process); 38 | /// }) 39 | /// .ignore(); 40 | /// ``` 41 | fn ignore(self) -> Sink; 42 | /// Insert the stream outputs to a Debug Node 43 | /// 44 | /// # Usage 45 | /// ```no_run 46 | /// use arcon::prelude::*; 47 | /// use std::time::Duration; 48 | /// 49 | /// let mut app = (0..10i32) 50 | /// .to_stream(|conf| { 51 | /// conf.set_arcon_time(ArconTime::Process); 52 | /// }) 53 | /// .filter(|x| *x < 5) 54 | /// .debug() 55 | /// .builder() 56 | /// .build(); 57 | /// 58 | /// app.run(); 59 | /// 60 | /// std::thread::sleep(Duration::from_millis(1000)); 61 | /// 62 | /// let debug_node = app.get_debug_node::().unwrap(); 63 | /// debug_node.on_definition(|cd| { 64 | /// assert_eq!(cd.data.len(), 5); 65 | /// }); 66 | /// ``` 67 | fn debug(self) -> Sink; 68 | /// Send stream outputs to a Measure Sink 69 | /// 70 | /// # Usage 71 | /// ```no_run 72 | /// use arcon::prelude::*; 73 | /// let sink = (0..10u64) 74 | /// .to_stream(|conf| { 75 | /// conf.set_arcon_time(ArconTime::Process); 76 | /// }) 77 | /// .measure(10000000); 78 | /// ``` 79 | fn measure(self, log_freq: u64) -> Sink; 80 | } 81 | 82 | /// A Sink struct that implements the [ToBuilderExt] trait 83 | /// 84 | /// Note that Arcon currently doesn't have a specific Sink trait at this moment. 85 | pub struct Sink { 86 | stream: Stream, 87 | debug: bool, 88 | } 89 | 90 | impl ToSinkExt for Stream { 91 | fn print(mut self) -> Sink { 92 | self.set_channel_kind(ChannelKind::Console); 93 | Sink { 94 | stream: self, 95 | debug: false, 96 | } 97 | } 98 | fn debug(mut self) -> Sink { 99 | self.set_channel_kind(ChannelKind::Forward); 100 | Sink { 101 | stream: self, 102 | debug: true, 103 | } 104 | } 105 | fn ignore(mut self) -> Sink { 106 | self.set_channel_kind(ChannelKind::Mute); 107 | Sink { 108 | stream: self, 109 | debug: false, 110 | } 111 | } 112 | fn measure(self, log_freq: u64) -> Sink { 113 | let mut stream = self.operator(OperatorBuilder { 114 | operator: Arc::new(move || MeasureSink::new(log_freq)), 115 | state: Arc::new(|_| EmptyState), 116 | conf: OperatorConf { 117 | parallelism_strategy: ParallelismStrategy::Static(1), 118 | ..Default::default() 119 | }, 120 | }); 121 | stream.set_channel_kind(ChannelKind::Mute); 122 | Sink { 123 | stream, 124 | debug: false, 125 | } 126 | } 127 | } 128 | 129 | mod private { 130 | use super::*; 131 | pub trait Sealed {} 132 | impl Sealed for Sink {} 133 | } 134 | 135 | /// Extension trait for types that can be turned into [ApplicationBuilder] 136 | pub trait ToBuilderExt: private::Sealed { 137 | fn builder(self) -> ApplicationBuilder; 138 | } 139 | 140 | impl ToBuilderExt for Sink { 141 | fn builder(mut self) -> ApplicationBuilder { 142 | self.stream.move_last_node(); 143 | ApplicationBuilder::new(self.stream.ctx, self.debug) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /arcon/src/dataflow/source/kafka.rs: -------------------------------------------------------------------------------- 1 | use super::super::stream::Stream; 2 | use super::ToStreamExt; 3 | use crate::dataflow::builder::ParallelSourceBuilder; 4 | use crate::dataflow::source::SourceBuilderType; 5 | use crate::dataflow::{conf::DefaultBackend, conf::SourceConf}; 6 | use crate::prelude::KafkaConsumerConf; 7 | use crate::stream::source::{ 8 | kafka::{KafkaConsumer, KafkaConsumerState}, 9 | schema::SourceSchema, 10 | }; 11 | use std::sync::Arc; 12 | 13 | /// An unbounded Kafka Source 14 | /// 15 | /// Returns a [`Stream`] object that users may execute transformations on. 16 | /// 17 | /// # Example 18 | /// ```no_run 19 | /// use arcon::prelude::*; 20 | /// 21 | /// let consumer_conf = KafkaConsumerConf::default() 22 | /// .with_topic("test") 23 | /// .set("group.id", "test") 24 | /// .set("bootstrap.servers", "localhost:9092") 25 | /// .set("enable.auto.commit", "false"); 26 | /// 27 | /// let paralellism = 2; 28 | /// let stream = KafkaSource::new(consumer_conf, ProtoSchema::new(), paralellism) 29 | /// .to_stream(|conf| { 30 | /// conf.set_arcon_time(ArconTime::Event); 31 | /// conf.set_timestamp_extractor(|x: &u64| *x); 32 | /// }); 33 | /// ``` 34 | pub struct KafkaSource { 35 | kafka_conf: KafkaConsumerConf, 36 | schema: S, 37 | parallelism: usize, 38 | } 39 | 40 | impl KafkaSource { 41 | pub fn new(kafka_conf: KafkaConsumerConf, schema: S, parallelism: usize) -> Self { 42 | Self { 43 | kafka_conf, 44 | schema, 45 | parallelism, 46 | } 47 | } 48 | } 49 | 50 | impl ToStreamExt for KafkaSource { 51 | fn to_stream)>(self, f: F) -> Stream { 52 | let mut conf = SourceConf::default(); 53 | f(&mut conf); 54 | 55 | let kafka_conf = self.kafka_conf; 56 | let schema = self.schema; 57 | let parallelism = self.parallelism; 58 | 59 | let builder = ParallelSourceBuilder { 60 | constructor: Arc::new(move |backend: Arc, index, total_sources| { 61 | KafkaConsumer::new( 62 | kafka_conf.clone(), 63 | KafkaConsumerState::new(backend), 64 | schema.clone(), 65 | index, 66 | total_sources, 67 | ) 68 | }), 69 | conf, 70 | parallelism, 71 | }; 72 | super::source_to_stream(SourceBuilderType::Parallel(builder)) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /arcon/src/dataflow/source/mod.rs: -------------------------------------------------------------------------------- 1 | use super::builder::SourceBuilderType; 2 | use super::stream::Stream; 3 | use crate::stream::source::Source; 4 | use crate::{data::ArconType, dataflow::conf::DefaultBackend}; 5 | use crate::{ 6 | dataflow::{ 7 | builder::SourceBuilder, 8 | conf::SourceConf, 9 | constructor::{SourceConstructor, TypedSourceFactory}, 10 | dfg::{DFGNode, DFGNodeKind}, 11 | stream::Context, 12 | }, 13 | prelude::*, 14 | }; 15 | use arcon_state::Backend; 16 | use std::rc::Rc; 17 | use std::sync::Arc; 18 | 19 | #[cfg(feature = "kafka")] 20 | pub mod kafka; 21 | 22 | #[cfg(feature = "kafka")] 23 | pub use kafka::KafkaSource; 24 | 25 | /// Extension trait for types that can be converted to streams 26 | pub trait ToStreamExt { 27 | /// Convert a source type to a [Stream] 28 | /// 29 | /// # Example 30 | /// ```no_run 31 | /// use arcon::prelude::*; 32 | /// let stream: Stream = (0..100u64) 33 | /// .to_stream(|conf| { 34 | /// conf.set_arcon_time(ArconTime::Process); 35 | /// }); 36 | /// ``` 37 | fn to_stream)>(self, f: F) -> Stream; 38 | } 39 | 40 | fn source_to_stream(builder_type: SourceBuilderType) -> Stream 41 | where 42 | S: Source, 43 | B: Backend, 44 | { 45 | let parallelism = builder_type.parallelism(); 46 | let time = builder_type.time(); 47 | let manager_constructor = 48 | SourceConstructor::new(String::from("source_manager"), builder_type, time); 49 | let mut ctx = Context::default(); 50 | let typed_source_factory: Rc> = Rc::new(manager_constructor); 51 | let operator_id = 0; // source is the first operator 52 | let dfg_node = DFGNode::new(DFGNodeKind::Placeholder, operator_id, parallelism, vec![]); 53 | ctx.dfg.insert(dfg_node); 54 | Stream::new(ctx, typed_source_factory) 55 | } 56 | 57 | impl ToStreamExt for I 58 | where 59 | I: IntoIterator + 'static + Clone + Send + Sync, 60 | I::IntoIter: Send, 61 | I::Item: ArconType, 62 | { 63 | fn to_stream)>(self, f: F) -> Stream { 64 | let mut conf = SourceConf::default(); 65 | f(&mut conf); 66 | let builder = SourceBuilder { 67 | constructor: Arc::new(move |_: Arc| self.clone().into_iter()), 68 | conf, 69 | }; 70 | source_to_stream(SourceBuilderType::Single(builder)) 71 | } 72 | } 73 | 74 | /// A Local File Source 75 | /// 76 | /// Returns a [`Stream`] object that users may execute transformations on. 77 | /// 78 | /// # Example 79 | /// ```no_run 80 | /// use arcon::prelude::*; 81 | /// 82 | /// let stream: Stream = LocalFileSource::new("path_to_file") 83 | /// .to_stream(|conf| conf.set_arcon_time(ArconTime::Process)); 84 | /// ``` 85 | #[derive(Clone)] 86 | pub struct LocalFileSource { 87 | path: String, 88 | } 89 | impl LocalFileSource { 90 | pub fn new(path: impl Into) -> Self { 91 | Self { path: path.into() } 92 | } 93 | } 94 | 95 | impl ToStreamExt for LocalFileSource 96 | where 97 | A: ArconType + std::str::FromStr + std::fmt::Display, 98 | ::Err: std::fmt::Display, 99 | { 100 | fn to_stream)>(self, f: F) -> Stream { 101 | let mut conf = SourceConf::default(); 102 | f(&mut conf); 103 | let builder = SourceBuilder { 104 | constructor: Arc::new(move |_: Arc| { 105 | use crate::stream::source::local_file::LocalFileSourceImpl; 106 | LocalFileSourceImpl::new(self.path.clone()) 107 | }), 108 | conf, 109 | }; 110 | source_to_stream(SourceBuilderType::Single(builder)) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /arcon/src/dataflow/stream/arrow/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::data::arrow::ToArrow; 2 | use crate::data::ArconType; 3 | use crate::dataflow::stream::Stream; 4 | 5 | /// A Stream that supports Arrow analytics 6 | pub struct ArrowStream { 7 | pub(crate) stream: Stream, 8 | } 9 | 10 | impl ArrowStream { 11 | pub(super) fn from(stream: Stream) -> Self { 12 | Self { stream } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /arcon/src/dataflow/stream/filter.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::ArconType, 3 | dataflow::{ 4 | builder::OperatorBuilder, 5 | stream::{OperatorExt, Stream}, 6 | }, 7 | index::EmptyState, 8 | stream::operator::function, 9 | util::ArconFnBounds, 10 | }; 11 | use std::sync::Arc; 12 | 13 | /// Extension trait for filter operations 14 | pub trait FilterExt { 15 | /// Filter out records based on the given predicate 16 | /// 17 | /// # Example 18 | /// ```rust 19 | /// use arcon::prelude::*; 20 | /// let stream: Stream = (0..100) 21 | /// .to_stream(|conf| { 22 | /// conf.set_arcon_time(ArconTime::Process); 23 | /// }) 24 | /// .filter(|x| x < &50); 25 | /// ``` 26 | fn filter bool + ArconFnBounds>(self, f: F) -> Self; 27 | } 28 | 29 | impl FilterExt for Stream { 30 | #[must_use] 31 | fn filter bool + ArconFnBounds>(self, f: F) -> Self { 32 | self.operator(OperatorBuilder { 33 | operator: Arc::new(move || function::Filter::new(f.clone())), 34 | state: Arc::new(|_| EmptyState), 35 | conf: Default::default(), 36 | }) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /arcon/src/dataflow/stream/keyed/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::data::ArconType; 2 | use crate::dataflow::stream::Stream; 3 | 4 | /// Represents a stream that has been keyed 5 | pub struct KeyedStream { 6 | pub(crate) stream: Stream, 7 | } 8 | 9 | impl KeyedStream { 10 | pub(super) fn from(stream: Stream) -> Self { 11 | Self { stream } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /arcon/src/dataflow/stream/map.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::ArconType, 3 | dataflow::{ 4 | builder::OperatorBuilder, 5 | stream::{OperatorExt, Stream}, 6 | }, 7 | index::EmptyState, 8 | stream::operator::function, 9 | util::ArconFnBounds, 10 | }; 11 | use std::sync::Arc; 12 | 13 | /// Extension trait for map operations 14 | pub trait MapExt { 15 | /// Map each stream record to a possibly new type 16 | /// 17 | /// # Example 18 | /// ```rust 19 | /// use arcon::prelude::*; 20 | /// let stream: Stream = (0..100) 21 | /// .to_stream(|conf| { 22 | /// conf.set_arcon_time(ArconTime::Process); 23 | /// }) 24 | /// .map(|x| x + 10); 25 | /// ``` 26 | fn map OUT + ArconFnBounds>(self, f: F) -> Stream; 27 | /// Map each record in place keeping the same stream type 28 | /// 29 | /// # Example 30 | /// ```rust 31 | /// use arcon::prelude::*; 32 | /// let stream: Stream = (0..100) 33 | /// .to_stream(|conf| { 34 | /// conf.set_arcon_time(ArconTime::Process); 35 | /// }) 36 | /// .map_in_place(|x| *x += 10); 37 | /// ``` 38 | fn map_in_place(self, f: F) -> Stream; 39 | /// Akin to [Iterator::flat_map] but on a Stream 40 | /// 41 | /// # Example 42 | /// ```rust 43 | /// use arcon::prelude::*; 44 | /// let stream: Stream = (0..100) 45 | /// .to_stream(|conf| { 46 | /// conf.set_arcon_time(ArconTime::Process); 47 | /// }) 48 | /// .flat_map(|x| (0..x)); 49 | /// ``` 50 | fn flat_map(self, f: F) -> Stream 51 | where 52 | I: IntoIterator + 'static, 53 | I::Item: ArconType, 54 | F: Fn(T) -> I + ArconFnBounds; 55 | } 56 | 57 | impl MapExt for Stream { 58 | #[must_use] 59 | fn map OUT + ArconFnBounds>(self, f: F) -> Stream { 60 | self.operator(OperatorBuilder { 61 | operator: Arc::new(move || function::Map::new(f.clone())), 62 | state: Arc::new(|_| EmptyState), 63 | conf: Default::default(), 64 | }) 65 | } 66 | #[must_use] 67 | fn map_in_place(self, f: F) -> Stream { 68 | self.operator(OperatorBuilder { 69 | operator: Arc::new(move || function::MapInPlace::new(f.clone())), 70 | state: Arc::new(|_| EmptyState), 71 | conf: Default::default(), 72 | }) 73 | } 74 | #[must_use] 75 | fn flat_map(self, f: F) -> Stream 76 | where 77 | I: IntoIterator + 'static, 78 | I::Item: ArconType, 79 | F: Fn(T) -> I + ArconFnBounds, 80 | { 81 | self.operator(OperatorBuilder { 82 | operator: Arc::new(move || function::FlatMap::new(f.clone())), 83 | state: Arc::new(|_| EmptyState), 84 | conf: Default::default(), 85 | }) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /arcon/src/dataflow/stream/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod filter; 2 | pub mod map; 3 | pub mod operator; 4 | pub mod partition; 5 | 6 | #[allow(dead_code)] 7 | pub mod arrow; 8 | #[allow(dead_code)] 9 | pub mod keyed; 10 | 11 | use crate::{ 12 | data::ArconType, 13 | dataflow::{ 14 | constructor::*, 15 | dfg::{ChannelKind, DFGNodeKind, OperatorId, DFG}, 16 | }, 17 | }; 18 | use std::rc::Rc; 19 | 20 | pub use filter::FilterExt; 21 | pub use map::MapExt; 22 | pub use operator::OperatorExt; 23 | pub use partition::PartitionExt; 24 | 25 | pub use super::builder::KeyBuilder; 26 | pub use keyed::KeyedStream; 27 | 28 | /// Represents a possibly infinite stream of records 29 | pub struct Stream { 30 | // ID of the node which outputs this stream. 31 | prev_dfg_id: OperatorId, 32 | pub(crate) ctx: Context, 33 | key_builder: Option>, 34 | last_node: Option>>, 35 | source: Option>>, 36 | } 37 | 38 | impl Stream { 39 | /// Move the optional last_node/source-Factory into the DFG struct, will no longer be mutable after this. 40 | pub(crate) fn move_last_node(&mut self) { 41 | if let Some(node) = self.last_node.take() { 42 | let prev_dfg_node = self.ctx.dfg.get_mut(&self.prev_dfg_id); 43 | assert!(matches!(&prev_dfg_node.kind, DFGNodeKind::Placeholder)); // Make sure nothing bad has happened 44 | prev_dfg_node.kind = DFGNodeKind::Node(node.untype()); 45 | } else if let Some(source) = self.source.take() { 46 | let prev_dfg_node = self.ctx.dfg.get_mut(&self.prev_dfg_id); 47 | assert!(matches!(&prev_dfg_node.kind, DFGNodeKind::Placeholder)); // Make sure nothing bad has happened 48 | prev_dfg_node.kind = DFGNodeKind::Source(source.untype()); 49 | } 50 | } 51 | pub(crate) fn set_channel_kind(&mut self, channel_kind: ChannelKind) { 52 | if let Some(ref mut node_factory) = self.last_node { 53 | Rc::get_mut(node_factory) 54 | .unwrap() 55 | .set_channel_kind(channel_kind); 56 | } else if let Some(ref mut source_factory) = self.source { 57 | Rc::get_mut(source_factory) 58 | .unwrap() 59 | .set_channel_kind(channel_kind); 60 | } else { 61 | panic!("Nothing to configure ChannelKind on!"); 62 | } 63 | } 64 | 65 | pub(crate) fn new(ctx: Context, source: Rc>) -> Self { 66 | Self { 67 | prev_dfg_id: 0, 68 | ctx, 69 | last_node: None, 70 | key_builder: None, 71 | source: Some(source), 72 | } 73 | } 74 | } 75 | 76 | #[derive(Default)] 77 | pub(crate) struct Context { 78 | pub(crate) dfg: DFG, 79 | } 80 | -------------------------------------------------------------------------------- /arcon/src/dataflow/stream/operator.rs: -------------------------------------------------------------------------------- 1 | use crate::data::ArconType; 2 | use crate::dataflow::stream::Stream; 3 | use crate::dataflow::{ 4 | builder::OperatorBuilder, 5 | conf::{DefaultBackend, ParallelismStrategy}, 6 | constructor::*, 7 | dfg::{DFGNode, DFGNodeKind}, 8 | }; 9 | use crate::stream::operator::Operator; 10 | use std::rc::Rc; 11 | use std::sync::Arc; 12 | 13 | use super::keyed::KeyedStream; 14 | 15 | /// Extension trait for creating an [Operator] 16 | pub trait OperatorExt { 17 | /// Add an [`Operator`] to the dataflow graph 18 | /// 19 | /// Example 20 | /// ```no_run 21 | /// use arcon::prelude::*; 22 | /// 23 | /// let stream: Stream = (0..100u64) 24 | /// .to_stream(|conf| { 25 | /// conf.set_arcon_time(ArconTime::Process); 26 | /// }) 27 | /// .operator(OperatorBuilder { 28 | /// operator: Arc::new(|| Map::new(|x| x + 10)), 29 | /// state: Arc::new(|_| EmptyState), 30 | /// conf: Default::default(), 31 | /// }); 32 | /// ``` 33 | fn operator + 'static>( 34 | self, 35 | builder: OperatorBuilder, 36 | ) -> Stream; 37 | } 38 | 39 | impl OperatorExt for Stream { 40 | #[must_use] 41 | fn operator + 'static>( 42 | mut self, 43 | builder: OperatorBuilder, 44 | ) -> Stream { 45 | // No more mutations on the previous node, move it from the stream.current_node to the DFG Graph 46 | self.move_last_node(); 47 | 48 | let paralellism = match builder.conf.parallelism_strategy { 49 | ParallelismStrategy::Static(num) => num, 50 | _ => unreachable!("Managed Parallelism not Supported yet"), 51 | }; 52 | 53 | let prev_dfg_node = self.ctx.dfg.get_mut(&self.prev_dfg_id); 54 | let incoming_channels = prev_dfg_node.get_node_ids(); 55 | let operator_id = prev_dfg_node.get_operator_id() + 1; 56 | 57 | let node_constructor = NodeConstructor::::new( 58 | format!("Operator_{}", operator_id), 59 | Arc::new(builder), 60 | self.key_builder.take(), 61 | ); 62 | 63 | let dfg_node = DFGNode::new( 64 | DFGNodeKind::Placeholder, // The NodeFactory will be inserted into the DFG when it is finalized 65 | operator_id, 66 | paralellism, 67 | incoming_channels, 68 | ); 69 | prev_dfg_node.set_outgoing_channels(dfg_node.get_node_ids()); 70 | let next_dfg_id = self.ctx.dfg.insert(dfg_node); 71 | 72 | self.prev_dfg_id = next_dfg_id; 73 | Stream { 74 | prev_dfg_id: self.prev_dfg_id, 75 | ctx: self.ctx, 76 | last_node: Some(Rc::new(node_constructor)), 77 | key_builder: None, 78 | source: None, 79 | } 80 | } 81 | } 82 | 83 | impl OperatorExt for KeyedStream { 84 | fn operator + 'static>( 85 | self, 86 | builder: OperatorBuilder, 87 | ) -> Stream { 88 | self.stream.operator(builder) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /arcon/src/dataflow/stream/partition.rs: -------------------------------------------------------------------------------- 1 | use crate::data::ArconType; 2 | use crate::dataflow::stream::{KeyBuilder, KeyedStream, Stream}; 3 | use crate::util::ArconFnBounds; 4 | use std::{hash::Hash, hash::Hasher, rc::Rc, sync::Arc}; 5 | 6 | /// Extension trait for partitioning schemes 7 | pub trait PartitionExt { 8 | /// Consistently partition the Stream using the given key extractor method. 9 | /// 10 | /// The key extractor function must be deterministic, for two identical events it 11 | /// must return the same key whenever it is called. 12 | /// 13 | /// Example 14 | /// ```rust 15 | /// use arcon::prelude::*; 16 | /// let stream: KeyedStream = (0..100) 17 | /// .to_stream(|conf| { 18 | /// conf.set_arcon_time(ArconTime::Process); 19 | /// }) 20 | /// .key_by(|i: &u64| i); 21 | /// ``` 22 | fn key_by &KEY + ArconFnBounds>( 23 | self, 24 | key_extractor: F, 25 | ) -> KeyedStream; 26 | } 27 | 28 | impl PartitionExt for Stream { 29 | #[must_use] 30 | fn key_by &KEY + ArconFnBounds>( 31 | mut self, 32 | key_extractor: F, 33 | ) -> KeyedStream { 34 | let key_builder = KeyBuilder { 35 | extractor: Arc::new(move |d: &T| { 36 | let mut hasher = arcon_util::key_hasher(); 37 | key_extractor(d).hash(&mut hasher); 38 | hasher.finish() 39 | }), 40 | }; 41 | if let Some(ref mut node_factory) = self.last_node { 42 | let node_factory = Rc::get_mut(node_factory).unwrap(); 43 | node_factory.set_key_builder(key_builder.clone()); 44 | self.key_builder = Some(key_builder); 45 | } else if let Some(ref mut source_factory) = self.source { 46 | let source_factory = Rc::get_mut(source_factory).unwrap(); 47 | source_factory.set_key_builder(key_builder.clone()); 48 | self.key_builder = Some(key_builder); 49 | } else { 50 | panic!("Nothing to apply key_by on!"); 51 | } 52 | KeyedStream::from(self) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /arcon/src/error/source.rs: -------------------------------------------------------------------------------- 1 | use super::{ArconResult, Error}; 2 | use snafu::Snafu; 3 | use std::fmt::Debug; 4 | 5 | /// Nested result type for handling source errors 6 | pub type SourceResult = ArconResult>; 7 | 8 | /// Enum containing every type of error that a source may encounter 9 | #[derive(Debug, Snafu)] 10 | pub enum SourceError { 11 | #[snafu(display("Schema Error Encountered {}", msg))] 12 | Schema { msg: String }, 13 | #[snafu(display("Failed to parse data {}", msg))] 14 | Parse { msg: String }, 15 | #[cfg(feature = "kafka")] 16 | #[snafu(display("Encountered a Kafka error {}", error.to_string()))] 17 | Kafka { error: rdkafka::error::KafkaError }, 18 | } 19 | 20 | impl From for SourceResult { 21 | fn from(error: Error) -> Self { 22 | Err(error) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /arcon/src/error/timer.rs: -------------------------------------------------------------------------------- 1 | use super::{ArconResult, Error}; 2 | use snafu::Snafu; 3 | use std::fmt::Debug; 4 | 5 | /// TimerResult type utilised while scheduling timers 6 | pub type TimerResult = ArconResult>>; 7 | 8 | #[derive(Debug, Snafu)] 9 | #[snafu(display( 10 | "Attempted to schedule timer entry {:?} at {} when time is {}", 11 | entry, 12 | scheduled_time, 13 | current_time 14 | ))] 15 | pub struct TimerExpiredError { 16 | /// Current event time 17 | pub current_time: u64, 18 | /// The scheduled time 19 | pub scheduled_time: u64, 20 | /// Timer Entry 21 | pub entry: A, 22 | } 23 | 24 | impl From for TimerResult { 25 | fn from(error: Error) -> Self { 26 | Err(error) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /arcon/src/index/appender/eager.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | error::ArconResult, 3 | index::{AppenderIndex, IndexOps}, 4 | table::ImmutableTable, 5 | }; 6 | use arcon_state::{ 7 | backend::{ 8 | handles::{ActiveHandle, Handle}, 9 | Backend, VecState, 10 | }, 11 | data::Value, 12 | error::*, 13 | }; 14 | use std::sync::Arc; 15 | 16 | #[derive(Debug)] 17 | pub struct EagerAppender 18 | where 19 | V: Value, 20 | B: Backend, 21 | { 22 | /// A handle to the VecState 23 | handle: ActiveHandle, u64>, 24 | } 25 | 26 | impl EagerAppender 27 | where 28 | V: Value, 29 | B: Backend, 30 | { 31 | /// Creates an EagerAppender 32 | pub fn new(id: impl Into, backend: Arc) -> Self { 33 | let mut handle = Handle::vec(id.into()).with_item_key(0); 34 | backend.register_vec_handle(&mut handle); 35 | let handle: ActiveHandle, u64> = handle.activate(backend); 36 | EagerAppender { handle } 37 | } 38 | } 39 | 40 | impl IndexOps for EagerAppender 41 | where 42 | V: Value, 43 | B: Backend, 44 | { 45 | fn persist(&mut self) -> ArconResult<()> { 46 | Ok(()) 47 | } 48 | fn set_key(&mut self, key: u64) { 49 | self.handle.set_item_key(key); 50 | } 51 | fn table(&mut self) -> ArconResult> { 52 | Ok(None) 53 | } 54 | } 55 | 56 | impl AppenderIndex for EagerAppender 57 | where 58 | V: Value, 59 | B: Backend, 60 | { 61 | #[inline] 62 | fn append(&mut self, data: V) -> Result<()> { 63 | self.handle.append(data) 64 | } 65 | #[inline] 66 | fn consume(&mut self) -> Result> { 67 | let stored = self.handle.get()?; 68 | self.handle.clear()?; 69 | Ok(stored) 70 | } 71 | #[inline] 72 | fn len(&self) -> usize { 73 | self.handle.len().unwrap_or(0) 74 | } 75 | #[inline] 76 | fn is_empty(&self) -> bool { 77 | self.len() == 0 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /arcon/src/index/appender/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | error::ArconResult, 3 | index::{AppenderIndex, HashTable, IndexOps, IndexValue}, 4 | table::ImmutableTable, 5 | }; 6 | use arcon_state::{ 7 | backend::{handles::ActiveHandle, Backend, VecState}, 8 | error::*, 9 | }; 10 | use prost::*; 11 | use std::ops::{Deref, DerefMut}; 12 | 13 | const DEFAULT_APPENDER_SIZE: usize = 1024; 14 | 15 | pub mod eager; 16 | 17 | #[derive(Clone, Message)] 18 | pub struct ProstVec { 19 | #[prost(message, repeated, tag = "1")] 20 | data: Vec, 21 | } 22 | 23 | impl Deref for ProstVec 24 | where 25 | V: IndexValue, 26 | { 27 | type Target = Vec; 28 | 29 | fn deref(&self) -> &Self::Target { 30 | &self.data 31 | } 32 | } 33 | 34 | impl DerefMut for ProstVec 35 | where 36 | V: IndexValue, 37 | { 38 | fn deref_mut(&mut self) -> &mut Self::Target { 39 | &mut self.data 40 | } 41 | } 42 | 43 | /// An Index suitable for Non-associative Windows 44 | /// 45 | /// A backing [VecState] acts as an overflow vector when 46 | /// the data no longer fits in the specified in-memory capacity. 47 | pub struct LazyAppender 48 | where 49 | V: IndexValue, 50 | B: Backend, 51 | { 52 | current_key: u64, 53 | /// A handle to the VecState 54 | handle: ActiveHandle>, 55 | hash_table: HashTable, B>, 56 | } 57 | 58 | impl LazyAppender 59 | where 60 | V: IndexValue, 61 | B: Backend, 62 | { 63 | } 64 | 65 | impl IndexOps for LazyAppender 66 | where 67 | V: IndexValue, 68 | B: Backend, 69 | { 70 | fn persist(&mut self) -> ArconResult<()> { 71 | // for each modified, set handle key and drain 72 | unimplemented!(); 73 | } 74 | fn set_key(&mut self, key: u64) { 75 | self.current_key = key; 76 | } 77 | fn table(&mut self) -> ArconResult> { 78 | Ok(None) 79 | } 80 | } 81 | 82 | impl AppenderIndex for LazyAppender 83 | where 84 | V: IndexValue, 85 | B: Backend, 86 | { 87 | #[inline] 88 | fn append(&mut self, _: V) -> Result<()> { 89 | unimplemented!(); 90 | } 91 | #[inline] 92 | fn consume(&mut self) -> Result> { 93 | unimplemented!(); 94 | } 95 | #[inline] 96 | fn len(&self) -> usize { 97 | unimplemented!(); 98 | } 99 | #[inline] 100 | fn is_empty(&self) -> bool { 101 | unimplemented!(); 102 | } 103 | } 104 | 105 | #[cfg(test)] 106 | mod tests { 107 | use super::*; 108 | use crate::test_utils::temp_backend; 109 | use arcon_state::Sled; 110 | use eager::EagerAppender; 111 | use std::sync::Arc; 112 | 113 | fn index_test(mut index: impl AppenderIndex) -> Result<()> { 114 | index.set_key(0); 115 | for i in 0..1024 { 116 | index.append(i as u64)?; 117 | } 118 | 119 | assert_eq!(index.len(), 1024); 120 | let consumed = index.consume()?; 121 | assert_eq!(consumed.len(), 1024); 122 | 123 | for (c, i) in consumed.into_iter().enumerate() { 124 | assert_eq!(c as u64, i); 125 | } 126 | 127 | index.set_key(1); 128 | 129 | for i in 0..524 { 130 | index.append(i as u64)?; 131 | } 132 | 133 | assert_eq!(index.len(), 524); 134 | 135 | Ok(()) 136 | } 137 | 138 | #[test] 139 | fn eager_appender_test() { 140 | let backend = Arc::new(temp_backend::()); 141 | let index = EagerAppender::new("appender", backend); 142 | assert!(index_test(index).is_ok()); 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /arcon/src/index/hash_table/bitmask.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Amanieu d'Antras 2 | // SPDX-License-Identifier: MIT 3 | 4 | use super::imp::{BitMaskWord, BITMASK_MASK, BITMASK_STRIDE}; 5 | #[cfg(feature = "nightly")] 6 | use core::intrinsics; 7 | 8 | /// A bit mask which contains the result of a `Match` operation on a `Group` and 9 | /// allows iterating through them. 10 | /// 11 | /// The bit mask is arranged so that low-order bits represent lower memory 12 | /// addresses for group match results. 13 | /// 14 | /// For implementation reasons, the bits in the set may be sparsely packed, so 15 | /// that there is only one bit-per-byte used (the high bit, 7). If this is the 16 | /// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be 17 | /// performed on counts/indices to normalize this difference. `BITMASK_MASK` is 18 | /// similarly a mask of all the actually-used bits. 19 | #[derive(Copy, Clone)] 20 | pub struct BitMask(pub BitMaskWord); 21 | 22 | #[allow(clippy::use_self)] 23 | impl BitMask { 24 | /// Returns a new `BitMask` with all bits inverted. 25 | #[inline] 26 | #[must_use] 27 | pub fn invert(self) -> Self { 28 | BitMask(self.0 ^ BITMASK_MASK) 29 | } 30 | 31 | /// Returns a new `BitMask` with the lowest bit removed. 32 | #[inline] 33 | #[must_use] 34 | pub fn remove_lowest_bit(self) -> Self { 35 | BitMask(self.0 & (self.0 - 1)) 36 | } 37 | /// Returns whether the `BitMask` has at least one set bit. 38 | #[inline] 39 | #[allow(dead_code)] 40 | pub fn any_bit_set(self) -> bool { 41 | self.0 != 0 42 | } 43 | 44 | /// Returns the first set bit in the `BitMask`, if there is one. 45 | #[inline] 46 | pub fn lowest_set_bit(self) -> Option { 47 | if self.0 == 0 { 48 | None 49 | } else { 50 | Some(unsafe { self.lowest_set_bit_nonzero() }) 51 | } 52 | } 53 | 54 | /// Returns the first set bit in the `BitMask`, if there is one. The 55 | /// bitmask must not be empty. 56 | #[inline] 57 | #[cfg(feature = "nightly")] 58 | pub unsafe fn lowest_set_bit_nonzero(self) -> usize { 59 | intrinsics::cttz_nonzero(self.0) as usize / BITMASK_STRIDE 60 | } 61 | #[inline] 62 | #[cfg(not(feature = "nightly"))] 63 | pub unsafe fn lowest_set_bit_nonzero(self) -> usize { 64 | self.trailing_zeros() 65 | } 66 | 67 | /// Returns the number of trailing zeroes in the `BitMask`. 68 | #[inline] 69 | pub fn trailing_zeros(self) -> usize { 70 | // ARM doesn't have a trailing_zeroes instruction, and instead uses 71 | // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM 72 | // versions (pre-ARMv7) don't have RBIT and need to emulate it 73 | // instead. Since we only have 1 bit set in each byte on ARM, we can 74 | // use swap_bytes (REV) + leading_zeroes instead. 75 | if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { 76 | self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE 77 | } else { 78 | self.0.trailing_zeros() as usize / BITMASK_STRIDE 79 | } 80 | } 81 | } 82 | 83 | impl IntoIterator for BitMask { 84 | type Item = usize; 85 | type IntoIter = BitMaskIter; 86 | 87 | #[inline] 88 | fn into_iter(self) -> BitMaskIter { 89 | BitMaskIter(self) 90 | } 91 | } 92 | 93 | /// Iterator over the contents of a `BitMask`, returning the indicies of set 94 | /// bits. 95 | pub struct BitMaskIter(BitMask); 96 | 97 | impl Iterator for BitMaskIter { 98 | type Item = usize; 99 | 100 | #[inline] 101 | fn next(&mut self) -> Option { 102 | let bit = self.0.lowest_set_bit()?; 103 | self.0 = self.0.remove_lowest_bit(); 104 | Some(bit) 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /arcon/src/index/hash_table/eager.rs: -------------------------------------------------------------------------------- 1 | use crate::error::ArconResult; 2 | use crate::{index::IndexOps, table::ImmutableTable}; 3 | use arcon_state::{ 4 | backend::{ 5 | handles::{ActiveHandle, BoxedIteratorOfResult, Handle}, 6 | Backend, MapState, 7 | }, 8 | data::{Key, Value}, 9 | error::*, 10 | }; 11 | use std::sync::Arc; 12 | 13 | pub struct EagerHashTable 14 | where 15 | K: Key, 16 | V: Value, 17 | B: Backend, 18 | { 19 | /// Map Handle 20 | handle: ActiveHandle>, 21 | } 22 | 23 | impl EagerHashTable 24 | where 25 | K: Key, 26 | V: Value, 27 | B: Backend, 28 | { 29 | pub fn new(id: impl Into, backend: Arc) -> Self { 30 | let mut handle = Handle::map(id.into()); 31 | backend.register_map_handle(&mut handle); 32 | let handle = handle.activate(backend); 33 | Self { handle } 34 | } 35 | /// Insert a key-value record 36 | #[inline(always)] 37 | pub fn put(&mut self, key: K, value: V) -> Result<()> { 38 | self.handle.fast_insert(key, value) 39 | } 40 | 41 | #[inline(always)] 42 | pub fn get(&self, k: &K) -> Result> { 43 | self.handle.get(k) 44 | } 45 | 46 | #[inline(always)] 47 | pub fn remove(&self, k: &K) -> Result> { 48 | self.handle.remove(k) 49 | } 50 | #[inline(always)] 51 | pub fn contains(&self, k: &K) -> Result { 52 | self.handle.contains(k) 53 | } 54 | #[inline(always)] 55 | pub fn iter(&self) -> Result> { 56 | self.handle.iter() 57 | } 58 | } 59 | 60 | impl IndexOps for EagerHashTable 61 | where 62 | K: Key, 63 | V: Value, 64 | B: Backend, 65 | { 66 | fn persist(&mut self) -> ArconResult<()> { 67 | Ok(()) 68 | } 69 | fn set_key(&mut self, _: u64) {} 70 | fn table(&mut self) -> ArconResult> { 71 | Ok(None) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /arcon/src/index/value/eager.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | error::ArconResult, 3 | index::{IndexOps, IndexValue, ValueIndex}, 4 | table::ImmutableTable, 5 | }; 6 | use arcon_state::{ 7 | backend::{ 8 | handles::{ActiveHandle, Handle}, 9 | Backend, MapState, 10 | }, 11 | error::*, 12 | }; 13 | use std::{borrow::Cow, sync::Arc}; 14 | 15 | pub struct EagerValue 16 | where 17 | V: IndexValue, 18 | B: Backend, 19 | { 20 | /// A handle to the ValueState 21 | handle: ActiveHandle>, 22 | current_key: u64, 23 | } 24 | 25 | impl EagerValue 26 | where 27 | V: IndexValue, 28 | B: Backend, 29 | { 30 | /// Creates an EagerValue 31 | pub fn new(id: impl Into, backend: Arc) -> Self { 32 | let mut handle = Handle::map(id.into()); 33 | backend.register_map_handle(&mut handle); 34 | 35 | let handle: ActiveHandle> = handle.activate(backend); 36 | 37 | EagerValue { 38 | handle, 39 | current_key: 0, 40 | } 41 | } 42 | } 43 | 44 | impl ValueIndex for EagerValue 45 | where 46 | V: IndexValue, 47 | B: Backend, 48 | { 49 | fn put(&mut self, value: V) -> Result<()> { 50 | self.handle.fast_insert(self.current_key, value) 51 | } 52 | fn get(&self) -> Result>> { 53 | let value = self.handle.get(&self.current_key)?; 54 | Ok(value.map(Cow::Owned)) 55 | } 56 | fn take(&mut self) -> Result> { 57 | self.handle.remove(&self.current_key) 58 | } 59 | fn clear(&mut self) -> Result<()> { 60 | let _ = self.take()?; 61 | Ok(()) 62 | } 63 | fn rmw(&mut self, mut f: F) -> Result<()> 64 | where 65 | F: FnMut(&mut V) + Sized, 66 | { 67 | let value = self.get()?; 68 | if let Some(v) = value { 69 | let mut owned = v.into_owned(); 70 | f(&mut owned); 71 | self.put(owned) 72 | } else { 73 | self.put(V::default()) 74 | } 75 | } 76 | } 77 | 78 | impl IndexOps for EagerValue 79 | where 80 | V: IndexValue, 81 | B: Backend, 82 | { 83 | fn persist(&mut self) -> ArconResult<()> { 84 | Ok(()) 85 | } 86 | fn set_key(&mut self, key: u64) { 87 | self.current_key = key; 88 | } 89 | fn table(&mut self) -> ArconResult> { 90 | let mut table = V::table(); 91 | let values = self.handle.values()?; 92 | table 93 | .load(values.filter_map(|v| v.ok())) 94 | .map_err(|e| ArconStateError::Unknown { msg: e.to_string() })?; 95 | let imut = table 96 | .immutable() 97 | .map_err(|e| ArconStateError::Unknown { msg: e.to_string() })?; 98 | Ok(Some(imut)) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /arcon/src/index/value/local.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | error::ArconResult, 3 | index::{IndexOps, ValueIndex}, 4 | table::ImmutableTable, 5 | }; 6 | use arcon_state::{ 7 | backend::{ 8 | handles::{ActiveHandle, Handle}, 9 | Backend, ValueState, 10 | }, 11 | data::Value, 12 | error::*, 13 | }; 14 | use std::{borrow::Cow, sync::Arc}; 15 | 16 | pub struct LocalValue 17 | where 18 | V: Value, 19 | B: Backend, 20 | { 21 | /// The data itself 22 | data: Option, 23 | /// Modified flag 24 | modified: bool, 25 | /// A handle to the ValueState 26 | handle: ActiveHandle>, 27 | } 28 | 29 | impl LocalValue 30 | where 31 | V: Value, 32 | B: Backend, 33 | { 34 | /// Creates a LocalValue 35 | pub fn new(id: impl Into, backend: Arc) -> Self { 36 | let mut handle = Handle::value(id.into()); 37 | backend.register_value_handle(&mut handle); 38 | 39 | let handle = handle.activate(backend); 40 | 41 | // Attempt to fetch data from backend, otherwise set to default value.. 42 | let data = match handle.get() { 43 | Ok(Some(v)) => v, 44 | Ok(None) => V::default(), 45 | Err(_) => V::default(), 46 | }; 47 | 48 | Self { 49 | data: Some(data), 50 | modified: false, 51 | handle, 52 | } 53 | } 54 | } 55 | 56 | impl ValueIndex for LocalValue 57 | where 58 | V: Value, 59 | B: Backend, 60 | { 61 | fn put(&mut self, value: V) -> Result<()> { 62 | self.data = Some(value); 63 | self.modified = true; 64 | Ok(()) 65 | } 66 | fn get(&self) -> Result>> { 67 | Ok(self.data.as_ref().map(|v| Cow::Borrowed(v))) 68 | } 69 | fn take(&mut self) -> Result> { 70 | let data = self.data.take(); 71 | let _ = self.handle.clear(); 72 | Ok(data) 73 | } 74 | fn clear(&mut self) -> Result<()> { 75 | let _ = self.take()?; 76 | Ok(()) 77 | } 78 | fn rmw(&mut self, mut f: F) -> Result<()> 79 | where 80 | F: FnMut(&mut V) + Sized, 81 | { 82 | if let Some(ref mut v) = self.data.as_mut() { 83 | // execute the modification 84 | f(v); 85 | // assume the data has actually been modified 86 | self.modified = true; 87 | } else { 88 | self.data = Some(V::default()); 89 | } 90 | 91 | Ok(()) 92 | } 93 | } 94 | 95 | impl IndexOps for LocalValue 96 | where 97 | V: Value, 98 | B: Backend, 99 | { 100 | fn persist(&mut self) -> ArconResult<()> { 101 | if let Some(data) = &self.data { 102 | // only push data to the handle if it has actually been modified 103 | if self.modified { 104 | self.handle.fast_set_by_ref(data)?; 105 | self.modified = false; 106 | } 107 | } 108 | Ok(()) 109 | } 110 | fn set_key(&mut self, _: u64) {} 111 | fn table(&mut self) -> ArconResult> { 112 | Ok(None) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /arcon/src/index/value/mod.rs: -------------------------------------------------------------------------------- 1 | use super::{HashTable, IndexOps, IndexValue, ValueIndex}; 2 | use crate::error::ArconResult; 3 | use crate::table::ImmutableTable; 4 | use arcon_state::{error::*, Backend}; 5 | use std::{borrow::Cow, sync::Arc}; 6 | 7 | mod eager; 8 | mod local; 9 | 10 | pub use eager::EagerValue; 11 | pub use local::LocalValue; 12 | 13 | /// A Lazy ValueIndex 14 | pub struct LazyValue 15 | where 16 | V: IndexValue, 17 | B: Backend, 18 | { 19 | current_key: u64, 20 | hash_table: HashTable, 21 | } 22 | 23 | impl LazyValue 24 | where 25 | V: IndexValue, 26 | B: Backend, 27 | { 28 | /// Creates a LazyValue 29 | pub fn new(id: impl Into, backend: Arc) -> Self { 30 | let hash_table = HashTable::new(id.into(), backend); 31 | 32 | Self { 33 | current_key: 0, 34 | hash_table, 35 | } 36 | } 37 | } 38 | 39 | impl ValueIndex for LazyValue 40 | where 41 | V: IndexValue, 42 | B: Backend, 43 | { 44 | #[inline] 45 | fn put(&mut self, value: V) -> Result<()> { 46 | self.hash_table.put(self.current_key, value) 47 | } 48 | #[inline] 49 | fn get(&self) -> Result>> { 50 | let value = self.hash_table.get(&self.current_key)?; 51 | Ok(value.map(|v| Cow::Borrowed(v))) 52 | } 53 | #[inline] 54 | fn take(&mut self) -> Result> { 55 | self.hash_table.remove(&self.current_key) 56 | } 57 | #[inline] 58 | fn clear(&mut self) -> Result<()> { 59 | let _ = self.take()?; 60 | Ok(()) 61 | } 62 | #[inline] 63 | fn rmw(&mut self, f: F) -> Result<()> 64 | where 65 | F: FnMut(&mut V) + Sized, 66 | { 67 | self.hash_table.rmw(&self.current_key, V::default, f) 68 | } 69 | } 70 | 71 | impl IndexOps for LazyValue 72 | where 73 | V: IndexValue, 74 | B: Backend, 75 | { 76 | #[inline] 77 | fn persist(&mut self) -> ArconResult<()> { 78 | self.hash_table.persist() 79 | } 80 | #[inline] 81 | fn set_key(&mut self, key: u64) { 82 | self.current_key = key; 83 | } 84 | 85 | fn table(&mut self) -> ArconResult> { 86 | let (_, values) = self.hash_table.full_iter()?; 87 | let mut table = V::table(); 88 | table 89 | .load(values.filter_map(|v| v.ok())) 90 | .map_err(|e| ArconStateError::Unknown { msg: e.to_string() })?; 91 | let imut = table 92 | .immutable() 93 | .map_err(|e| ArconStateError::Unknown { msg: e.to_string() })?; 94 | Ok(Some(imut)) 95 | } 96 | } 97 | 98 | #[cfg(test)] 99 | mod tests { 100 | use super::*; 101 | use crate::test_utils::temp_backend; 102 | use arcon_state::Sled; 103 | use eager::EagerValue; 104 | use std::sync::Arc; 105 | 106 | fn index_test(mut index: impl ValueIndex) -> Result<()> { 107 | index.set_key(0); 108 | assert_eq!(index.get().unwrap(), None); 109 | index.put(10u64)?; 110 | let curr_value = index.get()?; 111 | assert_eq!(curr_value.unwrap().as_ref(), &10u64); 112 | index.rmw(|v| { 113 | *v += 10; 114 | })?; 115 | let curr_value = index.get()?; 116 | assert_eq!(curr_value.unwrap().as_ref(), &20u64); 117 | 118 | index.set_key(1); 119 | assert_eq!(index.get().unwrap(), None); 120 | index.put(5u64)?; 121 | index.clear()?; 122 | assert_eq!(index.get().unwrap(), None); 123 | 124 | index.set_key(0); 125 | let removed_value = index.take()?; 126 | assert_eq!(removed_value, Some(20u64)); 127 | 128 | Ok(()) 129 | } 130 | 131 | #[test] 132 | fn lazy_value_index_test() { 133 | let backend = Arc::new(temp_backend::()); 134 | let index: LazyValue = LazyValue::new("myvalue", backend); 135 | assert!(index_test(index).is_ok()); 136 | } 137 | #[test] 138 | fn eager_value_index_test() { 139 | let backend = Arc::new(temp_backend::()); 140 | let index: EagerValue = EagerValue::new("myvalue", backend); 141 | assert!(index_test(index).is_ok()); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /arcon/src/index/window/appender.rs: -------------------------------------------------------------------------------- 1 | use super::super::WindowIndex; 2 | use crate::data::ArconType; 3 | use crate::error::ArconResult; 4 | use crate::stream::operator::window::WindowContext; 5 | use crate::util::ArconFnBounds; 6 | use crate::{index::IndexOps, table::ImmutableTable}; 7 | use arcon_state::{backend::handles::ActiveHandle, Backend, Handle, VecState}; 8 | use std::sync::Arc; 9 | 10 | pub struct AppenderWindow 11 | where 12 | IN: ArconType, 13 | OUT: ArconType, 14 | F: Fn(&[IN]) -> OUT + ArconFnBounds, 15 | B: Backend, 16 | { 17 | handle: ActiveHandle, u64, u64>, 18 | materializer: F, 19 | } 20 | impl AppenderWindow 21 | where 22 | IN: ArconType, 23 | OUT: ArconType, 24 | F: Fn(&[IN]) -> OUT + ArconFnBounds, 25 | B: Backend, 26 | { 27 | pub fn new(backend: Arc, materializer: F) -> Self { 28 | let mut handle = Handle::vec("window_handle") 29 | .with_item_key(0) 30 | .with_namespace(0); 31 | 32 | backend.register_vec_handle(&mut handle); 33 | 34 | let handle = handle.activate(backend); 35 | 36 | Self { 37 | handle, 38 | materializer, 39 | } 40 | } 41 | } 42 | 43 | impl WindowIndex for AppenderWindow 44 | where 45 | IN: ArconType, 46 | OUT: ArconType, 47 | F: Fn(&[IN]) -> OUT + ArconFnBounds, 48 | B: Backend, 49 | { 50 | type IN = IN; 51 | type OUT = OUT; 52 | 53 | fn on_element(&mut self, element: Self::IN, ctx: WindowContext) -> ArconResult<()> { 54 | self.handle.set_item_key(ctx.key); 55 | self.handle.set_namespace(ctx.index); 56 | 57 | self.handle.append(element)?; 58 | Ok(()) 59 | } 60 | 61 | fn result(&mut self, ctx: WindowContext) -> ArconResult { 62 | self.handle.set_item_key(ctx.key); 63 | self.handle.set_namespace(ctx.index); 64 | 65 | let buf = self.handle.get()?; 66 | Ok((self.materializer)(&buf)) 67 | } 68 | 69 | fn clear(&mut self, ctx: WindowContext) -> ArconResult<()> { 70 | self.handle.set_item_key(ctx.key); 71 | self.handle.set_namespace(ctx.index); 72 | 73 | self.handle.clear()?; 74 | Ok(()) 75 | } 76 | } 77 | impl IndexOps for AppenderWindow 78 | where 79 | IN: ArconType, 80 | OUT: ArconType, 81 | F: Fn(&[IN]) -> OUT + ArconFnBounds, 82 | B: Backend, 83 | { 84 | fn persist(&mut self) -> ArconResult<()> { 85 | Ok(()) 86 | } 87 | fn set_key(&mut self, _: u64) {} 88 | fn table(&mut self) -> ArconResult> { 89 | Ok(None) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /arcon/src/index/window/arrow.rs: -------------------------------------------------------------------------------- 1 | use arrow::{datatypes::Schema, record_batch::RecordBatch}; 2 | 3 | use crate::{ 4 | index::{IndexOps, WindowIndex}, 5 | prelude::*, 6 | stream::operator::window::WindowContext, 7 | table::{to_record_batches, ImmutableTable, RawRecordBatch}, 8 | util::ArconFnBounds, 9 | }; 10 | use arcon_state::{backend::handles::ActiveHandle, Backend, VecState}; 11 | use std::marker::PhantomData; 12 | 13 | /// A window index for Arrow Data 14 | /// 15 | /// Elements are appended into RecordBatches and once a window is triggered, 16 | /// the underlying Arrow Schema and Vec is exposed. 17 | pub struct ArrowWindow 18 | where 19 | IN: ArconType + ToArrow, 20 | OUT: ArconType, 21 | F: Fn(Arc, Vec) -> ArconResult + ArconFnBounds, 22 | B: Backend, 23 | { 24 | handle: ActiveHandle, u64, u64>, 25 | map: std::collections::HashMap, 26 | udf: F, 27 | _marker: std::marker::PhantomData, 28 | } 29 | 30 | impl ArrowWindow 31 | where 32 | IN: ArconType + ToArrow, 33 | OUT: ArconType, 34 | F: Fn(Arc, Vec) -> ArconResult + ArconFnBounds, 35 | B: Backend, 36 | { 37 | pub fn new(backend: Arc, udf: F) -> Self { 38 | let mut handle = Handle::vec("window_handle") 39 | .with_item_key(0) 40 | .with_namespace(0); 41 | 42 | backend.register_vec_handle(&mut handle); 43 | 44 | let handle = handle.activate(backend); 45 | 46 | Self { 47 | handle, 48 | map: std::collections::HashMap::new(), 49 | udf, 50 | _marker: PhantomData, 51 | } 52 | } 53 | } 54 | 55 | impl WindowIndex for ArrowWindow 56 | where 57 | IN: ArconType + ToArrow, 58 | OUT: ArconType, 59 | F: Fn(Arc, Vec) -> ArconResult + ArconFnBounds, 60 | B: Backend, 61 | { 62 | type IN = IN; 63 | type OUT = OUT; 64 | 65 | fn on_element(&mut self, element: Self::IN, ctx: WindowContext) -> ArconResult<()> { 66 | let table = self.map.entry(ctx).or_insert_with(IN::table); 67 | table.append(element, None)?; 68 | 69 | Ok(()) 70 | } 71 | 72 | fn result(&mut self, ctx: WindowContext) -> ArconResult { 73 | let table = self.map.entry(ctx).or_insert_with(IN::table); 74 | self.handle.set_item_key(ctx.key); 75 | self.handle.set_namespace(ctx.index); 76 | 77 | // fetch in-memory batches 78 | let mut batches = table.batches()?; 79 | // fetch if any raw batches and append to the vector... 80 | let raw_batches = self.handle.get()?; 81 | batches.append(&mut to_record_batches(Arc::new(IN::schema()), raw_batches)?); 82 | 83 | (self.udf)(Arc::new(IN::schema()), batches) 84 | } 85 | 86 | fn clear(&mut self, ctx: WindowContext) -> ArconResult<()> { 87 | // clear from memory layer 88 | let _ = self.map.remove(&ctx); 89 | 90 | // clear everything in the backend 91 | self.handle.set_item_key(ctx.key); 92 | self.handle.set_namespace(ctx.index); 93 | 94 | self.handle.clear()?; 95 | 96 | Ok(()) 97 | } 98 | } 99 | impl IndexOps for ArrowWindow 100 | where 101 | IN: ArconType + ToArrow, 102 | OUT: ArconType, 103 | F: Fn(Arc, Vec) -> ArconResult + ArconFnBounds, 104 | B: Backend, 105 | { 106 | fn persist(&mut self) -> ArconResult<()> { 107 | for (ctx, table) in self.map.iter_mut() { 108 | self.handle.set_item_key(ctx.key); 109 | self.handle.set_namespace(ctx.index); 110 | 111 | let batches = table.raw_batches()?; 112 | for batch in batches { 113 | self.handle.append(batch)?; 114 | } 115 | } 116 | Ok(()) 117 | } 118 | fn set_key(&mut self, _: u64) {} 119 | fn table(&mut self) -> ArconResult> { 120 | Ok(None) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /arcon/src/index/window/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod appender; 2 | pub mod arrow; 3 | pub mod incremental; 4 | 5 | #[cfg(test)] 6 | mod tests { 7 | use super::appender::*; 8 | use super::incremental::*; 9 | use crate::index::WindowIndex; 10 | use crate::stream::operator::window::WindowContext; 11 | use crate::test_utils::temp_backend; 12 | use arcon_state::Sled; 13 | use std::sync::Arc; 14 | 15 | #[test] 16 | fn sum_appender_window_test() { 17 | let backend = Arc::new(temp_backend::()); 18 | 19 | fn materializer(buffer: &[i32]) -> i32 { 20 | buffer.iter().sum() 21 | } 22 | 23 | let mut window = AppenderWindow::new(backend, &materializer); 24 | 25 | for i in 0..10 { 26 | let _ = window.on_element(i, WindowContext::new(0, 0)); 27 | } 28 | 29 | let sum = window.result(WindowContext::new(0, 0)).unwrap(); 30 | 31 | let expected: i32 = 45; 32 | assert_eq!(sum, expected); 33 | } 34 | 35 | #[test] 36 | fn sum_incremental_window_test() { 37 | let backend = Arc::new(temp_backend::()); 38 | 39 | fn init(i: i32) -> u64 { 40 | i as u64 41 | } 42 | fn aggregation(i: i32, agg: &u64) -> u64 { 43 | agg + i as u64 44 | } 45 | 46 | let mut window = IncrementalWindow::new(backend, &init, &aggregation); 47 | 48 | for i in 0..10 { 49 | let _ = window.on_element(i, WindowContext::new(0, 0)); 50 | } 51 | 52 | for i in 0..20 { 53 | let _ = window.on_element(i, WindowContext::new(1, 1)); 54 | } 55 | 56 | let sum_one = window.result(WindowContext::new(0, 0)).unwrap(); 57 | assert_eq!(sum_one, 45); 58 | let sum_two = window.result(WindowContext::new(1, 1)).unwrap(); 59 | assert_eq!(sum_two, 190); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /arcon/src/manager/mod.rs: -------------------------------------------------------------------------------- 1 | /// A [kompact] component that injects epoch markers into the dataflow 2 | pub mod epoch; 3 | /// A [kompact] component that keeps track of a set of Arcon nodes 4 | pub mod node; 5 | /// A [kompact] component that keeps track of one or more source components 6 | pub mod source; 7 | 8 | pub mod snapshot; 9 | -------------------------------------------------------------------------------- /arcon/src/manager/state.rs: -------------------------------------------------------------------------------- 1 | use super::snapshot::Snapshot; 2 | use fxhash::FxHashMap; 3 | use kompact::prelude::*; 4 | use std::{collections::HashSet, sync::mpsc::Sender}; 5 | 6 | pub type StateID = String; 7 | 8 | #[derive(Debug, Clone)] 9 | pub enum StateEvent { 10 | Snapshot(StateID, Snapshot), 11 | Register(StateID), 12 | } 13 | 14 | pub struct StateManagerPort; 15 | 16 | impl Port for StateManagerPort { 17 | type Indication = Never; 18 | type Request = StateEvent; 19 | } 20 | 21 | #[derive(ComponentDefinition, Actor)] 22 | pub struct StateManager { 23 | /// Component Context 24 | ctx: ComponentContext, 25 | /// Port for incoming events 26 | pub(crate) manager_port: ProvidedPort, 27 | /// Set of registerd state ids 28 | /// 29 | /// Used to verify that users do not watch for state ids that do not exist 30 | pub(crate) registered_state_ids: HashSet, 31 | } 32 | 33 | impl StateManager { 34 | pub fn new() -> Self { 35 | Self { 36 | ctx: ComponentContext::uninitialised(), 37 | manager_port: ProvidedPort::uninitialised(), 38 | registered_state_ids: HashSet::new(), 39 | } 40 | } 41 | } 42 | 43 | impl Provide for StateManager { 44 | fn handle(&mut self, event: StateEvent) -> Handled { 45 | debug!(self.ctx.log(), "Got Event {:?}", event); 46 | 47 | match event { 48 | StateEvent::Snapshot(id, snapshot) => {} 49 | StateEvent::Register(id) => {} 50 | } 51 | Handled::Ok 52 | } 53 | } 54 | 55 | ignore_lifecycle!(StateManager); 56 | -------------------------------------------------------------------------------- /arcon/src/metrics/ewma.rs: -------------------------------------------------------------------------------- 1 | // Based off: https://github.com/infusionsoft/yammer-metrics/blob/master/metrics-core/src/main/java/com/codahale/metrics/EWMA.java 2 | 3 | const INTERVAL: isize = 5; 4 | const SECONDS_PER_MINUTE: f64 = 60.0; 5 | const ONE_MINUTE: usize = 1; 6 | const FIVE_MINUTES: usize = 5; 7 | const FIFTEEN_MINUTES: usize = 15; 8 | /// One-min Rate 9 | const M1_ALPHA: f64 = 1.0 - (-INTERVAL as f64 / SECONDS_PER_MINUTE / ONE_MINUTE as f64); 10 | /// Five-min Rate 11 | const M5_ALPHA: f64 = 1.0 - (-INTERVAL as f64 / SECONDS_PER_MINUTE / FIVE_MINUTES as f64); 12 | /// Fifteen-min Rate 13 | const M15_ALPHA: f64 = 1.0 - (-INTERVAL as f64 / SECONDS_PER_MINUTE / FIFTEEN_MINUTES as f64); 14 | 15 | /// Exponentially Weighted Moving Average 16 | #[derive(Clone, Debug, Default)] 17 | pub struct EWMA { 18 | initialised: bool, 19 | rate: f64, 20 | uncounted: u64, 21 | alpha: f64, 22 | interval: f64, 23 | } 24 | 25 | impl EWMA { 26 | #[inline] 27 | pub fn new(alpha: f64, interval: f64) -> EWMA { 28 | EWMA { 29 | initialised: false, 30 | rate: 0.0, 31 | uncounted: 0, 32 | alpha, 33 | interval, 34 | } 35 | } 36 | #[inline] 37 | pub fn one_min_ewma() -> EWMA { 38 | EWMA::new(M1_ALPHA, INTERVAL as f64) 39 | } 40 | 41 | #[inline] 42 | pub fn five_min_ewma() -> EWMA { 43 | EWMA::new(M5_ALPHA, INTERVAL as f64) 44 | } 45 | 46 | #[inline] 47 | pub fn fifteen_min_ewma() -> EWMA { 48 | EWMA::new(M15_ALPHA, INTERVAL as f64) 49 | } 50 | 51 | #[inline] 52 | pub fn update(&mut self, n: u64) { 53 | self.uncounted += n; 54 | } 55 | 56 | #[inline] 57 | pub fn tick(&mut self) { 58 | let count = self.uncounted; 59 | self.uncounted = 0; // reset 60 | let instant_rate: f64 = count as f64 / self.interval; 61 | 62 | if self.initialised { 63 | self.rate += self.alpha * (instant_rate - self.rate); 64 | } else { 65 | self.rate = instant_rate; 66 | self.initialised = true; 67 | } 68 | } 69 | 70 | #[inline] 71 | pub fn get_rate(&self) -> f64 { 72 | // TODO: is more needed? 73 | self.rate 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /arcon/src/metrics/log_recorder.rs: -------------------------------------------------------------------------------- 1 | use crate::application::conf::logger::ArconLogger; 2 | use arcon::prelude::*; 3 | use metrics::{GaugeValue, Key, Recorder, Unit}; 4 | 5 | pub struct LogRecorder { 6 | pub(crate) logger: ArconLogger, 7 | } 8 | 9 | impl Recorder for LogRecorder { 10 | fn register_counter(&self, key: &Key, unit: Option, description: Option<&'static str>) { 11 | info!( 12 | self.logger, 13 | "(counter) registered key {} with unit {:?} and description {:?}", 14 | key, 15 | unit, 16 | description 17 | ); 18 | } 19 | 20 | fn register_gauge(&self, key: &Key, unit: Option, description: Option<&'static str>) { 21 | info!( 22 | self.logger, 23 | "(gauge) registered key {} with unit {:?} and description {:?}", key, unit, description 24 | ); 25 | } 26 | 27 | fn register_histogram(&self, key: &Key, unit: Option, description: Option<&'static str>) { 28 | info!( 29 | self.logger, 30 | "(histogram) registered key {} with unit {:?} and description {:?}", 31 | key, 32 | unit, 33 | description 34 | ); 35 | } 36 | 37 | fn increment_counter(&self, key: &Key, value: u64) { 38 | info!(self.logger, "(counter) got value {} for key {}", value, key); 39 | } 40 | 41 | fn update_gauge(&self, key: &Key, value: GaugeValue) { 42 | info!(self.logger, "(gauge) got value {:?} for key {}", value, key); 43 | } 44 | 45 | fn record_histogram(&self, key: &Key, value: f64) { 46 | info!( 47 | self.logger, 48 | "(histogram) got value {} for key {}", value, key 49 | ); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /arcon/src/metrics/meter.rs: -------------------------------------------------------------------------------- 1 | // Based off: https://github.com/infusionsoft/yammer-metrics/blob/master/metrics-core/src/main/java/com/codahale/metrics/Meter.java 2 | 3 | use crate::metrics::ewma::EWMA; 4 | use std::time::Duration; 5 | 6 | /// Meter metric measuring throughput in various forms using EWMA 7 | #[derive(Clone, Debug, Default)] 8 | pub struct Meter { 9 | /// One-min rate 10 | m1_rate: EWMA, 11 | /// Five-min rate 12 | m5_rate: EWMA, 13 | /// Fifteen-min rate 14 | m15_rate: EWMA, 15 | /// Amount of marks 16 | count: u64, 17 | /// Time of start 18 | start_time: u64, 19 | /// Last time Meter was ticked 20 | last_tick: u64, 21 | } 22 | 23 | impl Meter { 24 | #[inline] 25 | pub fn new() -> Meter { 26 | let start_time = crate::util::get_system_time_nano(); 27 | Meter { 28 | m1_rate: EWMA::one_min_ewma(), 29 | m5_rate: EWMA::five_min_ewma(), 30 | m15_rate: EWMA::fifteen_min_ewma(), 31 | count: 0, 32 | start_time, 33 | last_tick: start_time, 34 | } 35 | } 36 | 37 | #[inline] 38 | pub fn mark(&mut self) { 39 | self.mark_n(1); 40 | } 41 | 42 | #[inline] 43 | pub fn mark_n(&mut self, n: u64) { 44 | self.tick_if_necessary(); 45 | self.count += n; 46 | self.m1_rate.update(n); 47 | self.m5_rate.update(n); 48 | self.m15_rate.update(n); 49 | } 50 | 51 | #[inline] 52 | fn tick_if_necessary(&mut self) { 53 | let old_tick = self.last_tick; 54 | // Add clock... 55 | let new_tick = crate::util::get_system_time_nano(); 56 | let age = new_tick - old_tick; 57 | 58 | let tick_interval = std::time::Duration::new(5, 0).as_nanos() as u64; 59 | if age > tick_interval { 60 | let new_interval_tick = new_tick - age % tick_interval; 61 | self.last_tick = new_interval_tick; 62 | let required_ticks = age / tick_interval; 63 | for _ in 0..required_ticks { 64 | self.m1_rate.tick(); 65 | self.m5_rate.tick(); 66 | self.m15_rate.tick(); 67 | } 68 | } 69 | } 70 | 71 | #[inline] 72 | pub fn get_count(&self) -> u64 { 73 | self.count 74 | } 75 | 76 | #[inline] 77 | pub fn get_one_min_rate(&mut self) -> f64 { 78 | self.tick_if_necessary(); 79 | self.m1_rate.get_rate() 80 | } 81 | 82 | #[inline] 83 | pub fn get_five_min_rate(&mut self) -> f64 { 84 | self.tick_if_necessary(); 85 | self.m5_rate.get_rate() 86 | } 87 | 88 | #[inline] 89 | pub fn get_fifteen_min_rate(&mut self) -> f64 { 90 | self.tick_if_necessary(); 91 | self.m15_rate.get_rate() 92 | } 93 | 94 | #[inline] 95 | pub fn get_mean_rate(&self) -> f64 { 96 | if self.count == 0 { 97 | 0.0 98 | } else { 99 | // Add clock instead.. 100 | let elapsed: f64 = (crate::util::get_system_time_nano() - self.start_time) as f64; 101 | self.count as f64 / (elapsed * Duration::new(1, 0).as_nanos() as f64) 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /arcon/src/metrics/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::all)] 2 | 3 | mod ewma; 4 | mod meter; 5 | #[cfg(all(feature = "hardware_counters", target_os = "linux"))] 6 | pub mod perf_event; 7 | 8 | pub mod runtime_metrics; 9 | 10 | pub mod log_recorder; 11 | -------------------------------------------------------------------------------- /arcon/src/metrics/perf_event.rs: -------------------------------------------------------------------------------- 1 | use crate::prelude::alloc::fmt::Formatter; 2 | use perf_event::events::Hardware; 3 | use std::fmt; 4 | 5 | /// An enum representing supported hardware counters with perf events as enum options 6 | /// 7 | /// It is a wrapper around [Hardware] in order to support [Deserialize] 8 | #[derive(Clone, Debug)] 9 | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 10 | pub enum HardwareCounter { 11 | CpuCycles, 12 | BranchMisses, 13 | Instructions, 14 | CacheReferences, 15 | CacheMisses, 16 | BranchInstructions, 17 | BusCycles, 18 | StalledCyclesFrontend, 19 | StalledCyclesBackend, 20 | RefCpuCycles, 21 | } 22 | 23 | impl HardwareCounter { 24 | pub(crate) fn get_hardware_kind(&self) -> Hardware { 25 | match self { 26 | HardwareCounter::CpuCycles => Hardware::CPU_CYCLES, 27 | HardwareCounter::Instructions => Hardware::INSTRUCTIONS, 28 | HardwareCounter::CacheReferences => Hardware::CACHE_REFERENCES, 29 | HardwareCounter::CacheMisses => Hardware::CACHE_MISSES, 30 | HardwareCounter::BranchInstructions => Hardware::BRANCH_INSTRUCTIONS, 31 | HardwareCounter::BranchMisses => Hardware::BRANCH_MISSES, 32 | HardwareCounter::BusCycles => Hardware::BUS_CYCLES, 33 | HardwareCounter::StalledCyclesFrontend => Hardware::STALLED_CYCLES_FRONTEND, 34 | HardwareCounter::StalledCyclesBackend => Hardware::STALLED_CYCLES_BACKEND, 35 | HardwareCounter::RefCpuCycles => Hardware::REF_CPU_CYCLES, 36 | } 37 | } 38 | } 39 | 40 | impl fmt::Display for HardwareCounter { 41 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 42 | match self { 43 | HardwareCounter::CpuCycles => write!(f, "cpu_cycles"), 44 | HardwareCounter::Instructions => write!(f, "instructions"), 45 | HardwareCounter::CacheReferences => write!(f, "cache_references"), 46 | HardwareCounter::CacheMisses => write!(f, "cache_misses"), 47 | HardwareCounter::BranchInstructions => write!(f, "branch_instructions"), 48 | HardwareCounter::BranchMisses => write!(f, "branch_misses"), 49 | HardwareCounter::BusCycles => write!(f, "bus_cycles"), 50 | HardwareCounter::StalledCyclesFrontend => write!(f, "stalled_cycles_frontend"), 51 | HardwareCounter::StalledCyclesBackend => write!(f, "stalled_cycles_backend"), 52 | HardwareCounter::RefCpuCycles => write!(f, "ref_cpu_cycles"), 53 | } 54 | } 55 | } 56 | 57 | #[derive(Clone, Debug, Default)] 58 | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 59 | pub struct PerfEvents { 60 | pub counters: Vec, 61 | } 62 | 63 | impl PerfEvents { 64 | pub fn new() -> PerfEvents { 65 | PerfEvents { counters: vec![] } 66 | } 67 | 68 | pub fn add(&mut self, hardware_metric_kind: HardwareCounter) { 69 | self.counters.push(hardware_metric_kind); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /arcon/src/metrics/runtime_metrics.rs: -------------------------------------------------------------------------------- 1 | use crate::metrics::meter::Meter; 2 | 3 | #[derive(Clone, Debug, Default)] 4 | pub struct NodeMetrics { 5 | pub inbound_throughput: Meter, 6 | } 7 | 8 | impl NodeMetrics { 9 | pub fn new() -> NodeMetrics { 10 | NodeMetrics { 11 | inbound_throughput: Meter::new(), 12 | } 13 | } 14 | } 15 | pub struct SourceMetrics { 16 | pub incoming_message_rate: Meter, 17 | } 18 | impl SourceMetrics { 19 | pub fn new() -> SourceMetrics { 20 | SourceMetrics { 21 | incoming_message_rate: Meter::new(), 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /arcon/src/stream/channel/mod.rs: -------------------------------------------------------------------------------- 1 | /// Available Channel Strategies 2 | pub mod strategy; 3 | 4 | use crate::data::{flight_serde::FlightSerde, ArconMessage, ArconType}; 5 | use kompact::prelude::{ActorPath, ActorRefStrong}; 6 | 7 | /// A Channel represents a connection to another Component 8 | #[derive(Clone)] 9 | pub enum Channel { 10 | /// A typed local queue 11 | Local(ActorRefStrong>), 12 | /// Remote based queue containing a remote ActorPath identifier and an Arcon Serialiser 13 | #[allow(dead_code)] 14 | Remote(ActorPath, FlightSerde), 15 | } 16 | -------------------------------------------------------------------------------- /arcon/src/stream/channel/strategy/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{ 3 | flight_serde::{reliable_remote::ReliableSerde, FlightSerde}, 4 | ArconEvent, ArconMessage, ArconType, 5 | }, 6 | stream::channel::Channel, 7 | }; 8 | use kompact::prelude::{ComponentDefinition, SerError}; 9 | use std::sync::Arc; 10 | 11 | #[allow(dead_code)] 12 | pub mod broadcast; 13 | pub mod forward; 14 | pub mod keyed; 15 | 16 | /// A `ChannelStrategy` defines a strategy of how messages are sent downstream 17 | /// 18 | /// Common strategies include (one-to-one)[forward::Forward] and (one-to-many)[broadcast::Broadcast] 19 | pub enum ChannelStrategy 20 | where 21 | A: ArconType, 22 | { 23 | /// Send messages to a single Component 24 | Forward(forward::Forward), 25 | /// Broadcasts the message to a Vec of `Channels` 26 | #[allow(dead_code)] 27 | Broadcast(broadcast::Broadcast), 28 | /// Partition data to a set of `Channels` based on keyed hash 29 | Keyed(keyed::Keyed), 30 | /// A strategy that prints to the console 31 | Console, 32 | /// A strategy that simply does nothing 33 | Mute, 34 | } 35 | 36 | impl ChannelStrategy 37 | where 38 | A: ArconType, 39 | { 40 | /// Add event to outgoing buffer 41 | /// 42 | /// The function returns a tuple of channel and msg if the buffers are full or a marker was inputted. 43 | #[inline] 44 | pub fn push( 45 | &mut self, 46 | event: ArconEvent, 47 | ) -> impl IntoIterator>, ArconMessage)> { 48 | match self { 49 | ChannelStrategy::Forward(s) => s.add(event), 50 | ChannelStrategy::Keyed(s) => s.add(event), 51 | ChannelStrategy::Broadcast(s) => s.add(event), 52 | ChannelStrategy::Console => { 53 | println!("{:?}", event); 54 | Vec::new() 55 | } 56 | ChannelStrategy::Mute => Vec::new(), 57 | } 58 | } 59 | 60 | /// Returns number of outgoing channels 61 | #[inline] 62 | #[allow(dead_code)] 63 | pub fn num_channels(&self) -> usize { 64 | match self { 65 | ChannelStrategy::Forward(_) => 1, 66 | ChannelStrategy::Broadcast(s) => s.num_channels(), 67 | ChannelStrategy::Keyed(s) => s.num_channels(), 68 | ChannelStrategy::Console => 0, 69 | ChannelStrategy::Mute => 0, 70 | } 71 | } 72 | } 73 | 74 | /// `send` pushes an ArconMessage onto a Component queue 75 | /// 76 | /// The message may be sent to a local or remote component 77 | #[inline] 78 | pub(crate) fn send( 79 | channel: &Channel, 80 | message: ArconMessage, 81 | source: &impl ComponentDefinition, 82 | ) -> Result<(), SerError> { 83 | match channel { 84 | Channel::Local(actor_ref) => { 85 | actor_ref.tell(message); 86 | Ok(()) 87 | } 88 | Channel::Remote(actor_path, FlightSerde::Reliable) => { 89 | let reliable_msg = ReliableSerde(message.into()); 90 | actor_path.tell_serialised(reliable_msg, source) 91 | } 92 | } 93 | } 94 | 95 | #[cfg(test)] 96 | pub mod tests { 97 | #[derive(Arcon, prost::Message, Clone)] 98 | #[arcon(reliable_ser_id = 13, version = 1)] 99 | pub struct Input { 100 | #[prost(uint32, tag = "1")] 101 | pub id: u32, 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /arcon/src/stream/mod.rs: -------------------------------------------------------------------------------- 1 | /// Module containing the channel logic for arcon 2 | pub mod channel; 3 | /// Module containing arcon nodes that drive the streaming execution 4 | pub mod node; 5 | /// Module containing all available arcon streaming operators 6 | pub mod operator; 7 | /// Module containing all available arcon sources 8 | pub mod source; 9 | /// Module containing time logic within Arcon 10 | pub mod time; 11 | -------------------------------------------------------------------------------- /arcon/src/stream/node/common.rs: -------------------------------------------------------------------------------- 1 | use crate::data::{ArconEvent, ArconType}; 2 | use crate::error::{ArconResult, Error}; 3 | use crate::reportable_error; 4 | use crate::stream::channel::strategy::{send, ChannelStrategy}; 5 | use kompact::prelude::{ComponentDefinition, SerError}; 6 | 7 | // Common helper function for adding events to a ChannelStrategy and possibly 8 | // dispatching Arcon messages. 9 | #[inline] 10 | pub fn add_outgoing_event( 11 | event: ArconEvent, 12 | strategy: &mut ChannelStrategy, 13 | cd: &impl ComponentDefinition, 14 | ) -> ArconResult<()> { 15 | for (channel, msg) in strategy.push(event) { 16 | match send(&channel, msg, cd) { 17 | Err(SerError::BufferError(msg)) | Err(SerError::NoBuffersAvailable(msg)) => { 18 | // TODO: actually handle it 19 | return Err(Error::Unsupported { msg }); 20 | } 21 | Err(SerError::InvalidData(msg)) 22 | | Err(SerError::InvalidType(msg)) 23 | | Err(SerError::Unknown(msg)) => { 24 | return reportable_error!("{}", msg); 25 | } 26 | Err(SerError::NoClone) => { 27 | return reportable_error!("Got Kompact's SerError::NoClone"); 28 | } 29 | Ok(_) => (), 30 | } 31 | } 32 | Ok(()) 33 | } 34 | -------------------------------------------------------------------------------- /arcon/src/stream/node/debug.rs: -------------------------------------------------------------------------------- 1 | use crate::data::{flight_serde::reliable_remote::ReliableSerde, *}; 2 | use kompact::prelude::*; 3 | use std::collections::HashSet; 4 | 5 | /// A DebugNode is a debug version of [Node] 6 | /// 7 | /// DebugNode's act as sinks and are useful for tests and situations 8 | /// when one needs to verify dev pipelines. 9 | #[derive(ComponentDefinition)] 10 | pub struct DebugNode 11 | where 12 | IN: ArconType, 13 | { 14 | ctx: ComponentContext>, 15 | /// Buffer holding all received [ArconElement] 16 | pub data: Vec>, 17 | /// Buffer holding all received [Watermark] 18 | pub watermarks: Vec, 19 | /// Buffer holding all received [Epoch] 20 | pub epochs: Vec, 21 | /// Buffer holding all NodeID's the DebugNode has received messages from 22 | pub senders: HashSet, 23 | } 24 | 25 | impl DebugNode 26 | where 27 | IN: ArconType, 28 | { 29 | pub fn new() -> DebugNode { 30 | DebugNode { 31 | ctx: ComponentContext::uninitialised(), 32 | data: Vec::new(), 33 | watermarks: Vec::new(), 34 | epochs: Vec::new(), 35 | senders: HashSet::new(), 36 | } 37 | } 38 | #[inline] 39 | fn handle_events(&mut self, events: I) 40 | where 41 | I: IntoIterator>, 42 | { 43 | for event in events.into_iter() { 44 | match event.unwrap() { 45 | ArconEvent::Element(e) => { 46 | info!(self.ctx.log(), "Sink element: {:?}", e.data); 47 | self.data.push(e); 48 | } 49 | ArconEvent::Watermark(w) => { 50 | self.watermarks.push(w); 51 | } 52 | ArconEvent::Epoch(e) => { 53 | self.epochs.push(e); 54 | } 55 | ArconEvent::Death(_) => {} 56 | } 57 | } 58 | } 59 | } 60 | 61 | impl Default for DebugNode 62 | where 63 | IN: ArconType, 64 | { 65 | fn default() -> Self { 66 | Self::new() 67 | } 68 | } 69 | 70 | impl ComponentLifecycle for DebugNode 71 | where 72 | IN: ArconType, 73 | { 74 | fn on_start(&mut self) -> Handled { 75 | debug!(self.ctx.log(), "Started Arcon DebugNode"); 76 | Handled::Ok 77 | } 78 | } 79 | 80 | impl Actor for DebugNode 81 | where 82 | IN: ArconType, 83 | { 84 | type Message = ArconMessage; 85 | 86 | fn receive_local(&mut self, msg: Self::Message) -> Handled { 87 | self.senders.insert(msg.sender); 88 | self.handle_events(msg.events); 89 | Handled::Ok 90 | } 91 | fn receive_network(&mut self, msg: NetMessage) -> Handled { 92 | let arcon_msg = match *msg.ser_id() { 93 | id if id == IN::RELIABLE_SER_ID => msg 94 | .try_deserialise::, ReliableSerde>() 95 | .unwrap(), 96 | _ => { 97 | panic!("Unexpected deserialiser") 98 | } 99 | }; 100 | self.senders.insert(arcon_msg.sender); 101 | self.handle_events(arcon_msg.events); 102 | Handled::Ok 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/function/filter.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{ArconElement, ArconNever, ArconType}, 3 | error::*, 4 | index::{ArconState, EmptyState}, 5 | stream::operator::{Operator, OperatorContext}, 6 | util::ArconFnBounds, 7 | }; 8 | use std::marker::PhantomData; 9 | 10 | pub struct Filter 11 | where 12 | IN: ArconType, 13 | F: Fn(&IN, &mut S) -> bool + ArconFnBounds, 14 | S: ArconState, 15 | { 16 | udf: F, 17 | _marker: PhantomData bool>, 18 | } 19 | 20 | impl Filter bool, EmptyState> 21 | where 22 | IN: ArconType, 23 | { 24 | #[allow(clippy::new_ret_no_self)] 25 | pub fn new( 26 | udf: impl Fn(&IN) -> bool + ArconFnBounds, 27 | ) -> Filter bool + ArconFnBounds, EmptyState> { 28 | let udf = move |input: &IN, _: &mut EmptyState| udf(input); 29 | Filter { 30 | udf, 31 | _marker: Default::default(), 32 | } 33 | } 34 | } 35 | 36 | impl Filter 37 | where 38 | IN: ArconType, 39 | F: Fn(&IN, &mut S) -> bool + ArconFnBounds, 40 | S: ArconState, 41 | { 42 | pub fn stateful(udf: F) -> Self { 43 | Filter { 44 | udf, 45 | _marker: Default::default(), 46 | } 47 | } 48 | } 49 | 50 | impl Operator for Filter 51 | where 52 | IN: ArconType, 53 | F: Fn(&IN, &mut S) -> bool + ArconFnBounds, 54 | S: ArconState, 55 | { 56 | type IN = IN; 57 | type OUT = IN; 58 | type TimerState = ArconNever; 59 | type OperatorState = S; 60 | type ElementIterator = Option>; 61 | 62 | fn handle_element( 63 | &mut self, 64 | element: ArconElement, 65 | ctx: &mut OperatorContext, 66 | ) -> ArconResult { 67 | if (self.udf)(&element.data, ctx.state()) { 68 | Ok(Some(element)) 69 | } else { 70 | Ok(None) 71 | } 72 | } 73 | crate::ignore_timeout!(); 74 | } 75 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/function/flatmap.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{ArconElement, ArconNever, ArconType}, 3 | error::*, 4 | index::{ArconState, EmptyState}, 5 | stream::operator::{Operator, OperatorContext}, 6 | util::ArconFnBounds, 7 | }; 8 | use std::marker::PhantomData; 9 | 10 | pub struct FlatMap 11 | where 12 | IN: ArconType, 13 | OUTS: IntoIterator, 14 | OUTS::Item: ArconType, 15 | F: Fn(IN, &mut S) -> ArconResult + ArconFnBounds, 16 | S: ArconState, 17 | { 18 | udf: F, 19 | _marker: PhantomData ArconResult>, 20 | } 21 | 22 | impl FlatMap ArconResult, EmptyState> 23 | where 24 | IN: ArconType, 25 | OUTS: IntoIterator + 'static, 26 | OUTS::Item: ArconType, 27 | { 28 | #[allow(clippy::new_ret_no_self)] 29 | pub fn new( 30 | udf: impl Fn(IN) -> OUTS + ArconFnBounds, 31 | ) -> FlatMap< 32 | IN, 33 | OUTS, 34 | impl Fn(IN, &mut EmptyState) -> ArconResult + ArconFnBounds, 35 | EmptyState, 36 | > { 37 | let udf = move |input: IN, _: &mut EmptyState| Ok(udf(input)); 38 | FlatMap { 39 | udf, 40 | _marker: Default::default(), 41 | } 42 | } 43 | } 44 | 45 | impl FlatMap 46 | where 47 | IN: ArconType, 48 | OUTS: IntoIterator + 'static, 49 | OUTS::Item: ArconType, 50 | F: Fn(IN, &mut S) -> ArconResult + ArconFnBounds, 51 | S: ArconState, 52 | { 53 | pub fn stateful(udf: F) -> Self { 54 | FlatMap { 55 | udf, 56 | _marker: Default::default(), 57 | } 58 | } 59 | } 60 | 61 | impl Operator for FlatMap 62 | where 63 | IN: ArconType, 64 | OUTS: IntoIterator + 'static, 65 | OUTS::Item: ArconType, 66 | F: Fn(IN, &mut S) -> ArconResult + ArconFnBounds, 67 | S: ArconState, 68 | { 69 | type IN = IN; 70 | type OUT = OUTS::Item; 71 | type TimerState = ArconNever; 72 | type OperatorState = S; 73 | type ElementIterator = Box>>; 74 | 75 | fn handle_element( 76 | &mut self, 77 | element: ArconElement, 78 | ctx: &mut OperatorContext, 79 | ) -> ArconResult { 80 | let timestamp = element.timestamp; 81 | let result = (self.udf)(element.data, ctx.state())?; 82 | Ok(Box::new( 83 | result 84 | .into_iter() 85 | .map(move |e| ArconElement::with_timestamp(e, timestamp)), 86 | )) 87 | } 88 | 89 | crate::ignore_timeout!(); 90 | } 91 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/function/map.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{ArconElement, ArconNever, ArconType}, 3 | error::*, 4 | index::{ArconState, EmptyState}, 5 | stream::operator::{Operator, OperatorContext}, 6 | util::ArconFnBounds, 7 | }; 8 | use std::marker::PhantomData; 9 | 10 | pub struct Map 11 | where 12 | IN: ArconType, 13 | OUT: ArconType, 14 | F: Fn(IN, &mut S) -> ArconResult + ArconFnBounds, 15 | S: ArconState, 16 | { 17 | udf: F, 18 | _marker: PhantomData ArconResult>, 19 | } 20 | 21 | impl Map ArconResult, EmptyState> 22 | where 23 | IN: ArconType, 24 | OUT: ArconType, 25 | { 26 | #[allow(clippy::new_ret_no_self)] 27 | pub fn new( 28 | udf: impl Fn(IN) -> OUT + ArconFnBounds, 29 | ) -> Map ArconResult + ArconFnBounds, EmptyState> 30 | { 31 | let udf = move |input: IN, _: &mut EmptyState| { 32 | let output = udf(input); 33 | Ok(output) 34 | }; 35 | 36 | Map { 37 | udf, 38 | _marker: Default::default(), 39 | } 40 | } 41 | } 42 | 43 | impl Map 44 | where 45 | IN: ArconType, 46 | OUT: ArconType, 47 | F: Fn(IN, &mut S) -> ArconResult + ArconFnBounds, 48 | S: ArconState, 49 | { 50 | pub fn stateful(udf: F) -> Self { 51 | Map { 52 | udf, 53 | _marker: Default::default(), 54 | } 55 | } 56 | } 57 | 58 | impl Operator for Map 59 | where 60 | IN: ArconType, 61 | OUT: ArconType, 62 | F: Fn(IN, &mut S) -> ArconResult + ArconFnBounds, 63 | S: ArconState, 64 | { 65 | type IN = IN; 66 | type OUT = OUT; 67 | type TimerState = ArconNever; 68 | type OperatorState = S; 69 | type ElementIterator = std::iter::Once>; 70 | 71 | fn handle_element( 72 | &mut self, 73 | element: ArconElement, 74 | ctx: &mut OperatorContext, 75 | ) -> ArconResult { 76 | let data = (self.udf)(element.data, ctx.state())?; 77 | Ok(std::iter::once(ArconElement::with_timestamp( 78 | data, 79 | element.timestamp, 80 | ))) 81 | } 82 | 83 | crate::ignore_timeout!(); 84 | } 85 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/function/map_in_place.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{ArconElement, ArconNever, ArconType}, 3 | error::*, 4 | index::{ArconState, EmptyState}, 5 | stream::operator::{Operator, OperatorContext}, 6 | util::ArconFnBounds, 7 | }; 8 | use std::marker::PhantomData; 9 | 10 | pub struct MapInPlace 11 | where 12 | IN: ArconType, 13 | F: Fn(&mut IN, &mut S) -> ArconResult<()> + ArconFnBounds, 14 | S: ArconState, 15 | { 16 | udf: F, 17 | _marker: PhantomData ArconResult<()>>, 18 | } 19 | 20 | impl MapInPlace ArconResult<()>, EmptyState> 21 | where 22 | IN: ArconType, 23 | { 24 | #[allow(clippy::new_ret_no_self)] 25 | pub fn new( 26 | udf: impl Fn(&mut IN) + ArconFnBounds, 27 | ) -> MapInPlace< 28 | IN, 29 | impl Fn(&mut IN, &mut EmptyState) -> ArconResult<()> + ArconFnBounds, 30 | EmptyState, 31 | > { 32 | let udf = move |input: &mut IN, _: &mut EmptyState| { 33 | udf(input); 34 | Ok(()) 35 | }; 36 | MapInPlace { 37 | udf, 38 | _marker: Default::default(), 39 | } 40 | } 41 | } 42 | 43 | impl MapInPlace 44 | where 45 | IN: ArconType, 46 | F: Fn(&mut IN, &mut S) -> ArconResult<()> + ArconFnBounds, 47 | S: ArconState, 48 | { 49 | pub fn stateful(udf: F) -> Self { 50 | MapInPlace { 51 | udf, 52 | _marker: Default::default(), 53 | } 54 | } 55 | } 56 | 57 | impl Operator for MapInPlace 58 | where 59 | IN: ArconType, 60 | F: Fn(&mut IN, &mut S) -> ArconResult<()> + ArconFnBounds, 61 | S: ArconState, 62 | { 63 | type IN = IN; 64 | type OUT = IN; 65 | type TimerState = ArconNever; 66 | type OperatorState = S; 67 | type ElementIterator = std::iter::Once>; 68 | 69 | fn handle_element( 70 | &mut self, 71 | element: ArconElement, 72 | ctx: &mut OperatorContext, 73 | ) -> ArconResult { 74 | let mut elem = element; 75 | (self.udf)(&mut elem.data, ctx.state())?; 76 | Ok(std::iter::once(elem)) 77 | } 78 | 79 | crate::ignore_timeout!(); 80 | } 81 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/function/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod filter; 2 | pub mod flatmap; 3 | pub mod map; 4 | pub mod map_in_place; 5 | 6 | pub use filter::Filter; 7 | pub use flatmap::FlatMap; 8 | pub use map::Map; 9 | pub use map_in_place::MapInPlace; 10 | 11 | #[cfg(test)] 12 | mod tests { 13 | use crate::prelude::*; 14 | 15 | fn wait(millis: u64) { 16 | std::thread::sleep(std::time::Duration::from_millis(millis)); 17 | } 18 | 19 | #[test] 20 | fn map_test() { 21 | let app = (0..10) 22 | .to_stream(|conf| { 23 | conf.set_arcon_time(ArconTime::Process); 24 | }) 25 | .map(|x| x + 10) 26 | .debug() 27 | .builder() 28 | .build(); 29 | 30 | check_map_result(app); 31 | } 32 | 33 | #[test] 34 | fn map_in_place_test() { 35 | let app = (0..10) 36 | .to_stream(|conf| { 37 | conf.set_arcon_time(ArconTime::Process); 38 | }) 39 | .map_in_place(|x| *x += 10) 40 | .debug() 41 | .builder() 42 | .build(); 43 | 44 | check_map_result(app); 45 | } 46 | 47 | // helper to check common result between Map/MapInPlace 48 | fn check_map_result(mut app: Application) { 49 | app.run(); 50 | wait(1000); 51 | 52 | let debug_node = app.get_debug_node::().unwrap(); 53 | 54 | debug_node.on_definition(|cd| { 55 | let sum: i32 = cd.data.iter().map(|elem| elem.data).sum(); 56 | assert_eq!(sum, 145); 57 | }); 58 | } 59 | 60 | #[test] 61 | fn filter_test() { 62 | let mut app = (0..10i32) 63 | .to_stream(|conf| { 64 | conf.set_arcon_time(ArconTime::Process); 65 | }) 66 | .filter(|x| *x < 5) 67 | .debug() 68 | .builder() 69 | .build(); 70 | 71 | app.run(); 72 | wait(1000); 73 | 74 | let debug_node = app.get_debug_node::().unwrap(); 75 | 76 | debug_node.on_definition(|cd| { 77 | assert_eq!(cd.data.len(), 5); 78 | }); 79 | } 80 | 81 | #[test] 82 | fn flatmap_test() { 83 | let mut builder = (0..5i32) 84 | .to_stream(|conf| { 85 | conf.set_arcon_time(ArconTime::Process); 86 | }) 87 | .flat_map(|x| (0..x)) 88 | .debug() 89 | .builder(); 90 | 91 | let mut app = builder.build(); 92 | 93 | app.run(); 94 | wait(1000); 95 | 96 | let debug_node = app.get_debug_node::().unwrap(); 97 | 98 | debug_node.on_definition(|cd| { 99 | assert_eq!(cd.data.len(), 10); 100 | let sum: i32 = cd.data.iter().map(|elem| elem.data).sum(); 101 | assert_eq!(sum, 10); 102 | }); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/sink/kafka.rs: -------------------------------------------------------------------------------- 1 | use crate::prelude::*; 2 | use futures::executor::block_on; 3 | use rdkafka::{ 4 | config::ClientConfig, 5 | error::KafkaResult, 6 | producer::{FutureProducer, FutureRecord}, 7 | }; 8 | use std::cell::RefCell; 9 | 10 | /* 11 | KafkaSink: Buffers received elements 12 | Writes and commits buffers on epoch 13 | */ 14 | #[allow(dead_code)] 15 | pub struct KafkaSink 16 | where 17 | IN: ArconType + ::serde::Serialize + ::serde::de::DeserializeOwned, 18 | { 19 | bootstrap_server: String, 20 | topic: String, 21 | offset: u32, 22 | batch_size: u32, 23 | producer: FutureProducer, 24 | buffer: RefCell>>, 25 | } 26 | 27 | impl KafkaSink 28 | where 29 | IN: ArconType + ::serde::Serialize + ::serde::de::DeserializeOwned, 30 | { 31 | pub fn new(bootstrap_server: String, topic: String, offset: u32) -> KafkaSink { 32 | let mut config = ClientConfig::new(); 33 | config 34 | .set("group.id", "example_consumer_group_id") 35 | .set("bootstrap.servers", &bootstrap_server) 36 | .set("produce.offset.report", "true") 37 | .set("message.timeout.ms", "5000"); 38 | let result: KafkaResult = config.create(); 39 | match result { 40 | Ok(producer) => KafkaSink { 41 | bootstrap_server, 42 | topic, 43 | offset, 44 | batch_size: 100, 45 | producer, 46 | buffer: RefCell::new(Vec::new()), 47 | }, 48 | _ => { 49 | panic!("Failed to start KafkaSink"); 50 | } 51 | } 52 | } 53 | 54 | pub fn commit_buffer(&self) -> () { 55 | //println!("sink committing buffer"); 56 | // Will asynchronously try to write all messages in the buffer 57 | // But will block the thread until all commits are complete 58 | let mut futures = Vec::new(); 59 | for element in self.buffer.borrow_mut().drain(..) { 60 | if let Ok(serialized) = serde_json::to_string(&element.data) { 61 | futures.push(self.producer.send( 62 | FutureRecord::to(&self.topic).payload(&serialized).key(&()), 63 | 0, // The future will return RDKafkaError::QueueFull without retrying 64 | )); 65 | } 66 | } 67 | 68 | // Write synchronously 69 | for future in futures { 70 | let _ = block_on(future); 71 | } 72 | } 73 | } 74 | 75 | impl Operator for KafkaSink 76 | where 77 | IN: ArconType + ::serde::Serialize + ::serde::de::DeserializeOwned, 78 | { 79 | type IN = IN; 80 | type OUT = ArconNever; 81 | type OperatorState = (); 82 | type TimerState = ArconNever; 83 | 84 | fn handle_element( 85 | &mut self, 86 | element: ArconElement, 87 | _ctx: OperatorContext, 88 | ) -> ArconResult<()> { 89 | self.buffer.borrow_mut().push(element); 90 | Ok(()) 91 | } 92 | crate::ignore_timeout!(); 93 | crate::ignore_persist!(); 94 | } 95 | 96 | // Tested via kafka_source 97 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/sink/local_file.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | data::{ArconElement, ArconNever, ArconType}, 3 | error::ArconResult, 4 | prelude::EmptyState, 5 | stream::operator::{Operator, OperatorContext}, 6 | }; 7 | use std::{ 8 | cell::RefCell, 9 | fs::{File, OpenOptions}, 10 | io::Write, 11 | marker::PhantomData, 12 | path::Path, 13 | }; 14 | 15 | pub struct LocalFileSink 16 | where 17 | IN: ArconType, 18 | { 19 | file: RefCell, 20 | _marker: PhantomData, 21 | } 22 | 23 | impl LocalFileSink 24 | where 25 | IN: ArconType, 26 | { 27 | pub fn new(file_path: impl AsRef) -> Self { 28 | let file = RefCell::new( 29 | OpenOptions::new() 30 | .write(true) 31 | .create(true) 32 | .append(true) 33 | .open(file_path) 34 | .expect("Failed to open file"), 35 | ); 36 | 37 | LocalFileSink { 38 | file, 39 | _marker: PhantomData, 40 | } 41 | } 42 | } 43 | 44 | impl Operator for LocalFileSink 45 | where 46 | IN: ArconType, 47 | { 48 | type IN = IN; 49 | type OUT = ArconNever; 50 | type TimerState = ArconNever; 51 | type OperatorState = EmptyState; 52 | type ElementIterator = std::iter::Empty>; 53 | 54 | fn handle_element( 55 | &mut self, 56 | element: ArconElement, 57 | _ctx: &mut OperatorContext, 58 | ) -> ArconResult { 59 | if let Err(err) = writeln!(self.file.borrow_mut(), "{:?}", element.data) { 60 | eprintln!("Error while writing to file sink {}", err); 61 | } 62 | Ok(std::iter::empty::>()) 63 | } 64 | crate::ignore_timeout!(); 65 | } 66 | 67 | #[cfg(test)] 68 | mod tests { 69 | use super::*; 70 | use crate::prelude::*; 71 | use std::io::{BufRead, BufReader}; 72 | use tempfile::NamedTempFile; 73 | 74 | #[test] 75 | fn local_file_sink_test() { 76 | let file = NamedTempFile::new().unwrap(); 77 | let file_path = file.path().to_string_lossy().into_owned(); 78 | 79 | let mut app = vec![6i32, 2i32, 15i32, 30i32] 80 | .to_stream(|conf| { 81 | conf.set_arcon_time(ArconTime::Process); 82 | }) 83 | .operator(OperatorBuilder { 84 | operator: Arc::new(move || LocalFileSink::new(&file_path)), 85 | state: Arc::new(|_| EmptyState), 86 | conf: OperatorConf { 87 | parallelism_strategy: ParallelismStrategy::Static(1), 88 | ..Default::default() 89 | }, 90 | }) 91 | .ignore() 92 | .builder() 93 | .build(); 94 | 95 | app.run(); 96 | 97 | std::thread::sleep(std::time::Duration::from_secs(1)); 98 | 99 | let file = File::open(file.path()).expect("no such file"); 100 | let buf = BufReader::new(file); 101 | let result: Vec = buf 102 | .lines() 103 | .map(|l| l.unwrap().parse::().expect("could not parse line")) 104 | .collect(); 105 | 106 | let expected: Vec = vec![6, 2, 15, 30]; 107 | assert_eq!(result, expected); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/sink/measure.rs: -------------------------------------------------------------------------------- 1 | use crate::data::{ArconElement, ArconNever, ArconType}; 2 | use crate::error::*; 3 | use crate::index::EmptyState; 4 | use crate::stream::operator::{Operator, OperatorContext}; 5 | use slog::*; 6 | use std::marker::PhantomData; 7 | 8 | // Throughput logic taken from: https://github.com/lsds/StreamBench/blob/master/yahoo-streaming-benchmark/src/main/scala/uk/ac/ic/imperial/benchmark/flink/YahooBenchmark.scala 9 | pub struct MeasureSink { 10 | log_freq: u64, 11 | last_total_recv: u64, 12 | last_time: u64, 13 | total_recv: u64, 14 | avg_throughput: f32, 15 | throughput_counter: u64, 16 | throughput_sum: f32, 17 | _marker: PhantomData, 18 | } 19 | 20 | impl MeasureSink { 21 | /// Creates a MeasureSink that logs throughput 22 | pub fn new(log_freq: u64) -> Self { 23 | Self { 24 | log_freq, 25 | last_total_recv: 0, 26 | last_time: 0, 27 | total_recv: 0, 28 | avg_throughput: 0.0, 29 | throughput_counter: 0, 30 | throughput_sum: 0.0, 31 | _marker: PhantomData, 32 | } 33 | } 34 | } 35 | 36 | impl Operator for MeasureSink { 37 | type IN = IN; 38 | type OUT = IN; 39 | type TimerState = ArconNever; 40 | type OperatorState = EmptyState; 41 | type ElementIterator = std::iter::Empty>; 42 | 43 | fn handle_element( 44 | &mut self, 45 | _: ArconElement, 46 | ctx: &mut OperatorContext, 47 | ) -> ArconResult { 48 | // When a latency marker is introduced for ArconEvent, calculate the end-to-end latency and report. 49 | // https://github.com/cda-group/arcon/issues/235 50 | 51 | if self.total_recv == 0 { 52 | info!( 53 | ctx.log(), 54 | "ThroughputLogging {}, {}", 55 | crate::util::get_system_time(), 56 | self.total_recv 57 | ); 58 | } 59 | 60 | self.total_recv += 1; 61 | 62 | if self.total_recv % self.log_freq == 0 { 63 | let current_time = crate::util::get_system_time(); 64 | 65 | if current_time > self.last_time { 66 | let throughput = (self.total_recv - self.last_total_recv) as f32 67 | / (current_time - self.last_time) as f32 68 | * 1000.0; 69 | 70 | if throughput != 0.0 { 71 | self.throughput_counter += 1; 72 | self.throughput_sum += throughput; 73 | self.avg_throughput = self.throughput_sum / self.throughput_counter as f32; 74 | } 75 | 76 | info!( 77 | ctx.log(), 78 | "Throughput {}, Average {}", throughput, self.avg_throughput 79 | ); 80 | 81 | info!( 82 | ctx.log(), 83 | "ThroughputLogging at time {}, with total recv {}", 84 | crate::util::get_system_time(), 85 | self.total_recv 86 | ); 87 | self.last_time = current_time; 88 | self.last_total_recv = self.total_recv; 89 | } 90 | } 91 | 92 | Ok(std::iter::empty::>()) 93 | } 94 | 95 | crate::ignore_timeout!(); 96 | } 97 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/sink/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod local_file; 2 | 3 | #[cfg(feature = "socket")] 4 | #[allow(dead_code)] 5 | pub mod socket; 6 | 7 | pub mod measure; 8 | 9 | //#[cfg(feature = "kafka")] 10 | //pub mod kafka; 11 | -------------------------------------------------------------------------------- /arcon/src/stream/operator/window/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod assigner; 2 | 3 | pub use assigner::{WindowAssigner, WindowState}; 4 | 5 | use fxhash::FxHasher; 6 | use std::hash::{Hash, Hasher}; 7 | 8 | #[derive(prost::Message, Hash, PartialEq, Eq, Copy, Clone)] 9 | pub struct WindowContext { 10 | #[prost(uint64)] 11 | pub key: u64, 12 | #[prost(uint64)] 13 | pub index: u64, 14 | } 15 | 16 | impl WindowContext { 17 | pub fn new(key: u64, index: u64) -> Self { 18 | WindowContext { key, index } 19 | } 20 | } 21 | 22 | impl From for u64 { 23 | fn from(ctx: WindowContext) -> Self { 24 | let mut s = FxHasher::default(); 25 | ctx.hash(&mut s); 26 | s.finish() 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /arcon/src/stream/source/local_file.rs: -------------------------------------------------------------------------------- 1 | use crate::error::{ 2 | source::{SourceError, SourceResult}, 3 | Error, 4 | }; 5 | 6 | use crate::{ 7 | data::ArconType, 8 | stream::source::{Poll, Source}, 9 | }; 10 | use std::{ 11 | fmt::Display, 12 | fs::File, 13 | io::{BufRead, BufReader}, 14 | str::FromStr, 15 | }; 16 | 17 | pub struct LocalFileSourceImpl 18 | where 19 | A: ArconType + FromStr + Display, 20 | ::Err: Display, 21 | { 22 | lines: std::io::Lines>, 23 | _marker: std::marker::PhantomData, 24 | } 25 | 26 | impl LocalFileSourceImpl 27 | where 28 | A: ArconType + FromStr + Display, 29 | ::Err: Display, 30 | { 31 | pub fn new(file_path: String) -> Self { 32 | let f = File::open(file_path).expect("failed to open file"); 33 | let reader = BufReader::new(f); 34 | let lines = reader.lines(); 35 | Self { 36 | lines, 37 | _marker: std::marker::PhantomData, 38 | } 39 | } 40 | } 41 | 42 | impl Source for LocalFileSourceImpl 43 | where 44 | A: ArconType + FromStr + Display, 45 | ::Err: Display, 46 | { 47 | type Item = A; 48 | 49 | fn poll_next(&mut self) -> SourceResult> { 50 | match self.lines.next() { 51 | Some(Ok(line)) => match line.parse::() { 52 | Ok(record) => Ok(Ok(Poll::Ready(record))), 53 | Err(err) => Ok(Err(SourceError::Parse { 54 | msg: err.to_string(), 55 | })), 56 | }, 57 | Some(Err(err)) => Err(Error::Io { error: err }), 58 | None => Ok(Ok(Poll::Done)), 59 | } 60 | } 61 | fn set_offset(&mut self, _: usize) {} 62 | } 63 | -------------------------------------------------------------------------------- /arcon/src/stream/source/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "kafka")] 2 | pub mod kafka; 3 | pub mod local_file; 4 | pub mod schema; 5 | 6 | use crate::{data::ArconType, error::source::SourceResult}; 7 | 8 | //#[cfg(feature = "socket")] 9 | //pub mod socket; 10 | 11 | /// Enum containing Poll responses for an Arcon source 12 | #[derive(Debug, Clone)] 13 | pub enum Poll { 14 | /// Makes the value `A` available 15 | Ready(A), 16 | /// Tells the runtime there is currently no records to process 17 | Pending, 18 | /// Indicates that the source is finished 19 | Done, 20 | } 21 | 22 | /// Defines an Arcon Source and the methods it must implement 23 | pub trait Source: Send + 'static { 24 | type Item: ArconType; 25 | /// Poll Source for an Item 26 | fn poll_next(&mut self) -> SourceResult>; 27 | /// Set offset for the source 28 | /// 29 | /// May be used by replayable sources to set a certain offset.. 30 | fn set_offset(&mut self, offset: usize); 31 | } 32 | 33 | // Implement Source for IntoIterator 34 | impl Source for I 35 | where 36 | I: Iterator + 'static + Send + 'static, 37 | I::Item: ArconType, 38 | { 39 | type Item = I::Item; 40 | 41 | fn poll_next(&mut self) -> SourceResult> { 42 | match self.next() { 43 | Some(item) => Ok(Ok(Poll::Ready(item))), 44 | None => Ok(Ok(Poll::Done)), 45 | } 46 | } 47 | fn set_offset(&mut self, _: usize) {} 48 | } 49 | 50 | #[cfg(test)] 51 | mod tests { 52 | use super::*; 53 | 54 | #[test] 55 | fn iterator_source_test() { 56 | fn sum(mut s: impl Source) -> u32 { 57 | let mut sum = 0; 58 | while let Poll::Ready(v) = s.poll_next().unwrap().unwrap() { 59 | sum += v; 60 | } 61 | sum 62 | } 63 | let v: Vec = vec![1, 2, 3, 4]; 64 | let sum = sum(v.into_iter()); 65 | assert_eq!(sum, 10); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /arcon/src/stream/source/schema.rs: -------------------------------------------------------------------------------- 1 | use crate::{data::ArconType, error::source::SourceError}; 2 | use arcon_state::backend::serialization::protobuf; 3 | 4 | pub trait SourceSchema: Send + Sync + Clone + 'static { 5 | type Data: ArconType; 6 | 7 | fn from_bytes(bytes: &[u8]) -> Result; 8 | } 9 | 10 | #[cfg(all(feature = "serde_json", feature = "serde"))] 11 | #[derive(Clone)] 12 | pub struct JsonSchema 13 | where 14 | IN: ArconType + ::serde::de::DeserializeOwned, 15 | { 16 | _marker: std::marker::PhantomData, 17 | } 18 | 19 | #[cfg(all(feature = "serde_json", feature = "serde"))] 20 | impl Default for JsonSchema 21 | where 22 | IN: ArconType + ::serde::de::DeserializeOwned, 23 | { 24 | fn default() -> Self { 25 | Self::new() 26 | } 27 | } 28 | 29 | #[cfg(all(feature = "serde_json", feature = "serde"))] 30 | impl JsonSchema 31 | where 32 | IN: ArconType + ::serde::de::DeserializeOwned, 33 | { 34 | pub fn new() -> Self { 35 | Self { 36 | _marker: std::marker::PhantomData, 37 | } 38 | } 39 | } 40 | 41 | #[cfg(all(feature = "serde_json", feature = "serde"))] 42 | impl SourceSchema for JsonSchema 43 | where 44 | IN: ArconType + ::serde::de::DeserializeOwned, 45 | { 46 | type Data = IN; 47 | 48 | fn from_bytes(bytes: &[u8]) -> Result { 49 | let s = std::str::from_utf8(bytes).map_err(|err| SourceError::Parse { 50 | msg: err.to_string(), 51 | })?; 52 | 53 | match serde_json::from_str(s) { 54 | Ok(data) => Ok(data), 55 | Err(err) => Err(SourceError::Schema { 56 | msg: err.to_string(), 57 | }), 58 | } 59 | } 60 | } 61 | 62 | #[derive(Clone)] 63 | pub struct ProtoSchema 64 | where 65 | IN: ArconType, 66 | { 67 | _marker: std::marker::PhantomData, 68 | } 69 | 70 | impl Default for ProtoSchema 71 | where 72 | IN: ArconType, 73 | { 74 | fn default() -> Self { 75 | Self::new() 76 | } 77 | } 78 | 79 | impl ProtoSchema 80 | where 81 | IN: ArconType, 82 | { 83 | pub fn new() -> Self { 84 | Self { 85 | _marker: std::marker::PhantomData, 86 | } 87 | } 88 | } 89 | 90 | impl SourceSchema for ProtoSchema 91 | where 92 | IN: ArconType, 93 | { 94 | type Data = IN; 95 | 96 | fn from_bytes(bytes: &[u8]) -> Result { 97 | match protobuf::deserialize(bytes) { 98 | Ok(data) => Ok(data), 99 | Err(err) => Err(SourceError::Schema { 100 | msg: err.to_string(), 101 | }), 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /arcon/src/stream/time/mod.rs: -------------------------------------------------------------------------------- 1 | #[derive(PartialEq, Clone, Copy, Debug)] 2 | pub enum ArconTime { 3 | Event, 4 | Process, 5 | } 6 | 7 | impl Default for ArconTime { 8 | fn default() -> Self { 9 | ArconTime::Event 10 | } 11 | } 12 | 13 | #[derive(Clone, Copy)] 14 | pub struct Time(pub u64); 15 | 16 | impl Time { 17 | pub fn seconds(seconds: u64) -> Self { 18 | Time(seconds) 19 | } 20 | pub fn minutes(minutes: u64) -> Self { 21 | Time(minutes * 60) 22 | } 23 | pub fn hours(hours: u64) -> Self { 24 | Time(hours * 60 * 60) 25 | } 26 | pub fn days(days: u64) -> Self { 27 | Time(days * 24 * 60 * 60) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /arcon/src/test/arcon_state.rs: -------------------------------------------------------------------------------- 1 | use crate::prelude::*; 2 | use arcon_macros::ArconState; 3 | use arcon_state::Backend; 4 | use std::sync::Arc; 5 | 6 | #[derive(ArconState)] 7 | pub struct StreamingState { 8 | watermark: LazyValue, 9 | epoch: LazyValue, 10 | counters: HashTable, 11 | #[ephemeral] 12 | emph: u64, 13 | } 14 | 15 | #[test] 16 | fn streaming_state_test() -> ArconResult<()> { 17 | let backend = Arc::new(crate::test_utils::temp_backend::()); 18 | 19 | let mut state = StreamingState { 20 | watermark: LazyValue::new("_watermark", backend.clone()), 21 | epoch: LazyValue::new("_epoch", backend.clone()), 22 | counters: HashTable::new("_counters", backend), 23 | emph: 0, 24 | }; 25 | 26 | state.watermark().put(100)?; 27 | state.epoch().put(1)?; 28 | state.counters().put(10, 1)?; 29 | state.counters().put(12, 2)?; 30 | 31 | let watermark = state.watermark().get()?; 32 | assert_eq!(watermark.unwrap().as_ref(), &100); 33 | let epoch = state.epoch().get()?; 34 | assert_eq!(epoch.unwrap().as_ref(), &1); 35 | assert_eq!(state.counters().get(&10).unwrap(), Some(&1)); 36 | assert_eq!(state.counters().get(&12).unwrap(), Some(&2)); 37 | assert_eq!(state.emph(), &0); 38 | assert!(state.persist().is_ok()); 39 | Ok(()) 40 | } 41 | -------------------------------------------------------------------------------- /arcon/src/test/mod.rs: -------------------------------------------------------------------------------- 1 | mod arcon_state; 2 | mod keyby_integration; 3 | -------------------------------------------------------------------------------- /arcon/src/util/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "socket")] 2 | #[allow(dead_code)] 3 | pub mod io; 4 | 5 | pub mod prost_helpers; 6 | pub mod system_killer; 7 | 8 | use std::time::{SystemTime, UNIX_EPOCH}; 9 | 10 | #[inline] 11 | pub fn get_system_time() -> u64 { 12 | let start = SystemTime::now(); 13 | let since_the_epoch = start 14 | .duration_since(UNIX_EPOCH) 15 | .expect("Time went backwards"); 16 | 17 | since_the_epoch.as_millis() as u64 18 | } 19 | 20 | #[cfg(feature = "metrics")] 21 | #[inline] 22 | pub fn get_system_time_nano() -> u64 { 23 | let start = SystemTime::now(); 24 | let since_the_epoch = start 25 | .duration_since(UNIX_EPOCH) 26 | .expect("Time went backwards"); 27 | 28 | since_the_epoch.as_nanos() as u64 29 | } 30 | 31 | pub trait ArconFnBounds: Send + Sync + Clone + 'static {} 32 | impl ArconFnBounds for T where T: Send + Sync + Clone + 'static {} 33 | -------------------------------------------------------------------------------- /arcon/src/util/prost_helpers.rs: -------------------------------------------------------------------------------- 1 | use prost::Message; 2 | 3 | #[cfg_attr(feature = "arcon_serde", derive(serde::Serialize, serde::Deserialize))] 4 | #[derive(Message, Clone)] 5 | pub struct ProstOption { 6 | #[prost(message, tag = "1")] 7 | pub inner: Option, 8 | } 9 | 10 | impl From> for ProstOption { 11 | fn from(inner: Option) -> Self { 12 | ProstOption { inner } 13 | } 14 | } 15 | 16 | impl From> for Option { 17 | fn from(opt: ProstOption) -> Self { 18 | opt.inner 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /arcon/src/util/system_killer.rs: -------------------------------------------------------------------------------- 1 | use crate::data::{ArconEvent, ArconMessage, ArconNever}; 2 | use kompact::prelude::*; 3 | 4 | /// Utility actor that shuts down the underlying kompact system on receiving an [`ArconMessage`] 5 | /// that contains an [`ArconEvent::Death`]. Designed to be connected _after_ a node with sink 6 | /// operator. 7 | #[derive(ComponentDefinition)] 8 | pub struct SystemKiller { 9 | ctx: ComponentContext, 10 | } 11 | 12 | impl Actor for SystemKiller { 13 | type Message = ArconMessage; 14 | 15 | fn receive_local(&mut self, msg: Self::Message) -> Handled { 16 | for ev in msg.events.as_slice() { 17 | let ev = ev.unwrap_ref(); 18 | match ev { 19 | ArconEvent::Death(s) => { 20 | info!(self.log(), "Received Death event: {}", s); 21 | info!(self.log(), "Shutting down the kompact system"); 22 | self.ctx.system().shutdown_async(); 23 | } 24 | _ => trace!(self.log(), "Ignoring non-death event: {:?}", ev), 25 | } 26 | } 27 | Handled::Ok 28 | } 29 | 30 | fn receive_network(&mut self, _msg: NetMessage) -> Handled { 31 | // TODO: for now we ignore all network messages 32 | Handled::Ok 33 | } 34 | } 35 | 36 | ignore_lifecycle!(SystemKiller); 37 | 38 | impl SystemKiller { 39 | #[allow(dead_code)] 40 | pub fn new() -> SystemKiller { 41 | SystemKiller { 42 | ctx: ComponentContext::uninitialised(), 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /arcon_build/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arcon_build" 3 | version = "0.1.0" 4 | authors = ["Max Meldrum "] 5 | repository = "https://github.com/cda-group/arcon" 6 | edition = "2018" 7 | 8 | [dependencies] 9 | prost-build = "0.9" 10 | -------------------------------------------------------------------------------- /arcon_build/README.md: -------------------------------------------------------------------------------- 1 | ![ci](https://github.com/cda-group/arcon/workflows/ci/badge.svg) 2 | [![Cargo](https://img.shields.io/badge/crates.io-v0.1.0-orange)](https://crates.io/crates/arcon_build) 3 | 4 | # arcon build 5 | 6 | `arcon_build` is a crate for generating `arcon` data types from .proto files. 7 | -------------------------------------------------------------------------------- /arcon_build/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `arcon_build` is a wrapper around `prost-build` that adds the required attributes for it to work in arcon. 2 | //! 3 | //! ```toml 4 | //! [dependencies] 5 | //! arcon = 6 | //! serde = 7 | //! prost = 8 | //! abomonation = 9 | //! abomonation_derive = 10 | //! 11 | //! [build-dependencies] 12 | //! arcon_build = { version = } 13 | //! ``` 14 | //! 15 | //! ## Example .proto file 16 | //! 17 | //! ```proto 18 | //! syntax = "proto3"; 19 | //! 20 | //! package arcon_data; 21 | //! 22 | //! // unsafe_ser_id = 100 23 | //! // reliable_ser_id = 101 24 | //! // version = 1 25 | //! message Hello { 26 | //! string id = 1 27 | //! } 28 | //! ``` 29 | //! 30 | //! Generate the Rust code by creating a `build.rs` build script and use the 31 | //! `compile_protos` function: 32 | //! 33 | //! ```rust,no_run 34 | //! # use std::io::Result; 35 | //! fn main() -> Result<()> { 36 | //! arcon_build::compile_protos(&["src/path_to_file.proto"], &["src/"])?; 37 | //! Ok(()) 38 | //! } 39 | //! ``` 40 | 41 | use std::{io::Result, path::Path}; 42 | 43 | /// Compile Protobuf files with Arcon configured attributes 44 | pub fn compile_protos

(protos: &[P], includes: &[P]) -> Result<()> 45 | where 46 | P: AsRef, 47 | { 48 | let mut config = prost_build::Config::new(); 49 | 50 | config.type_attribute(".", "#[derive(arcon::Arcon)]"); 51 | config.compile_protos(protos, includes) 52 | } 53 | -------------------------------------------------------------------------------- /arcon_macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arcon_macros" 3 | version = "0.2.2" 4 | authors = ["Max Meldrum "] 5 | readme = "README.md" 6 | license = "Apache-2.0" 7 | repository = "https://github.com/cda-group/arcon" 8 | homepage = "https://github.com/cda-group/arcon" 9 | description = """ 10 | Macros for Arcon 11 | """ 12 | edition = "2018" 13 | 14 | [dependencies] 15 | syn = { version = "1.0", features = ["full"] } 16 | quote = "1.0" 17 | proc-macro2 = "1.0" 18 | 19 | [dev-dependencies] 20 | arcon = { version = "0.2", path = "../arcon" } 21 | prost = "0.9" 22 | 23 | [lib] 24 | proc-macro = true 25 | -------------------------------------------------------------------------------- /arcon_macros/README.md: -------------------------------------------------------------------------------- 1 | ![ci](https://github.com/cda-group/arcon/workflows/ci/badge.svg) 2 | [![Cargo](https://img.shields.io/badge/crates.io-v0.2.2-orange)](https://crates.io/crates/arcon_macros) 3 | 4 | # arcon macros 5 | 6 | This crate is not intended to be used directly. It is a part of the [Arcon project](https://github.com/cda-group/arcon) 7 | -------------------------------------------------------------------------------- /arcon_macros/src/arrow.rs: -------------------------------------------------------------------------------- 1 | use proc_macro::TokenStream; 2 | use syn::DeriveInput; 3 | 4 | pub fn derive_arrow(input: TokenStream) -> TokenStream { 5 | let input: DeriveInput = syn::parse(input).unwrap(); 6 | let name = &input.ident; 7 | 8 | if let syn::Data::Struct(ref s) = input.data { 9 | let mut arrow_types = Vec::new(); 10 | let mut builders = Vec::new(); 11 | 12 | if let syn::Fields::Named(ref fields_named) = s.fields { 13 | for (field_pos, field) in fields_named.named.iter().enumerate() { 14 | let ident = field.ident.clone(); 15 | let ty = &field.ty; 16 | let arrow_quote = quote! { ::arcon::Field::new(stringify!(#ident), <#ty as ToArrow>::arrow_type(), false), }; 17 | arrow_types.push(arrow_quote); 18 | 19 | let builder_quote = quote! { 20 | { 21 | let value = self.#ident; 22 | match builder.field_builder::<<#ty as ToArrow>::Builder>(#field_pos) { 23 | Some(b) => b.append_value(value)?, 24 | None => return Err(::arcon::ArrowError::SchemaError(format!("Failed to downcast Arrow Builder"))), 25 | } 26 | } 27 | }; 28 | builders.push(builder_quote); 29 | } 30 | 31 | let timestamp_pos = builders.len(); 32 | 33 | // Add nullable timestamp field 34 | let timestamp_quote = 35 | quote! { ::arcon::Field::new("_timestamp", ::arcon::DataType::UInt64, true), }; 36 | arrow_types.push(timestamp_quote); 37 | 38 | // builder quote for the last timestamp column. 39 | // assumes there is an timestamp: Option in scope 40 | let builder_quote = quote! { 41 | match builder.field_builder::<::arcon::UInt64Builder>(#timestamp_pos) { 42 | Some(b) => { 43 | match timestamp { 44 | Some(ts) => b.append_value(ts)?, 45 | None => b.append_null()?, 46 | } 47 | } 48 | None => return Err(::arcon::ArrowError::SchemaError(format!("Failed to downcast Arrow Builder"))), 49 | } 50 | }; 51 | builders.push(builder_quote); 52 | } else { 53 | panic!("#[derive(Arrow)] requires named fields"); 54 | } 55 | 56 | let generics = &input.generics; 57 | let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); 58 | 59 | let fields: proc_macro2::TokenStream = { 60 | quote! { 61 | vec![#(#arrow_types)*] 62 | } 63 | }; 64 | 65 | let output: proc_macro2::TokenStream = { 66 | quote! { 67 | impl #impl_generics ::arcon::ToArrow for #name #ty_generics #where_clause { 68 | type Builder = ::arcon::StructBuilder; 69 | 70 | fn arrow_type() -> ::arcon::DataType { 71 | ::arcon::DataType::Struct(#fields) 72 | } 73 | fn schema() -> ::arcon::Schema { 74 | ::arcon::Schema::new(#fields) 75 | } 76 | fn append(self, builder: &mut ::arcon::StructBuilder, timestamp: Option) -> Result<(), ::arcon::ArrowError> { 77 | #(#builders)* 78 | Ok(()) 79 | } 80 | fn table() -> ::arcon::MutableTable { 81 | let builder = ::arcon::StructBuilder::from_fields(#fields, ::arcon::RECORD_BATCH_SIZE); 82 | let table_name = stringify!(#name).to_lowercase(); 83 | ::arcon::MutableTable::new(::arcon::RecordBatchBuilder::new(table_name, Self::schema(), builder)) 84 | } 85 | } 86 | } 87 | }; 88 | 89 | proc_macro::TokenStream::from(output) 90 | } else { 91 | panic!("#[derive(Arrow)] only works for structs"); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /arcon_macros/src/decoder.rs: -------------------------------------------------------------------------------- 1 | use proc_macro::TokenStream; 2 | use syn::{parse_macro_input, DeriveInput}; 3 | 4 | /// Implements [FromStr](std::str::FromStr) for a struct using a delimiter 5 | /// 6 | /// If no delimiter is specified, then `,` is chosen as default. 7 | /// Note: All inner fields of the struct need to implement [std::str::FromStr] for the macro to work. 8 | pub fn derive_decoder(delimiter: TokenStream, input: TokenStream) -> TokenStream { 9 | let item = parse_macro_input!(input as DeriveInput); 10 | let name = &item.ident; 11 | 12 | if let syn::Data::Struct(ref s) = item.data { 13 | let delim_str: String = delimiter.to_string(); 14 | // Set comma to default if no other is specified.. 15 | let delim = if delim_str.is_empty() { 16 | "," 17 | } else { 18 | &delim_str 19 | }; 20 | 21 | let mut idents = Vec::new(); 22 | for field in s.fields.iter() { 23 | match field.ident { 24 | Some(ref ident) => idents.push((ident.clone(), &field.ty)), 25 | None => panic!("Struct missing identiy"), 26 | } 27 | } 28 | 29 | let mut field_quotes = Vec::new(); 30 | for (pos, (ident, ty)) in idents.iter().enumerate() { 31 | let parse = quote! { 32 | string_vec[#pos].parse::<#ty>().map_err(|_| String::from("Failed to parse field"))? 33 | }; 34 | let field_gen = quote! { #ident: #parse }; 35 | field_quotes.push(field_gen); 36 | } 37 | 38 | let from_str = quote! {Self{#(#field_quotes,)*}}; 39 | 40 | let output: proc_macro2::TokenStream = { 41 | quote! { 42 | #item 43 | impl ::std::str::FromStr for #name { 44 | type Err = String; 45 | fn from_str(s: &str) -> ::std::result::Result { 46 | let string_vec: Vec<&str> = s.trim() 47 | .split(#delim) 48 | .collect::>() 49 | .iter() 50 | .map(|s| s.trim() as &str) 51 | .collect(); 52 | 53 | Ok(#from_str) 54 | } 55 | } 56 | } 57 | }; 58 | 59 | proc_macro::TokenStream::from(output) 60 | } else { 61 | panic!("#[decoder] is only defined for structs!"); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /arcon_macros/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::needless_doctest_main)] 2 | 3 | //! The arcon_macros crate contains macros used by arcon. 4 | 5 | #![recursion_limit = "128"] 6 | extern crate proc_macro; 7 | extern crate syn; 8 | #[macro_use] 9 | extern crate quote; 10 | 11 | use proc_macro::TokenStream; 12 | 13 | mod app; 14 | mod arcon; 15 | mod arrow; 16 | mod decoder; 17 | mod proto; 18 | mod state; 19 | 20 | /// Derive macro for declaring an ArconType 21 | /// 22 | /// ## Usage 23 | /// 24 | /// ```rust 25 | /// use arcon::prelude::*; 26 | /// 27 | /// #[arcon::proto] 28 | /// #[derive(Arcon, Clone)] 29 | /// pub struct ArconStruct { 30 | /// pub id: u32, 31 | /// pub timestamp: u64, 32 | /// } 33 | /// ``` 34 | #[proc_macro_derive(Arcon, attributes(arcon))] 35 | pub fn arcon(input: TokenStream) -> TokenStream { 36 | arcon::derive_arcon(input) 37 | } 38 | 39 | /// A macro that helps set up and run an [Application](../arcon/application/struct.Application.html). 40 | /// 41 | /// This macro is meant to simplify the creation of 42 | /// arcon applications that do not require complex configuration. For more flexibility, 43 | /// have a look at [ApplicationBulder](../arcon/application/builder/struct.ApplicationBuilder.html). 44 | /// 45 | /// ## Usage 46 | /// 47 | /// ### With no arguments 48 | /// 49 | /// ```no_run 50 | /// #[arcon::app] 51 | /// fn main() { 52 | /// (0..100u64) 53 | /// .to_stream(|conf| conf.set_arcon_time(ArconTime::Process)) 54 | /// .map(|x| x * 10) 55 | /// .print() 56 | /// } 57 | /// ``` 58 | /// 59 | /// Expands to the following 60 | /// 61 | /// ```no_run 62 | /// fn main() { 63 | /// use arcon::prelude::*; 64 | /// let mut builder = (0..100u64) 65 | /// .to_stream(|conf| conf.set_arcon_time(ArconTime::Process)) 66 | /// .map(|x| x * 10) 67 | /// .print() 68 | /// .builder(); 69 | /// 70 | /// builder 71 | /// .build() 72 | /// .run_and_block(); 73 | /// } 74 | /// ``` 75 | #[proc_macro_attribute] 76 | pub fn app(delimiter: TokenStream, input: TokenStream) -> TokenStream { 77 | app::main(delimiter, input) 78 | } 79 | 80 | /// Derive macro for declaring an ArconState 81 | /// 82 | /// ```rust 83 | /// use arcon::prelude::*; 84 | /// 85 | /// #[derive(ArconState)] 86 | /// pub struct StreamingState { 87 | /// values: LazyValue, 88 | /// } 89 | /// ``` 90 | #[proc_macro_derive(ArconState, attributes(ephemeral, table))] 91 | pub fn state(input: TokenStream) -> TokenStream { 92 | state::derive_state(input) 93 | } 94 | 95 | /// Derive macro for declaring an Arrow convertable type within the Arcon runtime 96 | /// 97 | /// ```rust 98 | /// use arcon::prelude::*; 99 | /// 100 | /// #[derive(Arrow)] 101 | /// pub struct ArrowStruct { 102 | /// pub id: u32, 103 | /// pub name: String, 104 | /// } 105 | /// ``` 106 | #[proc_macro_derive(Arrow)] 107 | pub fn arrow(input: TokenStream) -> TokenStream { 108 | arrow::derive_arrow(input) 109 | } 110 | 111 | /// Implements [std::str::FromStr] for a struct using a delimiter 112 | /// 113 | /// If no delimiter is specified, then `,` is chosen as default. 114 | /// Note: All inner fields of the struct need to implement [std::str::FromStr] for the macro to work. 115 | #[proc_macro_attribute] 116 | pub fn decoder(delimiter: TokenStream, input: TokenStream) -> TokenStream { 117 | decoder::derive_decoder(delimiter, input) 118 | } 119 | 120 | /// Helper macro to make a struct or enum prost-compatible without the need for annotations. 121 | /// 122 | /// ```rust 123 | /// use arcon::prelude::*; 124 | /// 125 | /// #[arcon::proto] 126 | /// struct Event { 127 | /// s: String, 128 | /// p: Point, 129 | /// } 130 | /// #[arcon::proto] 131 | /// struct Point { 132 | /// x: i32, 133 | /// y: i32, 134 | /// } 135 | /// ``` 136 | #[proc_macro_attribute] 137 | pub fn proto(_: TokenStream, input: TokenStream) -> TokenStream { 138 | proto::derive_proto(input) 139 | } 140 | -------------------------------------------------------------------------------- /arcon_macros/tests/decoder_test.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate arcon_macros; 3 | 4 | #[decoder(,)] 5 | pub struct CommaItem { 6 | id: u64, 7 | price: u32, 8 | } 9 | 10 | #[decoder(;)] 11 | pub struct SemicolonItem { 12 | id: u64, 13 | name: String, 14 | price: u32, 15 | } 16 | 17 | #[test] 18 | fn comma_test() { 19 | use std::str::FromStr; 20 | let item: CommaItem = CommaItem::from_str("100, 250").unwrap(); 21 | assert_eq!(item.id, 100); 22 | assert_eq!(item.price, 250); 23 | } 24 | 25 | #[test] 26 | fn semicolon_test() { 27 | use std::str::FromStr; 28 | let item = SemicolonItem::from_str("100;test;250").unwrap(); 29 | assert_eq!(item.id, 100); 30 | assert_eq!(item.name, String::from("test")); 31 | assert_eq!(item.price, 250); 32 | } 33 | -------------------------------------------------------------------------------- /arcon_tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arcon_tests" 3 | version = "0.0.0" 4 | authors = ["Max Meldrum "] 5 | edition = "2018" 6 | publish = false 7 | 8 | [dependencies] 9 | arcon = { path = "../arcon" } 10 | prost = "0.9" 11 | 12 | [build-dependencies] 13 | arcon_build = { path = "../arcon_build"} 14 | -------------------------------------------------------------------------------- /arcon_tests/build.rs: -------------------------------------------------------------------------------- 1 | fn main() -> Result<(), Box> { 2 | arcon_build::compile_protos(&["src/basic_v3.proto"], &["src/"]).unwrap(); 3 | Ok(()) 4 | } 5 | -------------------------------------------------------------------------------- /arcon_tests/src/basic_v3.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package arcon_basic_v3; 4 | 5 | // unsafe_ser_id = 100 6 | // reliable_ser_id = 101 7 | // version = 1 8 | message Hello { 9 | string id = 1; 10 | } 11 | 12 | // unsafe_ser_id = 102 13 | // reliable_ser_id = 103 14 | // version = 1 15 | // keys = query,page_number 16 | message SearchRequest { 17 | string query = 1; 18 | int32 page_number = 2; 19 | int32 result_per_page = 3; 20 | Hello hello = 4; 21 | repeated string msgs = 5; 22 | } 23 | 24 | // unsafe_ser_id = 104 25 | // reliable_ser_id = 105 26 | // version = 1 27 | message SearchBatch { 28 | repeated SearchRequest requests = 1; 29 | string id = 2; 30 | } 31 | 32 | // unsafe_ser_id = 106 33 | // reliable_ser_id = 107 34 | // version = 1 35 | message NestedMessage { 36 | repeated SearchBatch batches = 1; 37 | bytes raw_bytes = 2; 38 | string id = 3; 39 | } 40 | -------------------------------------------------------------------------------- /arcon_tests/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Verifies that Protobuf files can be converted into an Arcon supported format. 2 | //! 3 | //! NOTE: Only Protobuf version 3 is being tested at the moment. 4 | pub mod basic_v3 { 5 | include!(concat!(env!("OUT_DIR"), "/arcon_basic_v3.rs")); 6 | } 7 | 8 | /// Tests for deriving `prost` from structs. 9 | mod proto_derive_test; 10 | -------------------------------------------------------------------------------- /arcon_util/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arcon_util" 3 | version = "0.2.0" 4 | readme = "README.md" 5 | license = "Apache-2.0" 6 | repository = "https://github.com/cda-group/arcon" 7 | homepage = "https://github.com/cda-group/arcon" 8 | description = """ 9 | Utilities for Arcon 10 | """ 11 | edition = "2018" 12 | 13 | [features] 14 | default = [] 15 | hasher = ["mur3"] 16 | 17 | [dependencies] 18 | mur3 = { version = "0.1.0", optional = true } -------------------------------------------------------------------------------- /arcon_util/README.md: -------------------------------------------------------------------------------- 1 | ![ci](https://github.com/cda-group/arcon/workflows/ci/badge.svg) 2 | [![Cargo](https://img.shields.io/badge/crates.io-v0.2.0-orange)](https://crates.io/crates/arcon) 3 | 4 | # arcon util 5 | 6 | This crate is not intended to be used directly. It is a part of the [Arcon project](https://github.com/cda-group/arcon) 7 | -------------------------------------------------------------------------------- /arcon_util/src/lib.rs: -------------------------------------------------------------------------------- 1 | /// Alias type for Hasher used to shard data in Arcon 2 | /// 3 | /// Arcon uses MurmurHash3 4 | #[cfg(feature = "hasher")] 5 | pub type KeyHasher = mur3::Hasher32; 6 | 7 | /// Helper function to create [KeyHasher] 8 | #[cfg(feature = "hasher")] 9 | #[inline] 10 | pub fn key_hasher() -> KeyHasher { 11 | KeyHasher::with_seed(0) 12 | } 13 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | msrv = "1.56.1" 2 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Arcon Docs 2 | 3 | This directory contains the Arcon website that is powered by [Zola](https://www.getzola.org/). 4 | 5 | ## Building locally 6 | 7 | First you will need to install Zola. Check the instructions [here](https://www.getzola.org/documentation/getting-started/installation/). 8 | 9 | Then within this directory, execute the following command: 10 | 11 | ```bash 12 | zola serve 13 | ``` -------------------------------------------------------------------------------- /docs/config.toml: -------------------------------------------------------------------------------- 1 | # The URL the site will be built for 2 | base_url = "https://cda-group.github.io/arcon" 3 | title = "Arcon" 4 | 5 | theme = "adidoks" 6 | 7 | # Whether to automatically compile all Sass files in the sass directory 8 | compile_sass = true 9 | 10 | taxonomies = [ 11 | {name = "authors"}, 12 | ] 13 | 14 | edit_page = false 15 | docs_repo = "https://github.com/cda-group/arcon" 16 | repo_branch = "master" 17 | 18 | # Whether to build a search index to be used later on by a JavaScript library 19 | build_search_index = true 20 | 21 | [markdown] 22 | # Whether to do syntax highlighting 23 | # Theme can be customised by setting the `highlight_theme` variable to a theme supported by Zola 24 | highlight_code = true 25 | 26 | [extra.footer] 27 | info = 'Powered by Zola and AdiDoks' 28 | 29 | [[extra.menu.social]] 30 | name = "GitHub" 31 | pre = '' 32 | url = "https://github.com/cda-group/arcon" 33 | post = "v0.1.0" 34 | weight = 20 35 | 36 | [extra] 37 | # Put all your custom variables here -------------------------------------------------------------------------------- /docs/content/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Arcon" 3 | 4 | # The homepage contents 5 | [extra] 6 | lead = 'State-first Streaming Applications in Rust' 7 | url = "/docs/arcon/about" 8 | url_button = "Learn More" 9 | repo_version = "" 10 | repo_license = "" 11 | repo_url = "" 12 | 13 | # Menu items 14 | [[extra.menu.main]] 15 | name = "Learn" 16 | section = "docs" 17 | url = "/docs/arcon/about" 18 | weight = 20 19 | 20 | [[extra.menu.main]] 21 | name = "Blog" 22 | section = "blog" 23 | url = "/blog/" 24 | weight = 20 25 | 26 | [[extra.list]] 27 | title = "Rust" 28 | content = 'Arcon is written entirely in Rust for performance, memory safety, and no garbage collection.' 29 | 30 | [[extra.list]] 31 | title = "Arrow/Parquet Integration" 32 | content = 'Arcon has first-class integration with both Apache Arrow and Apache Parquet.' 33 | 34 | [[extra.list]] 35 | title = "Temporal Stream State" 36 | content = 'Work-in-Progress Feature' 37 | 38 | [[extra.list]] 39 | title = "Exactly-Once" 40 | content = 'Exactly-Once consistency through epoch snapshotting' 41 | 42 | [[extra.list]] 43 | title = "Modular State Backends" 44 | content = 'Flexible state abstraction that allow multiple backends per application' 45 | 46 | +++ 47 | -------------------------------------------------------------------------------- /docs/content/authors/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Authors" 3 | description = "The authors of the blog articles." 4 | date = 2021-04-01T08:00:00+00:00 5 | updated = 2021-04-01T08:00:00+00:00 6 | draft = false 7 | 8 | # If add a new author page in this section, please add a new item, 9 | # and the format is as follows: 10 | # 11 | # "author-name-in-url" = "the-full-path-of-the-author-page" 12 | # 13 | # Note: We use quoted keys here. 14 | [extra.author_pages] 15 | "max-meldrum" = "authors/max-meldrum.md" 16 | +++ 17 | 18 | The authors of the blog articles. 19 | -------------------------------------------------------------------------------- /docs/content/authors/max-meldrum.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Max Meldrum" 3 | description = "Lead developer of Arcon." 4 | date = 2021-04-01T08:50:45+00:00 5 | updated = 2021-04-01T08:50:45+00:00 6 | draft = false 7 | +++ 8 | 9 | Lead developer of **Arcon**. 10 | 11 | [@Max-Meldrum](https://github.com/Max-Meldrum) 12 | -------------------------------------------------------------------------------- /docs/content/blog/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Blog" 3 | description = "Blog" 4 | sort_by = "date" 5 | paginate_by = 2 6 | template = "blog/section.html" 7 | +++ 8 | -------------------------------------------------------------------------------- /docs/content/blog/dev_update_22_08_10.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Dev Update" 3 | description = "" 4 | date = 2022-08-10 5 | updated = 2022-08-10 6 | draft = false 7 | template = "blog/page.html" 8 | 9 | [taxonomies] 10 | authors = ["Max Meldrum"] 11 | 12 | [extra] 13 | lead = "" 14 | +++ 15 | 16 | This will be a short blog post giving a dev update on what is going on with Arcon. 17 | 18 | For the last few months, efforts have been put into Arcon's new runtime/kernel with a strong focus on the state management aspect. 19 | 20 | 21 | ## A Non-blocking Async Friendly Runtime 22 | 23 | The initial Arcon executor was implemented using [Kompact](https://github.com/kompics/kompact), a hybrid actor + component framework. And then, similarly to other systems, state management was offloaded to a store like [RocksDB](http://rocksdb.org/). While Kompact and RocksDB are excellent libraries by themselves, they are not a great fit for Arcon due to numerous reasons. 24 | 25 | **Blocking APIs** 26 | Arcon is an I/O bound system that also needs to manage a set of concurrent tasks in a non-blocking cooperative manner. This is not possible to achieve using synchronous frameworks such as Kompact and RocksDB. 27 | 28 | **Decoupled Executor + I/O** 29 | As noted in the following Redpanda blog [post](https://redpanda.com/blog/tpc-buffers), the advances in NVMe disks and Networks have turned the CPU into the new bottleneck of modern storage systems. This has made it essential to couple the CPU scheduler with non-blocking async I/O as a way of maximising CPU utilisation. Also, with a decoupled approach, we have no control over the execution of state compactions as they occur in separate threads. This may cause disturbance to the overall system performance and health. 30 | 31 | **Compilation times** 32 | RocksDB is a heavy project to compile, and as it's not native Rust code, it slows down the development process. 33 | 34 | The first stage is to implement the runtime for a single core, and this is currently what is in the works. 35 | The executor part will not be implemented from scratch, but here we rely on [glommio](https://github.com/DataDog/glommio), a thread-per-core io-uring async runtime. glommio offers a number of great [primitives](https://docs.rs/glommio/latest/glommio/io/index.html) for building storage solutions on top of io_uring and Direct I/O. 36 | 37 | Some other things glommio provides out of the box: 38 | 39 | 1. Cooperative scheduling 40 | 2. Various channels 41 | 3. Networking 42 | 4. Timers 43 | 5. Internal stats (I/O and CPU scheduler related). 44 | 45 | Taking a step back and re-architecting Arcon from zero has slowed the overall project progress. 46 | It has been a great learning process as it has given me the chance to dive into various technologies (e.g., io_uring, Direct I/O, Rust async/await). -------------------------------------------------------------------------------- /docs/content/docs/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Docs" 3 | description = "The documents of the AdiDoks theme." 4 | date = 2025-05-01T08:00:00+00:00 5 | updated = 2021-05-01T08:00:00+00:00 6 | sort_by = "weight" 7 | weight = 1 8 | template = "docs/section.html" 9 | +++ 10 | -------------------------------------------------------------------------------- /docs/content/docs/arcon/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Arcon" 3 | description = "Quick start and guides for installing the AdiDoks theme on your preferred operating system." 4 | date = 2025-05-01T08:00:00+00:00 5 | updated = 2021-05-01T08:00:00+00:00 6 | template = "docs/section.html" 7 | sort_by = "weight" 8 | weight = 1 9 | draft = false 10 | +++ 11 | -------------------------------------------------------------------------------- /docs/content/docs/arcon/about.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "About" 3 | description = "" 4 | date = 2021-05-01T18:20:00+00:00 5 | updated = 2021-05-01T18:20:00+00:00 6 | draft = false 7 | weight = 120 8 | sort_by = "weight" 9 | template = "docs/page.html" 10 | 11 | [extra] 12 | lead = "" 13 | toc = true 14 | top = false 15 | +++ 16 | 17 | Arcon is a library for building real-time analytics applications in Rust. The Arcon runtime is based on the Dataflow model, similarly to systems such as Apache Flink and Timely Dataflow. 18 | 19 | The Arcon philosophy is state first. Most other streaming systems are output-centric and lack a way of working with internal state with support for time semantics. Arcon’s upcoming TSS query language allows extracting and operating on state snapshots consistently based on application-time constraints and interfacing with other systems for batch and warehouse analytics. 20 | 21 | Key features: 22 | 23 | * Out-of-order Processing 24 | * Event-time & Watermarks 25 | * Epoch Snapshotting for Exactly-once Processing 26 | * Hybrid Row(Protobuf) / Columnar (Arrow) System 27 | * Modular State Backend Abstraction 28 | 29 | 30 | ## Project Status 31 | 32 | We are working towards stablising the APIs and the runtime. 33 | 34 | Arcon is in development and should be considered experimental until further notice. 35 | 36 | ## Background 37 | 38 | Arcon is a research project developed at KTH Royal Institute of Technology and RISE Research Institutes of Sweden in Stockholm, Sweden. -------------------------------------------------------------------------------- /docs/content/docs/arcon/community.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Community" 3 | description = "" 4 | date = 2021-05-01T18:20:00+00:00 5 | updated = 2021-05-01T18:20:00+00:00 6 | draft = false 7 | weight = 420 8 | sort_by = "weight" 9 | template = "docs/page.html" 10 | 11 | [extra] 12 | lead = "" 13 | toc = true 14 | top = false 15 | +++ 16 | 17 | Arcon is an ambitious project with many different development & research areas. If you find Arcon interesting and want to learn more, then join the [Zulip](https://arcon.zulipchat.com) community! -------------------------------------------------------------------------------- /docs/content/docs/arcon/roadmap.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Roadmap" 3 | description = "" 4 | date = 2021-05-01T18:20:00+00:00 5 | updated = 2021-05-01T18:20:00+00:00 6 | draft = false 7 | weight = 320 8 | sort_by = "weight" 9 | template = "docs/page.html" 10 | 11 | [extra] 12 | lead = "" 13 | toc = true 14 | top = false 15 | +++ 16 | 17 | We use Github for managing the roadmap and you may find it [here](https://github.com/cda-group/arcon/projects/1). -------------------------------------------------------------------------------- /docs/content/docs/getting-started/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Getting Started" 3 | description = "Quick start and guides for installing the AdiDoks theme on your preferred operating system." 4 | date = 2025-05-01T08:00:00+00:00 5 | updated = 2021-05-01T08:00:00+00:00 6 | template = "docs/section.html" 7 | sort_by = "weight" 8 | weight = 2 9 | draft = false 10 | +++ 11 | -------------------------------------------------------------------------------- /docs/content/docs/getting-started/data-types.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Data Types" 3 | description = "Arcon Data Types" 4 | date = 2022-03-29T08:20:00+00:00 5 | updated = 2022-03-29T08:20:00+00:00 6 | draft = false 7 | weight = 20 8 | sort_by = "weight" 9 | template = "docs/page.html" 10 | 11 | [extra] 12 | lead = "This section covers Arcon's data types" 13 | toc = true 14 | top = false 15 | +++ 16 | 17 | 18 | ## Overview 19 | 20 | Arcon is a hybrid row and columnar system. 21 | 22 | |Type | Format | Usage | Crate | 23 | |---|---| --- | --- | 24 | |Row | Protobuf | Ingestion / OLTP | [prost](https://github.com/tokio-rs/prost) | 25 | |Columnar | Arrow/Parquet | Warehousing / Window analytics | [arrow](https://github.com/apache/arrow-rs) | 26 | 27 | ## Declaring an Arcon Type 28 | 29 | Types inside the Arcon runtime must implement the [ArconType](https://docs.rs/arcon/latest/arcon/prelude/trait.ArconType.html) trait. To define an ArconType, one can use the [Arcon](https://docs.rs/arcon/latest/arcon/derive.Arcon.html) derive macro. 30 | 31 | 32 | ```rust 33 | use arcon::prelude::*; 34 | 35 | #[arcon::proto] 36 | #[derive(Arcon, Copy, Clone)] 37 | pub struct Event { 38 | id: u64, 39 | data: f32, 40 | timestamp: u64, 41 | } 42 | ``` 43 | 44 | The [arcon::proto](https://docs.rs/arcon/latest/arcon/attr.proto.html) attribute macro helps remove some boilerplate annotations required by the ``prost`` crate. Note that the Arcon derive macro needs ``prost`` to be in scope. Make sure it's listed as a dependency: 45 | 46 | ```toml 47 | [dependencies] 48 | arcon = "0.2" 49 | prost = "0.9" 50 | ``` 51 | 52 | 53 | ## Enabling Arrow 54 | 55 | Data types within Arcon are not required to support Arrow conversion. However, the [Arrow](https://docs.rs/arcon/latest/arcon/derive.Arrow.html) derive macro signals that it's an Arrow type to the runtime. 56 | Arrow analytics is limited as of now. You can follow the progress [here](https://github.com/cda-group/arcon/issues/285). 57 | 58 | ```rust 59 | use arcon::prelude::*; 60 | 61 | #[arcon::proto] 62 | #[derive(Arcon, Arrow, Copy, Clone)] 63 | pub struct ArrowEvent { 64 | id: u64, 65 | data: f32, 66 | } 67 | ``` -------------------------------------------------------------------------------- /docs/content/docs/getting-started/quick-start.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Quick Start" 3 | description = "Arcon Quick Start" 4 | date = 2021-05-01T08:20:00+00:00 5 | updated = 2021-05-01T08:20:00+00:00 6 | draft = false 7 | weight = 10 8 | sort_by = "weight" 9 | template = "docs/page.html" 10 | 11 | [extra] 12 | lead = "This section covers the steps of getting started with Arcon." 13 | toc = true 14 | top = false 15 | +++ 16 | 17 | # Setting up Rust 18 | 19 | The easiest way to install Rust is to use the [rustup](https://rustup.rs/) tool. 20 | 21 | # Cargo 22 | 23 | Add Arcon as a dependency in your ``Cargo.toml``. Note that we also include [prost](https://github.com/tokio-rs/prost) as it is required when declaring more complex Arcon data types. 24 | 25 | ```toml 26 | [dependencies] 27 | arcon = "0.2" 28 | prost = "0.9" 29 | ``` 30 | 31 | # Defining an Arcon application 32 | 33 | ```rust 34 | #[arcon::app] 35 | fn main() { 36 | (0..10u64) 37 | .to_stream(|conf| conf.set_arcon_time(ArconTime::Process)) 38 | .filter(|x| *x > 50) 39 | .map(|x| x * 10) 40 | .print() 41 | } 42 | ``` -------------------------------------------------------------------------------- /docs/content/privacy-policy/_index.md: -------------------------------------------------------------------------------- 1 | +++ 2 | title = "Privacy Policy" 3 | description = "We do not use cookies and we do not collect any personal data." 4 | date = 2021-05-01T08:00:00+00:00 5 | updated = 2020-05-01T08:00:00+00:00 6 | draft = false 7 | 8 | [extra] 9 | class = "page single" 10 | +++ 11 | 12 | __TLDR__: We do not use cookies and we do not collect any personal data. 13 | 14 | ## Website visitors 15 | 16 | - No personal information is collected. 17 | - No information is stored in the browser. 18 | - No information is shared with, sent to or sold to third-parties. 19 | - No information is shared with advertising companies. 20 | - No information is mined and harvested for personal and behavioral trends. 21 | - No information is monetized. 22 | 23 | ## Contact us 24 | 25 | [Contact us](https://github.com/aaranxu/adidoks) if you have any questions. 26 | 27 | Effective Date: _1st May 2021_ 28 | -------------------------------------------------------------------------------- /examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "examples" 3 | version = "0.0.0" 4 | authors = ["Max Meldrum "] 5 | edition = "2018" 6 | publish = false 7 | 8 | [features] 9 | kafka = ["arcon/kafka"] 10 | 11 | [dev-dependencies] 12 | arcon = { path = "../arcon" } 13 | prost = "0.9" 14 | 15 | [[example]] 16 | name = "custom_operator" 17 | path = "custom_operator.rs" 18 | 19 | [[example]] 20 | name = "stateful" 21 | path = "stateful.rs" 22 | 23 | [[example]] 24 | name = "window" 25 | path = "window.rs" 26 | 27 | [[example]] 28 | name = "kafka_source" 29 | path = "kafka_source.rs" 30 | required-features = ["kafka"] 31 | 32 | [[example]] 33 | name = "file" 34 | path = "file.rs" -------------------------------------------------------------------------------- /examples/custom_operator.rs: -------------------------------------------------------------------------------- 1 | use arcon::{ignore_timeout, prelude::*}; 2 | 3 | #[arcon::proto] 4 | #[derive(Arcon, Copy, Clone)] 5 | pub struct CustomEvent { 6 | pub id: u64, 7 | } 8 | 9 | #[derive(Default)] 10 | pub struct MyOperator; 11 | 12 | impl Operator for MyOperator { 13 | type IN = u64; 14 | type OUT = CustomEvent; 15 | type TimerState = ArconNever; 16 | type OperatorState = EmptyState; 17 | type ElementIterator = std::iter::Once>; 18 | 19 | fn handle_element( 20 | &mut self, 21 | element: ArconElement, 22 | _ctx: &mut OperatorContext, 23 | ) -> ArconResult { 24 | let custom_event = CustomEvent { id: element.data }; 25 | 26 | Ok(std::iter::once(ArconElement { 27 | data: custom_event, 28 | timestamp: element.timestamp, 29 | })) 30 | } 31 | 32 | ignore_timeout!(); 33 | } 34 | 35 | #[derive(Default)] 36 | pub struct TimerOperator; 37 | 38 | impl Operator for TimerOperator { 39 | type IN = CustomEvent; 40 | type OUT = CustomEvent; 41 | type TimerState = u64; 42 | type OperatorState = EmptyState; 43 | type ElementIterator = std::iter::Once>; 44 | 45 | fn handle_element( 46 | &mut self, 47 | element: ArconElement, 48 | ctx: &mut OperatorContext, 49 | ) -> ArconResult { 50 | let current_time = ctx.current_time()?; 51 | let time = current_time + 1000; 52 | 53 | if let Err(err) = ctx.schedule_at(time, element.data.id)? { 54 | error!(ctx.log(), "Failed to schedule timer with err {}", err); 55 | } 56 | 57 | Ok(std::iter::once(element)) 58 | } 59 | 60 | fn handle_timeout( 61 | &mut self, 62 | timeout: Self::TimerState, 63 | ctx: &mut OperatorContext, 64 | ) -> ArconResult> { 65 | info!(ctx.log(), "Got a timer timeout for {:?}", timeout); 66 | Ok(None) 67 | } 68 | } 69 | 70 | #[arcon::app] 71 | fn main() { 72 | (0u64..10000000) 73 | .to_stream(|conf| { 74 | conf.set_timestamp_extractor(|x: &u64| *x); 75 | }) 76 | .operator(OperatorBuilder { 77 | operator: Arc::new(|| MyOperator), 78 | state: Arc::new(|_| EmptyState), 79 | conf: Default::default(), 80 | }) 81 | .operator(OperatorBuilder { 82 | operator: Arc::new(|| TimerOperator), 83 | state: Arc::new(|_| EmptyState), 84 | conf: Default::default(), 85 | }) 86 | .measure(1000000) 87 | } 88 | -------------------------------------------------------------------------------- /examples/file.rs: -------------------------------------------------------------------------------- 1 | #[arcon::app(name = "file")] 2 | fn main() { 3 | LocalFileSource::new("file_path") 4 | .to_stream(|conf| conf.set_arcon_time(ArconTime::Process)) 5 | .filter(|x| *x > 50) 6 | .map(|x: i32| x * 10) 7 | .print() 8 | } 9 | -------------------------------------------------------------------------------- /examples/kafka_source.rs: -------------------------------------------------------------------------------- 1 | #[arcon::app] 2 | fn main() { 3 | let consumer_conf = KafkaConsumerConf::default() 4 | .with_topic("test") 5 | .set("group.id", "test") 6 | .set("bootstrap.servers", "localhost:9092") 7 | .set("enable.auto.commit", "false"); 8 | 9 | let paralellism = 2; 10 | 11 | KafkaSource::new(consumer_conf, ProtoSchema::new(), paralellism) 12 | .to_stream(|conf| { 13 | conf.set_arcon_time(ArconTime::Event); 14 | conf.set_timestamp_extractor(|x: &u64| *x); 15 | }) 16 | .map(|x| x + 10) 17 | .print() 18 | } 19 | -------------------------------------------------------------------------------- /examples/stateful.rs: -------------------------------------------------------------------------------- 1 | use arcon::prelude::*; 2 | 3 | #[arcon::proto] 4 | #[derive(Arcon, Arrow, Copy, Clone)] 5 | pub struct Event { 6 | pub id: u64, 7 | pub data: f32, 8 | } 9 | 10 | #[derive(ArconState)] 11 | pub struct MyState { 12 | #[table = "events"] 13 | events: EagerValue, 14 | } 15 | 16 | #[arcon::app] 17 | fn main() { 18 | (0..1000000) 19 | .map(|x| Event { id: x, data: 1.5 }) 20 | .to_stream(|conf| { 21 | conf.set_timestamp_extractor(|x: &Event| x.id); 22 | }) 23 | .key_by(|event: &Event| &event.id) 24 | .operator(OperatorBuilder { 25 | operator: Arc::new(|| { 26 | Map::stateful(|event, state: &mut MyState<_>| { 27 | state.events().put(event)?; 28 | Ok(event) 29 | }) 30 | }), 31 | state: Arc::new(|backend| MyState { 32 | events: EagerValue::new("_events", backend), 33 | }), 34 | conf: Default::default(), 35 | }) 36 | .ignore() 37 | } 38 | -------------------------------------------------------------------------------- /examples/window.rs: -------------------------------------------------------------------------------- 1 | fn window_sum(buffer: &[u64]) -> u64 { 2 | buffer.iter().sum() 3 | } 4 | 5 | #[arcon::app] 6 | fn main() { 7 | (0u64..100000) 8 | .to_stream(|conf| { 9 | conf.set_arcon_time(ArconTime::Event); 10 | conf.set_timestamp_extractor(|x: &u64| *x); 11 | }) 12 | .operator(OperatorBuilder { 13 | operator: Arc::new(|| { 14 | let conf = WindowConf { 15 | assigner: Assigner::Sliding { 16 | length: Time::seconds(1000), 17 | slide: Time::seconds(500), 18 | late_arrival: Time::seconds(0), 19 | }, 20 | }; 21 | WindowAssigner::new(conf) 22 | }), 23 | state: Arc::new(|backend| { 24 | let index = AppenderWindow::new(backend.clone(), &window_sum); 25 | WindowState::new(index, backend) 26 | }), 27 | conf: OperatorConf::default(), 28 | }) 29 | .print() 30 | } 31 | -------------------------------------------------------------------------------- /run_checks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o xtrace 4 | set -e 5 | 6 | cargo test --all --all-features 7 | cargo check --benches --all --all-features 8 | cargo fmt --all -- --check 9 | cargo clippy --workspace --all-targets --all-features -- -D warnings 10 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | use_field_init_shorthand = true 2 | --------------------------------------------------------------------------------