├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── runtime_benchmark.rs ├── crates ├── fluxus-api │ ├── Cargo.toml │ ├── README.md │ ├── src │ │ ├── io │ │ │ ├── collection_sink.rs │ │ │ ├── collection_source.rs │ │ │ └── mod.rs │ │ ├── lib.rs │ │ ├── operators │ │ │ ├── filter.rs │ │ │ ├── flat_map.rs │ │ │ ├── map.rs │ │ │ ├── mod.rs │ │ │ ├── window_aggregator.rs │ │ │ ├── window_skipper.rs │ │ │ └── window_sorter.rs │ │ └── stream │ │ │ ├── datastream.rs │ │ │ ├── mod.rs │ │ │ └── windowed_stream.rs │ └── tests │ │ ├── datastreams_test.rs │ │ ├── filter_test.rs │ │ └── windowed_stream_test.rs ├── fluxus-core │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── config.rs │ │ ├── error_handling │ │ ├── backpressure.rs │ │ ├── mod.rs │ │ └── retry_strategy.rs │ │ ├── lib.rs │ │ ├── metrics.rs │ │ └── pipeline │ │ ├── mod.rs │ │ ├── processor.rs │ │ └── status.rs ├── fluxus-runtime │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── lib.rs │ │ ├── runtime.rs │ │ ├── state.rs │ │ └── watermark.rs ├── fluxus-sinks │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── buffered.rs │ │ ├── console.rs │ │ ├── dummy_sink.rs │ │ ├── file.rs │ │ └── lib.rs ├── fluxus-sources │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── csv.rs │ │ ├── generator.rs │ │ └── lib.rs ├── fluxus-transformers │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── lib.rs │ │ ├── operator │ │ ├── builder.rs │ │ ├── filter.rs │ │ ├── map.rs │ │ ├── mod.rs │ │ ├── window_match.rs │ │ └── window_reduce.rs │ │ ├── transform_base.rs │ │ ├── transform_source.rs │ │ └── transform_source_with_operator.rs ├── fluxus-utils │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── error_converters.rs │ │ ├── lib.rs │ │ ├── models.rs │ │ ├── time.rs │ │ └── window.rs └── fluxus │ ├── Cargo.toml │ ├── README.md │ └── src │ └── lib.rs ├── docs ├── DESIGN.md ├── Logo.md ├── architecture.png └── images │ └── fluxus-logo.png └── examples ├── README.md ├── click-stream ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── event-timestamp ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── iot-devices ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── log-anomaly ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── network-log ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── remote-csv ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── stock-market ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── temperature-sensor ├── Cargo.toml ├── README.md └── src │ └── main.rs └── word-count ├── Cargo.toml ├── README.md └── src └── main.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | permissions: 3 | contents: read 4 | pull-requests: write 5 | 6 | on: 7 | push: 8 | branches: [ "main", "develop" ] 9 | pull_request: 10 | branches: [ "main", "develop" ] 11 | release: 12 | types: [ published ] 13 | 14 | env: 15 | CARGO_TERM_COLOR: always 16 | 17 | jobs: 18 | build: 19 | runs-on: ubuntu-latest 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Install Rust toolchain 24 | uses: actions-rs/toolchain@v1 25 | with: 26 | toolchain: stable 27 | components: rustfmt, clippy 28 | override: true 29 | - name: Check formatting 30 | run: cargo fmt -- --check 31 | - name: Run clippy 32 | run: cargo clippy -- -D warnings 33 | - name: Build 34 | run: cargo build --verbose 35 | - name: Run tests 36 | run: cargo test --verbose 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .DS_Store 3 | .idea/ 4 | 5 | # Added by cargo 6 | # 7 | # already existing elements were commented out 8 | 9 | #/target 10 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["crates/*", "examples/*"] 3 | 4 | resolver = "2" 5 | 6 | [workspace.package] 7 | version = "0.2.0" 8 | edition = "2024" 9 | license = "Apache-2.0" 10 | authors = ["Fluxus Team"] 11 | description = "Fluxus is a stream processing engine that provides a declarative and efficient way to process and analyze data streams." 12 | homepage = "https://github.com/lispking/fluxus" 13 | repository = "https://github.com/lispking/fluxus" 14 | readme = "README.md" 15 | categories = ["database", "development-tools", "asynchronous", "science"] 16 | keywords = [ 17 | "stream-processing", 18 | "real-time", 19 | "data-processing", 20 | "analytics", 21 | "async", 22 | ] 23 | 24 | [workspace.metadata.release] 25 | publish = true 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | Fluxus Logo 3 |

4 | 5 | # Fluxus Stream Processing Engine 6 | 7 | [![Crates.io](https://img.shields.io/crates/v/fluxus-core.svg)](https://crates.io/crates/fluxus-core) 8 | [![Documentation](https://docs.rs/fluxus-core/badge.svg)](https://docs.rs/fluxus-core) 9 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache2.0-yellow.svg)](https://opensource.org/license/apache-2-0) 10 | [build status](https://github.com/lispking/fluxus/actions?query=branch%3Amain) 11 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/lispking/fluxus) 12 | 13 | 14 | Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real-time data processing and analysis. 15 | 16 | ![Fluxus Architecture](docs/architecture.png) 17 | 18 | ## Features 19 | 20 | - High-performance stream processing 21 | - Flexible windowing operations (Tumbling, Sliding, Session windows) 22 | - Parallel processing support 23 | - Rich set of stream operations (map, filter, aggregate) 24 | - Type-safe API 25 | - Easy to use and extend 26 | 27 | ## Project Structure 28 | 29 | - `crates/fluxus` - Main crate containing the Fluxus engine and its dependencies 30 | - `crates/fluxus-api` - Core API definitions and interfaces 31 | - `crates/fluxus-core` - Core implementations and data structures 32 | - `crates/fluxus-runtime` - Runtime engine and execution environment 33 | - `crates/fluxus-sinks` - Sink implementations for different data sinks (e.g., Kafka, Console) 34 | - `crates/fluxus-sources` - Source implementations for different data sources (e.g., Kafka, Console) 35 | - `crates/fluxus-transforms` - Transformations for stream processing (e.g., map, filter, aggregate) 36 | - `crates/fluxus-utils` - Utility functions and helpers 37 | - `examples` - Example applications demonstrating usage 38 | 39 | ## Examples 40 | 41 | The project includes several example applications that demonstrate different use cases: 42 | 43 | ### Word Count 44 | 45 | Simple word frequency analysis in text streams using tumbling windows. 46 | 47 | ```bash 48 | cargo run --example word-count 49 | ``` 50 | 51 | ### Temperature Sensor Analysis 52 | 53 | Processing and analyzing temperature sensor data with sliding windows. 54 | 55 | ```bash 56 | cargo run --example temperature-sensor 57 | ``` 58 | 59 | ### Click Stream Analysis 60 | 61 | Analyzing user click streams with session windows. 62 | 63 | ```bash 64 | cargo run --example click-stream 65 | ``` 66 | 67 | ### Network Log Analysis 68 | 69 | Processing network logs with sliding windows and aggregations. 70 | 71 | ```bash 72 | cargo run --example network-log 73 | ``` 74 | 75 | ### Remote CSV Data Processing 76 | 77 | Processing CSV data from remote sources like GitHub. 78 | 79 | ```bash 80 | cargo run --example remote-csv 81 | ``` 82 | 83 | ### View Available Examples 84 | 85 | To see all available examples and options: 86 | 87 | ```bash 88 | cargo run --example 89 | ``` 90 | 91 | ## Using Fluxus in Your Project 92 | 93 | To use Fluxus in your project, add it as a dependency using cargo: 94 | 95 | ```bash 96 | cargo add fluxus --features full 97 | ``` 98 | 99 | This will add Fluxus with all available features to your project. After adding the dependency, you can start using Fluxus in your code. Check out the examples section below for usage examples. 100 | 101 | ## Getting Started 102 | 103 | 1. Clone the repository: 104 | 105 | ```bash 106 | git clone https://github.com/lispking/fluxus.git 107 | cd fluxus 108 | ``` 109 | 110 | 2. Build the project: 111 | 112 | ```bash 113 | cargo build 114 | ``` 115 | 116 | 3. Run the examples: 117 | 118 | ```bash 119 | cargo run --example [example-name] 120 | ``` 121 | 122 | ## Development 123 | 124 | ### Prerequisites 125 | 126 | - Rust 1.75+ 127 | - Cargo 128 | 129 | ### Building 130 | 131 | ```bash 132 | cargo build 133 | ``` 134 | 135 | ### Testing 136 | 137 | ```bash 138 | cargo test 139 | ``` 140 | 141 | ## License 142 | 143 | This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details. 144 | 145 | ## Star History 146 | 147 | [![Star History Chart](https://api.star-history.com/svg?repos=lispking/fluxus&type=Date)](https://www.star-history.com/#lispking/fluxus&Date) 148 | 149 | ### Thank you for your support and participation ❤️ 150 | 151 |
152 | 153 | 154 | 155 |
156 | -------------------------------------------------------------------------------- /benches/runtime_benchmark.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use criterion::{Criterion, criterion_group, criterion_main}; 3 | use fluxus_core::ParallelConfig; 4 | use fluxus_runtime::RuntimeContext; 5 | use fluxus_sinks::Sink; 6 | use fluxus_sources::Source; 7 | use fluxus_transformers::Operator; 8 | use fluxus_utils::models::{Record, StreamResult}; 9 | use std::sync::Arc; 10 | use tokio::sync::Mutex; 11 | 12 | // Dummy Source for benchmarking 13 | pub struct DummySource { 14 | data: Vec, 15 | index: usize, 16 | } 17 | 18 | #[async_trait] 19 | impl Source for DummySource { 20 | async fn init(&mut self) -> StreamResult<()> { 21 | Ok(()) 22 | } 23 | 24 | async fn next(&mut self) -> StreamResult>> { 25 | if self.index < self.data.len() { 26 | let record = Record::new(self.data[self.index]); 27 | self.index += 1; 28 | Ok(Some(record)) 29 | } else { 30 | Ok(None) 31 | } 32 | } 33 | 34 | async fn close(&mut self) -> StreamResult<()> { 35 | Ok(()) 36 | } 37 | } 38 | 39 | // Dummy Operator for benchmarking 40 | pub struct DummyOperator; 41 | 42 | #[async_trait] 43 | impl Operator for DummyOperator { 44 | async fn process(&mut self, record: Record) -> StreamResult>> { 45 | Ok(vec![record]) 46 | } 47 | 48 | async fn close(&mut self) -> StreamResult<()> { 49 | Ok(()) 50 | } 51 | } 52 | 53 | // Dummy Sink for benchmarking 54 | pub struct DummySink; 55 | 56 | #[async_trait] 57 | impl Sink for DummySink { 58 | async fn init(&mut self) -> StreamResult<()> { 59 | Ok(()) 60 | } 61 | async fn write(&mut self, _record: Record) -> StreamResult<()> { 62 | Ok(()) 63 | } 64 | async fn flush(&mut self) -> StreamResult<()> { 65 | Ok(()) 66 | } 67 | async fn close(&mut self) -> StreamResult<()> { 68 | Ok(()) 69 | } 70 | } 71 | 72 | fn criterion_benchmark(c: &mut Criterion) { 73 | let runtime = tokio::runtime::Runtime::new().unwrap(); 74 | let data_size = 10_000; 75 | let data: Vec = (0..data_size).collect(); 76 | 77 | c.bench_function("pipeline_execution", |b| { 78 | b.iter(|| { 79 | runtime.block_on(async { 80 | let parallel_config = ParallelConfig::default(); 81 | let runtime_context = RuntimeContext::new(parallel_config); 82 | 83 | let source = DummySource { 84 | data: data.clone(), 85 | index: 0, 86 | }; 87 | let operators: Vec + Send + Sync>>> = vec![ 88 | Arc::new(Mutex::new(DummyOperator)), 89 | Arc::new(Mutex::new(DummyOperator)), 90 | ]; 91 | let sink = DummySink; 92 | 93 | runtime_context 94 | .execute_pipeline(source, operators, sink) 95 | .await 96 | .unwrap(); 97 | }) 98 | }); 99 | }); 100 | } 101 | 102 | criterion_group!(benches, criterion_benchmark); 103 | criterion_main!(benches); 104 | -------------------------------------------------------------------------------- /crates/fluxus-api/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus-api" 3 | description = "High-level API for Fluxus stream processing engine" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | repository.workspace = true 9 | readme = "README.md" 10 | 11 | [dependencies] 12 | fluxus-core = { path = "../fluxus-core", version="0.2" } 13 | fluxus-runtime = { path = "../fluxus-runtime", version="0.2" } 14 | fluxus-utils = { path = "../fluxus-utils", version="0.2" } 15 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" } 16 | fluxus-sources = { path = "../fluxus-sources", version="0.2" } 17 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2" } 18 | 19 | tokio = { version = "1", features = ["full"] } 20 | futures = "0.3" 21 | async-trait = "0.1" 22 | anyhow = "1.0" 23 | tracing = "0.1" 24 | serde = { version = "1.0", features = ["derive"] } 25 | serde_json = "1.0" 26 | 27 | [dev-dependencies] 28 | tokio-test = "0.4.4" 29 | -------------------------------------------------------------------------------- /crates/fluxus-api/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus API 2 | 3 | Core API definitions and interfaces for the Fluxus stream processing engine. 4 | 5 | ## Overview 6 | 7 | This crate provides the public API for building stream processing applications with Fluxus. It includes: 8 | 9 | - `DataStream` - The main abstraction for working with data streams 10 | - Source and Sink interfaces 11 | - Stream operations (map, filter, aggregate, etc.) 12 | - Window configurations 13 | - I/O utilities 14 | 15 | ## Key Components 16 | 17 | ### DataStream 18 | 19 | The `DataStream` type is the main entry point for building stream processing pipelines: 20 | 21 | ```rust 22 | DataStream::new(source) 23 | .map(|x| x * 2) 24 | .filter(|x| x > 0) 25 | .window(WindowConfig::Tumbling { size_ms: 1000 }) 26 | .aggregate(initial_state, |state, value| /* aggregation logic */) 27 | .sink(sink) 28 | ``` 29 | 30 | ### Windows 31 | 32 | Supported window types: 33 | - Tumbling Windows - Fixed-size, non-overlapping windows 34 | - Sliding Windows - Fixed-size windows that slide by a specified interval 35 | - Session Windows - Dynamic windows based on activity timeouts 36 | 37 | ### I/O 38 | 39 | Pre-built source and sink implementations: 40 | - `CollectionSource` - Create a stream from a collection 41 | - `CollectionSink` - Collect stream results into a collection 42 | - Additional I/O implementations for files, networks, etc. 43 | 44 | ## Usage 45 | 46 | Add this to your `Cargo.toml`: 47 | 48 | ```toml 49 | [dependencies] 50 | fluxus-api = "0.2" 51 | ``` 52 | 53 | See the `fluxus-examples` crate for complete usage examples. -------------------------------------------------------------------------------- /crates/fluxus-api/src/io/collection_sink.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_sinks::Sink; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::sync::{Arc, Mutex}; 5 | 6 | /// A sink that collects elements into a Vec 7 | #[derive(Default, Clone)] 8 | pub struct CollectionSink { 9 | data: Arc>>, 10 | } 11 | 12 | impl CollectionSink { 13 | pub fn new() -> Self { 14 | Self { 15 | data: Arc::new(Mutex::new(Vec::new())), 16 | } 17 | } 18 | 19 | pub fn get_data(&self) -> Vec 20 | where 21 | T: Clone, 22 | { 23 | self.data 24 | .lock() 25 | .map_or_else(|p| p.into_inner().clone(), |d| d.clone()) 26 | } 27 | 28 | pub fn get_last_element(&self) -> Option 29 | where 30 | T: Clone, 31 | { 32 | self.data 33 | .lock() 34 | .map_or_else(|p| p.into_inner().last().cloned(), |d| d.last().cloned()) 35 | } 36 | } 37 | 38 | #[async_trait] 39 | impl Sink for CollectionSink 40 | where 41 | T: Clone + Send + Sync + 'static, 42 | { 43 | async fn init(&mut self) -> StreamResult<()> { 44 | Ok(()) 45 | } 46 | 47 | async fn write(&mut self, record: Record) -> StreamResult<()> { 48 | if let Ok(mut data) = self.data.lock() { 49 | data.push(record.data) 50 | } 51 | Ok(()) 52 | } 53 | 54 | async fn flush(&mut self) -> StreamResult<()> { 55 | Ok(()) 56 | } 57 | 58 | async fn close(&mut self) -> StreamResult<()> { 59 | Ok(()) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/io/collection_source.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_sources::Source; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use fluxus_utils::time::current_time; 5 | use std::collections::VecDeque; 6 | 7 | /// A source that produces elements from a collection 8 | pub struct CollectionSource { 9 | data: VecDeque, 10 | } 11 | 12 | impl CollectionSource { 13 | pub fn new(data: impl IntoIterator) -> Self { 14 | Self { 15 | data: data.into_iter().collect(), 16 | } 17 | } 18 | } 19 | 20 | #[async_trait] 21 | impl Source for CollectionSource 22 | where 23 | T: Clone + Send + Sync + 'static, 24 | { 25 | async fn init(&mut self) -> StreamResult<()> { 26 | Ok(()) 27 | } 28 | 29 | async fn next(&mut self) -> StreamResult>> { 30 | let value = self.data.pop_front(); 31 | Ok(value.map(|data| Record { 32 | data, 33 | timestamp: current_time() as i64, 34 | })) 35 | } 36 | 37 | async fn close(&mut self) -> StreamResult<()> { 38 | Ok(()) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/io/mod.rs: -------------------------------------------------------------------------------- 1 | mod collection_sink; 2 | mod collection_source; 3 | 4 | pub use collection_sink::CollectionSink; 5 | pub use collection_source::CollectionSource; 6 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Fluxus API - High-level interface for stream processing 2 | //! 3 | //! This module provides a user-friendly API for building stream processing applications. 4 | 5 | pub mod io; 6 | pub mod operators; 7 | pub mod stream; 8 | 9 | pub use io::{CollectionSink, CollectionSource}; 10 | pub use stream::{DataStream, WindowedStream}; 11 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/operators/filter.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_transformers::Operator; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::marker::PhantomData; 5 | 6 | pub struct FilterOperator { 7 | f: F, 8 | _phantom: PhantomData, 9 | } 10 | 11 | impl FilterOperator 12 | where 13 | F: Fn(&T) -> bool, 14 | { 15 | pub fn new(f: F) -> Self { 16 | Self { 17 | f, 18 | _phantom: PhantomData, 19 | } 20 | } 21 | } 22 | 23 | #[async_trait] 24 | impl Operator for FilterOperator 25 | where 26 | T: Clone + Send + Sync + 'static, 27 | F: Fn(&T) -> bool + Send + Sync, 28 | { 29 | async fn process(&mut self, record: Record) -> StreamResult>> { 30 | if (self.f)(&record.data) { 31 | Ok(vec![record]) 32 | } else { 33 | Ok(vec![]) 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/operators/flat_map.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_transformers::Operator; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::marker::PhantomData; 5 | 6 | pub struct FlatMapOperator 7 | where 8 | I: IntoIterator, 9 | F: Fn(T) -> I, 10 | { 11 | f: F, 12 | _phantom: PhantomData<(T, R)>, 13 | } 14 | 15 | impl FlatMapOperator 16 | where 17 | I: IntoIterator, 18 | F: Fn(T) -> I, 19 | { 20 | pub fn new(f: F) -> Self { 21 | Self { 22 | f, 23 | _phantom: PhantomData, 24 | } 25 | } 26 | } 27 | 28 | #[async_trait] 29 | impl Operator for FlatMapOperator 30 | where 31 | T: Clone + Send + Sync + 'static, 32 | R: Clone + Send + Sync + 'static, 33 | F: Fn(T) -> I + Send + Sync, 34 | I: IntoIterator, 35 | { 36 | async fn process(&mut self, record: Record) -> StreamResult>> { 37 | let Record { data, timestamp } = record; 38 | let result = (self.f)(data); 39 | Ok(result 40 | .into_iter() 41 | .map(|r| Record::with_timestamp(r, timestamp)) 42 | .collect()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/operators/map.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_transformers::Operator; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::marker::PhantomData; 5 | 6 | pub struct MapOperator { 7 | f: F, 8 | _phantom: PhantomData<(T, R)>, 9 | } 10 | 11 | impl MapOperator 12 | where 13 | F: Fn(T) -> R, 14 | { 15 | pub fn new(f: F) -> Self { 16 | Self { 17 | f, 18 | _phantom: PhantomData, 19 | } 20 | } 21 | } 22 | 23 | #[async_trait] 24 | impl Operator for MapOperator 25 | where 26 | T: Clone + Send + Sync + 'static, 27 | R: Clone + Send + Sync + 'static, 28 | F: Fn(T) -> R + Send + Sync, 29 | { 30 | async fn process(&mut self, record: Record) -> StreamResult>> { 31 | let result = (self.f)(record.data); 32 | Ok(vec![Record::with_timestamp(result, record.timestamp)]) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/operators/mod.rs: -------------------------------------------------------------------------------- 1 | mod filter; 2 | mod flat_map; 3 | mod map; 4 | mod window_aggregator; 5 | mod window_skipper; 6 | mod window_sorter; 7 | 8 | pub use filter::FilterOperator; 9 | pub use flat_map::FlatMapOperator; 10 | pub use map::MapOperator; 11 | pub use window_aggregator::WindowAggregator; 12 | pub use window_skipper::WindowSkipper; 13 | pub use window_sorter::SortOrder; 14 | pub use window_sorter::WindowSorter; 15 | pub use window_sorter::WindowTimestampSorter; 16 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/operators/window_aggregator.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_runtime::state::KeyedStateBackend; 3 | use fluxus_transformers::Operator; 4 | use fluxus_utils::{ 5 | models::{Record, StreamResult}, 6 | window::WindowConfig, 7 | }; 8 | use std::marker::PhantomData; 9 | 10 | pub struct WindowAggregator { 11 | window_config: WindowConfig, 12 | init: A, 13 | f: F, 14 | state: KeyedStateBackend, 15 | _phantom: PhantomData, 16 | } 17 | 18 | impl WindowAggregator 19 | where 20 | A: Clone, 21 | F: Fn(A, T) -> A, 22 | { 23 | pub fn new(window_config: WindowConfig, init: A, f: F) -> Self { 24 | Self { 25 | window_config, 26 | init, 27 | f, 28 | state: KeyedStateBackend::new(), 29 | _phantom: PhantomData, 30 | } 31 | } 32 | 33 | fn get_window_keys(&self, timestamp: i64) -> Vec { 34 | self.window_config.window_type.get_window_keys(timestamp) 35 | } 36 | } 37 | 38 | #[async_trait] 39 | impl Operator for WindowAggregator 40 | where 41 | T: Clone + Send + Sync + 'static, 42 | A: Clone + Send + Sync + 'static, 43 | F: Fn(A, T) -> A + Send + Sync, 44 | { 45 | async fn process(&mut self, record: Record) -> StreamResult>> { 46 | let mut results = Vec::new(); 47 | 48 | for window_key in self.get_window_keys(record.timestamp) { 49 | let current = self 50 | .state 51 | .get(&window_key) 52 | .unwrap_or_else(|| self.init.clone()); 53 | let new_value = (self.f)(current, record.data.clone()); 54 | self.state.set(window_key, new_value.clone()); 55 | 56 | results.push(Record { 57 | data: new_value, 58 | timestamp: record.timestamp, 59 | }); 60 | } 61 | 62 | Ok(results) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/operators/window_skipper.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_transformers::Operator; 3 | use fluxus_utils::{ 4 | models::{Record, StreamResult}, 5 | window::WindowConfig, 6 | }; 7 | use std::{collections::HashMap, marker::PhantomData}; 8 | 9 | pub struct WindowSkipper { 10 | window_config: WindowConfig, 11 | n: usize, 12 | buffer: HashMap>, 13 | _phantom: PhantomData, 14 | } 15 | 16 | impl WindowSkipper 17 | where 18 | T: Clone, 19 | { 20 | pub fn new(window_config: WindowConfig, n: usize) -> Self { 21 | Self { 22 | window_config, 23 | n, 24 | buffer: HashMap::new(), 25 | _phantom: PhantomData, 26 | } 27 | } 28 | 29 | fn get_window_keys(&self, timestamp: i64) -> Vec { 30 | self.window_config.window_type.get_window_keys(timestamp) 31 | } 32 | } 33 | 34 | #[async_trait] 35 | impl Operator> for WindowSkipper 36 | where 37 | T: Clone + Send + Sync + 'static, 38 | { 39 | async fn process(&mut self, record: Record) -> StreamResult>>> { 40 | let mut results = Vec::new(); 41 | 42 | for window_key in self.get_window_keys(record.timestamp) { 43 | let records = self.buffer.entry(window_key).or_default(); 44 | records.push(record.data.clone()); 45 | let new_records = records.iter().skip(self.n).cloned().collect::>(); 46 | results.push(Record { 47 | data: new_records, 48 | timestamp: record.timestamp, 49 | }); 50 | } 51 | 52 | Ok(results) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/operators/window_sorter.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_runtime::state::KeyedStateBackend; 3 | use fluxus_transformers::Operator; 4 | use fluxus_utils::{ 5 | models::{Record, StreamResult}, 6 | window::WindowConfig, 7 | }; 8 | use std::{cmp::Ordering, marker::PhantomData}; 9 | 10 | /// sort_by operator for windowed stream. 11 | pub struct WindowSorter { 12 | window_config: WindowConfig, 13 | f: F, 14 | state: KeyedStateBackend>, 15 | _phantom: PhantomData, 16 | } 17 | 18 | impl WindowSorter 19 | where 20 | F: FnMut(&T, &T) -> Ordering, 21 | { 22 | pub fn new(window_config: WindowConfig, f: F) -> Self { 23 | Self { 24 | window_config, 25 | f, 26 | state: KeyedStateBackend::new(), 27 | _phantom: PhantomData, 28 | } 29 | } 30 | 31 | fn get_window_keys(&self, timestamp: i64) -> Vec { 32 | self.window_config.window_type.get_window_keys(timestamp) 33 | } 34 | } 35 | 36 | #[async_trait] 37 | impl Operator> for WindowSorter 38 | where 39 | T: Clone + Send + Sync + 'static, 40 | F: FnMut(&T, &T) -> Ordering + Send + Sync, 41 | { 42 | async fn process(&mut self, record: Record) -> StreamResult>>> { 43 | let mut results = Vec::new(); 44 | 45 | for window_key in self.get_window_keys(record.timestamp) { 46 | let mut current = self.state.get(&window_key).unwrap_or_default(); 47 | let index = current 48 | .binary_search_by(|prob| (self.f)(prob, &record.data)) 49 | .unwrap_or_else(|i| i); 50 | current.insert(index, record.data.clone()); 51 | 52 | self.state.set(window_key, current.clone()); 53 | results.push(Record { 54 | data: current, 55 | timestamp: record.timestamp, 56 | }); 57 | } 58 | 59 | Ok(results) 60 | } 61 | } 62 | 63 | /// Specify sorting method of sort_by_ts 64 | #[derive(Debug, Clone, Copy)] 65 | pub enum SortOrder { 66 | Asc, 67 | Desc, 68 | } 69 | 70 | /// sort_by_ts operator for windowed stream. 71 | pub struct WindowTimestampSorter { 72 | window_config: WindowConfig, 73 | method: SortOrder, 74 | state: KeyedStateBackend>>, 75 | _phantom: PhantomData, 76 | } 77 | 78 | impl WindowTimestampSorter { 79 | pub fn new(window_config: WindowConfig, method: SortOrder) -> Self { 80 | Self { 81 | window_config, 82 | method, 83 | state: KeyedStateBackend::new(), 84 | _phantom: PhantomData, 85 | } 86 | } 87 | 88 | fn get_window_keys(&self, timestamp: i64) -> Vec { 89 | self.window_config.window_type.get_window_keys(timestamp) 90 | } 91 | } 92 | 93 | #[async_trait] 94 | impl Operator> for WindowTimestampSorter 95 | where 96 | T: Clone + Send + Sync + 'static, 97 | { 98 | async fn process(&mut self, record: Record) -> StreamResult>>> { 99 | let mut raw_results = Vec::new(); 100 | for window_key in self.get_window_keys(record.timestamp) { 101 | let mut current = self.state.get(&window_key).unwrap_or_default(); 102 | let index = current 103 | .binary_search_by(|prob| match self.method { 104 | SortOrder::Asc => prob.timestamp.cmp(&record.timestamp), 105 | SortOrder::Desc => record.timestamp.cmp(&prob.timestamp), 106 | }) 107 | .unwrap_or_else(|i| i); 108 | current.insert(index, record.clone()); 109 | 110 | self.state.set(window_key, current.clone()); 111 | raw_results.push(Record { 112 | data: current, 113 | timestamp: record.timestamp, 114 | }); 115 | } 116 | let results = raw_results 117 | .into_iter() 118 | .map(|Record { data, timestamp }| { 119 | let data = data.into_iter().map(|rec| rec.data).collect(); 120 | Record { data, timestamp } 121 | }) 122 | .collect(); 123 | Ok(results) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/stream/datastream.rs: -------------------------------------------------------------------------------- 1 | use crate::operators::{FilterOperator, FlatMapOperator, MapOperator}; 2 | use fluxus_core::ParallelConfig; 3 | use fluxus_sinks::Sink; 4 | use fluxus_sources::Source; 5 | use fluxus_transformers::{ 6 | InnerOperator, InnerSource, Operator, TransformSource, TransformSourceWithOperator, 7 | }; 8 | use fluxus_utils::{ 9 | models::{StreamError, StreamResult}, 10 | window::WindowConfig, 11 | }; 12 | use std::sync::{ 13 | Arc, 14 | atomic::{AtomicUsize, Ordering}, 15 | }; 16 | 17 | use super::WindowedStream; 18 | 19 | /// DataStream represents a stream of data elements 20 | pub struct DataStream { 21 | pub(crate) source: Arc>, 22 | pub(crate) operators: Vec>>, 23 | pub(crate) parallel_config: Option, 24 | } 25 | 26 | impl DataStream 27 | where 28 | T: Clone + Send + Sync + 'static, 29 | { 30 | /// Create a new DataStream from a source 31 | pub fn new(source: S) -> Self 32 | where 33 | S: Source + Send + Sync + 'static, 34 | { 35 | Self { 36 | source: Arc::new(source), 37 | operators: Vec::new(), 38 | parallel_config: None, 39 | } 40 | } 41 | 42 | /// Set parallelism for the stream processing 43 | pub fn parallel(mut self, parallelism: usize) -> Self { 44 | self.parallel_config = Some(ParallelConfig { 45 | parallelism, 46 | buffer_size: 1024, 47 | preserve_order: true, 48 | }); 49 | self 50 | } 51 | 52 | /// Apply a map transformation 53 | pub fn map(self, f: F) -> DataStream 54 | where 55 | F: Fn(T) -> R + Send + Sync + 'static, 56 | R: Clone + Send + Sync + 'static, 57 | { 58 | let mapper = MapOperator::new(f); 59 | self.transform(mapper) 60 | } 61 | 62 | /// Apply a filter transformation 63 | pub fn filter(mut self, f: F) -> Self 64 | where 65 | F: Fn(&T) -> bool + Send + Sync + 'static, 66 | { 67 | let filter = FilterOperator::new(f); 68 | self.operators.push(Arc::new(filter)); 69 | self 70 | } 71 | 72 | /// Apply a flat map transformation 73 | pub fn flat_map(self, f: F) -> DataStream 74 | where 75 | F: Fn(T) -> I + Send + Sync + 'static, 76 | R: Clone + Send + Sync + 'static, 77 | I: IntoIterator + Send + Sync + 'static, 78 | { 79 | self.transform(FlatMapOperator::new(f)) 80 | } 81 | 82 | /// Apply a limit transformation that keeps the first n elements 83 | pub fn limit(self, n: usize) -> Self { 84 | let n = AtomicUsize::new(n); 85 | self.filter(move |_| { 86 | if n.load(Ordering::SeqCst) > 0 { 87 | n.fetch_sub(1, Ordering::SeqCst); 88 | true 89 | } else { 90 | false 91 | } 92 | }) 93 | } 94 | 95 | /// Transform the stream using a custom operator 96 | pub fn transform(self, operator: O) -> DataStream 97 | where 98 | O: Operator + Send + Sync + 'static, 99 | R: Clone + Send + Sync + 'static, 100 | { 101 | let source = TransformSourceWithOperator::new(self.source, operator, self.operators); 102 | DataStream { 103 | source: Arc::new(source), 104 | operators: Vec::new(), 105 | parallel_config: self.parallel_config, 106 | } 107 | } 108 | 109 | /// Apply windowing to the stream 110 | pub fn window(self, config: WindowConfig) -> WindowedStream { 111 | WindowedStream { 112 | stream: self, 113 | window_config: config, 114 | } 115 | } 116 | 117 | /// Write the stream to a sink 118 | pub async fn sink(self, mut sink: K) -> StreamResult<()> 119 | where 120 | K: Sink + Send + Sync + 'static, 121 | { 122 | let mut source = TransformSource::new(self.source); 123 | source.set_operators(self.operators); 124 | 125 | loop { 126 | match source.next().await { 127 | Ok(Some(record)) => sink.write(record).await?, 128 | Ok(None) => break, 129 | Err(e) => match e { 130 | StreamError::EOF => break, 131 | StreamError::Wait(ms) => { 132 | tokio::time::sleep(std::time::Duration::from_millis(ms)).await 133 | } 134 | _ => return Err(e), 135 | }, 136 | } 137 | } 138 | 139 | sink.flush().await?; 140 | sink.close().await 141 | } 142 | } 143 | 144 | impl DataStream> 145 | where 146 | T: Clone + Send + Sync + 'static, 147 | { 148 | /// Flatten the stream 149 | pub fn flatten(self) -> DataStream { 150 | self.transform(FlatMapOperator::new(|v| v)) 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/stream/mod.rs: -------------------------------------------------------------------------------- 1 | mod datastream; 2 | mod windowed_stream; 3 | 4 | pub use datastream::DataStream; 5 | pub use windowed_stream::WindowedStream; 6 | -------------------------------------------------------------------------------- /crates/fluxus-api/src/stream/windowed_stream.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::{Ordering, Reverse}; 2 | use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque}; 3 | use std::hash::Hash; 4 | 5 | use fluxus_transformers::operator::{WindowAllOperator, WindowAnyOperator}; 6 | use fluxus_utils::window::WindowConfig; 7 | 8 | use crate::operators::{ 9 | SortOrder, WindowAggregator, WindowSkipper, WindowSorter, WindowTimestampSorter, 10 | }; 11 | use crate::stream::datastream::DataStream; 12 | 13 | /// Represents a windowed stream for aggregation operations 14 | pub struct WindowedStream { 15 | pub(crate) stream: DataStream, 16 | pub(crate) window_config: WindowConfig, 17 | } 18 | 19 | impl WindowedStream 20 | where 21 | T: Clone + Send + Sync + 'static, 22 | { 23 | /// Aggregate values in the window 24 | pub fn aggregate(self, init: A, f: F) -> DataStream 25 | where 26 | A: Clone + Send + Sync + 'static, 27 | F: Fn(A, T) -> A + Send + Sync + 'static, 28 | { 29 | let aggregator = WindowAggregator::new(self.window_config, init, f); 30 | self.stream.transform(aggregator) 31 | } 32 | 33 | pub fn any(self, f: F) -> DataStream 34 | where 35 | F: Fn(&T) -> bool + Send + Sync + 'static, 36 | { 37 | let anyer = WindowAnyOperator::new(f, self.window_config); 38 | self.stream.transform(anyer) 39 | } 40 | 41 | pub fn all(self, f: F) -> DataStream 42 | where 43 | F: Fn(&T) -> bool + Send + Sync + 'static, 44 | { 45 | let aller = WindowAllOperator::new(f, self.window_config); 46 | self.stream.transform(aller) 47 | } 48 | 49 | /// Limit the number of values in the window 50 | pub fn limit(self, n: usize) -> DataStream> { 51 | let limiter = WindowAggregator::new(self.window_config, vec![], move |mut acc, value| { 52 | if acc.len() < n { 53 | acc.push(value); 54 | } 55 | acc 56 | }); 57 | self.stream.transform(limiter) 58 | } 59 | 60 | /// Retain last n values in the window 61 | pub fn tail(self, n: usize) -> DataStream> { 62 | let init = VecDeque::with_capacity(n); 63 | let limiter = WindowAggregator::new(self.window_config, init, move |mut acc, value| { 64 | acc.push_back(value); 65 | if acc.len() > n { 66 | acc.pop_front(); 67 | } 68 | acc 69 | }); 70 | self.stream 71 | .transform(limiter) 72 | .map(|d| d.into_iter().collect()) 73 | } 74 | 75 | /// Sort values in the window 76 | pub fn sort_by(self, f: F) -> DataStream> 77 | where 78 | F: FnMut(&T, &T) -> Ordering + Send + Sync + 'static, 79 | { 80 | let sorter = WindowSorter::new(self.window_config, f); 81 | self.stream.transform(sorter) 82 | } 83 | 84 | /// Sort values in the window by timestamp 85 | pub fn sort_by_ts(self, order: SortOrder) -> DataStream> { 86 | let sorter = WindowTimestampSorter::new(self.window_config, order); 87 | self.stream.transform(sorter) 88 | } 89 | 90 | /// Sort values in the window by timestamp in ascending order 91 | pub fn sort_by_ts_asc(self) -> DataStream> { 92 | let sorter = WindowTimestampSorter::new(self.window_config, SortOrder::Asc); 93 | self.stream.transform(sorter) 94 | } 95 | 96 | /// Sort values in the window by timestamp in descending order 97 | pub fn sort_by_ts_desc(self) -> DataStream> { 98 | let sorter = WindowTimestampSorter::new(self.window_config, SortOrder::Desc); 99 | self.stream.transform(sorter) 100 | } 101 | 102 | /// Skip 103 | pub fn skip(self, n: usize) -> DataStream> { 104 | let skipper = WindowSkipper::new(self.window_config, n); 105 | self.stream.transform(skipper) 106 | } 107 | } 108 | 109 | impl WindowedStream 110 | where 111 | T: Ord + Clone + Send + Sync + 'static, 112 | { 113 | /// Sort values in specified order 114 | pub fn sort(self, ord: SortOrder) -> DataStream> { 115 | self.sort_by(move |v1, v2| match ord { 116 | SortOrder::Asc => v1.cmp(v2), 117 | SortOrder::Desc => v2.cmp(v1), 118 | }) 119 | } 120 | 121 | /// Get the top k values in the window, the values are sorted in descending order 122 | pub fn top_k(self, k: usize) -> DataStream> { 123 | let init = BinaryHeap::>::new(); 124 | let res = self.aggregate(init, move |mut heap, v| { 125 | heap.push(Reverse(v)); 126 | if heap.len() > k { 127 | heap.pop(); 128 | } 129 | heap 130 | }); 131 | res.map(|heap| { 132 | heap.into_sorted_vec() 133 | .into_iter() 134 | .map(|Reverse(v)| v) 135 | .collect() 136 | }) 137 | } 138 | } 139 | 140 | impl WindowedStream 141 | where 142 | T: Eq + Hash + Clone + Send + Sync + 'static, 143 | { 144 | /// Distinct values 145 | pub fn distinct(self) -> DataStream> { 146 | self.aggregate(HashSet::new(), |mut set, value| { 147 | set.insert(value); 148 | set 149 | }) 150 | } 151 | } 152 | 153 | impl WindowedStream 154 | where 155 | T: Clone + Send + Sync + 'static, 156 | { 157 | /// Distinct values by key. When the same key is encountered, the first occurrence of the value is retained 158 | pub fn distinct_by_key(self, f: F) -> DataStream> 159 | where 160 | F: Fn(&T) -> K + Sync + Send + 'static, 161 | K: Eq + Hash + Clone + Sync + Send + 'static, 162 | { 163 | let keys = HashSet::new(); 164 | let data = vec![]; 165 | self.aggregate((keys, data), move |(mut keys, mut data), value| { 166 | let k = f(&value); 167 | if !keys.contains(&k) { 168 | keys.insert(k); 169 | data.push(value); 170 | } 171 | (keys, data) 172 | }) 173 | .map(|(_, data)| data) 174 | } 175 | 176 | /// Get top k values by key. The values are sorted by key in descending order 177 | pub fn top_k_by_key(self, n: usize, f: F) -> DataStream> 178 | where 179 | F: Fn(&T) -> K + Sync + Send + 'static, 180 | K: Ord + Eq + Hash + Clone + Sync + Send + 'static, 181 | { 182 | // Store the top k keys 183 | let keys = BinaryHeap::>::new(); 184 | // Store the values by key 185 | let kvs: HashMap> = HashMap::new(); 186 | self.aggregate((keys, kvs), move |(mut keys, mut kvs), value| { 187 | let k = f(&value); 188 | 189 | keys.push(Reverse(k.clone())); 190 | kvs.entry(k).or_default().push(value); 191 | 192 | if keys.len() > n { 193 | if let Some(Reverse(min_k)) = keys.pop() { 194 | kvs.get_mut(&min_k).map(|v| v.pop()); 195 | } 196 | } 197 | (keys, kvs) 198 | }) 199 | .map(|(top_keys, mut kvs)| { 200 | top_keys 201 | .into_sorted_vec() 202 | .into_iter() 203 | .fold(vec![], move |mut acc, Reverse(k)| { 204 | let values = kvs.remove(&k).unwrap_or_default(); 205 | acc.extend(values); 206 | acc 207 | }) 208 | }) 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /crates/fluxus-api/tests/datastreams_test.rs: -------------------------------------------------------------------------------- 1 | use fluxus_api::{CollectionSink, CollectionSource, DataStream}; 2 | 3 | #[test] 4 | fn test_limit() { 5 | tokio_test::block_on(async { 6 | let numbers = vec![1, 2, 3, 4, 5]; 7 | let source = CollectionSource::new(numbers); 8 | let sink = CollectionSink::new(); 9 | 10 | DataStream::new(source) 11 | .limit(2) 12 | .sink(sink.clone()) 13 | .await 14 | .unwrap(); 15 | 16 | let data = sink.get_data(); 17 | assert_eq!(data, vec![1, 2]); 18 | }) 19 | } 20 | 21 | #[test] 22 | fn test_windowed_limit() { 23 | tokio_test::block_on(async { 24 | let numbers = vec![1, 2, 3, 4, 5]; 25 | let source = CollectionSource::new(numbers); 26 | let sink = CollectionSink::new(); 27 | 28 | DataStream::new(source) 29 | .window(fluxus_utils::window::WindowConfig::global()) 30 | .limit(3) 31 | .sink(sink.clone()) 32 | .await 33 | .unwrap(); 34 | 35 | let data = sink.get_data(); 36 | assert_eq!( 37 | data, 38 | vec![ 39 | vec![1], 40 | vec![1, 2], 41 | vec![1, 2, 3], 42 | vec![1, 2, 3], 43 | vec![1, 2, 3], 44 | ] 45 | ); 46 | }) 47 | } 48 | 49 | #[test] 50 | fn test_tail() { 51 | tokio_test::block_on(async { 52 | let numbers = vec![1, 2, 3, 4, 5, 6]; 53 | let source = CollectionSource::new(numbers); 54 | let sink = CollectionSink::new(); 55 | DataStream::new(source) 56 | .window(fluxus_utils::window::WindowConfig::global()) 57 | .tail(3) 58 | .sink(sink.clone()) 59 | .await 60 | .unwrap(); 61 | 62 | let data = sink.get_data(); 63 | assert_eq!( 64 | data, 65 | vec![ 66 | vec![1], 67 | vec![1, 2], 68 | vec![1, 2, 3], 69 | vec![2, 3, 4], 70 | vec![3, 4, 5], 71 | vec![4, 5, 6], 72 | ] 73 | ); 74 | }) 75 | } 76 | 77 | #[test] 78 | fn test_flatten() { 79 | tokio_test::block_on(async { 80 | let numbers: Vec> = vec![vec![1, 2], vec![3, 4, 5]]; 81 | let source = CollectionSource::new(numbers); 82 | let sink = CollectionSink::new(); 83 | 84 | DataStream::new(source) 85 | .flatten() 86 | .sink(sink.clone()) 87 | .await 88 | .unwrap(); 89 | 90 | let data = sink.get_data(); 91 | assert_eq!(data, vec![1, 2, 3, 4, 5]); 92 | }) 93 | } 94 | 95 | #[test] 96 | fn test_flat_map() { 97 | tokio_test::block_on(async { 98 | let numbers: Vec = vec![1, 2, 3]; 99 | let source = CollectionSource::new(numbers); 100 | let sink = CollectionSink::new(); 101 | 102 | DataStream::new(source) 103 | .flat_map(|v| vec![v; v]) 104 | .sink(sink.clone()) 105 | .await 106 | .unwrap(); 107 | 108 | let data = sink.get_data(); 109 | assert_eq!(data, vec![1, 2, 2, 3, 3, 3]); 110 | }) 111 | } 112 | -------------------------------------------------------------------------------- /crates/fluxus-api/tests/filter_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod tests { 3 | use fluxus_api::{CollectionSink, CollectionSource, DataStream}; 4 | 5 | #[test] 6 | fn test_filter() { 7 | tokio_test::block_on(async { 8 | let numbers = vec![1, 2, 3, 4, 5]; 9 | // TransformSourceWithOperator::new(); 10 | let source = CollectionSource::new(numbers); 11 | let sink = CollectionSink::new(); 12 | 13 | DataStream::new(source) 14 | .filter(|x| x % 2 == 0) 15 | .sink(sink.clone()) 16 | .await 17 | .unwrap(); 18 | 19 | let data = sink.get_data(); 20 | println!("data: {:?}", data); 21 | assert_eq!(data.len(), 2); 22 | assert_eq!(data[0], 2); 23 | assert_eq!(data[1], 4); 24 | }) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /crates/fluxus-api/tests/windowed_stream_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod tests { 3 | use async_trait::async_trait; 4 | use fluxus_api::operators::SortOrder; 5 | use fluxus_api::{CollectionSink, CollectionSource, DataStream}; 6 | use fluxus_sources::Source; 7 | use fluxus_utils::models::Record; 8 | use fluxus_utils::{models::StreamResult, window::WindowConfig}; 9 | 10 | #[test] 11 | fn test_any() { 12 | tokio_test::block_on(async { 13 | let source = CollectionSource::new(vec![1, 2, 3, 4, 5]); 14 | let sink = CollectionSink::new(); 15 | DataStream::new(source) 16 | .window(WindowConfig::global()) 17 | .any(|x| x % 2 == 0) 18 | .sink(sink.clone()) 19 | .await 20 | .unwrap(); 21 | let data = sink.get_data(); 22 | assert_eq!(data[0], false); 23 | assert_eq!(data[1], true); 24 | assert_eq!(data[2], true); 25 | assert_eq!(data[3], true); 26 | assert_eq!(data[4], true); 27 | }) 28 | } 29 | 30 | #[test] 31 | fn test_all() { 32 | tokio_test::block_on(async { 33 | let source = CollectionSource::new(vec![1, 2, 3, 4, 5]); 34 | let sink = CollectionSink::new(); 35 | DataStream::new(source) 36 | .window(WindowConfig::global()) 37 | .all(|x| x % 2 == 0) 38 | .sink(sink.clone()) 39 | .await 40 | .unwrap(); 41 | let data = sink.get_data(); 42 | assert_eq!(data[0], false); 43 | assert_eq!(data[1], false); 44 | assert_eq!(data[2], false); 45 | assert_eq!(data[3], false); 46 | assert_eq!(data[4], false); 47 | }) 48 | } 49 | 50 | #[test] 51 | fn test_sort_by() { 52 | tokio_test::block_on(async { 53 | let source = CollectionSource::new(vec!["1", "4444", "55555", "22", "333"]); 54 | let sink = CollectionSink::new(); 55 | DataStream::new(source) 56 | .window(WindowConfig::global()) 57 | .sort_by(|a, b| a.len().cmp(&b.len())) 58 | .sink(sink.clone()) 59 | .await 60 | .unwrap(); 61 | let data = sink.get_data(); 62 | assert_eq!(data.len(), 5); 63 | assert_eq!(data[0], vec!["1"]); 64 | assert_eq!(data[1], vec!["1", "4444"]); 65 | assert_eq!(data[2], vec!["1", "4444", "55555"]); 66 | assert_eq!(data[3], vec!["1", "22", "4444", "55555"]); 67 | assert_eq!(data[4], vec!["1", "22", "333", "4444", "55555"]); 68 | }) 69 | } 70 | 71 | #[test] 72 | fn test_sort() { 73 | tokio_test::block_on(async { 74 | let source = CollectionSource::new(vec!["1", "4444", "55555", "22", "333"]); 75 | let sink = CollectionSink::new(); 76 | DataStream::new(source) 77 | .window(WindowConfig::global()) 78 | .sort(SortOrder::Asc) 79 | .sink(sink.clone()) 80 | .await 81 | .unwrap(); 82 | let data = sink.get_data(); 83 | assert_eq!(data.len(), 5); 84 | assert_eq!(data[0], vec!["1"]); 85 | assert_eq!(data[1], vec!["1", "4444"]); 86 | assert_eq!(data[2], vec!["1", "4444", "55555"]); 87 | assert_eq!(data[3], vec!["1", "22", "4444", "55555"]); 88 | assert_eq!(data[4], vec!["1", "22", "333", "4444", "55555"]); 89 | }) 90 | } 91 | 92 | struct SlowSource { 93 | inner: CollectionSource, 94 | counter: i64, 95 | } 96 | #[async_trait] 97 | impl Source for SlowSource 98 | where 99 | T: Clone + Send + Sync + 'static, 100 | { 101 | async fn init(&mut self) -> StreamResult<()> { 102 | Ok(()) 103 | } 104 | 105 | async fn next(&mut self) -> StreamResult>> { 106 | self.inner.next().await.map(|op| { 107 | op.map(|mut r| { 108 | self.counter += 1; 109 | r.timestamp += self.counter; 110 | r 111 | }) 112 | }) 113 | } 114 | 115 | async fn close(&mut self) -> StreamResult<()> { 116 | Ok(()) 117 | } 118 | } 119 | #[test] 120 | fn test_sort_by_ts() { 121 | tokio_test::block_on(async { 122 | let source = CollectionSource::new(vec!["1st", "2nd", "3rd", "4th", "5th"]); 123 | let source = SlowSource { 124 | inner: source, 125 | counter: 0, 126 | }; 127 | let sink = CollectionSink::new(); 128 | DataStream::new(source) 129 | .window(WindowConfig::global()) 130 | .sort_by_ts(SortOrder::Asc) 131 | .sink(sink.clone()) 132 | .await 133 | .unwrap(); 134 | let data = sink.get_data(); 135 | assert_eq!(data.len(), 5); 136 | assert_eq!( 137 | data, 138 | vec![ 139 | vec!["1st"], 140 | vec!["1st", "2nd"], 141 | vec!["1st", "2nd", "3rd"], 142 | vec!["1st", "2nd", "3rd", "4th"], 143 | vec!["1st", "2nd", "3rd", "4th", "5th"], 144 | ] 145 | ); 146 | let source = CollectionSource::new(vec!["1st", "2nd", "3rd", "4th", "5th"]); 147 | let source = SlowSource { 148 | inner: source, 149 | counter: 0, 150 | }; 151 | let sink = CollectionSink::new(); 152 | DataStream::new(source) 153 | .window(WindowConfig::global()) 154 | .sort_by_ts(SortOrder::Desc) 155 | .sink(sink.clone()) 156 | .await 157 | .unwrap(); 158 | let data = sink.get_data(); 159 | assert_eq!(data.len(), 5); 160 | let rev = |mut v: Vec<_>| { 161 | v.reverse(); 162 | v 163 | }; 164 | assert_eq!( 165 | data, 166 | vec![ 167 | rev(vec!["1st"]), 168 | rev(vec!["1st", "2nd"]), 169 | rev(vec!["1st", "2nd", "3rd"]), 170 | rev(vec!["1st", "2nd", "3rd", "4th"]), 171 | rev(vec!["1st", "2nd", "3rd", "4th", "5th"]), 172 | ] 173 | ); 174 | }) 175 | } 176 | 177 | #[test] 178 | fn test_distinct() { 179 | tokio_test::block_on(async { 180 | let source = CollectionSource::new(vec!["1", "22", "1", "22", "333", "333"]); 181 | let sink = CollectionSink::new(); 182 | DataStream::new(source) 183 | .window(WindowConfig::global()) 184 | .distinct() 185 | .sink(sink.clone()) 186 | .await 187 | .unwrap(); 188 | let data = sink.get_data(); 189 | assert_eq!(data.len(), 6); 190 | assert_eq!(data[5].len(), 3); 191 | assert!(data[5].contains("1")); 192 | assert!(data[5].contains("22")); 193 | assert!(data[5].contains("333")); 194 | 195 | let source = CollectionSource::new(vec!["1", "11", "111", "111"]); 196 | let sink = CollectionSink::new(); 197 | DataStream::new(source) 198 | .window(WindowConfig::global()) 199 | .distinct_by_key(|s| s.as_bytes()[0]) 200 | .sink(sink.clone()) 201 | .await 202 | .unwrap(); 203 | let data = sink.get_data(); 204 | assert_eq!(data.len(), 4); 205 | assert_eq!(data[3].len(), 1); 206 | assert!(data[3].contains(&"1")); 207 | }) 208 | } 209 | 210 | #[test] 211 | fn test_top_k() { 212 | tokio_test::block_on(async { 213 | let source = CollectionSource::new(vec![1, 2, 3, 4, 5]); 214 | let sink = CollectionSink::new(); 215 | DataStream::new(source) 216 | .window(WindowConfig::global()) 217 | .top_k(3) 218 | .sink(sink.clone()) 219 | .await 220 | .unwrap(); 221 | let data = sink.get_data(); 222 | assert_eq!(data.len(), 5); 223 | assert_eq!(data[0], vec![1]); 224 | assert_eq!(data[1], vec![2, 1]); 225 | assert_eq!(data[2], vec![3, 2, 1]); 226 | assert_eq!(data[3], vec![4, 3, 2]); 227 | assert_eq!(data[4], vec![5, 4, 3]); 228 | 229 | let source = CollectionSource::new(vec!["1", "2", "3", "3", "3"]); 230 | let sink = CollectionSink::new(); 231 | DataStream::new(source) 232 | .window(WindowConfig::global()) 233 | .top_k_by_key(3, |s| s.as_bytes()[0]) 234 | .sink(sink.clone()) 235 | .await 236 | .unwrap(); 237 | let data = sink.get_data(); 238 | dbg!(&data); 239 | assert_eq!(data.len(), 5); 240 | assert_eq!(data[0], vec!["1"]); 241 | assert_eq!(data[1], vec!["2", "1"]); 242 | assert_eq!(data[2], vec!["3", "2", "1"]); 243 | assert_eq!(data[3], vec!["3", "3", "2"]); 244 | assert_eq!(data[4], vec!["3", "3", "3"]); 245 | }) 246 | } 247 | 248 | #[test] 249 | fn test_skip() { 250 | tokio_test::block_on(async { 251 | let source = CollectionSource::new(vec![1, 2, 3, 4, 5]); 252 | let sink = CollectionSink::new(); 253 | DataStream::new(source) 254 | .window(WindowConfig::global()) 255 | .skip(2) 256 | .sink(sink.clone()) 257 | .await 258 | .unwrap(); 259 | let data = sink.get_data(); 260 | assert_eq!(data.len(), 5); 261 | assert_eq!(data[0], Vec::::new()); 262 | assert_eq!(data[1], Vec::::new()); 263 | assert_eq!(data[2], vec![3]); 264 | assert_eq!(data[3], vec![3, 4]); 265 | assert_eq!(data[4], vec![3, 4, 5]); 266 | }) 267 | } 268 | } 269 | -------------------------------------------------------------------------------- /crates/fluxus-core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus-core" 3 | description = "Core components for Fluxus stream processing engine" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | repository.workspace = true 9 | readme = "README.md" 10 | 11 | [dependencies] 12 | fluxus-utils = { path = "../fluxus-utils", version="0.2" } 13 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" } 14 | fluxus-sources = { path = "../fluxus-sources", version="0.2" } 15 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2" } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | futures = "0.3" 19 | serde = { version = "1.0", features = ["derive"] } 20 | serde_json = "1.0" 21 | anyhow = "1.0" 22 | thiserror = "1.0" 23 | async-trait = "0.1" 24 | tracing = "0.1" 25 | num_cpus = "1.16" 26 | csv = "1.3" 27 | 28 | [dev-dependencies] 29 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] } 30 | -------------------------------------------------------------------------------- /crates/fluxus-core/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus Core 2 | 3 | Core implementations and data structures for the Fluxus stream processing engine. 4 | 5 | ## Overview 6 | 7 | This crate provides the fundamental building blocks and implementations for the Fluxus stream processing engine: 8 | 9 | - Window implementations 10 | - State management 11 | - Data partitioning 12 | - Runtime configurations 13 | - Core data structures 14 | 15 | ## Key Components 16 | 17 | ### Windows 18 | 19 | Core window implementations: 20 | - `TumblingWindow` - Fixed-size, non-overlapping windows 21 | - `SlidingWindow` - Overlapping windows with slide interval 22 | - `SessionWindow` - Dynamic windows based on event timing 23 | 24 | ### State Management 25 | 26 | State handling for stream operations: 27 | - In-memory state storage 28 | - State backends 29 | - Checkpointing (planned) 30 | 31 | ### Partitioning 32 | 33 | Data partitioning strategies: 34 | - Key-based partitioning 35 | - Round-robin partitioning 36 | - Custom partitioners 37 | 38 | ## Usage 39 | 40 | Add this to your `Cargo.toml`: 41 | 42 | ```toml 43 | [dependencies] 44 | fluxus-core = "0.2" 45 | ``` 46 | 47 | This crate is usually not used directly but through the `fluxus-api` crate. -------------------------------------------------------------------------------- /crates/fluxus-core/src/config.rs: -------------------------------------------------------------------------------- 1 | /// Configuration for parallel processing 2 | #[derive(Debug, Clone)] 3 | pub struct ParallelConfig { 4 | /// Number of parallel tasks 5 | pub parallelism: usize, 6 | /// Maximum buffer size per task 7 | pub buffer_size: usize, 8 | /// Whether to preserve ordering in parallel processing 9 | pub preserve_order: bool, 10 | } 11 | 12 | impl Default for ParallelConfig { 13 | fn default() -> Self { 14 | Self { 15 | parallelism: num_cpus::get(), 16 | buffer_size: 1000, 17 | preserve_order: true, 18 | } 19 | } 20 | } 21 | 22 | impl ParallelConfig { 23 | /// Create a new parallel configuration 24 | pub fn new(parallelism: usize, buffer_size: usize, preserve_order: bool) -> Self { 25 | Self { 26 | parallelism, 27 | buffer_size, 28 | preserve_order, 29 | } 30 | } 31 | 32 | /// Set the number of parallel tasks 33 | pub fn with_parallelism(mut self, parallelism: usize) -> Self { 34 | self.parallelism = parallelism; 35 | self 36 | } 37 | 38 | /// Set the buffer size per task 39 | pub fn with_buffer_size(mut self, buffer_size: usize) -> Self { 40 | self.buffer_size = buffer_size; 41 | self 42 | } 43 | 44 | /// Set whether to preserve ordering 45 | pub fn with_preserve_order(mut self, preserve_order: bool) -> Self { 46 | self.preserve_order = preserve_order; 47 | self 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/error_handling/backpressure.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | /// Backpressure strategy for handling overload 4 | #[derive(Debug, Clone)] 5 | pub enum BackpressureStrategy { 6 | /// Block when buffer is full 7 | Block, 8 | /// Drop oldest items when buffer is full 9 | DropOldest, 10 | /// Drop newest items when buffer is full 11 | DropNewest, 12 | /// Apply backpressure with custom threshold 13 | Throttle { 14 | high_watermark: usize, 15 | low_watermark: usize, 16 | backoff: Duration, 17 | }, 18 | } 19 | 20 | /// Backpressure controller for managing load 21 | pub struct BackpressureController { 22 | strategy: BackpressureStrategy, 23 | current_load: usize, 24 | } 25 | 26 | impl BackpressureController { 27 | /// Create a new backpressure controller with the given strategy 28 | pub fn new(strategy: BackpressureStrategy) -> Self { 29 | Self { 30 | strategy, 31 | current_load: 0, 32 | } 33 | } 34 | 35 | /// Check if we should apply backpressure 36 | pub fn should_apply_backpressure(&self) -> bool { 37 | match &self.strategy { 38 | BackpressureStrategy::Block => self.current_load > 0, 39 | BackpressureStrategy::DropOldest | BackpressureStrategy::DropNewest => false, 40 | BackpressureStrategy::Throttle { high_watermark, .. } => { 41 | self.current_load >= *high_watermark 42 | } 43 | } 44 | } 45 | 46 | /// Get the backoff duration if throttling is needed 47 | pub fn get_backoff(&self) -> Option { 48 | match &self.strategy { 49 | BackpressureStrategy::Throttle { backoff, .. } => Some(*backoff), 50 | _ => None, 51 | } 52 | } 53 | 54 | /// Update the current load 55 | pub fn update_load(&mut self, load: usize) { 56 | self.current_load = load; 57 | } 58 | 59 | /// Check if we can accept more items based on the strategy 60 | pub fn can_accept(&self) -> bool { 61 | !self.should_apply_backpressure() 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/error_handling/mod.rs: -------------------------------------------------------------------------------- 1 | mod backpressure; 2 | mod retry_strategy; 3 | 4 | pub use backpressure::{BackpressureController, BackpressureStrategy}; 5 | use fluxus_utils::models::StreamResult; 6 | pub use retry_strategy::RetryStrategy; 7 | use tokio::time::sleep; 8 | 9 | /// Error handler for retrying operations 10 | pub struct ErrorHandler { 11 | strategy: RetryStrategy, 12 | } 13 | 14 | impl ErrorHandler { 15 | /// Create a new error handler with the given retry strategy 16 | pub fn new(strategy: RetryStrategy) -> Self { 17 | Self { strategy } 18 | } 19 | 20 | /// Retry an operation with the configured strategy 21 | pub async fn retry(&self, mut operation: F) -> StreamResult 22 | where 23 | F: FnMut() -> StreamResult, 24 | { 25 | let mut attempt = 0; 26 | loop { 27 | match operation() { 28 | Ok(value) => return Ok(value), 29 | Err(error) => { 30 | if let Some(delay) = self.strategy.get_delay(attempt) { 31 | tracing::warn!( 32 | "Operation failed (attempt {}/{}): {}. Retrying after {:?}", 33 | attempt + 1, 34 | match &self.strategy { 35 | RetryStrategy::NoRetry => 1, 36 | RetryStrategy::Fixed { max_attempts, .. } => *max_attempts, 37 | RetryStrategy::ExponentialBackoff { max_attempts, .. } => 38 | *max_attempts, 39 | }, 40 | error, 41 | delay 42 | ); 43 | sleep(delay).await; 44 | attempt += 1; 45 | } else { 46 | return Err(error); 47 | } 48 | } 49 | } 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/error_handling/retry_strategy.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | /// Error recovery strategy 4 | #[derive(Debug, Clone)] 5 | pub enum RetryStrategy { 6 | /// No retry, fail immediately 7 | NoRetry, 8 | /// Retry with fixed delay 9 | Fixed { 10 | delay: Duration, 11 | max_attempts: usize, 12 | }, 13 | /// Retry with exponential backoff 14 | ExponentialBackoff { 15 | initial_delay: Duration, 16 | max_delay: Duration, 17 | max_attempts: usize, 18 | multiplier: f64, 19 | }, 20 | } 21 | 22 | impl RetryStrategy { 23 | /// Create a fixed delay retry strategy 24 | pub fn fixed(delay: Duration, max_attempts: usize) -> Self { 25 | Self::Fixed { 26 | delay, 27 | max_attempts, 28 | } 29 | } 30 | 31 | /// Create an exponential backoff retry strategy 32 | pub fn exponential( 33 | initial_delay: Duration, 34 | max_delay: Duration, 35 | max_attempts: usize, 36 | multiplier: f64, 37 | ) -> Self { 38 | Self::ExponentialBackoff { 39 | initial_delay, 40 | max_delay, 41 | max_attempts, 42 | multiplier, 43 | } 44 | } 45 | 46 | /// Calculate delay for a given attempt 47 | pub fn get_delay(&self, attempt: usize) -> Option { 48 | match self { 49 | Self::NoRetry => None, 50 | Self::Fixed { 51 | delay, 52 | max_attempts, 53 | } => { 54 | if attempt < *max_attempts { 55 | Some(*delay) 56 | } else { 57 | None 58 | } 59 | } 60 | Self::ExponentialBackoff { 61 | initial_delay, 62 | max_delay, 63 | max_attempts, 64 | multiplier, 65 | } => { 66 | if attempt < *max_attempts { 67 | let delay = Duration::from_secs_f64( 68 | initial_delay.as_secs_f64() * multiplier.powi(attempt as i32), 69 | ); 70 | Some(delay.min(*max_delay)) 71 | } else { 72 | None 73 | } 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Fluxus Core - A Flink-like stream processing engine in Rust 2 | //! 3 | //! This module contains the core abstractions and data types for stream processing. 4 | 5 | pub mod config; 6 | pub mod error_handling; 7 | pub mod metrics; 8 | pub mod pipeline; 9 | 10 | // Re-export commonly used items 11 | pub use config::ParallelConfig; 12 | pub use error_handling::{ 13 | BackpressureController, BackpressureStrategy, ErrorHandler, RetryStrategy, 14 | }; 15 | pub use metrics::{Counter, Gauge, MetricValue, Metrics, Timer}; 16 | pub use pipeline::Pipeline; 17 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/metrics.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::sync::Arc; 3 | use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; 4 | use std::time::{Duration, Instant}; 5 | 6 | /// Counter for accumulating values 7 | #[derive(Debug, Default)] 8 | pub struct Counter { 9 | value: AtomicU64, 10 | } 11 | 12 | impl Counter { 13 | pub fn new() -> Self { 14 | Self { 15 | value: AtomicU64::new(0), 16 | } 17 | } 18 | 19 | pub fn increment(&self) { 20 | self.value.fetch_add(1, Ordering::Relaxed); 21 | } 22 | 23 | pub fn add(&self, value: u64) { 24 | self.value.fetch_add(value, Ordering::Relaxed); 25 | } 26 | 27 | pub fn value(&self) -> u64 { 28 | self.value.load(Ordering::Relaxed) 29 | } 30 | } 31 | 32 | /// Gauge for tracking current value 33 | #[derive(Debug, Default)] 34 | pub struct Gauge { 35 | value: AtomicI64, 36 | } 37 | 38 | impl Gauge { 39 | pub fn new() -> Self { 40 | Self { 41 | value: AtomicI64::new(0), 42 | } 43 | } 44 | 45 | pub fn set(&self, value: i64) { 46 | self.value.store(value, Ordering::Relaxed); 47 | } 48 | 49 | pub fn value(&self) -> i64 { 50 | self.value.load(Ordering::Relaxed) 51 | } 52 | } 53 | 54 | /// Timer for measuring durations 55 | #[derive(Debug)] 56 | pub struct Timer { 57 | start: Instant, 58 | duration_counter: Counter, 59 | count_counter: Counter, 60 | } 61 | 62 | impl Default for Timer { 63 | fn default() -> Self { 64 | Self::new() 65 | } 66 | } 67 | 68 | impl Timer { 69 | pub fn new() -> Self { 70 | Self { 71 | start: Instant::now(), 72 | duration_counter: Counter::new(), 73 | count_counter: Counter::new(), 74 | } 75 | } 76 | 77 | pub fn start(&mut self) { 78 | self.start = Instant::now(); 79 | } 80 | 81 | pub fn stop(&mut self) { 82 | let duration = self.start.elapsed(); 83 | self.duration_counter.add(duration.as_micros() as u64); 84 | self.count_counter.increment(); 85 | } 86 | 87 | /// Record a duration directly 88 | pub fn record(&mut self, duration: Duration) { 89 | self.duration_counter.add(duration.as_micros() as u64); 90 | self.count_counter.increment(); 91 | } 92 | 93 | pub fn average_duration_micros(&self) -> u64 { 94 | let total = self.duration_counter.value(); 95 | let count = self.count_counter.value(); 96 | if count == 0 { 0 } else { total / count } 97 | } 98 | } 99 | 100 | /// Metrics collection for pipeline monitoring 101 | #[derive(Debug, Default)] 102 | pub struct Metrics { 103 | counters: HashMap>, 104 | gauges: HashMap>, 105 | timers: HashMap>, 106 | } 107 | 108 | impl Metrics { 109 | pub fn new() -> Self { 110 | Self::default() 111 | } 112 | 113 | pub fn counter(&mut self, name: &str) -> Arc { 114 | self.counters 115 | .entry(name.to_string()) 116 | .or_insert_with(|| Arc::new(Counter::new())) 117 | .clone() 118 | } 119 | 120 | pub fn gauge(&mut self, name: &str) -> Arc { 121 | self.gauges 122 | .entry(name.to_string()) 123 | .or_insert_with(|| Arc::new(Gauge::new())) 124 | .clone() 125 | } 126 | 127 | pub fn timer(&mut self, name: &str) -> Arc { 128 | self.timers 129 | .entry(name.to_string()) 130 | .or_insert_with(|| Arc::new(Timer::new())) 131 | .clone() 132 | } 133 | 134 | pub fn snapshot(&self) -> HashMap { 135 | let mut snapshot = HashMap::new(); 136 | 137 | for (name, counter) in &self.counters { 138 | snapshot.insert(name.clone(), MetricValue::Counter(counter.value())); 139 | } 140 | 141 | for (name, gauge) in &self.gauges { 142 | snapshot.insert(name.clone(), MetricValue::Gauge(gauge.value())); 143 | } 144 | 145 | for (name, timer) in &self.timers { 146 | snapshot.insert( 147 | name.clone(), 148 | MetricValue::Timer { 149 | avg_micros: timer.average_duration_micros(), 150 | count: timer.count_counter.value(), 151 | }, 152 | ); 153 | } 154 | 155 | snapshot 156 | } 157 | } 158 | 159 | #[derive(Debug, Clone)] 160 | pub enum MetricValue { 161 | Counter(u64), 162 | Gauge(i64), 163 | Timer { avg_micros: u64, count: u64 }, 164 | } 165 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/pipeline/mod.rs: -------------------------------------------------------------------------------- 1 | mod processor; 2 | mod status; 3 | 4 | pub use processor::Pipeline; 5 | pub use status::PipelineStatus; 6 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/pipeline/processor.rs: -------------------------------------------------------------------------------- 1 | use super::status::PipelineStatus; 2 | use crate::BackpressureStrategy; 3 | use crate::Counter; 4 | use crate::ParallelConfig; 5 | use crate::RetryStrategy; 6 | use crate::Timer; 7 | use crate::error_handling::BackpressureController; 8 | use crate::error_handling::ErrorHandler; 9 | use crate::metrics::Metrics; 10 | use fluxus_sinks::Sink; 11 | use fluxus_sinks::dummy_sink::DummySink; 12 | use fluxus_sources::Source; 13 | use fluxus_transformers::operator::Operator; 14 | use fluxus_utils::models::Record; 15 | use fluxus_utils::models::StreamResult; 16 | use fluxus_utils::time::current_time; 17 | use fluxus_utils::window::WindowConfig; 18 | use std::sync::Arc; 19 | use std::time::{Duration, Instant}; 20 | use tokio::runtime::Handle; 21 | use tokio::time; 22 | use tracing; 23 | 24 | /// Represents a stream processing pipeline 25 | pub struct Pipeline { 26 | /// The data source 27 | source: Box>, 28 | /// The sequence of operators 29 | operators: Vec>>, 30 | /// The data sink 31 | sink: Box>, 32 | /// Window configuration (optional) 33 | window_config: Option, 34 | /// Parallel processing configuration 35 | parallel_config: ParallelConfig, 36 | /// Current pipeline status 37 | status: PipelineStatus, 38 | /// Last watermark timestamp 39 | last_watermark: i64, 40 | /// Metrics tracking 41 | metrics: Arc, 42 | process_timer: Arc, 43 | records_processed: Arc, 44 | records_failed: Arc, 45 | /// Error handling 46 | error_handler: ErrorHandler, 47 | /// Backpressure controller 48 | backpressure: BackpressureController, 49 | } 50 | 51 | impl Pipeline { 52 | /// Create a new pipeline with a source 53 | pub fn source + 'static>(source: S) -> Self { 54 | let mut metrics = Metrics::new(); 55 | let process_timer = metrics.timer("process_time"); 56 | let records_processed = metrics.counter("records_processed"); 57 | let records_failed = metrics.counter("records_failed"); 58 | 59 | Self { 60 | source: Box::new(source), 61 | operators: Vec::new(), 62 | sink: Box::new(DummySink::new()), 63 | window_config: None, 64 | parallel_config: ParallelConfig::default(), 65 | status: PipelineStatus::Ready, 66 | last_watermark: 0, 67 | metrics: Arc::new(metrics), 68 | process_timer, 69 | records_processed, 70 | records_failed, 71 | error_handler: ErrorHandler::new(RetryStrategy::exponential( 72 | Duration::from_millis(100), 73 | Duration::from_secs(10), 74 | 3, 75 | 2.0, 76 | )), 77 | backpressure: BackpressureController::new(BackpressureStrategy::Throttle { 78 | high_watermark: 1000, 79 | low_watermark: 100, 80 | backoff: Duration::from_millis(50), 81 | }), 82 | } 83 | } 84 | 85 | /// Add an operator to the pipeline 86 | pub fn add_operator + 'static>(mut self, operator: O) -> Self { 87 | self.operators.push(Box::new(operator)); 88 | self 89 | } 90 | 91 | /// Set the sink for the pipeline 92 | pub fn sink + 'static>(mut self, sink: S) -> Self { 93 | self.sink = Box::new(sink); 94 | self 95 | } 96 | 97 | /// Configure windowing for the pipeline 98 | pub fn window(mut self, config: WindowConfig) -> Self { 99 | self.window_config = Some(config); 100 | self 101 | } 102 | 103 | /// Configure parallel processing for the pipeline 104 | pub fn parallel(mut self, config: ParallelConfig) -> Self { 105 | self.parallel_config = config; 106 | self 107 | } 108 | 109 | /// Configure error handling strategy 110 | pub fn with_retry_strategy(mut self, strategy: RetryStrategy) -> Self { 111 | self.error_handler = ErrorHandler::new(strategy); 112 | self 113 | } 114 | 115 | /// Configure backpressure strategy 116 | pub fn with_backpressure_strategy(mut self, strategy: BackpressureStrategy) -> Self { 117 | self.backpressure = BackpressureController::new(strategy); 118 | self 119 | } 120 | 121 | /// Get current pipeline status 122 | pub fn status(&self) -> PipelineStatus { 123 | self.status 124 | } 125 | 126 | /// Get a snapshot of current metrics 127 | pub fn metrics(&self) -> &Arc { 128 | &self.metrics 129 | } 130 | 131 | /// Update watermark and trigger windows if needed 132 | async fn process_watermark(&mut self) -> StreamResult<()> { 133 | if let Some(window_config) = &self.window_config { 134 | let now = current_time() as i64; 135 | 136 | // Check if we should advance the watermark 137 | if now - self.last_watermark >= window_config.watermark_delay.as_millis() as i64 { 138 | self.last_watermark = now; 139 | 140 | // Trigger windows in all operators 141 | for op in &mut self.operators { 142 | let results = op.on_window_trigger().await?; 143 | for record in results { 144 | self.sink.write(record).await?; 145 | } 146 | } 147 | } 148 | } 149 | Ok(()) 150 | } 151 | 152 | /// Process a record through a single operator with retries 153 | async fn process_with_retry( 154 | error_handler: &ErrorHandler, 155 | op: &mut Box>, 156 | record: Record, 157 | ) -> StreamResult>> { 158 | let record = record.clone(); 159 | let op_ref = &mut **op; 160 | 161 | error_handler 162 | .retry(|| { 163 | let rt = Handle::current(); 164 | rt.block_on(op_ref.process(record.clone())) 165 | }) 166 | .await 167 | } 168 | 169 | /// Write a record to the sink with retries 170 | async fn write_with_retry( 171 | error_handler: &ErrorHandler, 172 | sink: &mut Box>, 173 | record: Record, 174 | ) -> StreamResult<()> { 175 | let record = record.clone(); 176 | let sink_ref = &mut **sink; 177 | 178 | error_handler 179 | .retry(|| { 180 | let rt = Handle::current(); 181 | rt.block_on(sink_ref.write(record.clone())) 182 | }) 183 | .await 184 | } 185 | 186 | /// Execute the pipeline with error handling and backpressure 187 | pub async fn execute(mut self) -> StreamResult<()> { 188 | self.status = PipelineStatus::Running; 189 | 190 | // Initialize components 191 | self.source.init().await?; 192 | for op in &mut self.operators { 193 | op.init().await?; 194 | } 195 | self.sink.init().await?; 196 | 197 | let mut watermark_interval = time::interval(Duration::from_millis(100)); 198 | 199 | loop { 200 | if self.backpressure.should_apply_backpressure() { 201 | if let Some(backoff) = self.backpressure.get_backoff() { 202 | tracing::debug!("Applying backpressure, waiting for {:?}", backoff); 203 | time::sleep(backoff).await; 204 | continue; 205 | } 206 | } 207 | 208 | tokio::select! { 209 | result = self.source.next() => { 210 | match result { 211 | Ok(Some(record)) => { 212 | let start = Instant::now(); 213 | let mut records = vec![record]; 214 | let mut success = true; 215 | 216 | // Process through operators with retry 217 | for op in &mut self.operators { 218 | let mut next = Vec::new(); 219 | let current_records = std::mem::take(&mut records); 220 | 221 | for record in current_records { 222 | match Self::process_with_retry(&self.error_handler, op, record).await { 223 | Ok(mut results) => next.append(&mut results), 224 | Err(e) => { 225 | self.records_failed.increment(); 226 | success = false; 227 | tracing::error!("Operator error after retries: {}", e); 228 | break; 229 | } 230 | } 231 | } 232 | 233 | if !success { 234 | break; 235 | } 236 | records = next; 237 | } 238 | 239 | // Use the length before consuming records 240 | let record_count = records.len(); 241 | self.backpressure.update_load(record_count); 242 | 243 | if success { 244 | while let Some(record) = records.pop() { 245 | match Self::write_with_retry(&self.error_handler, &mut self.sink, record).await { 246 | Ok(_) => { 247 | self.records_processed.increment(); 248 | } 249 | Err(e) => { 250 | self.records_failed.increment(); 251 | tracing::error!("Sink error after retries: {}", e); 252 | } 253 | } 254 | } 255 | } 256 | 257 | if let Some(timer) = Arc::get_mut(&mut self.process_timer) { 258 | timer.record(start.elapsed()); 259 | } 260 | } 261 | Ok(None) => break, 262 | Err(e) => { 263 | self.records_failed.increment(); 264 | tracing::error!("Source error: {}", e); 265 | return Err(e); 266 | } 267 | } 268 | } 269 | 270 | _ = watermark_interval.tick() => { 271 | if let Err(e) = self.process_watermark().await { 272 | tracing::error!("Watermark error: {}", e); 273 | } 274 | } 275 | } 276 | } 277 | 278 | self.sink.flush().await?; 279 | self.sink.close().await?; 280 | self.status = PipelineStatus::Completed; 281 | Ok(()) 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /crates/fluxus-core/src/pipeline/status.rs: -------------------------------------------------------------------------------- 1 | /// Status of a pipeline execution 2 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 3 | pub enum PipelineStatus { 4 | /// Pipeline is initialized but not running 5 | Ready, 6 | /// Pipeline is currently running 7 | Running, 8 | /// Pipeline has completed successfully 9 | Completed, 10 | /// Pipeline has failed 11 | Failed, 12 | } 13 | -------------------------------------------------------------------------------- /crates/fluxus-runtime/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus-runtime" 3 | description = "Runtime implementation for Fluxus stream processing engine" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | repository.workspace = true 9 | readme = "README.md" 10 | 11 | [dependencies] 12 | fluxus-core = { path = "../fluxus-core", version="0.2" } 13 | fluxus-utils = { path = "../fluxus-utils", version="0.2" } 14 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" } 15 | fluxus-sources = { path = "../fluxus-sources", version="0.2" } 16 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2" } 17 | 18 | tokio = { version = "1", features = ["full"] } 19 | futures = "0.3" 20 | async-trait = "0.1" 21 | tracing = "0.1" 22 | parking_lot = "0.12" 23 | dashmap = "5.5" 24 | bytes = "1.5" 25 | uuid = { version = "1.7", features = ["v4"] } 26 | 27 | [dev-dependencies] 28 | criterion = "0.6" 29 | 30 | [[bench]] 31 | name = "runtime_benchmark" 32 | path = "../../benches/runtime_benchmark.rs" 33 | harness = false 34 | -------------------------------------------------------------------------------- /crates/fluxus-runtime/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus Runtime 2 | 3 | Runtime engine and execution environment for the Fluxus stream processing engine. 4 | 5 | ## Overview 6 | 7 | This crate provides the execution environment and runtime components for Fluxus: 8 | 9 | - Task execution and scheduling 10 | - Memory management 11 | - Threading and concurrency 12 | - Performance optimization 13 | - Resource management 14 | 15 | ## Key Components 16 | 17 | ### Task Execution 18 | 19 | - Parallel task execution 20 | - Work stealing scheduler 21 | - Back-pressure handling 22 | - Resource-aware scheduling 23 | 24 | ### Threading Model 25 | 26 | - Thread pool management 27 | - Thread-safe data structures 28 | - Lock-free algorithms 29 | - Efficient inter-thread communication 30 | 31 | ### Memory Management 32 | 33 | - Buffer management 34 | - Memory pooling 35 | - Efficient data serialization 36 | - Zero-copy optimizations 37 | 38 | ### Monitoring 39 | 40 | - Performance metrics 41 | - Resource usage tracking 42 | - Runtime statistics 43 | - Diagnostics (planned) 44 | 45 | ## Usage 46 | 47 | Add this to your `Cargo.toml`: 48 | 49 | ```toml 50 | [dependencies] 51 | fluxus-runtime = "0.2" 52 | ``` 53 | 54 | This crate is usually not used directly but through the `fluxus-api` crate. -------------------------------------------------------------------------------- /crates/fluxus-runtime/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Fluxus Runtime - Execution engine for stream processing 2 | //! 3 | //! This module implements the runtime execution environment for Fluxus pipelines. 4 | mod runtime; 5 | pub use runtime::RuntimeContext; 6 | 7 | /// State management for stateful operators 8 | pub mod state; 9 | 10 | /// Watermark tracking and propagation 11 | pub mod watermark; 12 | -------------------------------------------------------------------------------- /crates/fluxus-runtime/src/runtime.rs: -------------------------------------------------------------------------------- 1 | use dashmap::DashMap; 2 | use fluxus_core::ParallelConfig; 3 | use fluxus_sinks::Sink; 4 | use fluxus_sources::Source; 5 | use fluxus_transformers::Operator; 6 | use fluxus_utils::models::{Record, StreamResult}; 7 | use std::sync::Arc; 8 | use tokio::sync::{Mutex, mpsc}; 9 | use tokio::task::JoinHandle; 10 | use uuid::Uuid; 11 | 12 | /// Runtime context for managing stream processing execution 13 | pub struct RuntimeContext { 14 | /// Task parallelism configuration 15 | parallel_config: ParallelConfig, 16 | /// Active task handles 17 | task_handles: Arc>>>, 18 | } 19 | 20 | impl RuntimeContext { 21 | pub fn new(parallel_config: ParallelConfig) -> Self { 22 | Self { 23 | parallel_config, 24 | task_handles: Arc::new(DashMap::new()), 25 | } 26 | } 27 | 28 | /// Execute a source-to-sink pipeline with operators 29 | pub async fn execute_pipeline( 30 | &self, 31 | source: S, 32 | operators: Vec + Send + Sync>>>, 33 | sink: K, 34 | ) -> StreamResult<()> 35 | where 36 | T: Clone + Send + Sync + 'static, 37 | S: Source + Send + Sync + 'static, 38 | K: Sink + Send + Sync + 'static, 39 | { 40 | let (tx, rx) = mpsc::channel(self.parallel_config.buffer_size); 41 | let source = Arc::new(Mutex::new(source)); 42 | let sink = Arc::new(Mutex::new(sink)); 43 | 44 | // Spawn source task 45 | let source_handle = self.spawn_source_task(source.clone(), tx.clone()); 46 | 47 | // Create channels for operator pipeline 48 | let mut curr_rx = rx; 49 | let mut handles = vec![source_handle]; 50 | 51 | // Spawn operator tasks 52 | for operator in operators { 53 | let (new_tx, new_rx) = mpsc::channel(self.parallel_config.buffer_size); 54 | let operator_handles = self.spawn_operator_tasks(operator, curr_rx, new_tx); 55 | handles.extend(operator_handles); 56 | curr_rx = new_rx; 57 | } 58 | 59 | // Spawn sink task 60 | let sink_handle = self.spawn_sink_task(sink.clone(), curr_rx); 61 | handles.push(sink_handle); 62 | 63 | // Store handles 64 | self.task_handles 65 | .insert(Uuid::new_v4().to_string(), handles); 66 | 67 | Ok(()) 68 | } 69 | 70 | fn spawn_source_task( 71 | &self, 72 | source: Arc>, 73 | tx: mpsc::Sender>, 74 | ) -> JoinHandle<()> 75 | where 76 | T: Clone + Send + 'static, 77 | S: Source + Send + 'static, 78 | { 79 | tokio::spawn(async move { 80 | loop { 81 | let mut source_guard = source.lock().await; 82 | match source_guard.next().await { 83 | Ok(Some(record)) => { 84 | if tx.send(record).await.is_err() { 85 | break; 86 | } 87 | } 88 | _ => break, 89 | } 90 | } 91 | let mut source_guard = source.lock().await; 92 | if let Err(e) = source_guard.close().await { 93 | tracing::error!("Error closing source: {:?}", e); 94 | } 95 | }) 96 | } 97 | 98 | fn spawn_operator_tasks( 99 | &self, 100 | operator: Arc + Send + Sync>>, 101 | rx: mpsc::Receiver>, 102 | tx: mpsc::Sender>, 103 | ) -> Vec> 104 | where 105 | T: Clone + Send + 'static, 106 | { 107 | let mut handles = Vec::new(); 108 | let rx = Arc::new(Mutex::new(rx)); 109 | 110 | for _ in 0..self.parallel_config.parallelism { 111 | let operator = Arc::clone(&operator); 112 | let rx = Arc::clone(&rx); 113 | let tx = tx.clone(); 114 | 115 | let handle = tokio::spawn(async move { 116 | loop { 117 | let record = { 118 | let mut rx = rx.lock().await; 119 | match rx.recv().await { 120 | Some(r) => r, 121 | None => break, 122 | } 123 | }; 124 | 125 | let mut op = operator.lock().await; 126 | if let Ok(results) = op.process(record).await { 127 | for result in results { 128 | if tx.send(result).await.is_err() { 129 | return; 130 | } 131 | } 132 | } 133 | } 134 | }); 135 | handles.push(handle); 136 | } 137 | 138 | handles 139 | } 140 | 141 | fn spawn_sink_task( 142 | &self, 143 | sink: Arc>, 144 | mut rx: mpsc::Receiver>, 145 | ) -> JoinHandle<()> 146 | where 147 | T: Clone + Send + 'static, 148 | K: Sink + Send + 'static, 149 | { 150 | tokio::spawn(async move { 151 | while let Some(record) = rx.recv().await { 152 | let mut sink_guard = sink.lock().await; 153 | if let Err(e) = sink_guard.write(record).await { 154 | tracing::error!("Error writing to sink: {:?}", e); 155 | } 156 | } 157 | 158 | let mut sink_guard = sink.lock().await; 159 | if let Err(e) = sink_guard.flush().await { 160 | tracing::error!("Error flushing sink: {:?}", e); 161 | } 162 | 163 | if let Err(e) = sink_guard.close().await { 164 | tracing::error!("Error closing sink: {:?}", e); 165 | } 166 | }) 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /crates/fluxus-runtime/src/state.rs: -------------------------------------------------------------------------------- 1 | use parking_lot::RwLock; 2 | use std::collections::HashMap; 3 | use std::hash::Hash; 4 | use std::sync::Arc; 5 | 6 | /// Simple key-value state backend 7 | #[derive(Default)] 8 | pub struct KeyedStateBackend { 9 | state: Arc>>, 10 | } 11 | 12 | impl KeyedStateBackend 13 | where 14 | K: Eq + Hash, 15 | { 16 | pub fn new() -> Self { 17 | Self { 18 | state: Arc::new(RwLock::new(HashMap::new())), 19 | } 20 | } 21 | 22 | pub fn get(&self, key: &K) -> Option 23 | where 24 | V: Clone, 25 | { 26 | self.state.read().get(key).cloned() 27 | } 28 | 29 | pub fn set(&self, key: K, value: V) { 30 | self.state.write().insert(key, value); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /crates/fluxus-runtime/src/watermark.rs: -------------------------------------------------------------------------------- 1 | use parking_lot::RwLock; 2 | use std::sync::Arc; 3 | use std::time::SystemTime; 4 | 5 | /// Watermark tracker for managing event time progress 6 | pub struct WatermarkTracker { 7 | current_watermark: Arc>, 8 | } 9 | 10 | impl Default for WatermarkTracker { 11 | fn default() -> Self { 12 | Self::new() 13 | } 14 | } 15 | 16 | impl WatermarkTracker { 17 | pub fn new() -> Self { 18 | Self { 19 | current_watermark: Arc::new(RwLock::new(SystemTime::now())), 20 | } 21 | } 22 | 23 | pub fn update(&self, watermark: SystemTime) { 24 | let mut current = self.current_watermark.write(); 25 | if watermark > *current { 26 | *current = watermark; 27 | } 28 | } 29 | 30 | pub fn get_current(&self) -> SystemTime { 31 | *self.current_watermark.read() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /crates/fluxus-sinks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus-sinks" 3 | description = "Sink components for Fluxus stream processing engine" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | categories.workspace = true 11 | keywords.workspace = true 12 | readme = "README.md" 13 | 14 | [dependencies] 15 | fluxus-utils = { path = "../fluxus-utils", version="0.2" } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | futures = "0.3" 19 | serde = { version = "1.0", features = ["derive"] } 20 | serde_json = "1.0" 21 | anyhow = "1.0" 22 | thiserror = "1.0" 23 | async-trait = "0.1" 24 | tracing = "0.1" 25 | num_cpus = "1.16" 26 | csv = "1.3" 27 | 28 | [dev-dependencies] 29 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] } 30 | -------------------------------------------------------------------------------- /crates/fluxus-sinks/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus Sinks 2 | 3 | Sink components for the Fluxus stream processing engine. 4 | 5 | ## Overview 6 | 7 | This crate provides various sink implementations for the Fluxus stream processing engine, allowing processed data to be output to different destinations. 8 | 9 | ### Key Sinks 10 | - `BufferedSink` - Buffered output for efficient writes. 11 | - `ConsoleSink` - Output data to the console for debugging. 12 | - `DummySink` - A placeholder sink for testing. 13 | - `FileSink` - Write data to files. 14 | 15 | ## Usage 16 | 17 | Add this to your `Cargo.toml`: 18 | 19 | ```toml 20 | [dependencies] 21 | fluxus-sinks = "0.2" 22 | ``` -------------------------------------------------------------------------------- /crates/fluxus-sinks/src/buffered.rs: -------------------------------------------------------------------------------- 1 | use crate::Sink; 2 | use async_trait::async_trait; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::time::{Duration, Instant}; 5 | 6 | /// A sink wrapper that provides buffering capabilities 7 | pub struct BufferedSink> { 8 | inner: S, 9 | buffer: Vec>, 10 | buffer_size: usize, 11 | flush_interval: Duration, 12 | last_flush: Instant, 13 | } 14 | 15 | impl> BufferedSink { 16 | /// Create a new buffered sink with the specified buffer size and flush interval 17 | pub fn new(inner: S, buffer_size: usize, flush_interval: Duration) -> Self { 18 | Self { 19 | inner, 20 | buffer: Vec::with_capacity(buffer_size), 21 | buffer_size, 22 | flush_interval, 23 | last_flush: Instant::now(), 24 | } 25 | } 26 | 27 | /// Force flush the buffer 28 | pub async fn force_flush(&mut self) -> StreamResult<()> { 29 | for record in self.buffer.drain(..) { 30 | self.inner.write(record).await?; 31 | } 32 | self.inner.flush().await?; 33 | self.last_flush = Instant::now(); 34 | Ok(()) 35 | } 36 | } 37 | 38 | #[async_trait] 39 | impl + Send> Sink for BufferedSink { 40 | async fn init(&mut self) -> StreamResult<()> { 41 | self.inner.init().await 42 | } 43 | 44 | async fn write(&mut self, record: Record) -> StreamResult<()> { 45 | self.buffer.push(record); 46 | 47 | let should_flush = self.buffer.len() >= self.buffer_size 48 | || self.last_flush.elapsed() >= self.flush_interval; 49 | 50 | if should_flush { 51 | self.force_flush().await?; 52 | } 53 | 54 | Ok(()) 55 | } 56 | 57 | async fn flush(&mut self) -> StreamResult<()> { 58 | self.force_flush().await 59 | } 60 | 61 | async fn close(&mut self) -> StreamResult<()> { 62 | self.force_flush().await?; 63 | self.inner.close().await 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /crates/fluxus-sinks/src/console.rs: -------------------------------------------------------------------------------- 1 | use crate::{ConsoleFormatter, DefaultFormatter, Sink}; 2 | use async_trait::async_trait; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::marker::PhantomData; 5 | 6 | /// A sink that writes to console 7 | #[derive(Default)] 8 | pub struct ConsoleSink { 9 | formatter: F, 10 | _phantom: PhantomData, 11 | } 12 | 13 | impl ConsoleSink { 14 | /// Create a new console sink with default formatter 15 | pub fn new() -> Self { 16 | Self { 17 | formatter: DefaultFormatter, 18 | _phantom: PhantomData, 19 | } 20 | } 21 | } 22 | 23 | impl ConsoleSink { 24 | /// Create a new console sink with custom formatter 25 | pub fn with_formatter(formatter: F) -> Self { 26 | Self { 27 | formatter, 28 | _phantom: PhantomData, 29 | } 30 | } 31 | } 32 | 33 | #[async_trait] 34 | impl Sink for ConsoleSink 35 | where 36 | T: Send, 37 | F: ConsoleFormatter + Send + Sync, 38 | { 39 | async fn init(&mut self) -> StreamResult<()> { 40 | Ok(()) 41 | } 42 | 43 | async fn write(&mut self, record: Record) -> StreamResult<()> { 44 | tracing::info!("{}", self.formatter.format(&record)); 45 | Ok(()) 46 | } 47 | 48 | async fn flush(&mut self) -> StreamResult<()> { 49 | Ok(()) 50 | } 51 | 52 | async fn close(&mut self) -> StreamResult<()> { 53 | Ok(()) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /crates/fluxus-sinks/src/dummy_sink.rs: -------------------------------------------------------------------------------- 1 | use crate::Sink; 2 | use fluxus_utils::models::Record; 3 | use fluxus_utils::models::StreamResult; 4 | use std::marker::PhantomData; 5 | 6 | /// A dummy sink that discards all records 7 | #[derive(Default)] 8 | pub struct DummySink { 9 | _phantom: PhantomData, 10 | } 11 | 12 | impl DummySink { 13 | pub fn new() -> Self { 14 | Self { 15 | _phantom: PhantomData, 16 | } 17 | } 18 | } 19 | 20 | #[async_trait::async_trait] 21 | impl Sink for DummySink { 22 | async fn init(&mut self) -> StreamResult<()> { 23 | Ok(()) 24 | } 25 | 26 | async fn write(&mut self, _record: Record) -> StreamResult<()> { 27 | Ok(()) 28 | } 29 | 30 | async fn flush(&mut self) -> StreamResult<()> { 31 | Ok(()) 32 | } 33 | 34 | async fn close(&mut self) -> StreamResult<()> { 35 | Ok(()) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /crates/fluxus-sinks/src/file.rs: -------------------------------------------------------------------------------- 1 | use crate::Sink; 2 | use async_trait::async_trait; 3 | use csv; 4 | use fluxus_utils::models::{Record, StreamResult}; 5 | use serde::Serialize; 6 | use serde_json; 7 | use std::marker::PhantomData; 8 | use std::path::PathBuf; 9 | use tokio::fs::File; 10 | use tokio::io::AsyncWriteExt; 11 | 12 | /// Output format for file sink 13 | #[derive(Clone, Debug)] 14 | pub enum FileFormat { 15 | /// Plain text format (one line per record) 16 | Text, 17 | /// CSV format 18 | Csv, 19 | /// JSON format (one JSON object per line) 20 | JsonLines, 21 | } 22 | 23 | /// A sink that writes to a file 24 | pub struct FileSink { 25 | path: PathBuf, 26 | format: FileFormat, 27 | file: Option, 28 | _phantom: PhantomData, 29 | } 30 | 31 | impl FileSink { 32 | /// Create a new file sink 33 | pub fn new>(path: P, format: FileFormat) -> Self { 34 | Self { 35 | path: path.into(), 36 | format, 37 | file: None, 38 | _phantom: PhantomData, 39 | } 40 | } 41 | } 42 | 43 | #[async_trait] 44 | impl Sink for FileSink { 45 | async fn init(&mut self) -> StreamResult<()> { 46 | self.file = Some(File::create(&self.path).await?); 47 | Ok(()) 48 | } 49 | 50 | async fn write(&mut self, record: Record) -> StreamResult<()> { 51 | if let Some(file) = &mut self.file { 52 | match self.format { 53 | FileFormat::Text => { 54 | let line = format!("{}\n", serde_json::to_string(&record.data)?); 55 | file.write_all(line.as_bytes()).await?; 56 | } 57 | FileFormat::Csv => { 58 | let mut wtr = csv::Writer::from_writer(Vec::new()); 59 | wtr.serialize(&record.data)?; 60 | let inner = wtr.into_inner()?; 61 | let data = String::from_utf8(inner)?; 62 | file.write_all(data.as_bytes()).await?; 63 | } 64 | FileFormat::JsonLines => { 65 | let line = format!("{}\n", serde_json::to_string(&record.data)?); 66 | file.write_all(line.as_bytes()).await?; 67 | } 68 | } 69 | } 70 | Ok(()) 71 | } 72 | 73 | async fn flush(&mut self) -> StreamResult<()> { 74 | if let Some(file) = &mut self.file { 75 | file.flush().await?; 76 | } 77 | Ok(()) 78 | } 79 | 80 | async fn close(&mut self) -> StreamResult<()> { 81 | if let Some(mut file) = self.file.take() { 82 | file.flush().await?; 83 | } 84 | Ok(()) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /crates/fluxus-sinks/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod buffered; 2 | pub mod console; 3 | pub mod dummy_sink; 4 | pub mod file; 5 | 6 | pub use buffered::BufferedSink; 7 | pub use console::ConsoleSink; 8 | pub use file::FileSink; 9 | 10 | use async_trait::async_trait; 11 | use fluxus_utils::models::{Record, StreamResult}; 12 | use std::fmt::Display; 13 | 14 | /// Sink trait defines the interface for data output 15 | #[async_trait] 16 | pub trait Sink { 17 | /// Initialize the sink 18 | async fn init(&mut self) -> StreamResult<()>; 19 | 20 | /// Write a record to the sink 21 | async fn write(&mut self, record: Record) -> StreamResult<()>; 22 | 23 | /// Flush any buffered data 24 | async fn flush(&mut self) -> StreamResult<()>; 25 | 26 | /// Close the sink and release resources 27 | async fn close(&mut self) -> StreamResult<()>; 28 | } 29 | 30 | /// Formatter for console output 31 | pub trait ConsoleFormatter { 32 | fn format(&self, record: &Record) -> String; 33 | } 34 | 35 | /// Default formatter that uses Display 36 | pub struct DefaultFormatter; 37 | 38 | impl ConsoleFormatter for DefaultFormatter { 39 | fn format(&self, record: &Record) -> String { 40 | format!("[{}] {}", record.timestamp, record.data) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /crates/fluxus-sources/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus-sources" 3 | description = "Source components for Fluxus stream processing engine" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | categories.workspace = true 11 | keywords.workspace = true 12 | readme = "README.md" 13 | 14 | [dependencies] 15 | fluxus-utils = { path = "../fluxus-utils", version="0.2" } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | futures = "0.3" 19 | serde = { version = "1.0", features = ["derive"] } 20 | serde_json = "1.0" 21 | anyhow = "1.0" 22 | thiserror = "1.0" 23 | async-trait = "0.1" 24 | tracing = "0.1" 25 | num_cpus = "1.16" 26 | csv = "1.3" 27 | tokio-util = { version = "0.7.15", features = ["io"] } 28 | reqwest = { version = "0.12.15", features = ["stream"] } 29 | 30 | [dev-dependencies] 31 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] } 32 | tempfile = "3" -------------------------------------------------------------------------------- /crates/fluxus-sources/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus Sources 2 | 3 | Source components for the Fluxus stream processing engine. 4 | 5 | ## Overview 6 | 7 | This crate provides various source implementations for the Fluxus stream processing engine, allowing data to be ingested from different sources. 8 | 9 | ### Key Sources 10 | - `CsvSource` - Read data from CSV files. 11 | - `GeneratorSource` - Generate data for testing purposes. 12 | 13 | ## Usage 14 | 15 | Add this to your `Cargo.toml`: 16 | 17 | ```toml 18 | [dependencies] 19 | fluxus-sources = "0.2" 20 | ``` -------------------------------------------------------------------------------- /crates/fluxus-sources/src/csv.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_utils::models::{Record, StreamError, StreamResult}; 3 | use futures::TryStreamExt; 4 | use reqwest; 5 | use std::io::{self, Error}; 6 | use std::path::PathBuf; 7 | use std::time::Duration; 8 | use tokio::fs::File; 9 | use tokio::io::{AsyncBufReadExt, BufReader}; 10 | use tokio_util::io::StreamReader; 11 | 12 | use super::Source; 13 | 14 | /// A source that reads CSV files 15 | pub struct CsvSource { 16 | source: CsvSourceType, 17 | reader: Option>, 18 | } 19 | 20 | enum CsvSourceType { 21 | LocalFile(PathBuf), 22 | RemoteUrl(String), 23 | } 24 | 25 | impl CsvSource { 26 | /// Create a new CSV source from a local file path 27 | pub fn new>(path: P) -> Self { 28 | Self { 29 | source: CsvSourceType::LocalFile(path.into()), 30 | reader: None, 31 | } 32 | } 33 | 34 | /// Create a new CSV source from a remote URL 35 | pub fn from_url>(url: S) -> Self { 36 | Self { 37 | source: CsvSourceType::RemoteUrl(url.into()), 38 | reader: None, 39 | } 40 | } 41 | } 42 | 43 | #[async_trait] 44 | impl Source for CsvSource { 45 | async fn init(&mut self) -> StreamResult<()> { 46 | match &self.source { 47 | CsvSourceType::LocalFile(path) => { 48 | let file = File::open(path) 49 | .await 50 | .map_err(|e| StreamError::Io(Error::other(format!("{}", e))))?; 51 | self.reader = Some(Box::new(BufReader::new(file))); 52 | } 53 | CsvSourceType::RemoteUrl(url) => { 54 | let client = reqwest::Client::builder() 55 | .timeout(Duration::from_secs(30)) 56 | .build() 57 | .map_err(|_e| StreamError::Io(io::Error::other("create http client error")))?; 58 | let response = client.get(url).send().await.map_err(|e| { 59 | StreamError::Io(Error::other(format!("Failed to fetch URL: {}", e))) 60 | })?; 61 | 62 | if !response.status().is_success() { 63 | return Err(StreamError::Io(Error::other(format!( 64 | "HTTP error: {}", 65 | response.status() 66 | )))); 67 | } 68 | 69 | let byte_stream = response 70 | .bytes_stream() 71 | .map_err(|e| Error::other(format!("{}", e))); 72 | 73 | let reader = StreamReader::new(byte_stream); 74 | self.reader = Some(Box::new(BufReader::new(reader))); 75 | } 76 | } 77 | Ok(()) 78 | } 79 | 80 | async fn next(&mut self) -> StreamResult>> { 81 | if let Some(reader) = &mut self.reader { 82 | let mut line = String::new(); 83 | match reader.read_line(&mut line).await { 84 | Ok(0) => Ok(None), // EOF 85 | Ok(_) => { 86 | let line = line.trim().to_string(); 87 | Ok(Some(Record::new(line))) 88 | } 89 | Err(e) => Err(e.into()), 90 | } 91 | } else { 92 | Ok(None) 93 | } 94 | } 95 | 96 | async fn close(&mut self) -> StreamResult<()> { 97 | self.reader = None; 98 | Ok(()) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /crates/fluxus-sources/src/generator.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_utils::models::{Record, StreamResult}; 3 | use std::marker::PhantomData; 4 | 5 | use super::Source; 6 | 7 | /// A source that generates test data 8 | pub struct GeneratorSource 9 | where 10 | F: FnMut() -> Option + Send, 11 | { 12 | generator: F, 13 | _phantom: PhantomData, 14 | } 15 | 16 | impl GeneratorSource 17 | where 18 | F: FnMut() -> Option + Send, 19 | { 20 | /// Create a new generator source 21 | pub fn new(generator: F) -> Self { 22 | Self { 23 | generator, 24 | _phantom: PhantomData, 25 | } 26 | } 27 | 28 | /// Create a counting source that generates numbers from start to end 29 | pub fn counter(start: i64, end: i64) -> GeneratorSource Option> { 30 | let current = start; 31 | GeneratorSource::new(move || { 32 | static mut CURRENT: i64 = 0; 33 | unsafe { 34 | if CURRENT == 0 { 35 | CURRENT = current; 36 | } 37 | if CURRENT <= end { 38 | let value = CURRENT; 39 | CURRENT += 1; 40 | Some(value) 41 | } else { 42 | None 43 | } 44 | } 45 | }) 46 | } 47 | } 48 | 49 | #[async_trait] 50 | impl Source for GeneratorSource 51 | where 52 | T: Send, 53 | F: FnMut() -> Option + Send + Sync, 54 | { 55 | async fn init(&mut self) -> StreamResult<()> { 56 | Ok(()) 57 | } 58 | 59 | async fn next(&mut self) -> StreamResult>> { 60 | Ok((self.generator)().map(Record::new)) 61 | } 62 | 63 | async fn close(&mut self) -> StreamResult<()> { 64 | Ok(()) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /crates/fluxus-sources/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod csv; 2 | pub mod generator; 3 | 4 | pub use csv::CsvSource; 5 | 6 | use fluxus_utils::models::{Record, StreamResult}; 7 | pub use generator::GeneratorSource; 8 | 9 | use async_trait::async_trait; 10 | 11 | /// Source trait defines the interface for data sources 12 | #[async_trait] 13 | pub trait Source { 14 | /// Initialize the source 15 | async fn init(&mut self) -> StreamResult<()>; 16 | 17 | /// Read the next record from the source 18 | async fn next(&mut self) -> StreamResult>>; 19 | 20 | /// Close the source and release resources 21 | async fn close(&mut self) -> StreamResult<()>; 22 | } 23 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus-transformers" 3 | description = "Transformer components for Fluxus stream processing engine" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | categories.workspace = true 11 | keywords.workspace = true 12 | readme = "README.md" 13 | 14 | [dependencies] 15 | fluxus-utils = { path = "../fluxus-utils", version="0.2" } 16 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" } 17 | fluxus-sources = { path = "../fluxus-sources", version="0.2" } 18 | 19 | tokio = { version = "1", features = ["full"] } 20 | futures = "0.3" 21 | serde = { version = "1.0", features = ["derive"] } 22 | serde_json = "1.0" 23 | anyhow = "1.0" 24 | thiserror = "1.0" 25 | async-trait = "0.1" 26 | tracing = "0.1" 27 | num_cpus = "1.16" 28 | csv = "1.3" 29 | 30 | [dev-dependencies] 31 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] } 32 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus Transformers 2 | 3 | Transformer components for the Fluxus stream processing engine. 4 | 5 | ## Overview 6 | 7 | This crate provides various transformation implementations for the Fluxus stream processing engine, allowing data to be processed and transformed in different ways. 8 | 9 | ### Key Transformers 10 | - `TransformSource` - Basic data transformation. 11 | - `TransformSourceWithOperator` - Data transformation with custom operators. 12 | 13 | ## Usage 14 | 15 | Add this to your `Cargo.toml`: 16 | 17 | ```toml 18 | [dependencies] 19 | fluxus-transformers = "0.2" 20 | ``` -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod operator; 2 | mod transform_base; 3 | mod transform_source; 4 | mod transform_source_with_operator; 5 | 6 | pub use operator::{Operator, OperatorBuilder}; 7 | pub use transform_base::TransformBase; 8 | pub use transform_source::TransformSource; 9 | pub use transform_source_with_operator::TransformSourceWithOperator; 10 | 11 | use fluxus_sources::Source; 12 | 13 | pub type InnerSource = dyn Source + Send + Sync; 14 | pub type InnerOperator = dyn Operator + Send + Sync; 15 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/operator/builder.rs: -------------------------------------------------------------------------------- 1 | use super::{FilterOperator, MapOperator, WindowReduceOperator}; 2 | use fluxus_utils::window::WindowConfig; 3 | 4 | /// Builder for creating stream operators 5 | pub struct OperatorBuilder; 6 | 7 | // Add type aliases at the module level 8 | type AveragePair = (T, usize); 9 | type AverageReduceFn = 10 | Box, AveragePair) -> AveragePair + Send + Sync>; 11 | 12 | impl OperatorBuilder { 13 | /// Create a new map operator 14 | pub fn map(func: F) -> MapOperator 15 | where 16 | F: Fn(In) -> Out + Send + Sync, 17 | { 18 | MapOperator::new(func) 19 | } 20 | 21 | /// Create a new filter operator 22 | pub fn filter(predicate: F) -> FilterOperator 23 | where 24 | F: Fn(&T) -> bool + Send + Sync, 25 | { 26 | FilterOperator::new(predicate) 27 | } 28 | 29 | /// Create a new window reduce operator 30 | pub fn window_reduce(func: F, window: WindowConfig) -> WindowReduceOperator 31 | where 32 | F: Fn(T, T) -> T + Send + Sync, 33 | T: Clone, 34 | { 35 | WindowReduceOperator::new(func, window) 36 | } 37 | 38 | /// Helper to create a sum operator with a window 39 | pub fn sum_window(window: WindowConfig) -> WindowReduceOperator T> 40 | where 41 | T: std::ops::Add + Clone + Send, 42 | { 43 | Self::window_reduce(|a, b| a + b, window) 44 | } 45 | 46 | /// Helper to create a count operator with a window 47 | pub fn count_window( 48 | window: WindowConfig, 49 | ) -> WindowReduceOperator usize> { 50 | Self::window_reduce(|count, _| count + 1, window) 51 | } 52 | 53 | /// Helper to create an average operator with a window 54 | pub fn avg_window( 55 | window: WindowConfig, 56 | ) -> WindowReduceOperator, AverageReduceFn> 57 | where 58 | T: std::ops::Add + Clone + Send + 'static, 59 | { 60 | Self::window_reduce( 61 | Box::new(|(sum1, count1), (sum2, count2)| (sum1 + sum2, count1 + count2)), 62 | window, 63 | ) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/operator/filter.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_utils::models::{Record, StreamResult}; 3 | use std::marker::PhantomData; 4 | 5 | /// Built-in filter operator 6 | pub struct FilterOperator 7 | where 8 | F: Fn(&T) -> bool + Send + Sync, 9 | { 10 | func: F, 11 | _phantom: PhantomData, 12 | } 13 | 14 | impl FilterOperator 15 | where 16 | F: Fn(&T) -> bool + Send + Sync, 17 | { 18 | pub fn new(func: F) -> Self { 19 | Self { 20 | func, 21 | _phantom: PhantomData, 22 | } 23 | } 24 | } 25 | 26 | #[async_trait] 27 | impl super::Operator for FilterOperator 28 | where 29 | T: Send, 30 | F: Fn(&T) -> bool + Send + Sync, 31 | { 32 | async fn process(&mut self, record: Record) -> StreamResult>> { 33 | if (self.func)(&record.data) { 34 | Ok(vec![record]) 35 | } else { 36 | Ok(vec![]) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/operator/map.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_utils::models::{Record, StreamResult}; 3 | use std::marker::PhantomData; 4 | 5 | /// Built-in map operator 6 | pub struct MapOperator 7 | where 8 | F: Fn(In) -> Out + Send + Sync, 9 | { 10 | func: F, 11 | _phantom_in: PhantomData, 12 | _phantom_out: PhantomData, 13 | } 14 | 15 | impl MapOperator 16 | where 17 | F: Fn(In) -> Out + Send + Sync, 18 | { 19 | pub fn new(func: F) -> Self { 20 | Self { 21 | func, 22 | _phantom_in: PhantomData, 23 | _phantom_out: PhantomData, 24 | } 25 | } 26 | } 27 | 28 | #[async_trait] 29 | impl super::Operator for MapOperator 30 | where 31 | In: Send, 32 | Out: Send, 33 | F: Fn(In) -> Out + Send + Sync, 34 | { 35 | async fn process(&mut self, record: Record) -> StreamResult>> { 36 | let output = (self.func)(record.data); 37 | Ok(vec![Record::with_timestamp(output, record.timestamp)]) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/operator/mod.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_utils::models::{Record, StreamResult}; 3 | 4 | mod builder; 5 | mod filter; 6 | mod map; 7 | mod window_match; 8 | mod window_reduce; 9 | 10 | pub use builder::OperatorBuilder; 11 | pub use filter::FilterOperator; 12 | pub use map::MapOperator; 13 | pub use window_match::{WindowAllOperator, WindowAnyOperator}; 14 | pub use window_reduce::WindowReduceOperator; 15 | 16 | /// Operator trait defines the interface for stream processing operators 17 | #[async_trait] 18 | pub trait Operator: Send { 19 | /// Initialize the operator 20 | async fn init(&mut self) -> StreamResult<()> { 21 | Ok(()) 22 | } 23 | 24 | /// Process a single record and return zero or more output records 25 | async fn process(&mut self, record: Record) -> StreamResult>>; 26 | 27 | /// Called when a window is triggered (if windowing is enabled) 28 | async fn on_window_trigger(&mut self) -> StreamResult>> { 29 | Ok(Vec::new()) 30 | } 31 | 32 | /// Close the operator and release resources 33 | async fn close(&mut self) -> StreamResult<()> { 34 | Ok(()) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/operator/window_match.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, marker::PhantomData}; 2 | 3 | use async_trait::async_trait; 4 | use fluxus_utils::{ 5 | models::{Record, StreamResult}, 6 | window::WindowConfig, 7 | }; 8 | 9 | use super::Operator; 10 | 11 | pub struct WindowAnyOperator { 12 | func: F, 13 | window: WindowConfig, 14 | buffer: HashMap>>, 15 | _phantom: PhantomData, 16 | } 17 | 18 | impl WindowAnyOperator 19 | where 20 | T: Clone, 21 | F: Fn(&T) -> bool + Send + Sync, 22 | { 23 | pub fn new(func: F, window: WindowConfig) -> Self { 24 | Self { 25 | func, 26 | window, 27 | buffer: HashMap::new(), 28 | _phantom: PhantomData, 29 | } 30 | } 31 | 32 | fn get_affected_windows(&self, timestamp: i64) -> Vec { 33 | self.window.window_type.get_affected_windows(timestamp) 34 | } 35 | 36 | fn process_window(&self, records: &[Record]) -> Option> { 37 | records.first().map(|first| Record { 38 | data: records.iter().any(|record| (self.func)(&record.data)), 39 | timestamp: first.timestamp, 40 | }) 41 | } 42 | } 43 | 44 | #[async_trait] 45 | impl Operator for WindowAnyOperator 46 | where 47 | T: Clone + Send + 'static, 48 | F: Fn(&T) -> bool + Send + Sync, 49 | { 50 | async fn process(&mut self, record: Record) -> StreamResult>> { 51 | let mut results = Vec::new(); 52 | 53 | // Get all windows that this record belongs to 54 | let window_keys = self.get_affected_windows(record.timestamp); 55 | 56 | // Add the record to all relevant windows 57 | for window_key in window_keys { 58 | let records = self.buffer.entry(window_key).or_default(); 59 | records.push(record.clone()); 60 | 61 | // Process each affected window 62 | let window_records = records.clone(); 63 | if let Some(result) = self.process_window(&window_records) { 64 | results.push(result); 65 | } 66 | } 67 | 68 | Ok(results) 69 | } 70 | } 71 | 72 | pub struct WindowAllOperator { 73 | func: F, 74 | window: WindowConfig, 75 | buffer: HashMap>>, 76 | _phantom: PhantomData, 77 | } 78 | 79 | impl WindowAllOperator 80 | where 81 | T: Clone, 82 | F: Fn(&T) -> bool + Send + Sync, 83 | { 84 | pub fn new(func: F, window: WindowConfig) -> Self { 85 | Self { 86 | func, 87 | window, 88 | buffer: HashMap::new(), 89 | _phantom: PhantomData, 90 | } 91 | } 92 | 93 | fn get_affected_windows(&self, timestamp: i64) -> Vec { 94 | self.window.window_type.get_affected_windows(timestamp) 95 | } 96 | 97 | fn process_window(&self, records: &[Record]) -> Option> { 98 | // 由于前面已经检查了records不为空,这里可以安全地使用first() 99 | records.first().map(|first| Record { 100 | data: records.iter().all(|record| (self.func)(&record.data)), 101 | timestamp: first.timestamp, 102 | }) 103 | } 104 | } 105 | 106 | #[async_trait] 107 | impl Operator for WindowAllOperator 108 | where 109 | T: Clone + Send + 'static, 110 | F: Fn(&T) -> bool + Send + Sync, 111 | { 112 | async fn process(&mut self, record: Record) -> StreamResult>> { 113 | let mut results = Vec::new(); 114 | 115 | // Get all windows that this record belongs to 116 | let window_keys = self.get_affected_windows(record.timestamp); 117 | 118 | // Add the record to all relevant windows 119 | for window_key in window_keys { 120 | let records = self.buffer.entry(window_key).or_default(); 121 | records.push(record.clone()); 122 | 123 | // Process each affected window 124 | let window_records = records.clone(); 125 | if let Some(result) = self.process_window(&window_records) { 126 | results.push(result); 127 | } 128 | } 129 | 130 | Ok(results) 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/operator/window_reduce.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_utils::models::{Record, StreamResult}; 3 | use fluxus_utils::time::current_time; 4 | use fluxus_utils::window::{WindowConfig, WindowType}; 5 | use std::collections::HashMap; 6 | use std::marker::PhantomData; 7 | 8 | /// Built-in window reduce operator 9 | pub struct WindowReduceOperator 10 | where 11 | T: Clone, 12 | F: Fn(T, T) -> T + Send + Sync, 13 | { 14 | func: F, 15 | window: WindowConfig, 16 | buffer: HashMap>>, 17 | _phantom: PhantomData, 18 | } 19 | 20 | impl WindowReduceOperator 21 | where 22 | T: Clone, 23 | F: Fn(T, T) -> T + Send + Sync, 24 | { 25 | pub fn new(func: F, window: WindowConfig) -> Self { 26 | Self { 27 | func, 28 | window, 29 | buffer: HashMap::new(), 30 | _phantom: PhantomData, 31 | } 32 | } 33 | 34 | fn get_affected_windows(&self, timestamp: i64) -> Vec { 35 | self.window.window_type.get_affected_windows(timestamp) 36 | } 37 | 38 | fn process_window(&self, records: &[Record]) -> Option> { 39 | records.first().map(|first| { 40 | let result = records[1..].iter().fold(first.data.clone(), |acc, record| { 41 | (self.func)(acc, record.data.clone()) 42 | }); 43 | Record { 44 | data: result, 45 | timestamp: first.timestamp, 46 | } 47 | }) 48 | } 49 | } 50 | 51 | #[async_trait] 52 | impl super::Operator for WindowReduceOperator 53 | where 54 | T: Clone + Send, 55 | F: Fn(T, T) -> T + Send + Sync, 56 | { 57 | async fn process(&mut self, record: Record) -> StreamResult>> { 58 | let mut results = Vec::new(); 59 | 60 | // Get all windows that this record belongs to 61 | let window_keys = self.get_affected_windows(record.timestamp); 62 | 63 | // Add the record to all relevant windows 64 | for window_key in window_keys { 65 | let records = self.buffer.entry(window_key).or_default(); 66 | records.push(record.clone()); 67 | 68 | // Process each affected window 69 | let window_records = records.clone(); 70 | if let Some(result) = self.process_window(&window_records) { 71 | results.push(result); 72 | } 73 | } 74 | 75 | Ok(results) 76 | } 77 | 78 | async fn on_window_trigger(&mut self) -> StreamResult>> { 79 | let mut results = Vec::new(); 80 | let now = current_time() as i64; 81 | 82 | // Process and remove expired windows 83 | let expired_keys: Vec<_> = self 84 | .buffer 85 | .keys() 86 | .filter(|&&key| match &self.window.window_type { 87 | WindowType::Tumbling(duration) => { 88 | key + duration.as_millis() as i64 89 | + self.window.allow_lateness.as_millis() as i64 90 | <= now 91 | } 92 | WindowType::Sliding(size, _) => { 93 | key + size.as_millis() as i64 + self.window.allow_lateness.as_millis() as i64 94 | <= now 95 | } 96 | WindowType::Session(gap) => { 97 | key + gap.as_millis() as i64 + self.window.allow_lateness.as_millis() as i64 98 | <= now 99 | } 100 | WindowType::Global => { 101 | // Global window doesn't expire based on time, so it's never considered expired here 102 | false 103 | } 104 | }) 105 | .cloned() 106 | .collect(); 107 | 108 | for key in expired_keys { 109 | if let Some(records) = self.buffer.remove(&key) { 110 | if let Some(result) = self.process_window(&records) { 111 | results.push(result); 112 | } 113 | } 114 | } 115 | 116 | Ok(results) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/transform_base.rs: -------------------------------------------------------------------------------- 1 | use fluxus_utils::models::{Record, StreamResult}; 2 | use std::sync::Arc; 3 | 4 | use crate::{InnerOperator, InnerSource}; 5 | 6 | #[derive(Clone)] 7 | pub struct TransformBase { 8 | inner: Arc>, 9 | operators: Vec>>, 10 | } 11 | 12 | impl TransformBase { 13 | pub fn new(inner: Arc>) -> Self { 14 | Self { 15 | inner, 16 | operators: Vec::new(), 17 | } 18 | } 19 | 20 | pub fn set_operators(&mut self, operators: Vec>>) { 21 | self.operators = operators; 22 | } 23 | 24 | pub async fn process_operators(&mut self, record: Record) -> StreamResult>> { 25 | let mut records = vec![record]; 26 | 27 | for op in &self.operators { 28 | let mut processed = Vec::new(); 29 | 30 | for rec in records { 31 | let operator = Arc::clone(op); 32 | let results = unsafe { 33 | // Safe because we have exclusive access through &mut self 34 | let op = &mut *(Arc::as_ptr(&operator) as *mut InnerOperator); 35 | op.process(rec).await? 36 | }; 37 | 38 | processed.extend(results); 39 | } 40 | 41 | if processed.is_empty() { 42 | return Ok(Vec::new()); 43 | } 44 | 45 | records = processed; 46 | } 47 | 48 | Ok(records) 49 | } 50 | 51 | pub async fn get_next_record(&mut self) -> StreamResult>> { 52 | let inner = Arc::clone(&self.inner); 53 | unsafe { 54 | // Safe because we have exclusive access through &mut self 55 | let source = &mut *(Arc::as_ptr(&inner) as *mut InnerSource); 56 | source.next().await 57 | } 58 | } 59 | 60 | pub async fn close_inner(&mut self) -> StreamResult<()> { 61 | let inner = Arc::clone(&self.inner); 62 | unsafe { 63 | // Safe because we have exclusive access through &mut self 64 | let source = &mut *(Arc::as_ptr(&inner) as *mut InnerSource); 65 | source.close().await 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/transform_source.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_sources::Source; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::sync::Arc; 5 | 6 | use crate::{InnerOperator, InnerSource, TransformBase}; 7 | 8 | #[derive(Clone)] 9 | pub struct TransformSource { 10 | base: TransformBase, 11 | buffer: Vec>, 12 | } 13 | 14 | impl TransformSource { 15 | pub fn new(inner: Arc>) -> Self { 16 | Self { 17 | base: TransformBase::new(inner), 18 | buffer: Vec::new(), 19 | } 20 | } 21 | 22 | pub fn set_operators(&mut self, operators: Vec>>) { 23 | self.base.set_operators(operators); 24 | } 25 | } 26 | 27 | #[async_trait] 28 | impl Source for TransformSource { 29 | async fn init(&mut self) -> StreamResult<()> { 30 | Ok(()) 31 | } 32 | 33 | async fn next(&mut self) -> StreamResult>> { 34 | // If we have records in the buffer, return one 35 | if !self.buffer.is_empty() { 36 | return Ok(self.buffer.pop()); 37 | } 38 | 39 | let record = self.base.get_next_record().await?; 40 | 41 | // If there's no next record, return None 42 | let Some(record) = record else { 43 | return Ok(None); 44 | }; 45 | 46 | let records = self.base.process_operators(record).await?; 47 | 48 | if records.is_empty() { 49 | return self.next().await; 50 | } 51 | 52 | self.buffer = records; 53 | self.buffer.reverse(); 54 | 55 | Ok(self.buffer.pop()) 56 | } 57 | 58 | async fn close(&mut self) -> StreamResult<()> { 59 | self.base.close_inner().await 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /crates/fluxus-transformers/src/transform_source_with_operator.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use fluxus_sources::Source; 3 | use fluxus_utils::models::{Record, StreamResult}; 4 | use std::sync::Arc; 5 | 6 | use crate::{InnerOperator, InnerSource, Operator, TransformBase}; 7 | 8 | /// A source that applies a single operator transformation 9 | #[derive(Clone)] 10 | pub struct TransformSourceWithOperator 11 | where 12 | T: Clone, 13 | R: Clone, 14 | { 15 | base: TransformBase, 16 | operator: Arc>, 17 | buffer: Vec>, 18 | } 19 | 20 | impl TransformSourceWithOperator 21 | where 22 | T: Clone + Send + Sync + 'static, 23 | R: Clone + Send + Sync + 'static, 24 | { 25 | pub fn new( 26 | inner: Arc>, 27 | operator: O, 28 | operators: Vec>>, 29 | ) -> Self 30 | where 31 | O: Operator + Send + Sync + 'static, 32 | { 33 | let mut base = TransformBase::new(inner); 34 | base.set_operators(operators); 35 | Self { 36 | base, 37 | operator: Arc::new(operator), 38 | buffer: Vec::new(), 39 | } 40 | } 41 | } 42 | 43 | #[async_trait] 44 | impl Source for TransformSourceWithOperator 45 | where 46 | T: Clone + Send + Sync + 'static, 47 | R: Clone + Send + Sync + 'static, 48 | { 49 | async fn init(&mut self) -> StreamResult<()> { 50 | Ok(()) 51 | } 52 | 53 | async fn next(&mut self) -> StreamResult>> { 54 | if !self.buffer.is_empty() { 55 | return Ok(self.buffer.pop()); 56 | } 57 | let record = self.base.get_next_record().await?; 58 | 59 | // If there's no next record, return None 60 | let Some(record) = record else { 61 | return Ok(None); 62 | }; 63 | 64 | let records = self.base.process_operators(record).await?; 65 | 66 | if records.is_empty() { 67 | return self.next().await; 68 | } 69 | 70 | let mut final_results = Vec::new(); 71 | for rec in records { 72 | final_results.extend(unsafe { 73 | let op = &mut *(Arc::as_ptr(&self.operator) as *mut InnerOperator); 74 | op.process(rec).await? 75 | }); 76 | } 77 | self.buffer = final_results; 78 | self.buffer.reverse(); 79 | 80 | Ok(self.buffer.pop()) 81 | } 82 | 83 | async fn close(&mut self) -> StreamResult<()> { 84 | self.base.close_inner().await 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /crates/fluxus-utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus-utils" 3 | description = "Utility components for Fluxus stream processing engine" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | categories.workspace = true 11 | keywords.workspace = true 12 | readme = "README.md" 13 | 14 | [dependencies] 15 | tokio = { version = "1", features = ["full"] } 16 | futures = "0.3" 17 | serde = { version = "1.0", features = ["derive"] } 18 | serde_json = "1.0" 19 | anyhow = "1.0" 20 | thiserror = "1.0" 21 | async-trait = "0.1" 22 | tracing = "0.1" 23 | num_cpus = "1.16" 24 | csv = "1.3" 25 | 26 | [dev-dependencies] 27 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] } 28 | -------------------------------------------------------------------------------- /crates/fluxus-utils/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus Utils 2 | 3 | Utility components for the Fluxus stream processing engine. This crate provides a set of useful tools and helper functions to support the development and operation of Fluxus. 4 | 5 | ## Overview 6 | 7 | The `fluxus-utils` crate exposes three core modules: `error_converters`, `models`, and `window`. These modules can be utilized across different parts of the Fluxus ecosystem to streamline common tasks and improve overall system functionality. 8 | 9 | ### `error_converters` 10 | The `error_converters` module contains utility functions for converting between different error types. This is particularly useful when dealing with errors that may be encountered in different parts of the Fluxus system. 11 | 12 | ### `models` 13 | The `models` module defines various data structures used throughout the Fluxus ecosystem. These models include configuration settings, event data, and other essential components. 14 | 15 | ### `window` 16 | The `window` module provides functionality for managing time-based windows in Fluxus. This is particularly useful for tasks such as aggregating data over time intervals. 17 | -------------------------------------------------------------------------------- /crates/fluxus-utils/src/error_converters.rs: -------------------------------------------------------------------------------- 1 | use crate::models::StreamError; 2 | use csv; 3 | use serde_json; 4 | 5 | /// Error converter for CSV errors 6 | impl From for StreamError { 7 | fn from(err: csv::Error) -> Self { 8 | StreamError::Serialization(err.to_string()) 9 | } 10 | } 11 | 12 | /// Error converter for UTF-8 errors 13 | impl From for StreamError { 14 | fn from(err: std::string::FromUtf8Error) -> Self { 15 | StreamError::Serialization(err.to_string()) 16 | } 17 | } 18 | 19 | /// Error converter for serde_json errors 20 | impl From for StreamError { 21 | fn from(err: serde_json::Error) -> Self { 22 | StreamError::Serialization(err.to_string()) 23 | } 24 | } 25 | 26 | /// Error converter for CSV writer's IntoInnerError 27 | impl From> for StreamError { 28 | fn from(err: csv::IntoInnerError) -> Self { 29 | StreamError::Serialization(err.to_string()) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /crates/fluxus-utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod error_converters; 2 | pub mod models; 3 | pub mod time; 4 | pub mod window; 5 | -------------------------------------------------------------------------------- /crates/fluxus-utils/src/models.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | use crate::time::current_time; 4 | 5 | /// Record represents a single data record in the stream 6 | #[derive(Debug, Clone)] 7 | pub struct Record { 8 | /// The actual data payload 9 | pub data: T, 10 | /// Timestamp of the record (in milliseconds) 11 | pub timestamp: i64, 12 | } 13 | 14 | impl Record { 15 | /// Create a new record with the current timestamp 16 | pub fn new(data: T) -> Self { 17 | let timestamp = current_time() as i64; 18 | Record { data, timestamp } 19 | } 20 | 21 | /// Create a new record with a specific timestamp 22 | pub fn with_timestamp(data: T, timestamp: i64) -> Self { 23 | Record { data, timestamp } 24 | } 25 | } 26 | 27 | /// Error types that can occur during stream processing 28 | #[derive(Error, Debug)] 29 | pub enum StreamError { 30 | #[error("IO error: {0}")] 31 | Io(#[from] std::io::Error), 32 | 33 | #[error("Serialization error: {0}")] 34 | Serialization(String), 35 | 36 | #[error("Configuration error: {0}")] 37 | Config(String), 38 | 39 | #[error("Runtime error: {0}")] 40 | Runtime(String), 41 | 42 | #[error("EOF")] 43 | EOF, 44 | 45 | #[error("Wait for {0} milliseconds")] 46 | Wait(u64), 47 | } 48 | 49 | /// A Result type specialized for stream processing operations 50 | pub type StreamResult = Result; 51 | -------------------------------------------------------------------------------- /crates/fluxus-utils/src/time.rs: -------------------------------------------------------------------------------- 1 | use std::time::{SystemTime, UNIX_EPOCH}; 2 | 3 | pub fn current_time() -> u128 { 4 | SystemTime::now() 5 | .duration_since(UNIX_EPOCH) 6 | .unwrap_or_default() 7 | .as_millis() 8 | } 9 | -------------------------------------------------------------------------------- /crates/fluxus-utils/src/window.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | /// Window type for stream processing 4 | #[derive(Debug, Clone)] 5 | pub enum WindowType { 6 | /// Tumbling window with fixed size 7 | Tumbling(Duration), 8 | /// Sliding window with size and slide interval 9 | Sliding(Duration, Duration), 10 | /// Session window with gap timeout 11 | Session(Duration), 12 | /// Global window, no window boundaries 13 | Global, 14 | } 15 | 16 | /// Configuration for windowed operations 17 | #[derive(Debug, Clone)] 18 | pub struct WindowConfig { 19 | /// Type of the window 20 | pub window_type: WindowType, 21 | /// Whether to allow late arrivals 22 | pub allow_lateness: Duration, 23 | /// Watermark strategy (time to wait before processing) 24 | pub watermark_delay: Duration, 25 | } 26 | 27 | impl WindowConfig { 28 | /// Create a new tumbling window configuration 29 | pub fn tumbling(size: Duration) -> Self { 30 | Self { 31 | window_type: WindowType::Tumbling(size), 32 | allow_lateness: Duration::from_secs(0), 33 | watermark_delay: Duration::from_secs(0), 34 | } 35 | } 36 | 37 | /// Create a new sliding window configuration 38 | pub fn sliding(size: Duration, slide: Duration) -> Self { 39 | Self { 40 | window_type: WindowType::Sliding(size, slide), 41 | allow_lateness: Duration::from_secs(0), 42 | watermark_delay: Duration::from_secs(0), 43 | } 44 | } 45 | 46 | /// Create a new session window configuration 47 | pub fn session(gap: Duration) -> Self { 48 | Self { 49 | window_type: WindowType::Session(gap), 50 | allow_lateness: Duration::from_secs(0), 51 | watermark_delay: Duration::from_secs(0), 52 | } 53 | } 54 | 55 | /// Create a new global window configuration 56 | pub fn global() -> Self { 57 | Self { 58 | window_type: WindowType::Global, 59 | allow_lateness: Duration::from_secs(0), 60 | watermark_delay: Duration::from_secs(0), 61 | } 62 | } 63 | 64 | /// Set the allowed lateness for this window 65 | pub fn with_lateness(mut self, lateness: Duration) -> Self { 66 | self.allow_lateness = lateness; 67 | self 68 | } 69 | 70 | /// Set the watermark delay for this window 71 | pub fn with_watermark_delay(mut self, delay: Duration) -> Self { 72 | self.watermark_delay = delay; 73 | self 74 | } 75 | } 76 | 77 | impl WindowType { 78 | fn get_common_windows(&self, timestamp: i64) -> Vec { 79 | match self { 80 | WindowType::Tumbling(duration) => { 81 | let duration_ms = duration.as_millis() as i64; 82 | vec![(timestamp / duration_ms) * duration_ms] 83 | } 84 | WindowType::Sliding(size, slide) => { 85 | let slide_ms = slide.as_millis() as i64; 86 | let size_ms = size.as_millis() as i64; 87 | let earliest_window = ((timestamp - size_ms) / slide_ms) * slide_ms; 88 | let latest_window = (timestamp / slide_ms) * slide_ms; 89 | 90 | (earliest_window..=latest_window) 91 | .step_by(slide.as_millis() as usize) 92 | .filter(|&start| timestamp - start < size_ms) 93 | .collect() 94 | } 95 | WindowType::Session(gap) => { 96 | let gap_ms = gap.as_millis() as i64; 97 | vec![timestamp / gap_ms] 98 | } 99 | WindowType::Global => { 100 | vec![0] 101 | } 102 | } 103 | } 104 | 105 | pub fn get_affected_windows(&self, timestamp: i64) -> Vec { 106 | self.get_common_windows(timestamp) 107 | } 108 | 109 | pub fn get_window_keys(&self, timestamp: i64) -> Vec { 110 | self.get_common_windows(timestamp) 111 | .iter() 112 | .map(|&ts| ts as u64) 113 | .collect() 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /crates/fluxus/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fluxus" 3 | description = "Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real-time data processing and analysis." 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | authors.workspace = true 8 | repository.workspace = true 9 | readme = "README.md" 10 | 11 | [dependencies] 12 | fluxus-api = { path = "../fluxus-api", version="0.2", optional = true } 13 | fluxus-core = { path = "../fluxus-core", version="0.2", optional = true } 14 | fluxus-runtime = { path = "../fluxus-runtime", version="0.2", optional = true } 15 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2", optional = true } 16 | fluxus-sources = { path = "../fluxus-sources", version="0.2", optional = true } 17 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2", optional = true } 18 | fluxus-utils = { path = "../fluxus-utils", version="0.2", optional = true } 19 | 20 | tokio = { version = "1", features = ["full"] } 21 | futures = "0.3" 22 | serde = { version = "1.0", features = ["derive"] } 23 | serde_json = "1.0" 24 | anyhow = "1.0" 25 | thiserror = "1.0" 26 | async-trait = "0.1" 27 | tracing = "0.1" 28 | num_cpus = "1.16" 29 | csv = "1.3" 30 | 31 | [features] 32 | # Include nothing by default 33 | default = [] 34 | 35 | # enable everything 36 | full = [ 37 | "fluxus-api", 38 | "fluxus-core", 39 | "fluxus-runtime", 40 | "fluxus-sinks", 41 | "fluxus-sources", 42 | "fluxus-transformers", 43 | "fluxus-utils" 44 | ] 45 | -------------------------------------------------------------------------------- /crates/fluxus/README.md: -------------------------------------------------------------------------------- 1 |

2 | Fluxus Logo 3 |

4 | 5 | # Fluxus Stream Processing Engine 6 | 7 | [![Crates.io](https://img.shields.io/crates/v/fluxus-core.svg)](https://crates.io/crates/fluxus-core) 8 | [![Documentation](https://docs.rs/fluxus-core/badge.svg)](https://docs.rs/fluxus-core) 9 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache2.0-yellow.svg)](https://opensource.org/license/apache-2-0) 10 | [build status](https://github.com/lispking/fluxus/actions?query=branch%3Amain) 11 | 12 | 13 | Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real-time data processing and analysis. 14 | 15 | ![Fluxus Architecture](../../docs/architecture.png) 16 | 17 | ## Features 18 | 19 | - High-performance stream processing 20 | - Flexible windowing operations (Tumbling, Sliding, Session windows) 21 | - Parallel processing support 22 | - Rich set of stream operations (map, filter, aggregate) 23 | - Type-safe API 24 | - Easy to use and extend 25 | 26 | ## Project Structure 27 | 28 | - `crates/fluxus` - Main crate containing the Fluxus engine and its dependencies 29 | - `crates/fluxus-api` - Core API definitions and interfaces 30 | - `crates/fluxus-core` - Core implementations and data structures 31 | - `crates/fluxus-runtime` - Runtime engine and execution environment 32 | - `crates/fluxus-sinks` - Sink implementations for different data sinks (e.g., Kafka, Console) 33 | - `crates/fluxus-sources` - Source implementations for different data sources (e.g., Kafka, Console) 34 | - `crates/fluxus-transforms` - Transformations for stream processing (e.g., map, filter, aggregate) 35 | - `crates/fluxus-utils` - Utility functions and helpers 36 | - `examples` - Example applications demonstrating usage 37 | 38 | ## Examples 39 | 40 | The project includes several example applications that demonstrate different use cases: 41 | 42 | ### Word Count 43 | 44 | Simple word frequency analysis in text streams using tumbling windows. 45 | 46 | ```bash 47 | cargo run --example word-count 48 | ``` 49 | 50 | ### Temperature Sensor Analysis 51 | 52 | Processing and analyzing temperature sensor data with sliding windows. 53 | 54 | ```bash 55 | cargo run --example temperature-sensor 56 | ``` 57 | 58 | ### Click Stream Analysis 59 | 60 | Analyzing user click streams with session windows. 61 | 62 | ```bash 63 | cargo run --example click-stream 64 | ``` 65 | 66 | ### Network Log Analysis 67 | 68 | Processing network logs with sliding windows and aggregations. 69 | 70 | ```bash 71 | cargo run --example network-log 72 | ``` 73 | 74 | ### View Available Examples 75 | 76 | To see all available examples and options: 77 | 78 | ```bash 79 | cargo run --example 80 | ``` 81 | 82 | ## Getting Started 83 | 84 | 1. Clone the repository: 85 | 86 | ```bash 87 | git clone https://github.com/lispking/fluxus.git 88 | cd fluxus 89 | ``` 90 | 91 | 2. Build the project: 92 | 93 | ```bash 94 | cargo build 95 | ``` 96 | 97 | 3. Run the examples: 98 | 99 | ```bash 100 | cargo run --example [example-name] 101 | ``` 102 | 103 | ## Development 104 | 105 | ### Prerequisites 106 | 107 | - Rust 1.75+ 108 | - Cargo 109 | 110 | ### Building 111 | 112 | ```bash 113 | cargo build 114 | ``` 115 | 116 | ### Testing 117 | 118 | ```bash 119 | cargo test 120 | ``` 121 | 122 | ## License 123 | 124 | This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details. -------------------------------------------------------------------------------- /crates/fluxus/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Fluxus - A powerful stream processing framework in Rust 2 | //! 3 | //! Fluxus is a high-performance stream processing framework inspired by Flink, 4 | //! designed to provide a seamless experience for building and running data processing pipelines in Rust. 5 | //! It offers a rich set of APIs and components to handle various data sources, sinks, and transformations. 6 | //! 7 | //! ## Add Dependencies 8 | //! To use Fluxus, you need to add the following dependencies to your `Cargo.toml`: 9 | //! ```shell 10 | //! cargo add fluxus --features full 11 | //! ``` 12 | //! 13 | //! ## Word Count Example 14 | //! Here is a word count example using Fluxus: 15 | //! ```rust 16 | //! use anyhow::Result; 17 | //! use fluxus::api::{ 18 | //! DataStream, 19 | //! io::{CollectionSink, CollectionSource}, 20 | //! }; 21 | //! use fluxus::utils::window::WindowConfig; 22 | //! use std::collections::HashMap; 23 | //! use std::time::Duration; 24 | //! 25 | //! pub type WordCount = HashMap; 26 | //! 27 | //! #[tokio::main] 28 | //! async fn main() -> Result<()> { 29 | //! // Sample input text 30 | //! let text = vec![ 31 | //! "hello world", 32 | //! "hello stream processing", 33 | //! "world of streaming", 34 | //! "hello streaming world", 35 | //! ]; 36 | //! 37 | //! // Create a source from the text collection 38 | //! let source = CollectionSource::new(text); 39 | //! let sink: CollectionSink = CollectionSink::new(); 40 | //! 41 | //! // Build and execute the streaming pipeline 42 | //! DataStream::new(source) 43 | //! // Split text into words 44 | //! .map(|line| { 45 | //! line.split_whitespace() 46 | //! .map(|s| s.to_lowercase()) 47 | //! .collect::>() 48 | //! }) 49 | //! // Parallelize the processing 50 | //! .parallel(2) 51 | //! // Create tumbling windows of 1 second 52 | //! .window(WindowConfig::tumbling(Duration::from_millis(1000))) 53 | //! // Count words in each window 54 | //! .aggregate(HashMap::new(), |mut counts, words| { 55 | //! for word in words { 56 | //! *counts.entry(word).or_insert(0) += 1; 57 | //! } 58 | //! counts 59 | //! }) 60 | //! // Write results to sink 61 | //! .sink(sink.clone()) 62 | //! .await?; 63 | //! 64 | //! // Print the results 65 | //! println!("\nWord count results:"); 66 | //! for result in sink.get_data() { 67 | //! println!("\nWindow results:"); 68 | //! let mut words: Vec<_> = result.iter().collect(); 69 | //! words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); 70 | //! for (word, count) in words { 71 | //! println!(" {}: {}", word, count); 72 | //! } 73 | //! } 74 | //! 75 | //! Ok(()) 76 | //! } 77 | //! 78 | 79 | #[cfg(feature = "fluxus-api")] 80 | pub mod api { 81 | pub use fluxus_api::*; 82 | } 83 | 84 | #[cfg(feature = "fluxus-core")] 85 | pub mod core { 86 | pub use fluxus_core::*; 87 | } 88 | 89 | #[cfg(feature = "fluxus-runtime")] 90 | pub mod runtime { 91 | pub use fluxus_runtime::*; 92 | } 93 | 94 | #[cfg(feature = "fluxus-sinks")] 95 | pub mod sinks { 96 | pub use fluxus_sinks::*; 97 | } 98 | 99 | #[cfg(feature = "fluxus-sources")] 100 | pub mod sources { 101 | pub use fluxus_sources::*; 102 | } 103 | 104 | #[cfg(feature = "fluxus-transformers")] 105 | pub mod transformers { 106 | pub use fluxus_transformers::*; 107 | } 108 | 109 | #[cfg(feature = "fluxus-utils")] 110 | pub mod utils { 111 | pub use fluxus_utils::*; 112 | } 113 | -------------------------------------------------------------------------------- /docs/DESIGN.md: -------------------------------------------------------------------------------- 1 | # Fluxus Stream Processing Engine Design Document 2 | 3 | ## 1. Introduction 4 | 5 | Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real - time data processing and analysis. It provides high - performance stream processing capabilities with a type - safe API, making it easy to use and extend. 6 | 7 | ## 2. Features 8 | 9 | ### 2.1 High - Performance Stream Processing 10 | 11 | Fluxus is optimized for real - time data processing, leveraging Rust's performance characteristics to handle high - volume data streams efficiently. 12 | 13 | ### 2.2 Flexible Windowing Operations 14 | 15 | - **Tumbling Windows**: Fixed - size, non - overlapping windows. 16 | - **Sliding Windows**: Fixed - size, overlapping windows. 17 | - **Session Windows**: Variable - size windows based on inactivity gaps. 18 | 19 | ### 2.3 Parallel Processing Support 20 | 21 | The engine supports parallel processing of data streams, allowing for better utilization of multi - core processors. 22 | 23 | ### 2.4 Rich Set of Stream Operations 24 | 25 | Fluxus provides a variety of stream operations, including `map`, `filter`, and `aggregate`, enabling users to perform complex data transformations. 26 | 27 | ### 2.5 Type - Safe API 28 | 29 | The API is type - safe, reducing the likelihood of runtime errors and providing better developer experience. 30 | 31 | ## 3. Architecture 32 | 33 | ### 3.1 Core Components 34 | 35 | - **`fluxus`**: The main crate that serves as the entry point for the Fluxus engine. It provides the API for creating and managing stream processing tasks. 36 | - **`fluxus-api`**: Defines the core API and interfaces for the Fluxus engine. It serves as the contract between different components of the engine and user applications. 37 | - **`fluxus-core`**: Contains the core implementations and data structures. This component is responsible for handling the internal logic of stream processing, such as windowing and operation execution. 38 | - **`fluxus-runtime`**: Provides the runtime engine and execution environment. It manages the execution of stream processing tasks, including resource allocation and task scheduling. 39 | - **`fluxus-sinks`**: Provides sinks for outputting processed data. Sinks can be used to write data to various destinations, such as databases, files, or external services. 40 | - **`fluxus-sources`**: Provides sources for ingesting data streams. Sources can be used to read data from various sources, such as Kafka, RabbitMQ, or files. 41 | - ** `fluxus-transformers`**: Contains transformers for performing data transformations. Transformers can be used to perform operations on data streams, such as mapping, filtering, or aggregating. 42 | - ** `fluxus-utils`**: Contains utility functions and helper classes. These utilities provide common functionality, such as serialization and deserialization, error handling, and configuration management. 43 | 44 | ### 3.2 Data Flow 45 | 46 | 1. **Data Ingestion**: Data streams are ingested into the engine. 47 | 2. **Stream Processing**: The data streams are processed using the defined operations and windowing strategies. 48 | 3. **Result Output**: The processed results are outputted to the specified destinations. 49 | 50 | ## 4. Design Principles 51 | 52 | ### 4.1 Performance - Oriented 53 | 54 | The engine is designed with performance in mind. Rust's memory management and concurrency features are fully utilized to achieve high throughput and low latency. 55 | 56 | ### 4.2 Flexibility 57 | 58 | Fluxus provides flexible windowing operations and a rich set of stream operations, allowing users to adapt the engine to different use cases. 59 | 60 | ### 4.3 Ease of Use 61 | 62 | The type - safe API and well - structured project make it easy for developers to use and extend the engine. 63 | 64 | ## 5. Example Applications 65 | 66 | ### 5.1 Word Count 67 | 68 | A simple word frequency analysis in text streams using tumbling windows. 69 | 70 | ```bash 71 | cargo run --example word-count 72 | ``` 73 | 74 | ### 5.2 Temperature Sensor Analysis 75 | 76 | Processing and analyzing temperature sensor data with sliding windows. 77 | 78 | ```bash 79 | cargo run --example temperature-sensor 80 | ``` 81 | 82 | ### 5.3 Click Stream Analysis 83 | 84 | Analyzing user click streams with session windows. 85 | 86 | ```bash 87 | cargo run --example click-stream 88 | ``` 89 | 90 | ### 5.4 Network Log Analysis 91 | 92 | Processing network logs with sliding windows and aggregations. 93 | 94 | ```bash 95 | cargo run --example network-log 96 | ``` 97 | 98 | ### 5.5 IoT Device Analysis 99 | 100 | Processing IoT device data with tumbling windows and aggregations. 101 | 102 | ```bash 103 | cargo run --example iot-devices 104 | ``` 105 | 106 | ### 5.6 Log Anomaly Detection 107 | 108 | Detecting anomalies in log streams using sliding windows and aggregations. 109 | 110 | ```bash 111 | cargo run --example log-anomaly 112 | ``` 113 | 114 | ### 5.7 Stock Analysis 115 | 116 | Analyzing stock price data with tumbling windows and aggregations. 117 | 118 | ```bash 119 | cargo run --example stock-market 120 | ``` 121 | 122 | 123 | ## 6. Development 124 | 125 | ### 6.1 Prerequisites 126 | 127 | - Rust 1.75+ 128 | - Cargo 129 | 130 | ### 6.2 Building 131 | 132 | ```bash 133 | cargo build 134 | ``` 135 | 136 | ### 6.3 Testing 137 | 138 | ```bash 139 | cargo test 140 | ``` 141 | 142 | ## 7. Conclusion 143 | 144 | Fluxus is a powerful and flexible stream processing engine. Its design emphasizes performance, flexibility, and ease of use. With a rich set of features and a well - structured architecture, it can be used in various real - time data processing scenarios. 145 | -------------------------------------------------------------------------------- /docs/Logo.md: -------------------------------------------------------------------------------- 1 | # Mascot Name: Fluko 2 | 3 | ### Image Setting: 4 | - Species: Anthropomorphic otter (The otter is associated with "flow" and "flexibility", and has a strong affinity) 5 | - Color: The main color is a gradient of blue and green, symbolizing flowing data and calm calculation 6 | 7 | - Body Shape: 8 | - Streamlined body, and the tail is shaped like the icon of a data flow diagram 9 | - There are lightning-shaped pupils in the eyes (representing high-speed processing) 10 | - There is a "node" device hanging on the belt, similar to a small chip (symbolizing node processing) 11 | 12 | - Clothing: 13 | - Wearing a simple high-tech style jacket, with a small "λ" (Lambda, representing functional calculation) printed on the sleeves 14 | - There is a small energy backpack on the back, emitting a halo, symbolizing the continuous flow of computing energy 15 | 16 | ### Personality Setting: 17 | - Smart but not arrogant: Represents the intelligence of Fluxus, yet is open and friendly 18 | - Quick to react: Always wears a headset, ready to respond to event or message streams at any time 19 | - Likes water and light: Prefers flowing environments and often shuttles through the digital river or the ocean of information 20 | 21 | ## Application Scenarios: 22 | - Fluko can appear in the official website animation, symbolizing event processing by running or swimming 23 | - It can be made into Slack stickers, the mascot on the startup screen, a small assistant in documents, etc. 24 | - When attending open-source events, as a peripheral doll, it can also attract the attention of developers 25 | -------------------------------------------------------------------------------- /docs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lispking/fluxus/7bbbb2f83a21add8b118aea69f0fb01249b280c6/docs/architecture.png -------------------------------------------------------------------------------- /docs/images/fluxus-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lispking/fluxus/7bbbb2f83a21add8b118aea69f0fb01249b280c6/docs/images/fluxus-logo.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Fluxus Examples 2 | 3 | A collection of example applications demonstrating the usage of the Fluxus stream processing engine. 4 | 5 | ## Available Examples 6 | 7 | ### 1. Word Count (`word-count`) 8 | 9 | Demonstrates basic stream processing with tumbling windows: 10 | - Splits text into words 11 | - Counts word frequencies in time windows 12 | - Shows parallel processing capabilities 13 | 14 | ```bash 15 | cargo run --example word-count 16 | ``` 17 | 18 | ### 2. Temperature Sensor Analysis (`temperature-sensor`) 19 | 20 | Shows how to process IoT sensor data: 21 | - Processes multiple sensor readings 22 | - Calculates min/max/average temperatures 23 | - Uses sliding windows for continuous monitoring 24 | 25 | ```bash 26 | cargo run --example temperature-sensor 27 | ``` 28 | 29 | ### 3. Click Stream Analysis (`click-stream`) 30 | 31 | Demonstrates session window usage for user behavior analysis: 32 | - Tracks user navigation patterns 33 | - Groups events into sessions 34 | - Analyzes user engagement metrics 35 | 36 | ```bash 37 | cargo run --example click-stream 38 | ``` 39 | 40 | ### 4. Network Log Analysis (`network-log`) 41 | 42 | Shows advanced stream processing features: 43 | - Processes HTTP access logs 44 | - Calculates request statistics 45 | - Uses sliding windows with custom aggregations 46 | 47 | ```bash 48 | cargo run --example network-log 49 | ``` 50 | 51 | ### 5. IoT Device Analysis (`iot-devices`) 52 | 53 | Demonstrates how to process various IoT device data: 54 | - Processes sensor data from different devices 55 | - Calculates device status statistics 56 | - Uses tumbling windows for real-time monitoring 57 | 58 | ```bash 59 | cargo run --example iot-devices 60 | ``` 61 | 62 | ### 6. Log Anomaly Detection (`log-anomaly`) 63 | 64 | Demonstrates log anomaly detection capabilities: 65 | - Processes system log data 66 | - Detects abnormal log patterns 67 | - Uses custom windows for anomaly analysis 68 | 69 | ```bash 70 | cargo run --example log-anomaly 71 | ``` 72 | 73 | ### 7. Stock Market Analysis (`stock-market`) 74 | 75 | Demonstrates stock market data processing: 76 | - Processes real-time stock price data 77 | - Calculates stock price indicators 78 | - Uses session windows to analyze trading patterns 79 | 80 | ```bash 81 | cargo run --example stock-market 82 | ``` 83 | 84 | ## Example Structure 85 | 86 | Each example follows a similar pattern: 87 | 1. Define data structures 88 | 2. Create a data source 89 | 3. Build processing pipeline 90 | 4. Configure windows 91 | 5. Define aggregations 92 | 6. Output results 93 | 94 | ## Learning Path 95 | 96 | We recommend going through the examples in this order: 97 | 1. Word Count - Basic concepts 98 | 2. Temperature Sensor - Time-based windows 99 | 3. Click Stream - Session windows 100 | 4. Network Log - Advanced features 101 | 5. IoT Devices - Multiple data sources 102 | 6. Log Anomaly - Custom windows 103 | 7. Stock Market - Real-time monitoring 104 | 8. [GitHub Archive](https://github.com/fluxus-labs/fluxus-source-gharchive/tree/main/examples) - Count event type from GitHub archive file -------------------------------------------------------------------------------- /examples/click-stream/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "click-stream" 3 | description = "Click stream analysis example for Fluxus stream processing engine" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "click-stream" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | anyhow = "1.0" 19 | clap = { version = "4.0", features = ["derive"] } 20 | tracing-subscriber = "0.3.19" 21 | tracing = "0.1.41" -------------------------------------------------------------------------------- /examples/click-stream/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Click Stream Analysis Example 3 | 4 | This example demonstrates how to use Fluxus to process click - stream data and implement user session analysis. It simulates the browsing behavior of users on an e - commerce website, including the processing of page visits and click events. 5 | 6 | ## Features 7 | 8 | - User session tracking 9 | - Page browsing path analysis 10 | - Session duration statistics 11 | - Event aggregation processing 12 | 13 | ## Running the Example 14 | 15 | ```bash 16 | cargo run 17 | ``` 18 | 19 | ## Implementation Details 20 | 21 | - Use session windows (30 - second timeout) to group user behaviors 22 | - Filter and process page visit events 23 | - Calculate session duration and total number of events 24 | - Record the user's page visit sequence 25 | 26 | ## Output Example 27 | 28 | ``` 29 | Click stream analysis results: 30 | 31 | Session window results: 32 | User user1: 4 events over 30s, Pages: home -> products -> cart -> checkout 33 | ``` 34 | 35 | ## Dependencies 36 | 37 | - fluxus - core 38 | - fluxus - runtime 39 | - fluxus - api 40 | - tokio 41 | - anyhow 42 | -------------------------------------------------------------------------------- /examples/click-stream/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::api::{ 3 | DataStream, 4 | io::{CollectionSink, CollectionSource}, 5 | }; 6 | use fluxus::utils::window::WindowConfig; 7 | use std::collections::HashMap; 8 | use std::time::{Duration, SystemTime}; 9 | 10 | #[derive(Clone)] 11 | pub struct ClickEvent { 12 | user_id: String, 13 | page_id: String, 14 | event_type: String, 15 | timestamp: SystemTime, 16 | } 17 | 18 | #[derive(Clone)] 19 | pub struct UserSession { 20 | user_id: String, 21 | page_views: Vec, 22 | start_time: SystemTime, 23 | duration_secs: u64, 24 | total_events: usize, 25 | } 26 | 27 | #[tokio::main] 28 | async fn main() -> Result<()> { 29 | // Generate sample click events 30 | let events = generate_sample_clicks(); 31 | let source = CollectionSource::new(events); 32 | let sink = CollectionSink::new(); 33 | 34 | // Build and execute the streaming pipeline 35 | DataStream::new(source) 36 | // Filter only page view events 37 | .filter(|event| event.event_type == "page_view") 38 | // Group by user_id 39 | .map(|event| { 40 | ( 41 | event.user_id.clone(), 42 | (event.page_id.clone(), event.timestamp), 43 | ) 44 | }) 45 | // Create session windows with 30-second timeout 46 | .window(WindowConfig::session(Duration::from_millis(30000))) 47 | // Aggregate user sessions 48 | .aggregate( 49 | HashMap::new(), 50 | |mut sessions, (user_id, (page_id, timestamp))| { 51 | let session = sessions 52 | .entry(user_id.clone()) 53 | .or_insert_with(|| UserSession { 54 | user_id, 55 | page_views: Vec::new(), 56 | start_time: timestamp, 57 | duration_secs: 0, 58 | total_events: 0, 59 | }); 60 | 61 | session.page_views.push(page_id); 62 | session.duration_secs = timestamp 63 | .duration_since(session.start_time) 64 | .unwrap_or(Duration::from_secs(0)) 65 | .as_secs(); 66 | session.total_events += 1; 67 | 68 | sessions 69 | }, 70 | ) 71 | .sink(sink.clone()) 72 | .await?; 73 | 74 | // Print results 75 | println!("\nClick stream analysis results:"); 76 | for session_data in sink.get_data() { 77 | println!("\nSession window results:"); 78 | for (_, session) in session_data { 79 | println!( 80 | "User {}: {} events over {}s, Pages: {}", 81 | session.user_id, 82 | session.total_events, 83 | session.duration_secs, 84 | session.page_views.join(" -> ") 85 | ); 86 | } 87 | } 88 | 89 | Ok(()) 90 | } 91 | 92 | // Helper function to generate sample data 93 | fn generate_sample_clicks() -> Vec { 94 | let start_time = SystemTime::now(); 95 | let mut events = Vec::new(); 96 | let pages = ["home", "products", "cart", "checkout"]; 97 | let users = ["user1", "user2", "user3"]; 98 | 99 | for (user_idx, user_id) in users.iter().enumerate() { 100 | let user_start = start_time + Duration::from_secs(user_idx as u64 * 5); 101 | 102 | // Simulate a user session with page views and some other events 103 | for (i, &page) in pages.iter().enumerate() { 104 | // Add page view 105 | events.push(ClickEvent { 106 | user_id: user_id.to_string(), 107 | page_id: page.to_string(), 108 | event_type: "page_view".to_string(), 109 | timestamp: user_start + Duration::from_secs(i as u64 * 10), 110 | }); 111 | 112 | // Add some click events 113 | events.push(ClickEvent { 114 | user_id: user_id.to_string(), 115 | page_id: page.to_string(), 116 | event_type: "click".to_string(), 117 | timestamp: user_start + Duration::from_secs(i as u64 * 10 + 2), 118 | }); 119 | } 120 | } 121 | 122 | events 123 | } 124 | -------------------------------------------------------------------------------- /examples/event-timestamp/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "event-timestamp" 3 | description = "Example of how to add timestamps to events" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "event-timestamp" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | anyhow = "1.0" 17 | tokio = { version = "1.0", features = ["full"] } 18 | -------------------------------------------------------------------------------- /examples/event-timestamp/README.md: -------------------------------------------------------------------------------- 1 | # Event Timestamp Example 2 | 3 | This example demonstrates how to use Fluxus for processing timestamped event streams with windowing operations. It shows how to count events within specific time windows and aggregate the results. 4 | 5 | ## Features 6 | 7 | - Event stream processing with timestamps 8 | - Tumbling window implementation 9 | - Event counting and aggregation 10 | - Time-based event analysis 11 | - Real-time event processing 12 | 13 | ## Running the Example 14 | 15 | ```bash 16 | cargo run 17 | ``` 18 | 19 | ## Implementation Details 20 | 21 | - Uses tumbling windows with 1-millisecond duration 22 | - Processes various event types (login, click, purchase) 23 | - Groups and counts events by type and timestamp 24 | - Demonstrates window-based aggregation 25 | - Sorts and displays results by event count 26 | 27 | ## Output Example 28 | 29 | ``` 30 | Event counts by timestamp: 31 | 32 | Time window results: 33 | ("click", timestamp): 3 34 | ("login", timestamp): 1 35 | ("purchase", timestamp): 1 36 | ``` 37 | 38 | ## Dependencies 39 | 40 | - fluxus-core 41 | - fluxus-runtime 42 | - fluxus-api 43 | - tokio 44 | - anyhow -------------------------------------------------------------------------------- /examples/event-timestamp/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::utils::{models::Record, window::WindowConfig}; 3 | use fluxus::{ 4 | api::{ 5 | DataStream, 6 | io::{CollectionSink, CollectionSource}, 7 | }, 8 | utils::time::current_time, 9 | }; 10 | use std::{collections::HashMap, time::Duration}; 11 | 12 | pub type EventCount = HashMap<(String, i64), usize>; 13 | 14 | #[tokio::main] 15 | async fn main() -> Result<()> { 16 | // Create timestamped event data 17 | let events = vec![ 18 | Record::with_timestamp("login".to_string(), get_timestamp(0)), 19 | Record::with_timestamp("click".to_string(), get_timestamp(100)), 20 | Record::with_timestamp("click".to_string(), get_timestamp(100)), 21 | Record::with_timestamp("click".to_string(), get_timestamp(100)), 22 | Record::with_timestamp("login".to_string(), get_timestamp(200)), 23 | Record::with_timestamp("purchase".to_string(), get_timestamp(300)), 24 | Record::with_timestamp("click".to_string(), get_timestamp(400)), 25 | Record::with_timestamp("click".to_string(), get_timestamp(400)), 26 | Record::with_timestamp("click".to_string(), get_timestamp(600)), 27 | ]; 28 | 29 | // Create data source and sink 30 | let source = CollectionSource::new(events); 31 | let sink: CollectionSink = CollectionSink::new(); 32 | 33 | // Build and execute stream processing pipeline 34 | DataStream::new(source) 35 | // Create tumbling windows of 1 milliseconds 36 | .window(WindowConfig::tumbling(Duration::from_millis(1))) 37 | // Count events in each time window 38 | .aggregate(HashMap::new(), |mut counts, event| { 39 | *counts.entry((event.data, event.timestamp)).or_insert(0) += 1; 40 | counts 41 | }) 42 | // Write results to sink 43 | .sink(sink.clone()) 44 | .await?; 45 | 46 | // Print results 47 | println!("\nEvent counts by timestamp:"); 48 | if let Some(last_result) = sink.get_last_element() { 49 | println!("\nTime window results:"); 50 | let mut events: Vec<_> = last_result.iter().collect(); 51 | events.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); 52 | for (event, count) in events { 53 | println!(" {event:?}: {count}"); 54 | } 55 | } 56 | 57 | Ok(()) 58 | } 59 | 60 | // Helper function: Generate timestamp relative to current time 61 | fn get_timestamp(offset_ms: u64) -> i64 { 62 | let now = current_time() as i64; 63 | now + offset_ms as i64 64 | } 65 | -------------------------------------------------------------------------------- /examples/iot-devices/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "iot-devices" 3 | description = "IoT devices monitoring example for Fluxus stream processing engine" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "iot-devices" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | anyhow = "1.0" 19 | clap = { version = "4.0", features = ["derive"] } 20 | tracing-subscriber = "0.3.19" 21 | tracing = "0.1.41" -------------------------------------------------------------------------------- /examples/iot-devices/README.md: -------------------------------------------------------------------------------- 1 | 2 | # IoT Device Monitoring Example 3 | 4 | This example demonstrates how to use Fluxus to process and analyze IoT device data streams. It implements real - time monitoring of multiple IoT devices, including data aggregation, statistical analysis, and alarm detection. 5 | 6 | ## Features 7 | 8 | - Multi - device data stream processing 9 | - Sliding window statistical analysis 10 | - Device status monitoring 11 | - Low battery and weak signal alarms 12 | - Real - time data aggregation 13 | 14 | ## Running the Example 15 | 16 | ```bash 17 | cargo run 18 | ``` 19 | 20 | ## Implementation Details 21 | 22 | - Use a 2 - minute sliding window with a 30 - second sliding interval 23 | - Calculate the average value of device data 24 | - Monitor battery level and signal strength 25 | - Count alarm events 26 | - Track the latest update time of devices 27 | 28 | ## Output Example 29 | 30 | ``` 31 | IoT Device Statistics: 32 | Device ID: DEV_001, Type: Temperature Sensor, Average Value: 25.50, Min Battery: 80%, Average Signal: -85dBm, Alert Count: 2 33 | ``` 34 | 35 | ## Dependencies 36 | 37 | - fluxus - core 38 | - fluxus - runtime 39 | - fluxus - api 40 | - tokio 41 | - anyhow 42 | -------------------------------------------------------------------------------- /examples/iot-devices/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::api::{ 3 | DataStream, 4 | io::{CollectionSink, CollectionSource}, 5 | }; 6 | use fluxus::utils::window::WindowConfig; 7 | use std::collections::HashMap; 8 | use std::time::{Duration, SystemTime}; 9 | 10 | #[derive(Clone)] 11 | pub struct IoTData { 12 | device_id: String, 13 | device_type: String, 14 | value: f64, 15 | battery_level: u8, 16 | signal_strength: i32, 17 | timestamp: SystemTime, 18 | } 19 | 20 | #[derive(Clone)] 21 | pub struct DeviceStats { 22 | device_id: String, 23 | device_type: String, 24 | avg_value: f64, 25 | min_battery: u8, 26 | avg_signal: i32, 27 | alert_count: u32, 28 | last_update: SystemTime, 29 | } 30 | 31 | #[tokio::main] 32 | async fn main() -> Result<()> { 33 | // Generate sample IoT device data 34 | let iot_data = generate_sample_data(); 35 | let source = CollectionSource::new(iot_data); 36 | let sink = CollectionSink::new(); 37 | 38 | // Build and execute stream processing pipeline 39 | DataStream::new(source) 40 | // Group by device ID 41 | .map(|data| (data.device_id.clone(), data)) 42 | // Create 2-minute sliding window with 30-second slide 43 | .window(WindowConfig::sliding( 44 | Duration::from_secs(120), // 2 minutes 45 | Duration::from_secs(30), // 30 seconds 46 | )) 47 | // Aggregate device statistics 48 | .aggregate(HashMap::new(), |mut stats, (device_id, data)| { 49 | let entry = stats 50 | .entry(device_id.clone()) 51 | .or_insert_with(|| DeviceStats { 52 | device_id, 53 | device_type: data.device_type.clone(), 54 | avg_value: 0.0, 55 | min_battery: data.battery_level, 56 | avg_signal: 0, 57 | alert_count: 0, 58 | last_update: data.timestamp, 59 | }); 60 | 61 | // Update statistics 62 | entry.avg_value = (entry.avg_value + data.value) / 2.0; 63 | entry.min_battery = entry.min_battery.min(data.battery_level); 64 | entry.avg_signal = (entry.avg_signal + data.signal_strength) / 2; 65 | entry.last_update = data.timestamp; 66 | 67 | // Check alert conditions 68 | if data.battery_level < 20 || data.signal_strength < -90 { 69 | entry.alert_count += 1; 70 | } 71 | 72 | stats 73 | }) 74 | // Output results to sink 75 | .sink(sink.clone()) 76 | .await?; 77 | 78 | // Print results 79 | println!("\nIoT Device Statistics:"); 80 | for result in sink.get_data() { 81 | for (_, stats) in result { 82 | println!( 83 | "Device ID: {}, Type: {}, Average Value: {:.2}, Min Battery: {}%, Average Signal: {}dBm, Alert Count: {}", 84 | stats.device_id, 85 | stats.device_type, 86 | stats.avg_value, 87 | stats.min_battery, 88 | stats.avg_signal, 89 | stats.alert_count 90 | ); 91 | } 92 | } 93 | 94 | Ok(()) 95 | } 96 | 97 | // Generate sample IoT device data 98 | fn generate_sample_data() -> Vec { 99 | let device_types = [ 100 | "Temperature Sensor", 101 | "Humidity Sensor", 102 | "Pressure Sensor", 103 | "Light Sensor", 104 | ]; 105 | let mut data = Vec::new(); 106 | let start_time = SystemTime::now(); 107 | 108 | for i in 0..100 { 109 | for j in 1..=5 { 110 | let device_type = device_types[j % device_types.len()]; 111 | let base_value = match device_type { 112 | "Temperature Sensor" => 25.0, 113 | "Humidity Sensor" => 60.0, 114 | "Pressure Sensor" => 1013.0, 115 | "Light Sensor" => 500.0, 116 | _ => 0.0, 117 | }; 118 | 119 | // Simulate data fluctuation 120 | let value_variation = (i as f64 * 0.1).sin() * 5.0; 121 | let battery_drain = (i / 20) as u8; // Simulate battery consumption 122 | 123 | let reading = IoTData { 124 | device_id: format!("DEV_{j:03}"), 125 | device_type: device_type.to_string(), 126 | value: base_value + value_variation, 127 | battery_level: 100 - battery_drain, 128 | signal_strength: -70 - (i % 30), // Simulate signal strength fluctuation 129 | timestamp: start_time + Duration::from_secs(i as u64 * 15), // One data point every 15 seconds 130 | }; 131 | data.push(reading); 132 | } 133 | } 134 | 135 | data 136 | } 137 | -------------------------------------------------------------------------------- /examples/log-anomaly/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "log-anomaly" 3 | description = "Log anomaly detection example for Fluxus stream processing engine" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "log-anomaly" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | anyhow = "1.0" 19 | clap = { version = "4.0", features = ["derive"] } 20 | tracing-subscriber = "0.3.19" 21 | tracing = "0.1.41" -------------------------------------------------------------------------------- /examples/log-anomaly/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Log Anomaly Detection Example 3 | 4 | This example demonstrates how to use Fluxus for real - time log anomaly detection. It analyzes the log streams of multiple microservices to detect error rates and performance anomalies. 5 | 6 | ## Features 7 | 8 | - Multi - service log stream processing 9 | - Error rate statistics 10 | - Latency anomaly detection 11 | - Real - time statistical analysis 12 | - Sliding window aggregation 13 | 14 | ## Running the Example 15 | 16 | ```bash 17 | cargo run 18 | ``` 19 | 20 | ## Implementation Details 21 | 22 | - Use a 1 - minute sliding window with a 10 - second sliding interval. 23 | - Group and count by service name. 24 | - Calculate the error rate and average latency. 25 | - Detect high - latency events (>1 second). 26 | - Update service status statistics in real - time. 27 | 28 | ## Output Example 29 | 30 | ``` 31 | Log Anomaly Detection Statistics: 32 | Service: api - gateway, Error Rate: 5.00%, Avg Latency: 150.25ms, Error Count: 10, High Latency Events: 5, Total Events: 200 33 | ``` 34 | 35 | ## Dependencies 36 | 37 | - fluxus - core 38 | - fluxus - runtime 39 | - fluxus - api 40 | - tokio 41 | - anyhow 42 | -------------------------------------------------------------------------------- /examples/log-anomaly/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::api::{ 3 | DataStream, 4 | io::{CollectionSink, CollectionSource}, 5 | }; 6 | use fluxus::utils::window::WindowConfig; 7 | use std::{ 8 | collections::HashMap, 9 | time::{Duration, SystemTime}, 10 | }; 11 | 12 | #[derive(Clone)] 13 | #[allow(dead_code)] 14 | pub struct LogEvent { 15 | service: String, 16 | level: String, 17 | message: String, 18 | latency_ms: u64, 19 | timestamp: SystemTime, 20 | } 21 | 22 | #[derive(Clone)] 23 | pub struct AnomalyStats { 24 | service: String, 25 | error_rate: f64, 26 | avg_latency: f64, 27 | error_count: u32, 28 | high_latency_count: u32, 29 | total_events: u32, 30 | } 31 | 32 | #[tokio::main] 33 | async fn main() -> Result<()> { 34 | // Generate sample log events 35 | let events = generate_sample_events(); 36 | let source = CollectionSource::new(events); 37 | let sink = CollectionSink::new(); 38 | 39 | // Build and execute stream processing pipeline 40 | DataStream::new(source) 41 | // Group by service name 42 | .map(|event| (event.service.clone(), event)) 43 | // Create 1-minute sliding window with 10-second slide 44 | .window(WindowConfig::sliding( 45 | Duration::from_secs(60), // 1 minute 46 | Duration::from_secs(10), // 10 seconds 47 | )) 48 | // Aggregate anomaly statistics 49 | .aggregate(HashMap::new(), |mut stats, (service, event)| { 50 | let entry = stats 51 | .entry(service.clone()) 52 | .or_insert_with(|| AnomalyStats { 53 | service, 54 | error_rate: 0.0, 55 | avg_latency: 0.0, 56 | error_count: 0, 57 | high_latency_count: 0, 58 | total_events: 0, 59 | }); 60 | 61 | // Update statistics 62 | entry.total_events += 1; 63 | entry.avg_latency = (entry.avg_latency * (entry.total_events - 1) as f64 64 | + event.latency_ms as f64) 65 | / entry.total_events as f64; 66 | 67 | // Detect errors and high latency 68 | if event.level == "ERROR" { 69 | entry.error_count += 1; 70 | } 71 | if event.latency_ms > 1000 { 72 | // Latency over 1 second 73 | entry.high_latency_count += 1; 74 | } 75 | 76 | // Calculate error rate 77 | entry.error_rate = entry.error_count as f64 / entry.total_events as f64; 78 | 79 | stats 80 | }) 81 | // Output results to sink 82 | .sink(sink.clone()) 83 | .await?; 84 | 85 | // Print results 86 | println!("\nLog Anomaly Detection Statistics:"); 87 | for result in sink.get_data() { 88 | for (_, stats) in result { 89 | println!( 90 | "Service: {}, Error Rate: {:.2}%, Avg Latency: {:.2}ms, Error Count: {}, High Latency Events: {}, Total Events: {}", 91 | stats.service, 92 | stats.error_rate * 100.0, 93 | stats.avg_latency, 94 | stats.error_count, 95 | stats.high_latency_count, 96 | stats.total_events 97 | ); 98 | } 99 | } 100 | 101 | Ok(()) 102 | } 103 | 104 | // Generate sample log events 105 | fn generate_sample_events() -> Vec { 106 | let services = vec![ 107 | "api-gateway", 108 | "user-service", 109 | "order-service", 110 | "payment-service", 111 | ]; 112 | let mut events = Vec::new(); 113 | let start_time = SystemTime::now(); 114 | 115 | for i in 0..200 { 116 | for service in &services { 117 | // Simulate different error probabilities for services 118 | let error_prob = match *service { 119 | "api-gateway" => 0.05, 120 | "user-service" => 0.02, 121 | "order-service" => 0.08, 122 | "payment-service" => 0.03, 123 | _ => 0.01, 124 | }; 125 | 126 | // Randomly select log level 127 | let level = if rand_float() < error_prob { 128 | "ERROR" 129 | } else if rand_float() < 0.15 { 130 | "WARN" 131 | } else { 132 | "INFO" 133 | }; 134 | 135 | // Simulate latency 136 | let base_latency = match *service { 137 | "api-gateway" => 50, 138 | "user-service" => 100, 139 | "order-service" => 150, 140 | "payment-service" => 200, 141 | _ => 100, 142 | }; 143 | 144 | let latency = base_latency + (rand_float() * 1000.0) as u64; 145 | let message = format!("Processing request #{i}"); 146 | 147 | let event = LogEvent { 148 | service: service.to_string(), 149 | level: level.to_string(), 150 | message, 151 | latency_ms: latency, 152 | timestamp: start_time + Duration::from_secs(i as u64 / 2), // One event every 0.5 seconds 153 | }; 154 | events.push(event); 155 | } 156 | } 157 | 158 | events 159 | } 160 | 161 | // Generate random float between 0 and 1 162 | fn rand_float() -> f64 { 163 | use std::time::SystemTime; 164 | let nanos = SystemTime::now() 165 | .duration_since(SystemTime::UNIX_EPOCH) 166 | .expect("System time cannot be earlier than UNIX epoch") 167 | .subsec_nanos() as f64; 168 | (nanos % 1000.0) / 1000.0 169 | } 170 | -------------------------------------------------------------------------------- /examples/network-log/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "network-log" 3 | description = "Network log analysis example for Fluxus stream processing engine" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "network-log" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | anyhow = "1.0" 19 | clap = { version = "4.0", features = ["derive"] } 20 | tracing-subscriber = "0.3.19" 21 | tracing = "0.1.41" -------------------------------------------------------------------------------- /examples/network-log/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Network Log Analysis Example 3 | 4 | This example demonstrates how to use Fluxus for real - time network log analysis. It implements real - time processing of HTTP request logs, including request statistics, error rate analysis, and response size monitoring. 5 | 6 | ## Features 7 | 8 | - Real - time processing of HTTP request logs 9 | - Grouped statistics by path 10 | - Error rate monitoring 11 | - Response size analysis 12 | - Sliding window aggregation 13 | 14 | ## Running the Example 15 | 16 | ```bash 17 | cargo run 18 | ``` 19 | 20 | ## Implementation Details 21 | 22 | - Use a 60 - second sliding window with a 10 - second sliding interval. 23 | - Group and count requests by API path. 24 | - Calculate the number of requests and errors for each path. 25 | - Monitor changes in response size. 26 | - Calculate the error rate in real - time. 27 | 28 | ## Output Example 29 | 30 | ``` 31 | Network log analysis results: 32 | Path: /api/users 33 | Requests: 50 34 | Errors: 5 35 | Avg Size: 1250.50 bytes 36 | Error Rate: 10.0% 37 | ``` 38 | 39 | ## Dependencies 40 | 41 | - fluxus - core 42 | - fluxus - runtime 43 | - fluxus - api 44 | - tokio 45 | -------------------------------------------------------------------------------- /examples/network-log/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::api::{ 3 | DataStream, 4 | io::{CollectionSink, CollectionSource}, 5 | }; 6 | use fluxus::utils::window::WindowConfig; 7 | use std::collections::HashMap; 8 | use std::time::{Duration, SystemTime}; 9 | 10 | #[derive(Clone)] 11 | #[allow(dead_code)] 12 | pub struct LogEntry { 13 | ip: String, 14 | method: String, 15 | path: String, 16 | status: u16, 17 | bytes: u64, 18 | timestamp: SystemTime, 19 | } 20 | 21 | #[derive(Clone)] 22 | pub struct PathStats { 23 | path: String, 24 | total_requests: usize, 25 | error_count: usize, 26 | total_bytes: u64, 27 | avg_response_size: f64, 28 | } 29 | 30 | #[tokio::main] 31 | async fn main() -> Result<()> { 32 | // Generate sample log entries 33 | let logs = generate_sample_logs(); 34 | let source = CollectionSource::new(logs); 35 | let sink = CollectionSink::new(); 36 | 37 | // Build and execute the streaming pipeline 38 | DataStream::new(source) 39 | // Group by path 40 | .map(|log| (log.path.clone(), log)) 41 | // Create 60-second sliding windows with 10-second slide 42 | .window(WindowConfig::sliding( 43 | Duration::from_millis(60000), 44 | Duration::from_millis(10000), 45 | )) 46 | // Aggregate path statistics 47 | .aggregate(HashMap::new(), |mut stats, (path, log)| { 48 | let entry = stats.entry(path).or_insert_with(|| PathStats { 49 | path: String::new(), 50 | total_requests: 0, 51 | error_count: 0, 52 | total_bytes: 0, 53 | avg_response_size: 0.0, 54 | }); 55 | 56 | entry.path = log.path; 57 | entry.total_requests += 1; 58 | if log.status >= 400 { 59 | entry.error_count += 1; 60 | } 61 | entry.total_bytes += log.bytes; 62 | entry.avg_response_size = entry.total_bytes as f64 / entry.total_requests as f64; 63 | 64 | stats 65 | }) 66 | .sink(sink.clone()) 67 | .await?; 68 | 69 | // Print results 70 | println!("\nNetwork log analysis results:"); 71 | for window_stats in sink.get_data() { 72 | println!("\nWindow results:"); 73 | for (_, stats) in window_stats { 74 | println!( 75 | "Path: {}\n Requests: {}\n Errors: {}\n Avg Size: {:.2} bytes\n Error Rate: {:.1}%", 76 | stats.path, 77 | stats.total_requests, 78 | stats.error_count, 79 | stats.avg_response_size, 80 | (stats.error_count as f64 / stats.total_requests as f64) * 100.0 81 | ); 82 | } 83 | } 84 | 85 | Ok(()) 86 | } 87 | 88 | // Helper function to generate sample data 89 | fn generate_sample_logs() -> Vec { 90 | let start_time = SystemTime::now(); 91 | let mut logs = Vec::new(); 92 | let paths = ["/api/users", "/api/products", "/api/orders", "/health"]; 93 | let methods = ["GET", "POST", "PUT", "DELETE"]; 94 | 95 | for i in 0..200 { 96 | let timestamp = start_time + Duration::from_secs(i as u64 / 4); 97 | let path = paths[i % paths.len()]; 98 | let method = methods[i % methods.len()]; 99 | 100 | // Generate a mix of successful and error responses 101 | let status = if i % 10 == 0 { 102 | 500 // Occasional server errors 103 | } else if i % 7 == 0 { 104 | 404 // Some not found errors 105 | } else { 106 | 200 // Mostly successful 107 | }; 108 | 109 | // Simulate variable response sizes 110 | let bytes = if status == 200 { 111 | 1000 + (i % 5) * 500 // Successful responses have larger sizes 112 | } else { 113 | 100 + (i % 3) * 50 // Error responses are smaller 114 | } as u64; 115 | 116 | logs.push(LogEntry { 117 | ip: format!("192.168.1.{}", i % 256), 118 | method: method.to_string(), 119 | path: path.to_string(), 120 | status, 121 | bytes, 122 | timestamp, 123 | }); 124 | } 125 | 126 | logs 127 | } 128 | -------------------------------------------------------------------------------- /examples/remote-csv/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "remote_csv" 3 | description = "Example of a remote CSV source" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [dependencies] 11 | fluxus-sources = { path = "../../crates/fluxus-sources" } 12 | tokio = { version = "1", features = ["full"] } -------------------------------------------------------------------------------- /examples/remote-csv/README.md: -------------------------------------------------------------------------------- 1 | # Remote CSV Example 2 | 3 | This example demonstrates how to use Fluxus for reading and processing remote CSV data streams. It shows how to connect to a remote CSV file and process its contents line by line. 4 | 5 | ## Features 6 | 7 | - Remote CSV file streaming 8 | - URL-based data source 9 | - Line-by-line processing 10 | - Asynchronous data reading 11 | - Source initialization and cleanup 12 | 13 | ## Running the Example 14 | 15 | ```bash 16 | cargo run 17 | ``` 18 | 19 | ## Implementation Details 20 | 21 | - Connects to a remote CSV file via URL 22 | - Initializes a CSV source stream 23 | - Processes records asynchronously 24 | - Demonstrates proper source cleanup 25 | - Handles streaming termination 26 | 27 | ## Output Example 28 | 29 | ``` 30 | Reading CSV data from: [URL] 31 | Line 1: [CSV record data] 32 | Line 2: [CSV record data] 33 | ... 34 | Done! 35 | ``` 36 | 37 | ## Dependencies 38 | 39 | - fluxus-core 40 | - fluxus-runtime 41 | - fluxus-sources 42 | - tokio -------------------------------------------------------------------------------- /examples/remote-csv/src/main.rs: -------------------------------------------------------------------------------- 1 | use fluxus_sources::{CsvSource, Source}; 2 | 3 | #[tokio::main] 4 | async fn main() -> Result<(), Box> { 5 | let url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv"; 6 | 7 | println!("Reading CSV data from: {}", url); 8 | 9 | let mut source = CsvSource::from_url(url); 10 | 11 | source.init().await?; 12 | 13 | for i in 0..10 { 14 | match source.next().await? { 15 | Some(record) => println!("Line {}: {}", i + 1, record.data), 16 | None => { 17 | println!("End of file reached"); 18 | break; 19 | } 20 | } 21 | } 22 | 23 | source.close().await?; 24 | 25 | println!("Done!"); 26 | 27 | Ok(()) 28 | } 29 | -------------------------------------------------------------------------------- /examples/stock-market/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stock-market" 3 | description = "Stock market analysis example for Fluxus stream processing engine" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "stock-market" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | anyhow = "1.0" 19 | clap = { version = "4.0", features = ["derive"] } 20 | tracing-subscriber = "0.3.19" 21 | tracing = "0.1.41" -------------------------------------------------------------------------------- /examples/stock-market/README.md: -------------------------------------------------------------------------------- 1 | # Stock Analysis Example 2 | 3 | This example demonstrates how to use Fluxus to monitor real-time stock price fluctuations, analyze trading volume, and generate stock price trend predictions. 4 | 5 | ## Features 6 | 7 | - Monitor real-time stock price fluctuations. 8 | - Analyze trading volume. 9 | - Generate stock price trend predictions. 10 | 11 | ## Running the Example 12 | 13 | ```bash 14 | cargo run 15 | ``` 16 | 17 | ## Implementation Details 18 | 19 | - Use a streaming processing framework to process stock data. 20 | - Filter and aggregate real-time data. 21 | - Apply machine learning models for trend prediction. 22 | 23 | ## Output Example 24 | 25 | ``` 26 | Stock analysis results: 27 | Price trend: Upward 28 | Trading volume: 100000 29 | ... 30 | ``` 31 | 32 | ## Dependencies 33 | 34 | - fluxus - core 35 | - fluxus - runtime 36 | - fluxus - api 37 | - tokio 38 | - anyhow -------------------------------------------------------------------------------- /examples/stock-market/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::api::{ 3 | DataStream, 4 | io::{CollectionSink, CollectionSource}, 5 | }; 6 | use fluxus::utils::window::WindowConfig; 7 | use std::{ 8 | collections::HashMap, 9 | time::{Duration, SystemTime}, 10 | }; 11 | 12 | #[derive(Clone)] 13 | #[allow(dead_code)] 14 | pub struct StockTrade { 15 | symbol: String, 16 | price: f64, 17 | volume: u64, 18 | timestamp: SystemTime, 19 | } 20 | 21 | #[derive(Clone)] 22 | #[allow(dead_code)] 23 | pub struct StockStats { 24 | symbol: String, 25 | vwap: f64, // Volume Weighted Average Price 26 | total_volume: u64, 27 | price_change: f64, 28 | high: f64, 29 | low: f64, 30 | } 31 | 32 | #[tokio::main] 33 | async fn main() -> Result<()> { 34 | // Generate sample stock trading data 35 | let trades = generate_sample_trades(); 36 | let source = CollectionSource::new(trades); 37 | let sink = CollectionSink::new(); 38 | 39 | // Build and execute stream processing pipeline 40 | DataStream::new(source) 41 | // Group by stock symbol 42 | .map(|trade| (trade.symbol.clone(), trade)) 43 | // Create 5-minute sliding window with 1-minute slide 44 | .window(WindowConfig::sliding( 45 | Duration::from_secs(300), // 5 minutes 46 | Duration::from_secs(60), // 1 minute 47 | )) 48 | // Aggregate stock statistics within each window 49 | .aggregate(HashMap::new(), |mut stats, (symbol, trade)| { 50 | let entry = stats.entry(symbol.clone()).or_insert_with(|| StockStats { 51 | symbol, 52 | vwap: 0.0, 53 | total_volume: 0, 54 | price_change: 0.0, 55 | high: trade.price, 56 | low: trade.price, 57 | }); 58 | 59 | // Update statistics 60 | let volume_price = 61 | (entry.vwap * entry.total_volume as f64) + (trade.price * trade.volume as f64); 62 | entry.total_volume += trade.volume; 63 | entry.vwap = volume_price / entry.total_volume as f64; 64 | entry.high = entry.high.max(trade.price); 65 | entry.low = entry.low.min(trade.price); 66 | entry.price_change = entry.high - entry.low; 67 | 68 | stats 69 | }) 70 | // Output results to sink 71 | .sink(sink.clone()) 72 | .await?; 73 | 74 | // Print results 75 | println!("\nStock Market Statistics:"); 76 | for result in sink.get_data() { 77 | for (symbol, stats) in result { 78 | println!( 79 | "Stock: {}, VWAP: {:.2}, Volume: {}, Price Change: {:.2}, High: {:.2}, Low: {:.2}", 80 | symbol, stats.vwap, stats.total_volume, stats.price_change, stats.high, stats.low 81 | ); 82 | } 83 | } 84 | 85 | Ok(()) 86 | } 87 | 88 | // Generate sample trading data 89 | fn generate_sample_trades() -> Vec { 90 | let symbols = vec!["AAPL", "GOOGL", "MSFT", "AMZN"]; 91 | let mut trades = Vec::new(); 92 | let start_time = SystemTime::now(); 93 | 94 | for i in 0..100 { 95 | for symbol in &symbols { 96 | let base_price = match *symbol { 97 | "AAPL" => 150.0, 98 | "GOOGL" => 2800.0, 99 | "MSFT" => 300.0, 100 | "AMZN" => 3300.0, 101 | _ => 100.0, 102 | }; 103 | 104 | // Simulate price fluctuation 105 | let price_variation = (i as f64 * 0.1).sin() * 5.0; 106 | let trade = StockTrade { 107 | symbol: symbol.to_string(), 108 | price: base_price + price_variation, 109 | volume: 100 + (i as u64 % 900), 110 | timestamp: start_time + Duration::from_secs(i as u64 * 30), // Data point every 30 seconds 111 | }; 112 | trades.push(trade); 113 | } 114 | } 115 | 116 | trades 117 | } 118 | -------------------------------------------------------------------------------- /examples/temperature-sensor/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "temperature-sensor" 3 | description = "Temperature sensor analysis example for Fluxus stream processing engine" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "temperature-sensor" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | anyhow = "1.0" 19 | clap = { version = "4.0", features = ["derive"] } 20 | tracing-subscriber = "0.3.19" 21 | tracing = "0.1.41" -------------------------------------------------------------------------------- /examples/temperature-sensor/README.md: -------------------------------------------------------------------------------- 1 | # Temperature Analysis Example 2 | 3 | This example demonstrates how to use Fluxus to monitor real-time temperature data, analyze temperature change trends, and detect abnormal temperature values. 4 | 5 | ## Features 6 | 7 | - Monitor real-time temperature data. 8 | - Analyze temperature change trends. 9 | - Detect abnormal temperature values. 10 | 11 | ## Running the Example 12 | 13 | ## Implementation Details 14 | 15 | - Use a streaming processing framework to process temperature data. 16 | - Filter and aggregate real-time data. 17 | - Apply a threshold detection algorithm to identify abnormal temperatures. 18 | 19 | ## Output Example 20 | 21 | ``` 22 | Temperature analysis results: 23 | 24 | Window results: 25 | Sensor sensor3: 100 readings, Avg: 25.0°C, Min: 22.0°C, Max: 28.0°C, Avg Humidity: 60.0% 26 | Sensor sensor1: 100 readings, Avg: 20.4°C, Min: 18.0°C, Max: 22.0°C, Avg Humidity: 49.8% 27 | Sensor sensor2: 100 readings, Avg: 26.9°C, Min: 22.0°C, Max: 31.9°C, Avg Humidity: 64.9% 28 | ``` 29 | 30 | ## Dependencies 31 | 32 | - fluxus - core 33 | - fluxus - runtime 34 | - fluxus - api 35 | - tokio 36 | - anyhow 37 | -------------------------------------------------------------------------------- /examples/temperature-sensor/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::api::{ 3 | DataStream, 4 | io::{CollectionSink, CollectionSource}, 5 | }; 6 | use fluxus::utils::window::WindowConfig; 7 | use std::collections::HashMap; 8 | use std::time::{Duration, SystemTime}; 9 | 10 | #[derive(Clone)] 11 | pub struct SensorReading { 12 | sensor_id: String, 13 | temperature: f64, 14 | humidity: f64, 15 | timestamp: SystemTime, 16 | } 17 | 18 | #[derive(Clone)] 19 | pub struct SensorStats { 20 | sensor_id: String, 21 | avg_temperature: f64, 22 | avg_humidity: f64, 23 | min_temperature: f64, 24 | max_temperature: f64, 25 | reading_count: usize, 26 | } 27 | 28 | #[tokio::main] 29 | async fn main() -> Result<()> { 30 | // Generate sample temperature readings 31 | let readings = generate_sample_readings(); 32 | let source = CollectionSource::new(readings); 33 | let sink = CollectionSink::new(); 34 | 35 | // Build and execute the streaming pipeline 36 | DataStream::new(source) 37 | // Group by sensor_id 38 | .map(|reading| { 39 | ( 40 | reading.sensor_id.clone(), 41 | (reading.temperature, reading.humidity, reading.timestamp), 42 | ) 43 | }) 44 | // Create 10-second tumbling windows 45 | .window(WindowConfig::tumbling(Duration::from_millis(10000))) 46 | // Aggregate temperatures in each window 47 | .aggregate( 48 | HashMap::new(), 49 | |mut stats, (sensor_id, (temp, humidity, _))| { 50 | let entry = stats 51 | .entry(sensor_id.clone()) 52 | .or_insert_with(|| SensorStats { 53 | sensor_id: String::new(), 54 | avg_temperature: 0.0, 55 | avg_humidity: 0.0, 56 | min_temperature: f64::MAX, 57 | max_temperature: f64::MIN, 58 | reading_count: 0, 59 | }); 60 | 61 | entry.sensor_id = sensor_id; 62 | entry.min_temperature = entry.min_temperature.min(temp); 63 | entry.max_temperature = entry.max_temperature.max(temp); 64 | entry.avg_temperature = (entry.avg_temperature * entry.reading_count as f64 + temp) 65 | / (entry.reading_count + 1) as f64; 66 | entry.avg_humidity = (entry.avg_humidity * entry.reading_count as f64 + humidity) 67 | / (entry.reading_count + 1) as f64; 68 | entry.reading_count += 1; 69 | 70 | stats 71 | }, 72 | ) 73 | .sink(sink.clone()) 74 | .await?; 75 | 76 | // Print results 77 | println!("\nTemperature analysis results:"); 78 | for window_stats in sink.get_data() { 79 | println!("\nWindow results:"); 80 | for (_, stats) in window_stats { 81 | println!( 82 | "Sensor {}: {} readings, Avg: {:.1}°C, Min: {:.1}°C, Max: {:.1}°C, Avg Humidity: {:.1}%", 83 | stats.sensor_id, 84 | stats.reading_count, 85 | stats.avg_temperature, 86 | stats.min_temperature, 87 | stats.max_temperature, 88 | stats.avg_humidity, 89 | ); 90 | } 91 | } 92 | 93 | Ok(()) 94 | } 95 | 96 | // Helper function to generate sample data 97 | fn generate_sample_readings() -> Vec { 98 | let start_time = SystemTime::now(); 99 | let mut readings = Vec::new(); 100 | 101 | for i in 0..100 { 102 | let timestamp = start_time + Duration::from_secs(i as u64 / 10); 103 | 104 | // Sensor 1: Normal temperature variations 105 | readings.push(SensorReading { 106 | sensor_id: "sensor1".to_string(), 107 | temperature: 20.0 + (i as f64 / 10.0).sin() * 2.0, 108 | humidity: 50.0 + (i as f64 / 10.0).cos() * 5.0, 109 | timestamp, 110 | }); 111 | 112 | // Sensor 2: Gradually increasing temperature 113 | readings.push(SensorReading { 114 | sensor_id: "sensor2".to_string(), 115 | temperature: 22.0 + i as f64 * 0.1, 116 | humidity: 55.0 + i as f64 * 0.2, 117 | timestamp, 118 | }); 119 | 120 | // Sensor 3: Random fluctuations 121 | readings.push(SensorReading { 122 | sensor_id: "sensor3".to_string(), 123 | temperature: 25.0 + (i as f64 * 0.7).cos() * 3.0, 124 | humidity: 60.0 + (i as f64 * 0.5).sin() * 4.0, 125 | timestamp, 126 | }); 127 | } 128 | 129 | readings 130 | } 131 | -------------------------------------------------------------------------------- /examples/word-count/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "word-count" 3 | description = "Word count example for Fluxus stream processing engine" 4 | publish = false 5 | version = "0.1.0" 6 | edition = "2024" 7 | license = "Apache-2.0" 8 | readme = "./README.md" 9 | 10 | [[example]] 11 | name = "word-count" 12 | path = "src/main.rs" 13 | 14 | [dependencies] 15 | fluxus = { path = "../../crates/fluxus", features = ["full"] } 16 | 17 | tokio = { version = "1", features = ["full"] } 18 | anyhow = "1.0" 19 | clap = { version = "4.0", features = ["derive"] } 20 | tracing-subscriber = "0.3.19" 21 | tracing = "0.1.41" 22 | -------------------------------------------------------------------------------- /examples/word-count/README.md: -------------------------------------------------------------------------------- 1 | # Word Count Example 2 | 3 | This example demonstrates how to use Fluxus to count the number of words in input text, filter common stop words, and sort the results by word frequency. 4 | 5 | ## Features 6 | 7 | - Count the number of words in input text. 8 | - Support filtering common stop words. 9 | - Output results sorted by word frequency. 10 | 11 | ## Running the Example 12 | 13 | ```bash 14 | cargo run 15 | ``` 16 | 17 | ## Implementation Details 18 | 19 | - Use a streaming processing framework to process text data. 20 | - Tokenize the input text. 21 | - Count the occurrence of each word. 22 | - Filter stop words and sort the output. 23 | 24 | ## Output Example 25 | 26 | ``` 27 | Word count results: 28 | The: 10 29 | And: 8 30 | ... 31 | ``` 32 | 33 | ## Dependencies 34 | 35 | - fluxus - core 36 | - fluxus - runtime 37 | - fluxus - api 38 | - tokio 39 | - anyhow -------------------------------------------------------------------------------- /examples/word-count/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use fluxus::api::{ 3 | DataStream, 4 | io::{CollectionSink, CollectionSource}, 5 | }; 6 | use fluxus::utils::window::WindowConfig; 7 | use std::collections::HashMap; 8 | use std::time::Duration; 9 | 10 | pub type WordCount = HashMap; 11 | 12 | #[tokio::main] 13 | async fn main() -> Result<()> { 14 | // Sample input text 15 | let text = vec![ 16 | "hello world", 17 | "hello stream processing", 18 | "world of streaming", 19 | "hello streaming world", 20 | ]; 21 | 22 | // Create a source from the text collection 23 | let source = CollectionSource::new(text); 24 | let sink: CollectionSink = CollectionSink::new(); 25 | 26 | // Build and execute the streaming pipeline 27 | DataStream::new(source) 28 | .filter(|line| line.starts_with("hello")) 29 | // Split text into words 30 | .map(|line| { 31 | line.split_whitespace() 32 | .map(|s| s.to_lowercase()) 33 | .collect::>() 34 | }) 35 | // Parallelize the processing 36 | .parallel(2) 37 | // Create tumbling windows of 1 second 38 | .window(WindowConfig::tumbling(Duration::from_millis(1000))) 39 | // Count words in each window 40 | .aggregate(HashMap::new(), |mut counts, words| { 41 | for word in words { 42 | *counts.entry(word).or_insert(0) += 1; 43 | } 44 | counts 45 | }) 46 | // Write results to sink 47 | .sink(sink.clone()) 48 | .await?; 49 | 50 | // Print the results 51 | println!("\nWord count last result:"); 52 | let last_result = sink.get_last_element().unwrap(); 53 | let mut words: Vec<_> = last_result.iter().collect(); 54 | words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); 55 | for (word, count) in words { 56 | println!(" {word}: {count}"); 57 | } 58 | 59 | println!("\nWord count results:"); 60 | for result in sink.get_data() { 61 | println!("\nWindow results:"); 62 | let mut words: Vec<_> = result.iter().collect(); 63 | words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); 64 | for (word, count) in words { 65 | println!(" {word}: {count}"); 66 | } 67 | } 68 | 69 | Ok(()) 70 | } 71 | --------------------------------------------------------------------------------