├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    └── runtime_benchmark.rs
├── crates
    ├── fluxus-api
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── src
    │   │   ├── io
    │   │   │   ├── collection_sink.rs
    │   │   │   ├── collection_source.rs
    │   │   │   └── mod.rs
    │   │   ├── lib.rs
    │   │   ├── operators
    │   │   │   ├── filter.rs
    │   │   │   ├── flat_map.rs
    │   │   │   ├── map.rs
    │   │   │   ├── mod.rs
    │   │   │   ├── window_aggregator.rs
    │   │   │   ├── window_skipper.rs
    │   │   │   └── window_sorter.rs
    │   │   └── stream
    │   │   │   ├── datastream.rs
    │   │   │   ├── mod.rs
    │   │   │   └── windowed_stream.rs
    │   └── tests
    │   │   ├── datastreams_test.rs
    │   │   ├── filter_test.rs
    │   │   └── windowed_stream_test.rs
    ├── fluxus-core
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── config.rs
    │   │   ├── error_handling
    │   │       ├── backpressure.rs
    │   │       ├── mod.rs
    │   │       └── retry_strategy.rs
    │   │   ├── lib.rs
    │   │   ├── metrics.rs
    │   │   └── pipeline
    │   │       ├── mod.rs
    │   │       ├── processor.rs
    │   │       └── status.rs
    ├── fluxus-runtime
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── lib.rs
    │   │   ├── runtime.rs
    │   │   ├── state.rs
    │   │   └── watermark.rs
    ├── fluxus-sinks
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── buffered.rs
    │   │   ├── console.rs
    │   │   ├── dummy_sink.rs
    │   │   ├── file.rs
    │   │   └── lib.rs
    ├── fluxus-sources
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── csv.rs
    │   │   ├── generator.rs
    │   │   └── lib.rs
    ├── fluxus-transformers
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── lib.rs
    │   │   ├── operator
    │   │       ├── builder.rs
    │   │       ├── filter.rs
    │   │       ├── map.rs
    │   │       ├── mod.rs
    │   │       ├── window_match.rs
    │   │       └── window_reduce.rs
    │   │   ├── transform_base.rs
    │   │   ├── transform_source.rs
    │   │   └── transform_source_with_operator.rs
    ├── fluxus-utils
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── error_converters.rs
    │   │   ├── lib.rs
    │   │   ├── models.rs
    │   │   ├── time.rs
    │   │   └── window.rs
    └── fluxus
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │       └── lib.rs
├── docs
    ├── DESIGN.md
    ├── Logo.md
    ├── architecture.png
    └── images
    │   └── fluxus-logo.png
└── examples
    ├── README.md
    ├── click-stream
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    ├── event-timestamp
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    ├── iot-devices
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    ├── log-anomaly
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    ├── network-log
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    ├── remote-csv
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    ├── stock-market
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    ├── temperature-sensor
        ├── Cargo.toml
        ├── README.md
        └── src
        │   └── main.rs
    └── word-count
        ├── Cargo.toml
        ├── README.md
        └── src
            └── main.rs


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | permissions:
 3 |   contents: read
 4 |   pull-requests: write
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "main", "develop" ]
 9 |   pull_request:
10 |     branches: [ "main", "develop" ]
11 |   release:
12 |     types: [ published ]
13 | 
14 | env:
15 |   CARGO_TERM_COLOR: always
16 | 
17 | jobs:
18 |   build:
19 |     runs-on: ubuntu-latest
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - name: Install Rust toolchain
24 |       uses: actions-rs/toolchain@v1
25 |       with:
26 |         toolchain: stable
27 |         components: rustfmt, clippy
28 |         override: true
29 |     - name: Check formatting
30 |       run: cargo fmt -- --check
31 |     - name: Run clippy
32 |       run: cargo clippy -- -D warnings
33 |     - name: Build
34 |       run: cargo build --verbose
35 |     - name: Run tests
36 |       run: cargo test --verbose
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | .DS_Store
 3 | .idea/
 4 | 
 5 | # Added by cargo
 6 | #
 7 | # already existing elements were commented out
 8 | 
 9 | #/target
10 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = ["crates/*", "examples/*"]
 3 | 
 4 | resolver = "2"
 5 | 
 6 | [workspace.package]
 7 | version = "0.2.0"
 8 | edition = "2024"
 9 | license = "Apache-2.0"
10 | authors = ["Fluxus Team"]
11 | description = "Fluxus is a stream processing engine that provides a declarative and efficient way to process and analyze data streams."
12 | homepage = "https://github.com/lispking/fluxus"
13 | repository = "https://github.com/lispking/fluxus"
14 | readme = "README.md"
15 | categories = ["database", "development-tools", "asynchronous", "science"]
16 | keywords = [
17 |     "stream-processing",
18 |     "real-time",
19 |     "data-processing",
20 |     "analytics",
21 |     "async",
22 | ]
23 | 
24 | [workspace.metadata.release]
25 | publish = true
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img src="docs/images/fluxus-logo.png" height="200" alt="Fluxus Logo">
  3 | </p>
  4 | 
  5 | # Fluxus Stream Processing Engine
  6 | 
  7 | [![Crates.io](https://img.shields.io/crates/v/fluxus-core.svg)](https://crates.io/crates/fluxus-core)
  8 | [![Documentation](https://docs.rs/fluxus-core/badge.svg)](https://docs.rs/fluxus-core)
  9 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache2.0-yellow.svg)](https://opensource.org/license/apache-2-0)
 10 | [<img alt="build status" src="https://img.shields.io/github/actions/workflow/status/lispking/fluxus/ci.yml?branch=main&style=for-the-badge" height="20">](https://github.com/lispking/fluxus/actions?query=branch%3Amain)
 11 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/lispking/fluxus)
 12 | 
 13 | 
 14 | Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real-time data processing and analysis.
 15 | 
 16 | ![Fluxus Architecture](docs/architecture.png)
 17 | 
 18 | ## Features
 19 | 
 20 | - High-performance stream processing
 21 | - Flexible windowing operations (Tumbling, Sliding, Session windows)
 22 | - Parallel processing support
 23 | - Rich set of stream operations (map, filter, aggregate)
 24 | - Type-safe API
 25 | - Easy to use and extend
 26 | 
 27 | ## Project Structure
 28 | 
 29 | - `crates/fluxus` - Main crate containing the Fluxus engine and its dependencies
 30 | - `crates/fluxus-api` - Core API definitions and interfaces
 31 | - `crates/fluxus-core` - Core implementations and data structures
 32 | - `crates/fluxus-runtime` - Runtime engine and execution environment
 33 | - `crates/fluxus-sinks` - Sink implementations for different data sinks (e.g., Kafka, Console)
 34 | - `crates/fluxus-sources` - Source implementations for different data sources (e.g., Kafka, Console)
 35 | - `crates/fluxus-transforms` - Transformations for stream processing (e.g., map, filter, aggregate)
 36 | - `crates/fluxus-utils` - Utility functions and helpers
 37 | - `examples` - Example applications demonstrating usage
 38 | 
 39 | ## Examples
 40 | 
 41 | The project includes several example applications that demonstrate different use cases:
 42 | 
 43 | ### Word Count
 44 | 
 45 | Simple word frequency analysis in text streams using tumbling windows.
 46 | 
 47 | ```bash
 48 | cargo run --example word-count
 49 | ```
 50 | 
 51 | ### Temperature Sensor Analysis
 52 | 
 53 | Processing and analyzing temperature sensor data with sliding windows.
 54 | 
 55 | ```bash
 56 | cargo run --example temperature-sensor
 57 | ```
 58 | 
 59 | ### Click Stream Analysis
 60 | 
 61 | Analyzing user click streams with session windows.
 62 | 
 63 | ```bash
 64 | cargo run --example click-stream
 65 | ```
 66 | 
 67 | ### Network Log Analysis
 68 | 
 69 | Processing network logs with sliding windows and aggregations.
 70 | 
 71 | ```bash
 72 | cargo run --example network-log
 73 | ```
 74 | 
 75 | ### Remote CSV Data Processing
 76 | 
 77 | Processing CSV data from remote sources like GitHub.
 78 | 
 79 | ```bash
 80 | cargo run --example remote-csv
 81 | ```
 82 | 
 83 | ### View Available Examples
 84 | 
 85 | To see all available examples and options:
 86 | 
 87 | ```bash
 88 | cargo run --example
 89 | ```
 90 | 
 91 | ## Using Fluxus in Your Project
 92 | 
 93 | To use Fluxus in your project, add it as a dependency using cargo:
 94 | 
 95 | ```bash
 96 | cargo add fluxus --features full
 97 | ```
 98 | 
 99 | This will add Fluxus with all available features to your project. After adding the dependency, you can start using Fluxus in your code. Check out the examples section below for usage examples.
100 | 
101 | ## Getting Started
102 | 
103 | 1. Clone the repository:
104 | 
105 | ```bash
106 | git clone https://github.com/lispking/fluxus.git
107 | cd fluxus
108 | ```
109 | 
110 | 2. Build the project:
111 | 
112 | ```bash
113 | cargo build
114 | ```
115 | 
116 | 3. Run the examples:
117 | 
118 | ```bash
119 | cargo run --example [example-name]
120 | ```
121 | 
122 | ## Development
123 | 
124 | ### Prerequisites
125 | 
126 | - Rust 1.75+ 
127 | - Cargo 
128 | 
129 | ### Building
130 | 
131 | ```bash
132 | cargo build
133 | ```
134 | 
135 | ### Testing
136 | 
137 | ```bash
138 | cargo test
139 | ```
140 | 
141 | ## License
142 | 
143 | This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
144 | 
145 | ## Star History
146 | 
147 | [![Star History Chart](https://api.star-history.com/svg?repos=lispking/fluxus&type=Date)](https://www.star-history.com/#lispking/fluxus&Date)
148 | 
149 | ### Thank you for your support and participation ❤️
150 | 
151 | <div align="center">
152 |   <a href="https://github.com/lispking/fluxus/graphs/contributors">
153 |     <img src="https://contrib.rocks/image?repo=lispking/fluxus" width="100%" />
154 |   </a>
155 | </div>
156 | 


--------------------------------------------------------------------------------
/benches/runtime_benchmark.rs:
--------------------------------------------------------------------------------
  1 | use async_trait::async_trait;
  2 | use criterion::{Criterion, criterion_group, criterion_main};
  3 | use fluxus_core::ParallelConfig;
  4 | use fluxus_runtime::RuntimeContext;
  5 | use fluxus_sinks::Sink;
  6 | use fluxus_sources::Source;
  7 | use fluxus_transformers::Operator;
  8 | use fluxus_utils::models::{Record, StreamResult};
  9 | use std::sync::Arc;
 10 | use tokio::sync::Mutex;
 11 | 
 12 | // Dummy Source for benchmarking
 13 | pub struct DummySource {
 14 |     data: Vec<i32>,
 15 |     index: usize,
 16 | }
 17 | 
 18 | #[async_trait]
 19 | impl Source<i32> for DummySource {
 20 |     async fn init(&mut self) -> StreamResult<()> {
 21 |         Ok(())
 22 |     }
 23 | 
 24 |     async fn next(&mut self) -> StreamResult<Option<Record<i32>>> {
 25 |         if self.index < self.data.len() {
 26 |             let record = Record::new(self.data[self.index]);
 27 |             self.index += 1;
 28 |             Ok(Some(record))
 29 |         } else {
 30 |             Ok(None)
 31 |         }
 32 |     }
 33 | 
 34 |     async fn close(&mut self) -> StreamResult<()> {
 35 |         Ok(())
 36 |     }
 37 | }
 38 | 
 39 | // Dummy Operator for benchmarking
 40 | pub struct DummyOperator;
 41 | 
 42 | #[async_trait]
 43 | impl Operator<i32, i32> for DummyOperator {
 44 |     async fn process(&mut self, record: Record<i32>) -> StreamResult<Vec<Record<i32>>> {
 45 |         Ok(vec![record])
 46 |     }
 47 | 
 48 |     async fn close(&mut self) -> StreamResult<()> {
 49 |         Ok(())
 50 |     }
 51 | }
 52 | 
 53 | // Dummy Sink for benchmarking
 54 | pub struct DummySink;
 55 | 
 56 | #[async_trait]
 57 | impl Sink<i32> for DummySink {
 58 |     async fn init(&mut self) -> StreamResult<()> {
 59 |         Ok(())
 60 |     }
 61 |     async fn write(&mut self, _record: Record<i32>) -> StreamResult<()> {
 62 |         Ok(())
 63 |     }
 64 |     async fn flush(&mut self) -> StreamResult<()> {
 65 |         Ok(())
 66 |     }
 67 |     async fn close(&mut self) -> StreamResult<()> {
 68 |         Ok(())
 69 |     }
 70 | }
 71 | 
 72 | fn criterion_benchmark(c: &mut Criterion) {
 73 |     let runtime = tokio::runtime::Runtime::new().unwrap();
 74 |     let data_size = 10_000;
 75 |     let data: Vec<i32> = (0..data_size).collect();
 76 | 
 77 |     c.bench_function("pipeline_execution", |b| {
 78 |         b.iter(|| {
 79 |             runtime.block_on(async {
 80 |                 let parallel_config = ParallelConfig::default();
 81 |                 let runtime_context = RuntimeContext::new(parallel_config);
 82 | 
 83 |                 let source = DummySource {
 84 |                     data: data.clone(),
 85 |                     index: 0,
 86 |                 };
 87 |                 let operators: Vec<Arc<Mutex<dyn Operator<i32, i32> + Send + Sync>>> = vec![
 88 |                     Arc::new(Mutex::new(DummyOperator)),
 89 |                     Arc::new(Mutex::new(DummyOperator)),
 90 |                 ];
 91 |                 let sink = DummySink;
 92 | 
 93 |                 runtime_context
 94 |                     .execute_pipeline(source, operators, sink)
 95 |                     .await
 96 |                     .unwrap();
 97 |             })
 98 |         });
 99 |     });
100 | }
101 | 
102 | criterion_group!(benches, criterion_benchmark);
103 | criterion_main!(benches);
104 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus-api"
 3 | description = "High-level API for Fluxus stream processing engine"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | repository.workspace = true
 9 | readme = "README.md"
10 | 
11 | [dependencies]
12 | fluxus-core = { path = "../fluxus-core", version="0.2" }
13 | fluxus-runtime = { path = "../fluxus-runtime", version="0.2" }
14 | fluxus-utils = { path = "../fluxus-utils", version="0.2" }
15 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" }
16 | fluxus-sources = { path = "../fluxus-sources", version="0.2" }
17 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2" }
18 | 
19 | tokio = { version = "1", features = ["full"] }
20 | futures = "0.3"
21 | async-trait = "0.1"
22 | anyhow = "1.0"
23 | tracing = "0.1"
24 | serde = { version = "1.0", features = ["derive"] }
25 | serde_json = "1.0"
26 | 
27 | [dev-dependencies]
28 | tokio-test = "0.4.4"
29 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxus API
 2 | 
 3 | Core API definitions and interfaces for the Fluxus stream processing engine.
 4 | 
 5 | ## Overview
 6 | 
 7 | This crate provides the public API for building stream processing applications with Fluxus. It includes:
 8 | 
 9 | - `DataStream` - The main abstraction for working with data streams
10 | - Source and Sink interfaces
11 | - Stream operations (map, filter, aggregate, etc.)
12 | - Window configurations
13 | - I/O utilities
14 | 
15 | ## Key Components
16 | 
17 | ### DataStream
18 | 
19 | The `DataStream` type is the main entry point for building stream processing pipelines:
20 | 
21 | ```rust
22 | DataStream::new(source)
23 |     .map(|x| x * 2)
24 |     .filter(|x| x > 0)
25 |     .window(WindowConfig::Tumbling { size_ms: 1000 })
26 |     .aggregate(initial_state, |state, value| /* aggregation logic */)
27 |     .sink(sink)
28 | ```
29 | 
30 | ### Windows
31 | 
32 | Supported window types:
33 | - Tumbling Windows - Fixed-size, non-overlapping windows
34 | - Sliding Windows - Fixed-size windows that slide by a specified interval
35 | - Session Windows - Dynamic windows based on activity timeouts
36 | 
37 | ### I/O
38 | 
39 | Pre-built source and sink implementations:
40 | - `CollectionSource` - Create a stream from a collection
41 | - `CollectionSink` - Collect stream results into a collection
42 | - Additional I/O implementations for files, networks, etc.
43 | 
44 | ## Usage
45 | 
46 | Add this to your `Cargo.toml`:
47 | 
48 | ```toml
49 | [dependencies]
50 | fluxus-api = "0.2"
51 | ```
52 | 
53 | See the `fluxus-examples` crate for complete usage examples.


--------------------------------------------------------------------------------
/crates/fluxus-api/src/io/collection_sink.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_sinks::Sink;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::sync::{Arc, Mutex};
 5 | 
 6 | /// A sink that collects elements into a Vec
 7 | #[derive(Default, Clone)]
 8 | pub struct CollectionSink<T> {
 9 |     data: Arc<Mutex<Vec<T>>>,
10 | }
11 | 
12 | impl<T> CollectionSink<T> {
13 |     pub fn new() -> Self {
14 |         Self {
15 |             data: Arc::new(Mutex::new(Vec::new())),
16 |         }
17 |     }
18 | 
19 |     pub fn get_data(&self) -> Vec<T>
20 |     where
21 |         T: Clone,
22 |     {
23 |         self.data
24 |             .lock()
25 |             .map_or_else(|p| p.into_inner().clone(), |d| d.clone())
26 |     }
27 | 
28 |     pub fn get_last_element(&self) -> Option<T>
29 |     where
30 |         T: Clone,
31 |     {
32 |         self.data
33 |             .lock()
34 |             .map_or_else(|p| p.into_inner().last().cloned(), |d| d.last().cloned())
35 |     }
36 | }
37 | 
38 | #[async_trait]
39 | impl<T> Sink<T> for CollectionSink<T>
40 | where
41 |     T: Clone + Send + Sync + 'static,
42 | {
43 |     async fn init(&mut self) -> StreamResult<()> {
44 |         Ok(())
45 |     }
46 | 
47 |     async fn write(&mut self, record: Record<T>) -> StreamResult<()> {
48 |         if let Ok(mut data) = self.data.lock() {
49 |             data.push(record.data)
50 |         }
51 |         Ok(())
52 |     }
53 | 
54 |     async fn flush(&mut self) -> StreamResult<()> {
55 |         Ok(())
56 |     }
57 | 
58 |     async fn close(&mut self) -> StreamResult<()> {
59 |         Ok(())
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/io/collection_source.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_sources::Source;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use fluxus_utils::time::current_time;
 5 | use std::collections::VecDeque;
 6 | 
 7 | /// A source that produces elements from a collection
 8 | pub struct CollectionSource<T> {
 9 |     data: VecDeque<T>,
10 | }
11 | 
12 | impl<T> CollectionSource<T> {
13 |     pub fn new(data: impl IntoIterator<Item = T>) -> Self {
14 |         Self {
15 |             data: data.into_iter().collect(),
16 |         }
17 |     }
18 | }
19 | 
20 | #[async_trait]
21 | impl<T> Source<T> for CollectionSource<T>
22 | where
23 |     T: Clone + Send + Sync + 'static,
24 | {
25 |     async fn init(&mut self) -> StreamResult<()> {
26 |         Ok(())
27 |     }
28 | 
29 |     async fn next(&mut self) -> StreamResult<Option<Record<T>>> {
30 |         let value = self.data.pop_front();
31 |         Ok(value.map(|data| Record {
32 |             data,
33 |             timestamp: current_time() as i64,
34 |         }))
35 |     }
36 | 
37 |     async fn close(&mut self) -> StreamResult<()> {
38 |         Ok(())
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/io/mod.rs:
--------------------------------------------------------------------------------
1 | mod collection_sink;
2 | mod collection_source;
3 | 
4 | pub use collection_sink::CollectionSink;
5 | pub use collection_source::CollectionSource;
6 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Fluxus API - High-level interface for stream processing
 2 | //!
 3 | //! This module provides a user-friendly API for building stream processing applications.
 4 | 
 5 | pub mod io;
 6 | pub mod operators;
 7 | pub mod stream;
 8 | 
 9 | pub use io::{CollectionSink, CollectionSource};
10 | pub use stream::{DataStream, WindowedStream};
11 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/operators/filter.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_transformers::Operator;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::marker::PhantomData;
 5 | 
 6 | pub struct FilterOperator<T, F> {
 7 |     f: F,
 8 |     _phantom: PhantomData<T>,
 9 | }
10 | 
11 | impl<T, F> FilterOperator<T, F>
12 | where
13 |     F: Fn(&T) -> bool,
14 | {
15 |     pub fn new(f: F) -> Self {
16 |         Self {
17 |             f,
18 |             _phantom: PhantomData,
19 |         }
20 |     }
21 | }
22 | 
23 | #[async_trait]
24 | impl<T, F> Operator<T, T> for FilterOperator<T, F>
25 | where
26 |     T: Clone + Send + Sync + 'static,
27 |     F: Fn(&T) -> bool + Send + Sync,
28 | {
29 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<T>>> {
30 |         if (self.f)(&record.data) {
31 |             Ok(vec![record])
32 |         } else {
33 |             Ok(vec![])
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/operators/flat_map.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_transformers::Operator;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::marker::PhantomData;
 5 | 
 6 | pub struct FlatMapOperator<T, R, F, I>
 7 | where
 8 |     I: IntoIterator<Item = R>,
 9 |     F: Fn(T) -> I,
10 | {
11 |     f: F,
12 |     _phantom: PhantomData<(T, R)>,
13 | }
14 | 
15 | impl<T, R, F, I> FlatMapOperator<T, R, F, I>
16 | where
17 |     I: IntoIterator<Item = R>,
18 |     F: Fn(T) -> I,
19 | {
20 |     pub fn new(f: F) -> Self {
21 |         Self {
22 |             f,
23 |             _phantom: PhantomData,
24 |         }
25 |     }
26 | }
27 | 
28 | #[async_trait]
29 | impl<T, R, F, I> Operator<T, R> for FlatMapOperator<T, R, F, I>
30 | where
31 |     T: Clone + Send + Sync + 'static,
32 |     R: Clone + Send + Sync + 'static,
33 |     F: Fn(T) -> I + Send + Sync,
34 |     I: IntoIterator<Item = R>,
35 | {
36 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<R>>> {
37 |         let Record { data, timestamp } = record;
38 |         let result = (self.f)(data);
39 |         Ok(result
40 |             .into_iter()
41 |             .map(|r| Record::with_timestamp(r, timestamp))
42 |             .collect())
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/operators/map.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_transformers::Operator;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::marker::PhantomData;
 5 | 
 6 | pub struct MapOperator<T, R, F> {
 7 |     f: F,
 8 |     _phantom: PhantomData<(T, R)>,
 9 | }
10 | 
11 | impl<T, R, F> MapOperator<T, R, F>
12 | where
13 |     F: Fn(T) -> R,
14 | {
15 |     pub fn new(f: F) -> Self {
16 |         Self {
17 |             f,
18 |             _phantom: PhantomData,
19 |         }
20 |     }
21 | }
22 | 
23 | #[async_trait]
24 | impl<T, R, F> Operator<T, R> for MapOperator<T, R, F>
25 | where
26 |     T: Clone + Send + Sync + 'static,
27 |     R: Clone + Send + Sync + 'static,
28 |     F: Fn(T) -> R + Send + Sync,
29 | {
30 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<R>>> {
31 |         let result = (self.f)(record.data);
32 |         Ok(vec![Record::with_timestamp(result, record.timestamp)])
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/operators/mod.rs:
--------------------------------------------------------------------------------
 1 | mod filter;
 2 | mod flat_map;
 3 | mod map;
 4 | mod window_aggregator;
 5 | mod window_skipper;
 6 | mod window_sorter;
 7 | 
 8 | pub use filter::FilterOperator;
 9 | pub use flat_map::FlatMapOperator;
10 | pub use map::MapOperator;
11 | pub use window_aggregator::WindowAggregator;
12 | pub use window_skipper::WindowSkipper;
13 | pub use window_sorter::SortOrder;
14 | pub use window_sorter::WindowSorter;
15 | pub use window_sorter::WindowTimestampSorter;
16 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/operators/window_aggregator.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_runtime::state::KeyedStateBackend;
 3 | use fluxus_transformers::Operator;
 4 | use fluxus_utils::{
 5 |     models::{Record, StreamResult},
 6 |     window::WindowConfig,
 7 | };
 8 | use std::marker::PhantomData;
 9 | 
10 | pub struct WindowAggregator<T, A, F> {
11 |     window_config: WindowConfig,
12 |     init: A,
13 |     f: F,
14 |     state: KeyedStateBackend<u64, A>,
15 |     _phantom: PhantomData<T>,
16 | }
17 | 
18 | impl<T, A, F> WindowAggregator<T, A, F>
19 | where
20 |     A: Clone,
21 |     F: Fn(A, T) -> A,
22 | {
23 |     pub fn new(window_config: WindowConfig, init: A, f: F) -> Self {
24 |         Self {
25 |             window_config,
26 |             init,
27 |             f,
28 |             state: KeyedStateBackend::new(),
29 |             _phantom: PhantomData,
30 |         }
31 |     }
32 | 
33 |     fn get_window_keys(&self, timestamp: i64) -> Vec<u64> {
34 |         self.window_config.window_type.get_window_keys(timestamp)
35 |     }
36 | }
37 | 
38 | #[async_trait]
39 | impl<T, A, F> Operator<T, A> for WindowAggregator<T, A, F>
40 | where
41 |     T: Clone + Send + Sync + 'static,
42 |     A: Clone + Send + Sync + 'static,
43 |     F: Fn(A, T) -> A + Send + Sync,
44 | {
45 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<A>>> {
46 |         let mut results = Vec::new();
47 | 
48 |         for window_key in self.get_window_keys(record.timestamp) {
49 |             let current = self
50 |                 .state
51 |                 .get(&window_key)
52 |                 .unwrap_or_else(|| self.init.clone());
53 |             let new_value = (self.f)(current, record.data.clone());
54 |             self.state.set(window_key, new_value.clone());
55 | 
56 |             results.push(Record {
57 |                 data: new_value,
58 |                 timestamp: record.timestamp,
59 |             });
60 |         }
61 | 
62 |         Ok(results)
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/operators/window_skipper.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_transformers::Operator;
 3 | use fluxus_utils::{
 4 |     models::{Record, StreamResult},
 5 |     window::WindowConfig,
 6 | };
 7 | use std::{collections::HashMap, marker::PhantomData};
 8 | 
 9 | pub struct WindowSkipper<T> {
10 |     window_config: WindowConfig,
11 |     n: usize,
12 |     buffer: HashMap<u64, Vec<T>>,
13 |     _phantom: PhantomData<T>,
14 | }
15 | 
16 | impl<T> WindowSkipper<T>
17 | where
18 |     T: Clone,
19 | {
20 |     pub fn new(window_config: WindowConfig, n: usize) -> Self {
21 |         Self {
22 |             window_config,
23 |             n,
24 |             buffer: HashMap::new(),
25 |             _phantom: PhantomData,
26 |         }
27 |     }
28 | 
29 |     fn get_window_keys(&self, timestamp: i64) -> Vec<u64> {
30 |         self.window_config.window_type.get_window_keys(timestamp)
31 |     }
32 | }
33 | 
34 | #[async_trait]
35 | impl<T> Operator<T, Vec<T>> for WindowSkipper<T>
36 | where
37 |     T: Clone + Send + Sync + 'static,
38 | {
39 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<Vec<T>>>> {
40 |         let mut results = Vec::new();
41 | 
42 |         for window_key in self.get_window_keys(record.timestamp) {
43 |             let records = self.buffer.entry(window_key).or_default();
44 |             records.push(record.data.clone());
45 |             let new_records = records.iter().skip(self.n).cloned().collect::<Vec<_>>();
46 |             results.push(Record {
47 |                 data: new_records,
48 |                 timestamp: record.timestamp,
49 |             });
50 |         }
51 | 
52 |         Ok(results)
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/operators/window_sorter.rs:
--------------------------------------------------------------------------------
  1 | use async_trait::async_trait;
  2 | use fluxus_runtime::state::KeyedStateBackend;
  3 | use fluxus_transformers::Operator;
  4 | use fluxus_utils::{
  5 |     models::{Record, StreamResult},
  6 |     window::WindowConfig,
  7 | };
  8 | use std::{cmp::Ordering, marker::PhantomData};
  9 | 
 10 | /// sort_by operator for windowed stream.
 11 | pub struct WindowSorter<T, F> {
 12 |     window_config: WindowConfig,
 13 |     f: F,
 14 |     state: KeyedStateBackend<u64, Vec<T>>,
 15 |     _phantom: PhantomData<T>,
 16 | }
 17 | 
 18 | impl<T, F> WindowSorter<T, F>
 19 | where
 20 |     F: FnMut(&T, &T) -> Ordering,
 21 | {
 22 |     pub fn new(window_config: WindowConfig, f: F) -> Self {
 23 |         Self {
 24 |             window_config,
 25 |             f,
 26 |             state: KeyedStateBackend::new(),
 27 |             _phantom: PhantomData,
 28 |         }
 29 |     }
 30 | 
 31 |     fn get_window_keys(&self, timestamp: i64) -> Vec<u64> {
 32 |         self.window_config.window_type.get_window_keys(timestamp)
 33 |     }
 34 | }
 35 | 
 36 | #[async_trait]
 37 | impl<T, F> Operator<T, Vec<T>> for WindowSorter<T, F>
 38 | where
 39 |     T: Clone + Send + Sync + 'static,
 40 |     F: FnMut(&T, &T) -> Ordering + Send + Sync,
 41 | {
 42 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<Vec<T>>>> {
 43 |         let mut results = Vec::new();
 44 | 
 45 |         for window_key in self.get_window_keys(record.timestamp) {
 46 |             let mut current = self.state.get(&window_key).unwrap_or_default();
 47 |             let index = current
 48 |                 .binary_search_by(|prob| (self.f)(prob, &record.data))
 49 |                 .unwrap_or_else(|i| i);
 50 |             current.insert(index, record.data.clone());
 51 | 
 52 |             self.state.set(window_key, current.clone());
 53 |             results.push(Record {
 54 |                 data: current,
 55 |                 timestamp: record.timestamp,
 56 |             });
 57 |         }
 58 | 
 59 |         Ok(results)
 60 |     }
 61 | }
 62 | 
 63 | /// Specify sorting method of sort_by_ts
 64 | #[derive(Debug, Clone, Copy)]
 65 | pub enum SortOrder {
 66 |     Asc,
 67 |     Desc,
 68 | }
 69 | 
 70 | /// sort_by_ts operator for windowed stream.
 71 | pub struct WindowTimestampSorter<T> {
 72 |     window_config: WindowConfig,
 73 |     method: SortOrder,
 74 |     state: KeyedStateBackend<u64, Vec<Record<T>>>,
 75 |     _phantom: PhantomData<T>,
 76 | }
 77 | 
 78 | impl<T> WindowTimestampSorter<T> {
 79 |     pub fn new(window_config: WindowConfig, method: SortOrder) -> Self {
 80 |         Self {
 81 |             window_config,
 82 |             method,
 83 |             state: KeyedStateBackend::new(),
 84 |             _phantom: PhantomData,
 85 |         }
 86 |     }
 87 | 
 88 |     fn get_window_keys(&self, timestamp: i64) -> Vec<u64> {
 89 |         self.window_config.window_type.get_window_keys(timestamp)
 90 |     }
 91 | }
 92 | 
 93 | #[async_trait]
 94 | impl<T> Operator<T, Vec<T>> for WindowTimestampSorter<T>
 95 | where
 96 |     T: Clone + Send + Sync + 'static,
 97 | {
 98 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<Vec<T>>>> {
 99 |         let mut raw_results = Vec::new();
100 |         for window_key in self.get_window_keys(record.timestamp) {
101 |             let mut current = self.state.get(&window_key).unwrap_or_default();
102 |             let index = current
103 |                 .binary_search_by(|prob| match self.method {
104 |                     SortOrder::Asc => prob.timestamp.cmp(&record.timestamp),
105 |                     SortOrder::Desc => record.timestamp.cmp(&prob.timestamp),
106 |                 })
107 |                 .unwrap_or_else(|i| i);
108 |             current.insert(index, record.clone());
109 | 
110 |             self.state.set(window_key, current.clone());
111 |             raw_results.push(Record {
112 |                 data: current,
113 |                 timestamp: record.timestamp,
114 |             });
115 |         }
116 |         let results = raw_results
117 |             .into_iter()
118 |             .map(|Record { data, timestamp }| {
119 |                 let data = data.into_iter().map(|rec| rec.data).collect();
120 |                 Record { data, timestamp }
121 |             })
122 |             .collect();
123 |         Ok(results)
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/stream/datastream.rs:
--------------------------------------------------------------------------------
  1 | use crate::operators::{FilterOperator, FlatMapOperator, MapOperator};
  2 | use fluxus_core::ParallelConfig;
  3 | use fluxus_sinks::Sink;
  4 | use fluxus_sources::Source;
  5 | use fluxus_transformers::{
  6 |     InnerOperator, InnerSource, Operator, TransformSource, TransformSourceWithOperator,
  7 | };
  8 | use fluxus_utils::{
  9 |     models::{StreamError, StreamResult},
 10 |     window::WindowConfig,
 11 | };
 12 | use std::sync::{
 13 |     Arc,
 14 |     atomic::{AtomicUsize, Ordering},
 15 | };
 16 | 
 17 | use super::WindowedStream;
 18 | 
 19 | /// DataStream represents a stream of data elements
 20 | pub struct DataStream<T> {
 21 |     pub(crate) source: Arc<InnerSource<T>>,
 22 |     pub(crate) operators: Vec<Arc<InnerOperator<T, T>>>,
 23 |     pub(crate) parallel_config: Option<ParallelConfig>,
 24 | }
 25 | 
 26 | impl<T> DataStream<T>
 27 | where
 28 |     T: Clone + Send + Sync + 'static,
 29 | {
 30 |     /// Create a new DataStream from a source
 31 |     pub fn new<S>(source: S) -> Self
 32 |     where
 33 |         S: Source<T> + Send + Sync + 'static,
 34 |     {
 35 |         Self {
 36 |             source: Arc::new(source),
 37 |             operators: Vec::new(),
 38 |             parallel_config: None,
 39 |         }
 40 |     }
 41 | 
 42 |     /// Set parallelism for the stream processing
 43 |     pub fn parallel(mut self, parallelism: usize) -> Self {
 44 |         self.parallel_config = Some(ParallelConfig {
 45 |             parallelism,
 46 |             buffer_size: 1024,
 47 |             preserve_order: true,
 48 |         });
 49 |         self
 50 |     }
 51 | 
 52 |     /// Apply a map transformation
 53 |     pub fn map<F, R>(self, f: F) -> DataStream<R>
 54 |     where
 55 |         F: Fn(T) -> R + Send + Sync + 'static,
 56 |         R: Clone + Send + Sync + 'static,
 57 |     {
 58 |         let mapper = MapOperator::new(f);
 59 |         self.transform(mapper)
 60 |     }
 61 | 
 62 |     /// Apply a filter transformation
 63 |     pub fn filter<F>(mut self, f: F) -> Self
 64 |     where
 65 |         F: Fn(&T) -> bool + Send + Sync + 'static,
 66 |     {
 67 |         let filter = FilterOperator::new(f);
 68 |         self.operators.push(Arc::new(filter));
 69 |         self
 70 |     }
 71 | 
 72 |     /// Apply a flat map transformation
 73 |     pub fn flat_map<F, R, I>(self, f: F) -> DataStream<R>
 74 |     where
 75 |         F: Fn(T) -> I + Send + Sync + 'static,
 76 |         R: Clone + Send + Sync + 'static,
 77 |         I: IntoIterator<Item = R> + Send + Sync + 'static,
 78 |     {
 79 |         self.transform(FlatMapOperator::new(f))
 80 |     }
 81 | 
 82 |     /// Apply a limit transformation that keeps the first n elements
 83 |     pub fn limit(self, n: usize) -> Self {
 84 |         let n = AtomicUsize::new(n);
 85 |         self.filter(move |_| {
 86 |             if n.load(Ordering::SeqCst) > 0 {
 87 |                 n.fetch_sub(1, Ordering::SeqCst);
 88 |                 true
 89 |             } else {
 90 |                 false
 91 |             }
 92 |         })
 93 |     }
 94 | 
 95 |     /// Transform the stream using a custom operator
 96 |     pub fn transform<O, R>(self, operator: O) -> DataStream<R>
 97 |     where
 98 |         O: Operator<T, R> + Send + Sync + 'static,
 99 |         R: Clone + Send + Sync + 'static,
100 |     {
101 |         let source = TransformSourceWithOperator::new(self.source, operator, self.operators);
102 |         DataStream {
103 |             source: Arc::new(source),
104 |             operators: Vec::new(),
105 |             parallel_config: self.parallel_config,
106 |         }
107 |     }
108 | 
109 |     /// Apply windowing to the stream
110 |     pub fn window(self, config: WindowConfig) -> WindowedStream<T> {
111 |         WindowedStream {
112 |             stream: self,
113 |             window_config: config,
114 |         }
115 |     }
116 | 
117 |     /// Write the stream to a sink
118 |     pub async fn sink<K>(self, mut sink: K) -> StreamResult<()>
119 |     where
120 |         K: Sink<T> + Send + Sync + 'static,
121 |     {
122 |         let mut source = TransformSource::new(self.source);
123 |         source.set_operators(self.operators);
124 | 
125 |         loop {
126 |             match source.next().await {
127 |                 Ok(Some(record)) => sink.write(record).await?,
128 |                 Ok(None) => break,
129 |                 Err(e) => match e {
130 |                     StreamError::EOF => break,
131 |                     StreamError::Wait(ms) => {
132 |                         tokio::time::sleep(std::time::Duration::from_millis(ms)).await
133 |                     }
134 |                     _ => return Err(e),
135 |                 },
136 |             }
137 |         }
138 | 
139 |         sink.flush().await?;
140 |         sink.close().await
141 |     }
142 | }
143 | 
144 | impl<T> DataStream<Vec<T>>
145 | where
146 |     T: Clone + Send + Sync + 'static,
147 | {
148 |     /// Flatten the stream
149 |     pub fn flatten(self) -> DataStream<T> {
150 |         self.transform(FlatMapOperator::new(|v| v))
151 |     }
152 | }
153 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/stream/mod.rs:
--------------------------------------------------------------------------------
1 | mod datastream;
2 | mod windowed_stream;
3 | 
4 | pub use datastream::DataStream;
5 | pub use windowed_stream::WindowedStream;
6 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/src/stream/windowed_stream.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::{Ordering, Reverse};
  2 | use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
  3 | use std::hash::Hash;
  4 | 
  5 | use fluxus_transformers::operator::{WindowAllOperator, WindowAnyOperator};
  6 | use fluxus_utils::window::WindowConfig;
  7 | 
  8 | use crate::operators::{
  9 |     SortOrder, WindowAggregator, WindowSkipper, WindowSorter, WindowTimestampSorter,
 10 | };
 11 | use crate::stream::datastream::DataStream;
 12 | 
 13 | /// Represents a windowed stream for aggregation operations
 14 | pub struct WindowedStream<T> {
 15 |     pub(crate) stream: DataStream<T>,
 16 |     pub(crate) window_config: WindowConfig,
 17 | }
 18 | 
 19 | impl<T> WindowedStream<T>
 20 | where
 21 |     T: Clone + Send + Sync + 'static,
 22 | {
 23 |     /// Aggregate values in the window
 24 |     pub fn aggregate<A, F>(self, init: A, f: F) -> DataStream<A>
 25 |     where
 26 |         A: Clone + Send + Sync + 'static,
 27 |         F: Fn(A, T) -> A + Send + Sync + 'static,
 28 |     {
 29 |         let aggregator = WindowAggregator::new(self.window_config, init, f);
 30 |         self.stream.transform(aggregator)
 31 |     }
 32 | 
 33 |     pub fn any<F>(self, f: F) -> DataStream<bool>
 34 |     where
 35 |         F: Fn(&T) -> bool + Send + Sync + 'static,
 36 |     {
 37 |         let anyer = WindowAnyOperator::new(f, self.window_config);
 38 |         self.stream.transform(anyer)
 39 |     }
 40 | 
 41 |     pub fn all<F>(self, f: F) -> DataStream<bool>
 42 |     where
 43 |         F: Fn(&T) -> bool + Send + Sync + 'static,
 44 |     {
 45 |         let aller = WindowAllOperator::new(f, self.window_config);
 46 |         self.stream.transform(aller)
 47 |     }
 48 | 
 49 |     /// Limit the number of values in the window
 50 |     pub fn limit(self, n: usize) -> DataStream<Vec<T>> {
 51 |         let limiter = WindowAggregator::new(self.window_config, vec![], move |mut acc, value| {
 52 |             if acc.len() < n {
 53 |                 acc.push(value);
 54 |             }
 55 |             acc
 56 |         });
 57 |         self.stream.transform(limiter)
 58 |     }
 59 | 
 60 |     /// Retain last n values in the window
 61 |     pub fn tail(self, n: usize) -> DataStream<Vec<T>> {
 62 |         let init = VecDeque::with_capacity(n);
 63 |         let limiter = WindowAggregator::new(self.window_config, init, move |mut acc, value| {
 64 |             acc.push_back(value);
 65 |             if acc.len() > n {
 66 |                 acc.pop_front();
 67 |             }
 68 |             acc
 69 |         });
 70 |         self.stream
 71 |             .transform(limiter)
 72 |             .map(|d| d.into_iter().collect())
 73 |     }
 74 | 
 75 |     /// Sort values in the window
 76 |     pub fn sort_by<F>(self, f: F) -> DataStream<Vec<T>>
 77 |     where
 78 |         F: FnMut(&T, &T) -> Ordering + Send + Sync + 'static,
 79 |     {
 80 |         let sorter = WindowSorter::new(self.window_config, f);
 81 |         self.stream.transform(sorter)
 82 |     }
 83 | 
 84 |     /// Sort values in the window by timestamp
 85 |     pub fn sort_by_ts(self, order: SortOrder) -> DataStream<Vec<T>> {
 86 |         let sorter = WindowTimestampSorter::new(self.window_config, order);
 87 |         self.stream.transform(sorter)
 88 |     }
 89 | 
 90 |     /// Sort values in the window by timestamp in ascending order
 91 |     pub fn sort_by_ts_asc(self) -> DataStream<Vec<T>> {
 92 |         let sorter = WindowTimestampSorter::new(self.window_config, SortOrder::Asc);
 93 |         self.stream.transform(sorter)
 94 |     }
 95 | 
 96 |     /// Sort values in the window by timestamp in descending order
 97 |     pub fn sort_by_ts_desc(self) -> DataStream<Vec<T>> {
 98 |         let sorter = WindowTimestampSorter::new(self.window_config, SortOrder::Desc);
 99 |         self.stream.transform(sorter)
100 |     }
101 | 
102 |     /// Skip
103 |     pub fn skip(self, n: usize) -> DataStream<Vec<T>> {
104 |         let skipper = WindowSkipper::new(self.window_config, n);
105 |         self.stream.transform(skipper)
106 |     }
107 | }
108 | 
109 | impl<T> WindowedStream<T>
110 | where
111 |     T: Ord + Clone + Send + Sync + 'static,
112 | {
113 |     /// Sort values in specified order
114 |     pub fn sort(self, ord: SortOrder) -> DataStream<Vec<T>> {
115 |         self.sort_by(move |v1, v2| match ord {
116 |             SortOrder::Asc => v1.cmp(v2),
117 |             SortOrder::Desc => v2.cmp(v1),
118 |         })
119 |     }
120 | 
121 |     /// Get the top k values in the window, the values are sorted in descending order
122 |     pub fn top_k(self, k: usize) -> DataStream<Vec<T>> {
123 |         let init = BinaryHeap::<Reverse<T>>::new();
124 |         let res = self.aggregate(init, move |mut heap, v| {
125 |             heap.push(Reverse(v));
126 |             if heap.len() > k {
127 |                 heap.pop();
128 |             }
129 |             heap
130 |         });
131 |         res.map(|heap| {
132 |             heap.into_sorted_vec()
133 |                 .into_iter()
134 |                 .map(|Reverse(v)| v)
135 |                 .collect()
136 |         })
137 |     }
138 | }
139 | 
140 | impl<T> WindowedStream<T>
141 | where
142 |     T: Eq + Hash + Clone + Send + Sync + 'static,
143 | {
144 |     /// Distinct values
145 |     pub fn distinct(self) -> DataStream<HashSet<T>> {
146 |         self.aggregate(HashSet::new(), |mut set, value| {
147 |             set.insert(value);
148 |             set
149 |         })
150 |     }
151 | }
152 | 
153 | impl<T> WindowedStream<T>
154 | where
155 |     T: Clone + Send + Sync + 'static,
156 | {
157 |     /// Distinct values by key. When the same key is encountered, the first occurrence of the value is retained
158 |     pub fn distinct_by_key<F, K>(self, f: F) -> DataStream<Vec<T>>
159 |     where
160 |         F: Fn(&T) -> K + Sync + Send + 'static,
161 |         K: Eq + Hash + Clone + Sync + Send + 'static,
162 |     {
163 |         let keys = HashSet::new();
164 |         let data = vec![];
165 |         self.aggregate((keys, data), move |(mut keys, mut data), value| {
166 |             let k = f(&value);
167 |             if !keys.contains(&k) {
168 |                 keys.insert(k);
169 |                 data.push(value);
170 |             }
171 |             (keys, data)
172 |         })
173 |         .map(|(_, data)| data)
174 |     }
175 | 
176 |     /// Get top k values by key. The values are sorted by key in descending order
177 |     pub fn top_k_by_key<F, K>(self, n: usize, f: F) -> DataStream<Vec<T>>
178 |     where
179 |         F: Fn(&T) -> K + Sync + Send + 'static,
180 |         K: Ord + Eq + Hash + Clone + Sync + Send + 'static,
181 |     {
182 |         // Store the top k keys
183 |         let keys = BinaryHeap::<Reverse<K>>::new();
184 |         // Store the values by key
185 |         let kvs: HashMap<K, Vec<T>> = HashMap::new();
186 |         self.aggregate((keys, kvs), move |(mut keys, mut kvs), value| {
187 |             let k = f(&value);
188 | 
189 |             keys.push(Reverse(k.clone()));
190 |             kvs.entry(k).or_default().push(value);
191 | 
192 |             if keys.len() > n {
193 |                 if let Some(Reverse(min_k)) = keys.pop() {
194 |                     kvs.get_mut(&min_k).map(|v| v.pop());
195 |                 }
196 |             }
197 |             (keys, kvs)
198 |         })
199 |         .map(|(top_keys, mut kvs)| {
200 |             top_keys
201 |                 .into_sorted_vec()
202 |                 .into_iter()
203 |                 .fold(vec![], move |mut acc, Reverse(k)| {
204 |                     let values = kvs.remove(&k).unwrap_or_default();
205 |                     acc.extend(values);
206 |                     acc
207 |                 })
208 |         })
209 |     }
210 | }
211 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/tests/datastreams_test.rs:
--------------------------------------------------------------------------------
  1 | use fluxus_api::{CollectionSink, CollectionSource, DataStream};
  2 | 
  3 | #[test]
  4 | fn test_limit() {
  5 |     tokio_test::block_on(async {
  6 |         let numbers = vec![1, 2, 3, 4, 5];
  7 |         let source = CollectionSource::new(numbers);
  8 |         let sink = CollectionSink::new();
  9 | 
 10 |         DataStream::new(source)
 11 |             .limit(2)
 12 |             .sink(sink.clone())
 13 |             .await
 14 |             .unwrap();
 15 | 
 16 |         let data = sink.get_data();
 17 |         assert_eq!(data, vec![1, 2]);
 18 |     })
 19 | }
 20 | 
 21 | #[test]
 22 | fn test_windowed_limit() {
 23 |     tokio_test::block_on(async {
 24 |         let numbers = vec![1, 2, 3, 4, 5];
 25 |         let source = CollectionSource::new(numbers);
 26 |         let sink = CollectionSink::new();
 27 | 
 28 |         DataStream::new(source)
 29 |             .window(fluxus_utils::window::WindowConfig::global())
 30 |             .limit(3)
 31 |             .sink(sink.clone())
 32 |             .await
 33 |             .unwrap();
 34 | 
 35 |         let data = sink.get_data();
 36 |         assert_eq!(
 37 |             data,
 38 |             vec![
 39 |                 vec![1],
 40 |                 vec![1, 2],
 41 |                 vec![1, 2, 3],
 42 |                 vec![1, 2, 3],
 43 |                 vec![1, 2, 3],
 44 |             ]
 45 |         );
 46 |     })
 47 | }
 48 | 
 49 | #[test]
 50 | fn test_tail() {
 51 |     tokio_test::block_on(async {
 52 |         let numbers = vec![1, 2, 3, 4, 5, 6];
 53 |         let source = CollectionSource::new(numbers);
 54 |         let sink = CollectionSink::new();
 55 |         DataStream::new(source)
 56 |             .window(fluxus_utils::window::WindowConfig::global())
 57 |             .tail(3)
 58 |             .sink(sink.clone())
 59 |             .await
 60 |             .unwrap();
 61 | 
 62 |         let data = sink.get_data();
 63 |         assert_eq!(
 64 |             data,
 65 |             vec![
 66 |                 vec![1],
 67 |                 vec![1, 2],
 68 |                 vec![1, 2, 3],
 69 |                 vec![2, 3, 4],
 70 |                 vec![3, 4, 5],
 71 |                 vec![4, 5, 6],
 72 |             ]
 73 |         );
 74 |     })
 75 | }
 76 | 
 77 | #[test]
 78 | fn test_flatten() {
 79 |     tokio_test::block_on(async {
 80 |         let numbers: Vec<Vec<i32>> = vec![vec![1, 2], vec![3, 4, 5]];
 81 |         let source = CollectionSource::new(numbers);
 82 |         let sink = CollectionSink::new();
 83 | 
 84 |         DataStream::new(source)
 85 |             .flatten()
 86 |             .sink(sink.clone())
 87 |             .await
 88 |             .unwrap();
 89 | 
 90 |         let data = sink.get_data();
 91 |         assert_eq!(data, vec![1, 2, 3, 4, 5]);
 92 |     })
 93 | }
 94 | 
 95 | #[test]
 96 | fn test_flat_map() {
 97 |     tokio_test::block_on(async {
 98 |         let numbers: Vec<usize> = vec![1, 2, 3];
 99 |         let source = CollectionSource::new(numbers);
100 |         let sink = CollectionSink::new();
101 | 
102 |         DataStream::new(source)
103 |             .flat_map(|v| vec![v; v])
104 |             .sink(sink.clone())
105 |             .await
106 |             .unwrap();
107 | 
108 |         let data = sink.get_data();
109 |         assert_eq!(data, vec![1, 2, 2, 3, 3, 3]);
110 |     })
111 | }
112 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/tests/filter_test.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | mod tests {
 3 |     use fluxus_api::{CollectionSink, CollectionSource, DataStream};
 4 | 
 5 |     #[test]
 6 |     fn test_filter() {
 7 |         tokio_test::block_on(async {
 8 |             let numbers = vec![1, 2, 3, 4, 5];
 9 |             // TransformSourceWithOperator::new();
10 |             let source = CollectionSource::new(numbers);
11 |             let sink = CollectionSink::new();
12 | 
13 |             DataStream::new(source)
14 |                 .filter(|x| x % 2 == 0)
15 |                 .sink(sink.clone())
16 |                 .await
17 |                 .unwrap();
18 | 
19 |             let data = sink.get_data();
20 |             println!("data: {:?}", data);
21 |             assert_eq!(data.len(), 2);
22 |             assert_eq!(data[0], 2);
23 |             assert_eq!(data[1], 4);
24 |         })
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/crates/fluxus-api/tests/windowed_stream_test.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(test)]
  2 | mod tests {
  3 |     use async_trait::async_trait;
  4 |     use fluxus_api::operators::SortOrder;
  5 |     use fluxus_api::{CollectionSink, CollectionSource, DataStream};
  6 |     use fluxus_sources::Source;
  7 |     use fluxus_utils::models::Record;
  8 |     use fluxus_utils::{models::StreamResult, window::WindowConfig};
  9 | 
 10 |     #[test]
 11 |     fn test_any() {
 12 |         tokio_test::block_on(async {
 13 |             let source = CollectionSource::new(vec![1, 2, 3, 4, 5]);
 14 |             let sink = CollectionSink::new();
 15 |             DataStream::new(source)
 16 |                 .window(WindowConfig::global())
 17 |                 .any(|x| x % 2 == 0)
 18 |                 .sink(sink.clone())
 19 |                 .await
 20 |                 .unwrap();
 21 |             let data = sink.get_data();
 22 |             assert_eq!(data[0], false);
 23 |             assert_eq!(data[1], true);
 24 |             assert_eq!(data[2], true);
 25 |             assert_eq!(data[3], true);
 26 |             assert_eq!(data[4], true);
 27 |         })
 28 |     }
 29 | 
 30 |     #[test]
 31 |     fn test_all() {
 32 |         tokio_test::block_on(async {
 33 |             let source = CollectionSource::new(vec![1, 2, 3, 4, 5]);
 34 |             let sink = CollectionSink::new();
 35 |             DataStream::new(source)
 36 |                 .window(WindowConfig::global())
 37 |                 .all(|x| x % 2 == 0)
 38 |                 .sink(sink.clone())
 39 |                 .await
 40 |                 .unwrap();
 41 |             let data = sink.get_data();
 42 |             assert_eq!(data[0], false);
 43 |             assert_eq!(data[1], false);
 44 |             assert_eq!(data[2], false);
 45 |             assert_eq!(data[3], false);
 46 |             assert_eq!(data[4], false);
 47 |         })
 48 |     }
 49 | 
 50 |     #[test]
 51 |     fn test_sort_by() {
 52 |         tokio_test::block_on(async {
 53 |             let source = CollectionSource::new(vec!["1", "4444", "55555", "22", "333"]);
 54 |             let sink = CollectionSink::new();
 55 |             DataStream::new(source)
 56 |                 .window(WindowConfig::global())
 57 |                 .sort_by(|a, b| a.len().cmp(&b.len()))
 58 |                 .sink(sink.clone())
 59 |                 .await
 60 |                 .unwrap();
 61 |             let data = sink.get_data();
 62 |             assert_eq!(data.len(), 5);
 63 |             assert_eq!(data[0], vec!["1"]);
 64 |             assert_eq!(data[1], vec!["1", "4444"]);
 65 |             assert_eq!(data[2], vec!["1", "4444", "55555"]);
 66 |             assert_eq!(data[3], vec!["1", "22", "4444", "55555"]);
 67 |             assert_eq!(data[4], vec!["1", "22", "333", "4444", "55555"]);
 68 |         })
 69 |     }
 70 | 
 71 |     #[test]
 72 |     fn test_sort() {
 73 |         tokio_test::block_on(async {
 74 |             let source = CollectionSource::new(vec!["1", "4444", "55555", "22", "333"]);
 75 |             let sink = CollectionSink::new();
 76 |             DataStream::new(source)
 77 |                 .window(WindowConfig::global())
 78 |                 .sort(SortOrder::Asc)
 79 |                 .sink(sink.clone())
 80 |                 .await
 81 |                 .unwrap();
 82 |             let data = sink.get_data();
 83 |             assert_eq!(data.len(), 5);
 84 |             assert_eq!(data[0], vec!["1"]);
 85 |             assert_eq!(data[1], vec!["1", "4444"]);
 86 |             assert_eq!(data[2], vec!["1", "4444", "55555"]);
 87 |             assert_eq!(data[3], vec!["1", "22", "4444", "55555"]);
 88 |             assert_eq!(data[4], vec!["1", "22", "333", "4444", "55555"]);
 89 |         })
 90 |     }
 91 | 
 92 |     struct SlowSource<T> {
 93 |         inner: CollectionSource<T>,
 94 |         counter: i64,
 95 |     }
 96 |     #[async_trait]
 97 |     impl<T> Source<T> for SlowSource<T>
 98 |     where
 99 |         T: Clone + Send + Sync + 'static,
100 |     {
101 |         async fn init(&mut self) -> StreamResult<()> {
102 |             Ok(())
103 |         }
104 | 
105 |         async fn next(&mut self) -> StreamResult<Option<Record<T>>> {
106 |             self.inner.next().await.map(|op| {
107 |                 op.map(|mut r| {
108 |                     self.counter += 1;
109 |                     r.timestamp += self.counter;
110 |                     r
111 |                 })
112 |             })
113 |         }
114 | 
115 |         async fn close(&mut self) -> StreamResult<()> {
116 |             Ok(())
117 |         }
118 |     }
119 |     #[test]
120 |     fn test_sort_by_ts() {
121 |         tokio_test::block_on(async {
122 |             let source = CollectionSource::new(vec!["1st", "2nd", "3rd", "4th", "5th"]);
123 |             let source = SlowSource {
124 |                 inner: source,
125 |                 counter: 0,
126 |             };
127 |             let sink = CollectionSink::new();
128 |             DataStream::new(source)
129 |                 .window(WindowConfig::global())
130 |                 .sort_by_ts(SortOrder::Asc)
131 |                 .sink(sink.clone())
132 |                 .await
133 |                 .unwrap();
134 |             let data = sink.get_data();
135 |             assert_eq!(data.len(), 5);
136 |             assert_eq!(
137 |                 data,
138 |                 vec![
139 |                     vec!["1st"],
140 |                     vec!["1st", "2nd"],
141 |                     vec!["1st", "2nd", "3rd"],
142 |                     vec!["1st", "2nd", "3rd", "4th"],
143 |                     vec!["1st", "2nd", "3rd", "4th", "5th"],
144 |                 ]
145 |             );
146 |             let source = CollectionSource::new(vec!["1st", "2nd", "3rd", "4th", "5th"]);
147 |             let source = SlowSource {
148 |                 inner: source,
149 |                 counter: 0,
150 |             };
151 |             let sink = CollectionSink::new();
152 |             DataStream::new(source)
153 |                 .window(WindowConfig::global())
154 |                 .sort_by_ts(SortOrder::Desc)
155 |                 .sink(sink.clone())
156 |                 .await
157 |                 .unwrap();
158 |             let data = sink.get_data();
159 |             assert_eq!(data.len(), 5);
160 |             let rev = |mut v: Vec<_>| {
161 |                 v.reverse();
162 |                 v
163 |             };
164 |             assert_eq!(
165 |                 data,
166 |                 vec![
167 |                     rev(vec!["1st"]),
168 |                     rev(vec!["1st", "2nd"]),
169 |                     rev(vec!["1st", "2nd", "3rd"]),
170 |                     rev(vec!["1st", "2nd", "3rd", "4th"]),
171 |                     rev(vec!["1st", "2nd", "3rd", "4th", "5th"]),
172 |                 ]
173 |             );
174 |         })
175 |     }
176 | 
177 |     #[test]
178 |     fn test_distinct() {
179 |         tokio_test::block_on(async {
180 |             let source = CollectionSource::new(vec!["1", "22", "1", "22", "333", "333"]);
181 |             let sink = CollectionSink::new();
182 |             DataStream::new(source)
183 |                 .window(WindowConfig::global())
184 |                 .distinct()
185 |                 .sink(sink.clone())
186 |                 .await
187 |                 .unwrap();
188 |             let data = sink.get_data();
189 |             assert_eq!(data.len(), 6);
190 |             assert_eq!(data[5].len(), 3);
191 |             assert!(data[5].contains("1"));
192 |             assert!(data[5].contains("22"));
193 |             assert!(data[5].contains("333"));
194 | 
195 |             let source = CollectionSource::new(vec!["1", "11", "111", "111"]);
196 |             let sink = CollectionSink::new();
197 |             DataStream::new(source)
198 |                 .window(WindowConfig::global())
199 |                 .distinct_by_key(|s| s.as_bytes()[0])
200 |                 .sink(sink.clone())
201 |                 .await
202 |                 .unwrap();
203 |             let data = sink.get_data();
204 |             assert_eq!(data.len(), 4);
205 |             assert_eq!(data[3].len(), 1);
206 |             assert!(data[3].contains(&"1"));
207 |         })
208 |     }
209 | 
210 |     #[test]
211 |     fn test_top_k() {
212 |         tokio_test::block_on(async {
213 |             let source = CollectionSource::new(vec![1, 2, 3, 4, 5]);
214 |             let sink = CollectionSink::new();
215 |             DataStream::new(source)
216 |                 .window(WindowConfig::global())
217 |                 .top_k(3)
218 |                 .sink(sink.clone())
219 |                 .await
220 |                 .unwrap();
221 |             let data = sink.get_data();
222 |             assert_eq!(data.len(), 5);
223 |             assert_eq!(data[0], vec![1]);
224 |             assert_eq!(data[1], vec![2, 1]);
225 |             assert_eq!(data[2], vec![3, 2, 1]);
226 |             assert_eq!(data[3], vec![4, 3, 2]);
227 |             assert_eq!(data[4], vec![5, 4, 3]);
228 | 
229 |             let source = CollectionSource::new(vec!["1", "2", "3", "3", "3"]);
230 |             let sink = CollectionSink::new();
231 |             DataStream::new(source)
232 |                 .window(WindowConfig::global())
233 |                 .top_k_by_key(3, |s| s.as_bytes()[0])
234 |                 .sink(sink.clone())
235 |                 .await
236 |                 .unwrap();
237 |             let data = sink.get_data();
238 |             dbg!(&data);
239 |             assert_eq!(data.len(), 5);
240 |             assert_eq!(data[0], vec!["1"]);
241 |             assert_eq!(data[1], vec!["2", "1"]);
242 |             assert_eq!(data[2], vec!["3", "2", "1"]);
243 |             assert_eq!(data[3], vec!["3", "3", "2"]);
244 |             assert_eq!(data[4], vec!["3", "3", "3"]);
245 |         })
246 |     }
247 | 
248 |     #[test]
249 |     fn test_skip() {
250 |         tokio_test::block_on(async {
251 |             let source = CollectionSource::new(vec![1, 2, 3, 4, 5]);
252 |             let sink = CollectionSink::new();
253 |             DataStream::new(source)
254 |                 .window(WindowConfig::global())
255 |                 .skip(2)
256 |                 .sink(sink.clone())
257 |                 .await
258 |                 .unwrap();
259 |             let data = sink.get_data();
260 |             assert_eq!(data.len(), 5);
261 |             assert_eq!(data[0], Vec::<i32>::new());
262 |             assert_eq!(data[1], Vec::<i32>::new());
263 |             assert_eq!(data[2], vec![3]);
264 |             assert_eq!(data[3], vec![3, 4]);
265 |             assert_eq!(data[4], vec![3, 4, 5]);
266 |         })
267 |     }
268 | }
269 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus-core"
 3 | description = "Core components for Fluxus stream processing engine"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | repository.workspace = true
 9 | readme = "README.md"
10 | 
11 | [dependencies]
12 | fluxus-utils = { path = "../fluxus-utils", version="0.2" }
13 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" }
14 | fluxus-sources = { path = "../fluxus-sources", version="0.2" }
15 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2" }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | futures = "0.3"
19 | serde = { version = "1.0", features = ["derive"] }
20 | serde_json = "1.0"
21 | anyhow = "1.0"
22 | thiserror = "1.0"
23 | async-trait = "0.1"
24 | tracing = "0.1"
25 | num_cpus = "1.16"
26 | csv = "1.3"
27 | 
28 | [dev-dependencies]
29 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] }
30 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxus Core
 2 | 
 3 | Core implementations and data structures for the Fluxus stream processing engine.
 4 | 
 5 | ## Overview
 6 | 
 7 | This crate provides the fundamental building blocks and implementations for the Fluxus stream processing engine:
 8 | 
 9 | - Window implementations
10 | - State management
11 | - Data partitioning
12 | - Runtime configurations
13 | - Core data structures
14 | 
15 | ## Key Components
16 | 
17 | ### Windows
18 | 
19 | Core window implementations:
20 | - `TumblingWindow` - Fixed-size, non-overlapping windows
21 | - `SlidingWindow` - Overlapping windows with slide interval
22 | - `SessionWindow` - Dynamic windows based on event timing
23 | 
24 | ### State Management
25 | 
26 | State handling for stream operations:
27 | - In-memory state storage
28 | - State backends
29 | - Checkpointing (planned)
30 | 
31 | ### Partitioning
32 | 
33 | Data partitioning strategies:
34 | - Key-based partitioning
35 | - Round-robin partitioning
36 | - Custom partitioners
37 | 
38 | ## Usage
39 | 
40 | Add this to your `Cargo.toml`:
41 | 
42 | ```toml
43 | [dependencies]
44 | fluxus-core = "0.2"
45 | ```
46 | 
47 | This crate is usually not used directly but through the `fluxus-api` crate.


--------------------------------------------------------------------------------
/crates/fluxus-core/src/config.rs:
--------------------------------------------------------------------------------
 1 | /// Configuration for parallel processing
 2 | #[derive(Debug, Clone)]
 3 | pub struct ParallelConfig {
 4 |     /// Number of parallel tasks
 5 |     pub parallelism: usize,
 6 |     /// Maximum buffer size per task
 7 |     pub buffer_size: usize,
 8 |     /// Whether to preserve ordering in parallel processing
 9 |     pub preserve_order: bool,
10 | }
11 | 
12 | impl Default for ParallelConfig {
13 |     fn default() -> Self {
14 |         Self {
15 |             parallelism: num_cpus::get(),
16 |             buffer_size: 1000,
17 |             preserve_order: true,
18 |         }
19 |     }
20 | }
21 | 
22 | impl ParallelConfig {
23 |     /// Create a new parallel configuration
24 |     pub fn new(parallelism: usize, buffer_size: usize, preserve_order: bool) -> Self {
25 |         Self {
26 |             parallelism,
27 |             buffer_size,
28 |             preserve_order,
29 |         }
30 |     }
31 | 
32 |     /// Set the number of parallel tasks
33 |     pub fn with_parallelism(mut self, parallelism: usize) -> Self {
34 |         self.parallelism = parallelism;
35 |         self
36 |     }
37 | 
38 |     /// Set the buffer size per task
39 |     pub fn with_buffer_size(mut self, buffer_size: usize) -> Self {
40 |         self.buffer_size = buffer_size;
41 |         self
42 |     }
43 | 
44 |     /// Set whether to preserve ordering
45 |     pub fn with_preserve_order(mut self, preserve_order: bool) -> Self {
46 |         self.preserve_order = preserve_order;
47 |         self
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/error_handling/backpressure.rs:
--------------------------------------------------------------------------------
 1 | use std::time::Duration;
 2 | 
 3 | /// Backpressure strategy for handling overload
 4 | #[derive(Debug, Clone)]
 5 | pub enum BackpressureStrategy {
 6 |     /// Block when buffer is full
 7 |     Block,
 8 |     /// Drop oldest items when buffer is full
 9 |     DropOldest,
10 |     /// Drop newest items when buffer is full
11 |     DropNewest,
12 |     /// Apply backpressure with custom threshold
13 |     Throttle {
14 |         high_watermark: usize,
15 |         low_watermark: usize,
16 |         backoff: Duration,
17 |     },
18 | }
19 | 
20 | /// Backpressure controller for managing load
21 | pub struct BackpressureController {
22 |     strategy: BackpressureStrategy,
23 |     current_load: usize,
24 | }
25 | 
26 | impl BackpressureController {
27 |     /// Create a new backpressure controller with the given strategy
28 |     pub fn new(strategy: BackpressureStrategy) -> Self {
29 |         Self {
30 |             strategy,
31 |             current_load: 0,
32 |         }
33 |     }
34 | 
35 |     /// Check if we should apply backpressure
36 |     pub fn should_apply_backpressure(&self) -> bool {
37 |         match &self.strategy {
38 |             BackpressureStrategy::Block => self.current_load > 0,
39 |             BackpressureStrategy::DropOldest | BackpressureStrategy::DropNewest => false,
40 |             BackpressureStrategy::Throttle { high_watermark, .. } => {
41 |                 self.current_load >= *high_watermark
42 |             }
43 |         }
44 |     }
45 | 
46 |     /// Get the backoff duration if throttling is needed
47 |     pub fn get_backoff(&self) -> Option<Duration> {
48 |         match &self.strategy {
49 |             BackpressureStrategy::Throttle { backoff, .. } => Some(*backoff),
50 |             _ => None,
51 |         }
52 |     }
53 | 
54 |     /// Update the current load
55 |     pub fn update_load(&mut self, load: usize) {
56 |         self.current_load = load;
57 |     }
58 | 
59 |     /// Check if we can accept more items based on the strategy
60 |     pub fn can_accept(&self) -> bool {
61 |         !self.should_apply_backpressure()
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/error_handling/mod.rs:
--------------------------------------------------------------------------------
 1 | mod backpressure;
 2 | mod retry_strategy;
 3 | 
 4 | pub use backpressure::{BackpressureController, BackpressureStrategy};
 5 | use fluxus_utils::models::StreamResult;
 6 | pub use retry_strategy::RetryStrategy;
 7 | use tokio::time::sleep;
 8 | 
 9 | /// Error handler for retrying operations
10 | pub struct ErrorHandler {
11 |     strategy: RetryStrategy,
12 | }
13 | 
14 | impl ErrorHandler {
15 |     /// Create a new error handler with the given retry strategy
16 |     pub fn new(strategy: RetryStrategy) -> Self {
17 |         Self { strategy }
18 |     }
19 | 
20 |     /// Retry an operation with the configured strategy
21 |     pub async fn retry<F, T>(&self, mut operation: F) -> StreamResult<T>
22 |     where
23 |         F: FnMut() -> StreamResult<T>,
24 |     {
25 |         let mut attempt = 0;
26 |         loop {
27 |             match operation() {
28 |                 Ok(value) => return Ok(value),
29 |                 Err(error) => {
30 |                     if let Some(delay) = self.strategy.get_delay(attempt) {
31 |                         tracing::warn!(
32 |                             "Operation failed (attempt {}/{}): {}. Retrying after {:?}",
33 |                             attempt + 1,
34 |                             match &self.strategy {
35 |                                 RetryStrategy::NoRetry => 1,
36 |                                 RetryStrategy::Fixed { max_attempts, .. } => *max_attempts,
37 |                                 RetryStrategy::ExponentialBackoff { max_attempts, .. } =>
38 |                                     *max_attempts,
39 |                             },
40 |                             error,
41 |                             delay
42 |                         );
43 |                         sleep(delay).await;
44 |                         attempt += 1;
45 |                     } else {
46 |                         return Err(error);
47 |                     }
48 |                 }
49 |             }
50 |         }
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/error_handling/retry_strategy.rs:
--------------------------------------------------------------------------------
 1 | use std::time::Duration;
 2 | 
 3 | /// Error recovery strategy
 4 | #[derive(Debug, Clone)]
 5 | pub enum RetryStrategy {
 6 |     /// No retry, fail immediately
 7 |     NoRetry,
 8 |     /// Retry with fixed delay
 9 |     Fixed {
10 |         delay: Duration,
11 |         max_attempts: usize,
12 |     },
13 |     /// Retry with exponential backoff
14 |     ExponentialBackoff {
15 |         initial_delay: Duration,
16 |         max_delay: Duration,
17 |         max_attempts: usize,
18 |         multiplier: f64,
19 |     },
20 | }
21 | 
22 | impl RetryStrategy {
23 |     /// Create a fixed delay retry strategy
24 |     pub fn fixed(delay: Duration, max_attempts: usize) -> Self {
25 |         Self::Fixed {
26 |             delay,
27 |             max_attempts,
28 |         }
29 |     }
30 | 
31 |     /// Create an exponential backoff retry strategy
32 |     pub fn exponential(
33 |         initial_delay: Duration,
34 |         max_delay: Duration,
35 |         max_attempts: usize,
36 |         multiplier: f64,
37 |     ) -> Self {
38 |         Self::ExponentialBackoff {
39 |             initial_delay,
40 |             max_delay,
41 |             max_attempts,
42 |             multiplier,
43 |         }
44 |     }
45 | 
46 |     /// Calculate delay for a given attempt
47 |     pub fn get_delay(&self, attempt: usize) -> Option<Duration> {
48 |         match self {
49 |             Self::NoRetry => None,
50 |             Self::Fixed {
51 |                 delay,
52 |                 max_attempts,
53 |             } => {
54 |                 if attempt < *max_attempts {
55 |                     Some(*delay)
56 |                 } else {
57 |                     None
58 |                 }
59 |             }
60 |             Self::ExponentialBackoff {
61 |                 initial_delay,
62 |                 max_delay,
63 |                 max_attempts,
64 |                 multiplier,
65 |             } => {
66 |                 if attempt < *max_attempts {
67 |                     let delay = Duration::from_secs_f64(
68 |                         initial_delay.as_secs_f64() * multiplier.powi(attempt as i32),
69 |                     );
70 |                     Some(delay.min(*max_delay))
71 |                 } else {
72 |                     None
73 |                 }
74 |             }
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Fluxus Core - A Flink-like stream processing engine in Rust
 2 | //!
 3 | //! This module contains the core abstractions and data types for stream processing.
 4 | 
 5 | pub mod config;
 6 | pub mod error_handling;
 7 | pub mod metrics;
 8 | pub mod pipeline;
 9 | 
10 | // Re-export commonly used items
11 | pub use config::ParallelConfig;
12 | pub use error_handling::{
13 |     BackpressureController, BackpressureStrategy, ErrorHandler, RetryStrategy,
14 | };
15 | pub use metrics::{Counter, Gauge, MetricValue, Metrics, Timer};
16 | pub use pipeline::Pipeline;
17 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/metrics.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | use std::sync::Arc;
  3 | use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
  4 | use std::time::{Duration, Instant};
  5 | 
  6 | /// Counter for accumulating values
  7 | #[derive(Debug, Default)]
  8 | pub struct Counter {
  9 |     value: AtomicU64,
 10 | }
 11 | 
 12 | impl Counter {
 13 |     pub fn new() -> Self {
 14 |         Self {
 15 |             value: AtomicU64::new(0),
 16 |         }
 17 |     }
 18 | 
 19 |     pub fn increment(&self) {
 20 |         self.value.fetch_add(1, Ordering::Relaxed);
 21 |     }
 22 | 
 23 |     pub fn add(&self, value: u64) {
 24 |         self.value.fetch_add(value, Ordering::Relaxed);
 25 |     }
 26 | 
 27 |     pub fn value(&self) -> u64 {
 28 |         self.value.load(Ordering::Relaxed)
 29 |     }
 30 | }
 31 | 
 32 | /// Gauge for tracking current value
 33 | #[derive(Debug, Default)]
 34 | pub struct Gauge {
 35 |     value: AtomicI64,
 36 | }
 37 | 
 38 | impl Gauge {
 39 |     pub fn new() -> Self {
 40 |         Self {
 41 |             value: AtomicI64::new(0),
 42 |         }
 43 |     }
 44 | 
 45 |     pub fn set(&self, value: i64) {
 46 |         self.value.store(value, Ordering::Relaxed);
 47 |     }
 48 | 
 49 |     pub fn value(&self) -> i64 {
 50 |         self.value.load(Ordering::Relaxed)
 51 |     }
 52 | }
 53 | 
 54 | /// Timer for measuring durations
 55 | #[derive(Debug)]
 56 | pub struct Timer {
 57 |     start: Instant,
 58 |     duration_counter: Counter,
 59 |     count_counter: Counter,
 60 | }
 61 | 
 62 | impl Default for Timer {
 63 |     fn default() -> Self {
 64 |         Self::new()
 65 |     }
 66 | }
 67 | 
 68 | impl Timer {
 69 |     pub fn new() -> Self {
 70 |         Self {
 71 |             start: Instant::now(),
 72 |             duration_counter: Counter::new(),
 73 |             count_counter: Counter::new(),
 74 |         }
 75 |     }
 76 | 
 77 |     pub fn start(&mut self) {
 78 |         self.start = Instant::now();
 79 |     }
 80 | 
 81 |     pub fn stop(&mut self) {
 82 |         let duration = self.start.elapsed();
 83 |         self.duration_counter.add(duration.as_micros() as u64);
 84 |         self.count_counter.increment();
 85 |     }
 86 | 
 87 |     /// Record a duration directly
 88 |     pub fn record(&mut self, duration: Duration) {
 89 |         self.duration_counter.add(duration.as_micros() as u64);
 90 |         self.count_counter.increment();
 91 |     }
 92 | 
 93 |     pub fn average_duration_micros(&self) -> u64 {
 94 |         let total = self.duration_counter.value();
 95 |         let count = self.count_counter.value();
 96 |         if count == 0 { 0 } else { total / count }
 97 |     }
 98 | }
 99 | 
100 | /// Metrics collection for pipeline monitoring
101 | #[derive(Debug, Default)]
102 | pub struct Metrics {
103 |     counters: HashMap<String, Arc<Counter>>,
104 |     gauges: HashMap<String, Arc<Gauge>>,
105 |     timers: HashMap<String, Arc<Timer>>,
106 | }
107 | 
108 | impl Metrics {
109 |     pub fn new() -> Self {
110 |         Self::default()
111 |     }
112 | 
113 |     pub fn counter(&mut self, name: &str) -> Arc<Counter> {
114 |         self.counters
115 |             .entry(name.to_string())
116 |             .or_insert_with(|| Arc::new(Counter::new()))
117 |             .clone()
118 |     }
119 | 
120 |     pub fn gauge(&mut self, name: &str) -> Arc<Gauge> {
121 |         self.gauges
122 |             .entry(name.to_string())
123 |             .or_insert_with(|| Arc::new(Gauge::new()))
124 |             .clone()
125 |     }
126 | 
127 |     pub fn timer(&mut self, name: &str) -> Arc<Timer> {
128 |         self.timers
129 |             .entry(name.to_string())
130 |             .or_insert_with(|| Arc::new(Timer::new()))
131 |             .clone()
132 |     }
133 | 
134 |     pub fn snapshot(&self) -> HashMap<String, MetricValue> {
135 |         let mut snapshot = HashMap::new();
136 | 
137 |         for (name, counter) in &self.counters {
138 |             snapshot.insert(name.clone(), MetricValue::Counter(counter.value()));
139 |         }
140 | 
141 |         for (name, gauge) in &self.gauges {
142 |             snapshot.insert(name.clone(), MetricValue::Gauge(gauge.value()));
143 |         }
144 | 
145 |         for (name, timer) in &self.timers {
146 |             snapshot.insert(
147 |                 name.clone(),
148 |                 MetricValue::Timer {
149 |                     avg_micros: timer.average_duration_micros(),
150 |                     count: timer.count_counter.value(),
151 |                 },
152 |             );
153 |         }
154 | 
155 |         snapshot
156 |     }
157 | }
158 | 
159 | #[derive(Debug, Clone)]
160 | pub enum MetricValue {
161 |     Counter(u64),
162 |     Gauge(i64),
163 |     Timer { avg_micros: u64, count: u64 },
164 | }
165 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/pipeline/mod.rs:
--------------------------------------------------------------------------------
1 | mod processor;
2 | mod status;
3 | 
4 | pub use processor::Pipeline;
5 | pub use status::PipelineStatus;
6 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/pipeline/processor.rs:
--------------------------------------------------------------------------------
  1 | use super::status::PipelineStatus;
  2 | use crate::BackpressureStrategy;
  3 | use crate::Counter;
  4 | use crate::ParallelConfig;
  5 | use crate::RetryStrategy;
  6 | use crate::Timer;
  7 | use crate::error_handling::BackpressureController;
  8 | use crate::error_handling::ErrorHandler;
  9 | use crate::metrics::Metrics;
 10 | use fluxus_sinks::Sink;
 11 | use fluxus_sinks::dummy_sink::DummySink;
 12 | use fluxus_sources::Source;
 13 | use fluxus_transformers::operator::Operator;
 14 | use fluxus_utils::models::Record;
 15 | use fluxus_utils::models::StreamResult;
 16 | use fluxus_utils::time::current_time;
 17 | use fluxus_utils::window::WindowConfig;
 18 | use std::sync::Arc;
 19 | use std::time::{Duration, Instant};
 20 | use tokio::runtime::Handle;
 21 | use tokio::time;
 22 | use tracing;
 23 | 
 24 | /// Represents a stream processing pipeline
 25 | pub struct Pipeline<T: Clone> {
 26 |     /// The data source
 27 |     source: Box<dyn Source<T>>,
 28 |     /// The sequence of operators
 29 |     operators: Vec<Box<dyn Operator<T, T>>>,
 30 |     /// The data sink
 31 |     sink: Box<dyn Sink<T>>,
 32 |     /// Window configuration (optional)
 33 |     window_config: Option<WindowConfig>,
 34 |     /// Parallel processing configuration
 35 |     parallel_config: ParallelConfig,
 36 |     /// Current pipeline status
 37 |     status: PipelineStatus,
 38 |     /// Last watermark timestamp
 39 |     last_watermark: i64,
 40 |     /// Metrics tracking
 41 |     metrics: Arc<Metrics>,
 42 |     process_timer: Arc<Timer>,
 43 |     records_processed: Arc<Counter>,
 44 |     records_failed: Arc<Counter>,
 45 |     /// Error handling
 46 |     error_handler: ErrorHandler,
 47 |     /// Backpressure controller
 48 |     backpressure: BackpressureController,
 49 | }
 50 | 
 51 | impl<T: 'static + Send + Clone> Pipeline<T> {
 52 |     /// Create a new pipeline with a source
 53 |     pub fn source<S: Source<T> + 'static>(source: S) -> Self {
 54 |         let mut metrics = Metrics::new();
 55 |         let process_timer = metrics.timer("process_time");
 56 |         let records_processed = metrics.counter("records_processed");
 57 |         let records_failed = metrics.counter("records_failed");
 58 | 
 59 |         Self {
 60 |             source: Box::new(source),
 61 |             operators: Vec::new(),
 62 |             sink: Box::new(DummySink::new()),
 63 |             window_config: None,
 64 |             parallel_config: ParallelConfig::default(),
 65 |             status: PipelineStatus::Ready,
 66 |             last_watermark: 0,
 67 |             metrics: Arc::new(metrics),
 68 |             process_timer,
 69 |             records_processed,
 70 |             records_failed,
 71 |             error_handler: ErrorHandler::new(RetryStrategy::exponential(
 72 |                 Duration::from_millis(100),
 73 |                 Duration::from_secs(10),
 74 |                 3,
 75 |                 2.0,
 76 |             )),
 77 |             backpressure: BackpressureController::new(BackpressureStrategy::Throttle {
 78 |                 high_watermark: 1000,
 79 |                 low_watermark: 100,
 80 |                 backoff: Duration::from_millis(50),
 81 |             }),
 82 |         }
 83 |     }
 84 | 
 85 |     /// Add an operator to the pipeline
 86 |     pub fn add_operator<O: Operator<T, T> + 'static>(mut self, operator: O) -> Self {
 87 |         self.operators.push(Box::new(operator));
 88 |         self
 89 |     }
 90 | 
 91 |     /// Set the sink for the pipeline
 92 |     pub fn sink<S: Sink<T> + 'static>(mut self, sink: S) -> Self {
 93 |         self.sink = Box::new(sink);
 94 |         self
 95 |     }
 96 | 
 97 |     /// Configure windowing for the pipeline
 98 |     pub fn window(mut self, config: WindowConfig) -> Self {
 99 |         self.window_config = Some(config);
100 |         self
101 |     }
102 | 
103 |     /// Configure parallel processing for the pipeline
104 |     pub fn parallel(mut self, config: ParallelConfig) -> Self {
105 |         self.parallel_config = config;
106 |         self
107 |     }
108 | 
109 |     /// Configure error handling strategy
110 |     pub fn with_retry_strategy(mut self, strategy: RetryStrategy) -> Self {
111 |         self.error_handler = ErrorHandler::new(strategy);
112 |         self
113 |     }
114 | 
115 |     /// Configure backpressure strategy
116 |     pub fn with_backpressure_strategy(mut self, strategy: BackpressureStrategy) -> Self {
117 |         self.backpressure = BackpressureController::new(strategy);
118 |         self
119 |     }
120 | 
121 |     /// Get current pipeline status
122 |     pub fn status(&self) -> PipelineStatus {
123 |         self.status
124 |     }
125 | 
126 |     /// Get a snapshot of current metrics
127 |     pub fn metrics(&self) -> &Arc<Metrics> {
128 |         &self.metrics
129 |     }
130 | 
131 |     /// Update watermark and trigger windows if needed
132 |     async fn process_watermark(&mut self) -> StreamResult<()> {
133 |         if let Some(window_config) = &self.window_config {
134 |             let now = current_time() as i64;
135 | 
136 |             // Check if we should advance the watermark
137 |             if now - self.last_watermark >= window_config.watermark_delay.as_millis() as i64 {
138 |                 self.last_watermark = now;
139 | 
140 |                 // Trigger windows in all operators
141 |                 for op in &mut self.operators {
142 |                     let results = op.on_window_trigger().await?;
143 |                     for record in results {
144 |                         self.sink.write(record).await?;
145 |                     }
146 |                 }
147 |             }
148 |         }
149 |         Ok(())
150 |     }
151 | 
152 |     /// Process a record through a single operator with retries
153 |     async fn process_with_retry(
154 |         error_handler: &ErrorHandler,
155 |         op: &mut Box<dyn Operator<T, T>>,
156 |         record: Record<T>,
157 |     ) -> StreamResult<Vec<Record<T>>> {
158 |         let record = record.clone();
159 |         let op_ref = &mut **op;
160 | 
161 |         error_handler
162 |             .retry(|| {
163 |                 let rt = Handle::current();
164 |                 rt.block_on(op_ref.process(record.clone()))
165 |             })
166 |             .await
167 |     }
168 | 
169 |     /// Write a record to the sink with retries
170 |     async fn write_with_retry(
171 |         error_handler: &ErrorHandler,
172 |         sink: &mut Box<dyn Sink<T>>,
173 |         record: Record<T>,
174 |     ) -> StreamResult<()> {
175 |         let record = record.clone();
176 |         let sink_ref = &mut **sink;
177 | 
178 |         error_handler
179 |             .retry(|| {
180 |                 let rt = Handle::current();
181 |                 rt.block_on(sink_ref.write(record.clone()))
182 |             })
183 |             .await
184 |     }
185 | 
186 |     /// Execute the pipeline with error handling and backpressure
187 |     pub async fn execute(mut self) -> StreamResult<()> {
188 |         self.status = PipelineStatus::Running;
189 | 
190 |         // Initialize components
191 |         self.source.init().await?;
192 |         for op in &mut self.operators {
193 |             op.init().await?;
194 |         }
195 |         self.sink.init().await?;
196 | 
197 |         let mut watermark_interval = time::interval(Duration::from_millis(100));
198 | 
199 |         loop {
200 |             if self.backpressure.should_apply_backpressure() {
201 |                 if let Some(backoff) = self.backpressure.get_backoff() {
202 |                     tracing::debug!("Applying backpressure, waiting for {:?}", backoff);
203 |                     time::sleep(backoff).await;
204 |                     continue;
205 |                 }
206 |             }
207 | 
208 |             tokio::select! {
209 |                 result = self.source.next() => {
210 |                     match result {
211 |                         Ok(Some(record)) => {
212 |                             let start = Instant::now();
213 |                             let mut records = vec![record];
214 |                             let mut success = true;
215 | 
216 |                             // Process through operators with retry
217 |                             for op in &mut self.operators {
218 |                                 let mut next = Vec::new();
219 |                                 let current_records = std::mem::take(&mut records);
220 | 
221 |                                 for record in current_records {
222 |                                     match Self::process_with_retry(&self.error_handler, op, record).await {
223 |                                         Ok(mut results) => next.append(&mut results),
224 |                                         Err(e) => {
225 |                                             self.records_failed.increment();
226 |                                             success = false;
227 |                                             tracing::error!("Operator error after retries: {}", e);
228 |                                             break;
229 |                                         }
230 |                                     }
231 |                                 }
232 | 
233 |                                 if !success {
234 |                                     break;
235 |                                 }
236 |                                 records = next;
237 |                             }
238 | 
239 |                             // Use the length before consuming records
240 |                             let record_count = records.len();
241 |                             self.backpressure.update_load(record_count);
242 | 
243 |                             if success {
244 |                                 while let Some(record) = records.pop() {
245 |                                     match Self::write_with_retry(&self.error_handler, &mut self.sink, record).await {
246 |                                         Ok(_) => {
247 |                                             self.records_processed.increment();
248 |                                         }
249 |                                         Err(e) => {
250 |                                             self.records_failed.increment();
251 |                                             tracing::error!("Sink error after retries: {}", e);
252 |                                         }
253 |                                     }
254 |                                 }
255 |                             }
256 | 
257 |                             if let Some(timer) = Arc::get_mut(&mut self.process_timer) {
258 |                                 timer.record(start.elapsed());
259 |                             }
260 |                         }
261 |                         Ok(None) => break,
262 |                         Err(e) => {
263 |                             self.records_failed.increment();
264 |                             tracing::error!("Source error: {}", e);
265 |                             return Err(e);
266 |                         }
267 |                     }
268 |                 }
269 | 
270 |                 _ = watermark_interval.tick() => {
271 |                     if let Err(e) = self.process_watermark().await {
272 |                         tracing::error!("Watermark error: {}", e);
273 |                     }
274 |                 }
275 |             }
276 |         }
277 | 
278 |         self.sink.flush().await?;
279 |         self.sink.close().await?;
280 |         self.status = PipelineStatus::Completed;
281 |         Ok(())
282 |     }
283 | }
284 | 


--------------------------------------------------------------------------------
/crates/fluxus-core/src/pipeline/status.rs:
--------------------------------------------------------------------------------
 1 | /// Status of a pipeline execution
 2 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 3 | pub enum PipelineStatus {
 4 |     /// Pipeline is initialized but not running
 5 |     Ready,
 6 |     /// Pipeline is currently running
 7 |     Running,
 8 |     /// Pipeline has completed successfully
 9 |     Completed,
10 |     /// Pipeline has failed
11 |     Failed,
12 | }
13 | 


--------------------------------------------------------------------------------
/crates/fluxus-runtime/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus-runtime"
 3 | description = "Runtime implementation for Fluxus stream processing engine"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | repository.workspace = true
 9 | readme = "README.md"
10 | 
11 | [dependencies]
12 | fluxus-core = { path = "../fluxus-core", version="0.2" }
13 | fluxus-utils = { path = "../fluxus-utils", version="0.2" }
14 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" }
15 | fluxus-sources = { path = "../fluxus-sources", version="0.2" }
16 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2" }
17 | 
18 | tokio = { version = "1", features = ["full"] }
19 | futures = "0.3"
20 | async-trait = "0.1"
21 | tracing = "0.1"
22 | parking_lot = "0.12"
23 | dashmap = "5.5"
24 | bytes = "1.5"
25 | uuid = { version = "1.7", features = ["v4"] }
26 | 
27 | [dev-dependencies]
28 | criterion = "0.6"
29 | 
30 | [[bench]]
31 | name = "runtime_benchmark"
32 | path = "../../benches/runtime_benchmark.rs"
33 | harness = false
34 | 


--------------------------------------------------------------------------------
/crates/fluxus-runtime/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxus Runtime
 2 | 
 3 | Runtime engine and execution environment for the Fluxus stream processing engine.
 4 | 
 5 | ## Overview
 6 | 
 7 | This crate provides the execution environment and runtime components for Fluxus:
 8 | 
 9 | - Task execution and scheduling
10 | - Memory management
11 | - Threading and concurrency
12 | - Performance optimization
13 | - Resource management
14 | 
15 | ## Key Components
16 | 
17 | ### Task Execution
18 | 
19 | - Parallel task execution
20 | - Work stealing scheduler
21 | - Back-pressure handling
22 | - Resource-aware scheduling
23 | 
24 | ### Threading Model
25 | 
26 | - Thread pool management
27 | - Thread-safe data structures
28 | - Lock-free algorithms
29 | - Efficient inter-thread communication
30 | 
31 | ### Memory Management
32 | 
33 | - Buffer management
34 | - Memory pooling
35 | - Efficient data serialization
36 | - Zero-copy optimizations
37 | 
38 | ### Monitoring
39 | 
40 | - Performance metrics
41 | - Resource usage tracking
42 | - Runtime statistics
43 | - Diagnostics (planned)
44 | 
45 | ## Usage
46 | 
47 | Add this to your `Cargo.toml`:
48 | 
49 | ```toml
50 | [dependencies]
51 | fluxus-runtime = "0.2"
52 | ```
53 | 
54 | This crate is usually not used directly but through the `fluxus-api` crate.


--------------------------------------------------------------------------------
/crates/fluxus-runtime/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Fluxus Runtime - Execution engine for stream processing
 2 | //!
 3 | //! This module implements the runtime execution environment for Fluxus pipelines.
 4 | mod runtime;
 5 | pub use runtime::RuntimeContext;
 6 | 
 7 | /// State management for stateful operators
 8 | pub mod state;
 9 | 
10 | /// Watermark tracking and propagation
11 | pub mod watermark;
12 | 


--------------------------------------------------------------------------------
/crates/fluxus-runtime/src/runtime.rs:
--------------------------------------------------------------------------------
  1 | use dashmap::DashMap;
  2 | use fluxus_core::ParallelConfig;
  3 | use fluxus_sinks::Sink;
  4 | use fluxus_sources::Source;
  5 | use fluxus_transformers::Operator;
  6 | use fluxus_utils::models::{Record, StreamResult};
  7 | use std::sync::Arc;
  8 | use tokio::sync::{Mutex, mpsc};
  9 | use tokio::task::JoinHandle;
 10 | use uuid::Uuid;
 11 | 
 12 | /// Runtime context for managing stream processing execution
 13 | pub struct RuntimeContext {
 14 |     /// Task parallelism configuration
 15 |     parallel_config: ParallelConfig,
 16 |     /// Active task handles
 17 |     task_handles: Arc<DashMap<String, Vec<JoinHandle<()>>>>,
 18 | }
 19 | 
 20 | impl RuntimeContext {
 21 |     pub fn new(parallel_config: ParallelConfig) -> Self {
 22 |         Self {
 23 |             parallel_config,
 24 |             task_handles: Arc::new(DashMap::new()),
 25 |         }
 26 |     }
 27 | 
 28 |     /// Execute a source-to-sink pipeline with operators
 29 |     pub async fn execute_pipeline<T, S, K>(
 30 |         &self,
 31 |         source: S,
 32 |         operators: Vec<Arc<Mutex<dyn Operator<T, T> + Send + Sync>>>,
 33 |         sink: K,
 34 |     ) -> StreamResult<()>
 35 |     where
 36 |         T: Clone + Send + Sync + 'static,
 37 |         S: Source<T> + Send + Sync + 'static,
 38 |         K: Sink<T> + Send + Sync + 'static,
 39 |     {
 40 |         let (tx, rx) = mpsc::channel(self.parallel_config.buffer_size);
 41 |         let source = Arc::new(Mutex::new(source));
 42 |         let sink = Arc::new(Mutex::new(sink));
 43 | 
 44 |         // Spawn source task
 45 |         let source_handle = self.spawn_source_task(source.clone(), tx.clone());
 46 | 
 47 |         // Create channels for operator pipeline
 48 |         let mut curr_rx = rx;
 49 |         let mut handles = vec![source_handle];
 50 | 
 51 |         // Spawn operator tasks
 52 |         for operator in operators {
 53 |             let (new_tx, new_rx) = mpsc::channel(self.parallel_config.buffer_size);
 54 |             let operator_handles = self.spawn_operator_tasks(operator, curr_rx, new_tx);
 55 |             handles.extend(operator_handles);
 56 |             curr_rx = new_rx;
 57 |         }
 58 | 
 59 |         // Spawn sink task
 60 |         let sink_handle = self.spawn_sink_task(sink.clone(), curr_rx);
 61 |         handles.push(sink_handle);
 62 | 
 63 |         // Store handles
 64 |         self.task_handles
 65 |             .insert(Uuid::new_v4().to_string(), handles);
 66 | 
 67 |         Ok(())
 68 |     }
 69 | 
 70 |     fn spawn_source_task<T, S>(
 71 |         &self,
 72 |         source: Arc<Mutex<S>>,
 73 |         tx: mpsc::Sender<Record<T>>,
 74 |     ) -> JoinHandle<()>
 75 |     where
 76 |         T: Clone + Send + 'static,
 77 |         S: Source<T> + Send + 'static,
 78 |     {
 79 |         tokio::spawn(async move {
 80 |             loop {
 81 |                 let mut source_guard = source.lock().await;
 82 |                 match source_guard.next().await {
 83 |                     Ok(Some(record)) => {
 84 |                         if tx.send(record).await.is_err() {
 85 |                             break;
 86 |                         }
 87 |                     }
 88 |                     _ => break,
 89 |                 }
 90 |             }
 91 |             let mut source_guard = source.lock().await;
 92 |             if let Err(e) = source_guard.close().await {
 93 |                 tracing::error!("Error closing source: {:?}", e);
 94 |             }
 95 |         })
 96 |     }
 97 | 
 98 |     fn spawn_operator_tasks<T>(
 99 |         &self,
100 |         operator: Arc<Mutex<dyn Operator<T, T> + Send + Sync>>,
101 |         rx: mpsc::Receiver<Record<T>>,
102 |         tx: mpsc::Sender<Record<T>>,
103 |     ) -> Vec<JoinHandle<()>>
104 |     where
105 |         T: Clone + Send + 'static,
106 |     {
107 |         let mut handles = Vec::new();
108 |         let rx = Arc::new(Mutex::new(rx));
109 | 
110 |         for _ in 0..self.parallel_config.parallelism {
111 |             let operator = Arc::clone(&operator);
112 |             let rx = Arc::clone(&rx);
113 |             let tx = tx.clone();
114 | 
115 |             let handle = tokio::spawn(async move {
116 |                 loop {
117 |                     let record = {
118 |                         let mut rx = rx.lock().await;
119 |                         match rx.recv().await {
120 |                             Some(r) => r,
121 |                             None => break,
122 |                         }
123 |                     };
124 | 
125 |                     let mut op = operator.lock().await;
126 |                     if let Ok(results) = op.process(record).await {
127 |                         for result in results {
128 |                             if tx.send(result).await.is_err() {
129 |                                 return;
130 |                             }
131 |                         }
132 |                     }
133 |                 }
134 |             });
135 |             handles.push(handle);
136 |         }
137 | 
138 |         handles
139 |     }
140 | 
141 |     fn spawn_sink_task<T, K>(
142 |         &self,
143 |         sink: Arc<Mutex<K>>,
144 |         mut rx: mpsc::Receiver<Record<T>>,
145 |     ) -> JoinHandle<()>
146 |     where
147 |         T: Clone + Send + 'static,
148 |         K: Sink<T> + Send + 'static,
149 |     {
150 |         tokio::spawn(async move {
151 |             while let Some(record) = rx.recv().await {
152 |                 let mut sink_guard = sink.lock().await;
153 |                 if let Err(e) = sink_guard.write(record).await {
154 |                     tracing::error!("Error writing to sink: {:?}", e);
155 |                 }
156 |             }
157 | 
158 |             let mut sink_guard = sink.lock().await;
159 |             if let Err(e) = sink_guard.flush().await {
160 |                 tracing::error!("Error flushing sink: {:?}", e);
161 |             }
162 | 
163 |             if let Err(e) = sink_guard.close().await {
164 |                 tracing::error!("Error closing sink: {:?}", e);
165 |             }
166 |         })
167 |     }
168 | }
169 | 


--------------------------------------------------------------------------------
/crates/fluxus-runtime/src/state.rs:
--------------------------------------------------------------------------------
 1 | use parking_lot::RwLock;
 2 | use std::collections::HashMap;
 3 | use std::hash::Hash;
 4 | use std::sync::Arc;
 5 | 
 6 | /// Simple key-value state backend
 7 | #[derive(Default)]
 8 | pub struct KeyedStateBackend<K, V> {
 9 |     state: Arc<RwLock<HashMap<K, V>>>,
10 | }
11 | 
12 | impl<K, V> KeyedStateBackend<K, V>
13 | where
14 |     K: Eq + Hash,
15 | {
16 |     pub fn new() -> Self {
17 |         Self {
18 |             state: Arc::new(RwLock::new(HashMap::new())),
19 |         }
20 |     }
21 | 
22 |     pub fn get(&self, key: &K) -> Option<V>
23 |     where
24 |         V: Clone,
25 |     {
26 |         self.state.read().get(key).cloned()
27 |     }
28 | 
29 |     pub fn set(&self, key: K, value: V) {
30 |         self.state.write().insert(key, value);
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/crates/fluxus-runtime/src/watermark.rs:
--------------------------------------------------------------------------------
 1 | use parking_lot::RwLock;
 2 | use std::sync::Arc;
 3 | use std::time::SystemTime;
 4 | 
 5 | /// Watermark tracker for managing event time progress
 6 | pub struct WatermarkTracker {
 7 |     current_watermark: Arc<RwLock<SystemTime>>,
 8 | }
 9 | 
10 | impl Default for WatermarkTracker {
11 |     fn default() -> Self {
12 |         Self::new()
13 |     }
14 | }
15 | 
16 | impl WatermarkTracker {
17 |     pub fn new() -> Self {
18 |         Self {
19 |             current_watermark: Arc::new(RwLock::new(SystemTime::now())),
20 |         }
21 |     }
22 | 
23 |     pub fn update(&self, watermark: SystemTime) {
24 |         let mut current = self.current_watermark.write();
25 |         if watermark > *current {
26 |             *current = watermark;
27 |         }
28 |     }
29 | 
30 |     pub fn get_current(&self) -> SystemTime {
31 |         *self.current_watermark.read()
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/crates/fluxus-sinks/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus-sinks"
 3 | description = "Sink components for Fluxus stream processing engine"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | categories.workspace = true
11 | keywords.workspace = true
12 | readme = "README.md"
13 | 
14 | [dependencies]
15 | fluxus-utils = { path = "../fluxus-utils", version="0.2" }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | futures = "0.3"
19 | serde = { version = "1.0", features = ["derive"] }
20 | serde_json = "1.0"
21 | anyhow = "1.0"
22 | thiserror = "1.0"
23 | async-trait = "0.1"
24 | tracing = "0.1"
25 | num_cpus = "1.16"
26 | csv = "1.3"
27 | 
28 | [dev-dependencies]
29 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] }
30 | 


--------------------------------------------------------------------------------
/crates/fluxus-sinks/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxus Sinks
 2 | 
 3 | Sink components for the Fluxus stream processing engine.
 4 | 
 5 | ## Overview
 6 | 
 7 | This crate provides various sink implementations for the Fluxus stream processing engine, allowing processed data to be output to different destinations.
 8 | 
 9 | ### Key Sinks
10 | - `BufferedSink` - Buffered output for efficient writes.
11 | - `ConsoleSink` - Output data to the console for debugging.
12 | - `DummySink` - A placeholder sink for testing.
13 | - `FileSink` - Write data to files.
14 | 
15 | ## Usage
16 | 
17 | Add this to your `Cargo.toml`:
18 | 
19 | ```toml
20 | [dependencies]
21 | fluxus-sinks = "0.2"
22 | ```


--------------------------------------------------------------------------------
/crates/fluxus-sinks/src/buffered.rs:
--------------------------------------------------------------------------------
 1 | use crate::Sink;
 2 | use async_trait::async_trait;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::time::{Duration, Instant};
 5 | 
 6 | /// A sink wrapper that provides buffering capabilities
 7 | pub struct BufferedSink<T, S: Sink<T>> {
 8 |     inner: S,
 9 |     buffer: Vec<Record<T>>,
10 |     buffer_size: usize,
11 |     flush_interval: Duration,
12 |     last_flush: Instant,
13 | }
14 | 
15 | impl<T, S: Sink<T>> BufferedSink<T, S> {
16 |     /// Create a new buffered sink with the specified buffer size and flush interval
17 |     pub fn new(inner: S, buffer_size: usize, flush_interval: Duration) -> Self {
18 |         Self {
19 |             inner,
20 |             buffer: Vec::with_capacity(buffer_size),
21 |             buffer_size,
22 |             flush_interval,
23 |             last_flush: Instant::now(),
24 |         }
25 |     }
26 | 
27 |     /// Force flush the buffer
28 |     pub async fn force_flush(&mut self) -> StreamResult<()> {
29 |         for record in self.buffer.drain(..) {
30 |             self.inner.write(record).await?;
31 |         }
32 |         self.inner.flush().await?;
33 |         self.last_flush = Instant::now();
34 |         Ok(())
35 |     }
36 | }
37 | 
38 | #[async_trait]
39 | impl<T: Send, S: Sink<T> + Send> Sink<T> for BufferedSink<T, S> {
40 |     async fn init(&mut self) -> StreamResult<()> {
41 |         self.inner.init().await
42 |     }
43 | 
44 |     async fn write(&mut self, record: Record<T>) -> StreamResult<()> {
45 |         self.buffer.push(record);
46 | 
47 |         let should_flush = self.buffer.len() >= self.buffer_size
48 |             || self.last_flush.elapsed() >= self.flush_interval;
49 | 
50 |         if should_flush {
51 |             self.force_flush().await?;
52 |         }
53 | 
54 |         Ok(())
55 |     }
56 | 
57 |     async fn flush(&mut self) -> StreamResult<()> {
58 |         self.force_flush().await
59 |     }
60 | 
61 |     async fn close(&mut self) -> StreamResult<()> {
62 |         self.force_flush().await?;
63 |         self.inner.close().await
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/crates/fluxus-sinks/src/console.rs:
--------------------------------------------------------------------------------
 1 | use crate::{ConsoleFormatter, DefaultFormatter, Sink};
 2 | use async_trait::async_trait;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::marker::PhantomData;
 5 | 
 6 | /// A sink that writes to console
 7 | #[derive(Default)]
 8 | pub struct ConsoleSink<T, F = DefaultFormatter> {
 9 |     formatter: F,
10 |     _phantom: PhantomData<T>,
11 | }
12 | 
13 | impl<T> ConsoleSink<T, DefaultFormatter> {
14 |     /// Create a new console sink with default formatter
15 |     pub fn new() -> Self {
16 |         Self {
17 |             formatter: DefaultFormatter,
18 |             _phantom: PhantomData,
19 |         }
20 |     }
21 | }
22 | 
23 | impl<T, F> ConsoleSink<T, F> {
24 |     /// Create a new console sink with custom formatter
25 |     pub fn with_formatter(formatter: F) -> Self {
26 |         Self {
27 |             formatter,
28 |             _phantom: PhantomData,
29 |         }
30 |     }
31 | }
32 | 
33 | #[async_trait]
34 | impl<T, F> Sink<T> for ConsoleSink<T, F>
35 | where
36 |     T: Send,
37 |     F: ConsoleFormatter<T> + Send + Sync,
38 | {
39 |     async fn init(&mut self) -> StreamResult<()> {
40 |         Ok(())
41 |     }
42 | 
43 |     async fn write(&mut self, record: Record<T>) -> StreamResult<()> {
44 |         tracing::info!("{}", self.formatter.format(&record));
45 |         Ok(())
46 |     }
47 | 
48 |     async fn flush(&mut self) -> StreamResult<()> {
49 |         Ok(())
50 |     }
51 | 
52 |     async fn close(&mut self) -> StreamResult<()> {
53 |         Ok(())
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/crates/fluxus-sinks/src/dummy_sink.rs:
--------------------------------------------------------------------------------
 1 | use crate::Sink;
 2 | use fluxus_utils::models::Record;
 3 | use fluxus_utils::models::StreamResult;
 4 | use std::marker::PhantomData;
 5 | 
 6 | /// A dummy sink that discards all records
 7 | #[derive(Default)]
 8 | pub struct DummySink<T> {
 9 |     _phantom: PhantomData<T>,
10 | }
11 | 
12 | impl<T> DummySink<T> {
13 |     pub fn new() -> Self {
14 |         Self {
15 |             _phantom: PhantomData,
16 |         }
17 |     }
18 | }
19 | 
20 | #[async_trait::async_trait]
21 | impl<T: Send> Sink<T> for DummySink<T> {
22 |     async fn init(&mut self) -> StreamResult<()> {
23 |         Ok(())
24 |     }
25 | 
26 |     async fn write(&mut self, _record: Record<T>) -> StreamResult<()> {
27 |         Ok(())
28 |     }
29 | 
30 |     async fn flush(&mut self) -> StreamResult<()> {
31 |         Ok(())
32 |     }
33 | 
34 |     async fn close(&mut self) -> StreamResult<()> {
35 |         Ok(())
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/crates/fluxus-sinks/src/file.rs:
--------------------------------------------------------------------------------
 1 | use crate::Sink;
 2 | use async_trait::async_trait;
 3 | use csv;
 4 | use fluxus_utils::models::{Record, StreamResult};
 5 | use serde::Serialize;
 6 | use serde_json;
 7 | use std::marker::PhantomData;
 8 | use std::path::PathBuf;
 9 | use tokio::fs::File;
10 | use tokio::io::AsyncWriteExt;
11 | 
12 | /// Output format for file sink
13 | #[derive(Clone, Debug)]
14 | pub enum FileFormat {
15 |     /// Plain text format (one line per record)
16 |     Text,
17 |     /// CSV format
18 |     Csv,
19 |     /// JSON format (one JSON object per line)
20 |     JsonLines,
21 | }
22 | 
23 | /// A sink that writes to a file
24 | pub struct FileSink<T> {
25 |     path: PathBuf,
26 |     format: FileFormat,
27 |     file: Option<File>,
28 |     _phantom: PhantomData<T>,
29 | }
30 | 
31 | impl<T> FileSink<T> {
32 |     /// Create a new file sink
33 |     pub fn new<P: Into<PathBuf>>(path: P, format: FileFormat) -> Self {
34 |         Self {
35 |             path: path.into(),
36 |             format,
37 |             file: None,
38 |             _phantom: PhantomData,
39 |         }
40 |     }
41 | }
42 | 
43 | #[async_trait]
44 | impl<T: Serialize + Send> Sink<T> for FileSink<T> {
45 |     async fn init(&mut self) -> StreamResult<()> {
46 |         self.file = Some(File::create(&self.path).await?);
47 |         Ok(())
48 |     }
49 | 
50 |     async fn write(&mut self, record: Record<T>) -> StreamResult<()> {
51 |         if let Some(file) = &mut self.file {
52 |             match self.format {
53 |                 FileFormat::Text => {
54 |                     let line = format!("{}\n", serde_json::to_string(&record.data)?);
55 |                     file.write_all(line.as_bytes()).await?;
56 |                 }
57 |                 FileFormat::Csv => {
58 |                     let mut wtr = csv::Writer::from_writer(Vec::new());
59 |                     wtr.serialize(&record.data)?;
60 |                     let inner = wtr.into_inner()?;
61 |                     let data = String::from_utf8(inner)?;
62 |                     file.write_all(data.as_bytes()).await?;
63 |                 }
64 |                 FileFormat::JsonLines => {
65 |                     let line = format!("{}\n", serde_json::to_string(&record.data)?);
66 |                     file.write_all(line.as_bytes()).await?;
67 |                 }
68 |             }
69 |         }
70 |         Ok(())
71 |     }
72 | 
73 |     async fn flush(&mut self) -> StreamResult<()> {
74 |         if let Some(file) = &mut self.file {
75 |             file.flush().await?;
76 |         }
77 |         Ok(())
78 |     }
79 | 
80 |     async fn close(&mut self) -> StreamResult<()> {
81 |         if let Some(mut file) = self.file.take() {
82 |             file.flush().await?;
83 |         }
84 |         Ok(())
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/crates/fluxus-sinks/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod buffered;
 2 | pub mod console;
 3 | pub mod dummy_sink;
 4 | pub mod file;
 5 | 
 6 | pub use buffered::BufferedSink;
 7 | pub use console::ConsoleSink;
 8 | pub use file::FileSink;
 9 | 
10 | use async_trait::async_trait;
11 | use fluxus_utils::models::{Record, StreamResult};
12 | use std::fmt::Display;
13 | 
14 | /// Sink trait defines the interface for data output
15 | #[async_trait]
16 | pub trait Sink<T> {
17 |     /// Initialize the sink
18 |     async fn init(&mut self) -> StreamResult<()>;
19 | 
20 |     /// Write a record to the sink
21 |     async fn write(&mut self, record: Record<T>) -> StreamResult<()>;
22 | 
23 |     /// Flush any buffered data
24 |     async fn flush(&mut self) -> StreamResult<()>;
25 | 
26 |     /// Close the sink and release resources
27 |     async fn close(&mut self) -> StreamResult<()>;
28 | }
29 | 
30 | /// Formatter for console output
31 | pub trait ConsoleFormatter<T> {
32 |     fn format(&self, record: &Record<T>) -> String;
33 | }
34 | 
35 | /// Default formatter that uses Display
36 | pub struct DefaultFormatter;
37 | 
38 | impl<T: Display> ConsoleFormatter<T> for DefaultFormatter {
39 |     fn format(&self, record: &Record<T>) -> String {
40 |         format!("[{}] {}", record.timestamp, record.data)
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/crates/fluxus-sources/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus-sources"
 3 | description = "Source components for Fluxus stream processing engine"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | categories.workspace = true
11 | keywords.workspace = true
12 | readme = "README.md"
13 | 
14 | [dependencies]
15 | fluxus-utils = { path = "../fluxus-utils", version="0.2" }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | futures = "0.3"
19 | serde = { version = "1.0", features = ["derive"] }
20 | serde_json = "1.0"
21 | anyhow = "1.0"
22 | thiserror = "1.0"
23 | async-trait = "0.1"
24 | tracing = "0.1"
25 | num_cpus = "1.16"
26 | csv = "1.3"
27 | tokio-util = { version = "0.7.15", features = ["io"] }
28 | reqwest = { version = "0.12.15", features = ["stream"] }
29 | 
30 | [dev-dependencies]
31 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] }
32 | tempfile = "3"


--------------------------------------------------------------------------------
/crates/fluxus-sources/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxus Sources
 2 | 
 3 | Source components for the Fluxus stream processing engine.
 4 | 
 5 | ## Overview
 6 | 
 7 | This crate provides various source implementations for the Fluxus stream processing engine, allowing data to be ingested from different sources.
 8 | 
 9 | ### Key Sources
10 | - `CsvSource` - Read data from CSV files.
11 | - `GeneratorSource` - Generate data for testing purposes.
12 | 
13 | ## Usage
14 | 
15 | Add this to your `Cargo.toml`:
16 | 
17 | ```toml
18 | [dependencies]
19 | fluxus-sources = "0.2"
20 | ```


--------------------------------------------------------------------------------
/crates/fluxus-sources/src/csv.rs:
--------------------------------------------------------------------------------
  1 | use async_trait::async_trait;
  2 | use fluxus_utils::models::{Record, StreamError, StreamResult};
  3 | use futures::TryStreamExt;
  4 | use reqwest;
  5 | use std::io::{self, Error};
  6 | use std::path::PathBuf;
  7 | use std::time::Duration;
  8 | use tokio::fs::File;
  9 | use tokio::io::{AsyncBufReadExt, BufReader};
 10 | use tokio_util::io::StreamReader;
 11 | 
 12 | use super::Source;
 13 | 
 14 | /// A source that reads CSV files
 15 | pub struct CsvSource {
 16 |     source: CsvSourceType,
 17 |     reader: Option<Box<dyn tokio::io::AsyncBufRead + Unpin + Send + Sync>>,
 18 | }
 19 | 
 20 | enum CsvSourceType {
 21 |     LocalFile(PathBuf),
 22 |     RemoteUrl(String),
 23 | }
 24 | 
 25 | impl CsvSource {
 26 |     /// Create a new CSV source from a local file path
 27 |     pub fn new<P: Into<PathBuf>>(path: P) -> Self {
 28 |         Self {
 29 |             source: CsvSourceType::LocalFile(path.into()),
 30 |             reader: None,
 31 |         }
 32 |     }
 33 | 
 34 |     /// Create a new CSV source from a remote URL
 35 |     pub fn from_url<S: Into<String>>(url: S) -> Self {
 36 |         Self {
 37 |             source: CsvSourceType::RemoteUrl(url.into()),
 38 |             reader: None,
 39 |         }
 40 |     }
 41 | }
 42 | 
 43 | #[async_trait]
 44 | impl Source<String> for CsvSource {
 45 |     async fn init(&mut self) -> StreamResult<()> {
 46 |         match &self.source {
 47 |             CsvSourceType::LocalFile(path) => {
 48 |                 let file = File::open(path)
 49 |                     .await
 50 |                     .map_err(|e| StreamError::Io(Error::other(format!("{}", e))))?;
 51 |                 self.reader = Some(Box::new(BufReader::new(file)));
 52 |             }
 53 |             CsvSourceType::RemoteUrl(url) => {
 54 |                 let client = reqwest::Client::builder()
 55 |                     .timeout(Duration::from_secs(30))
 56 |                     .build()
 57 |                     .map_err(|_e| StreamError::Io(io::Error::other("create http client error")))?;
 58 |                 let response = client.get(url).send().await.map_err(|e| {
 59 |                     StreamError::Io(Error::other(format!("Failed to fetch URL: {}", e)))
 60 |                 })?;
 61 | 
 62 |                 if !response.status().is_success() {
 63 |                     return Err(StreamError::Io(Error::other(format!(
 64 |                         "HTTP error: {}",
 65 |                         response.status()
 66 |                     ))));
 67 |                 }
 68 | 
 69 |                 let byte_stream = response
 70 |                     .bytes_stream()
 71 |                     .map_err(|e| Error::other(format!("{}", e)));
 72 | 
 73 |                 let reader = StreamReader::new(byte_stream);
 74 |                 self.reader = Some(Box::new(BufReader::new(reader)));
 75 |             }
 76 |         }
 77 |         Ok(())
 78 |     }
 79 | 
 80 |     async fn next(&mut self) -> StreamResult<Option<Record<String>>> {
 81 |         if let Some(reader) = &mut self.reader {
 82 |             let mut line = String::new();
 83 |             match reader.read_line(&mut line).await {
 84 |                 Ok(0) => Ok(None), // EOF
 85 |                 Ok(_) => {
 86 |                     let line = line.trim().to_string();
 87 |                     Ok(Some(Record::new(line)))
 88 |                 }
 89 |                 Err(e) => Err(e.into()),
 90 |             }
 91 |         } else {
 92 |             Ok(None)
 93 |         }
 94 |     }
 95 | 
 96 |     async fn close(&mut self) -> StreamResult<()> {
 97 |         self.reader = None;
 98 |         Ok(())
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/crates/fluxus-sources/src/generator.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_utils::models::{Record, StreamResult};
 3 | use std::marker::PhantomData;
 4 | 
 5 | use super::Source;
 6 | 
 7 | /// A source that generates test data
 8 | pub struct GeneratorSource<T, F>
 9 | where
10 |     F: FnMut() -> Option<T> + Send,
11 | {
12 |     generator: F,
13 |     _phantom: PhantomData<T>,
14 | }
15 | 
16 | impl<T, F> GeneratorSource<T, F>
17 | where
18 |     F: FnMut() -> Option<T> + Send,
19 | {
20 |     /// Create a new generator source
21 |     pub fn new(generator: F) -> Self {
22 |         Self {
23 |             generator,
24 |             _phantom: PhantomData,
25 |         }
26 |     }
27 | 
28 |     /// Create a counting source that generates numbers from start to end
29 |     pub fn counter(start: i64, end: i64) -> GeneratorSource<i64, impl FnMut() -> Option<i64>> {
30 |         let current = start;
31 |         GeneratorSource::new(move || {
32 |             static mut CURRENT: i64 = 0;
33 |             unsafe {
34 |                 if CURRENT == 0 {
35 |                     CURRENT = current;
36 |                 }
37 |                 if CURRENT <= end {
38 |                     let value = CURRENT;
39 |                     CURRENT += 1;
40 |                     Some(value)
41 |                 } else {
42 |                     None
43 |                 }
44 |             }
45 |         })
46 |     }
47 | }
48 | 
49 | #[async_trait]
50 | impl<T, F> Source<T> for GeneratorSource<T, F>
51 | where
52 |     T: Send,
53 |     F: FnMut() -> Option<T> + Send + Sync,
54 | {
55 |     async fn init(&mut self) -> StreamResult<()> {
56 |         Ok(())
57 |     }
58 | 
59 |     async fn next(&mut self) -> StreamResult<Option<Record<T>>> {
60 |         Ok((self.generator)().map(Record::new))
61 |     }
62 | 
63 |     async fn close(&mut self) -> StreamResult<()> {
64 |         Ok(())
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/crates/fluxus-sources/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod csv;
 2 | pub mod generator;
 3 | 
 4 | pub use csv::CsvSource;
 5 | 
 6 | use fluxus_utils::models::{Record, StreamResult};
 7 | pub use generator::GeneratorSource;
 8 | 
 9 | use async_trait::async_trait;
10 | 
11 | /// Source trait defines the interface for data sources
12 | #[async_trait]
13 | pub trait Source<T> {
14 |     /// Initialize the source
15 |     async fn init(&mut self) -> StreamResult<()>;
16 | 
17 |     /// Read the next record from the source
18 |     async fn next(&mut self) -> StreamResult<Option<Record<T>>>;
19 | 
20 |     /// Close the source and release resources
21 |     async fn close(&mut self) -> StreamResult<()>;
22 | }
23 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus-transformers"
 3 | description = "Transformer components for Fluxus stream processing engine"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | categories.workspace = true
11 | keywords.workspace = true
12 | readme = "README.md"
13 | 
14 | [dependencies]
15 | fluxus-utils = { path = "../fluxus-utils", version="0.2" }
16 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2" }
17 | fluxus-sources = { path = "../fluxus-sources", version="0.2" }
18 | 
19 | tokio = { version = "1", features = ["full"] }
20 | futures = "0.3"
21 | serde = { version = "1.0", features = ["derive"] }
22 | serde_json = "1.0"
23 | anyhow = "1.0"
24 | thiserror = "1.0"
25 | async-trait = "0.1"
26 | tracing = "0.1"
27 | num_cpus = "1.16"
28 | csv = "1.3"
29 | 
30 | [dev-dependencies]
31 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] }
32 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxus Transformers
 2 | 
 3 | Transformer components for the Fluxus stream processing engine.
 4 | 
 5 | ## Overview
 6 | 
 7 | This crate provides various transformation implementations for the Fluxus stream processing engine, allowing data to be processed and transformed in different ways.
 8 | 
 9 | ### Key Transformers
10 | - `TransformSource` - Basic data transformation.
11 | - `TransformSourceWithOperator` - Data transformation with custom operators.
12 | 
13 | ## Usage
14 | 
15 | Add this to your `Cargo.toml`:
16 | 
17 | ```toml
18 | [dependencies]
19 | fluxus-transformers = "0.2"
20 | ```


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod operator;
 2 | mod transform_base;
 3 | mod transform_source;
 4 | mod transform_source_with_operator;
 5 | 
 6 | pub use operator::{Operator, OperatorBuilder};
 7 | pub use transform_base::TransformBase;
 8 | pub use transform_source::TransformSource;
 9 | pub use transform_source_with_operator::TransformSourceWithOperator;
10 | 
11 | use fluxus_sources::Source;
12 | 
13 | pub type InnerSource<T> = dyn Source<T> + Send + Sync;
14 | pub type InnerOperator<T, R> = dyn Operator<T, R> + Send + Sync;
15 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/operator/builder.rs:
--------------------------------------------------------------------------------
 1 | use super::{FilterOperator, MapOperator, WindowReduceOperator};
 2 | use fluxus_utils::window::WindowConfig;
 3 | 
 4 | /// Builder for creating stream operators
 5 | pub struct OperatorBuilder;
 6 | 
 7 | // Add type aliases at the module level
 8 | type AveragePair<T> = (T, usize);
 9 | type AverageReduceFn<T> =
10 |     Box<dyn Fn(AveragePair<T>, AveragePair<T>) -> AveragePair<T> + Send + Sync>;
11 | 
12 | impl OperatorBuilder {
13 |     /// Create a new map operator
14 |     pub fn map<In, Out, F>(func: F) -> MapOperator<In, Out, F>
15 |     where
16 |         F: Fn(In) -> Out + Send + Sync,
17 |     {
18 |         MapOperator::new(func)
19 |     }
20 | 
21 |     /// Create a new filter operator
22 |     pub fn filter<T, F>(predicate: F) -> FilterOperator<T, F>
23 |     where
24 |         F: Fn(&T) -> bool + Send + Sync,
25 |     {
26 |         FilterOperator::new(predicate)
27 |     }
28 | 
29 |     /// Create a new window reduce operator
30 |     pub fn window_reduce<T, F>(func: F, window: WindowConfig) -> WindowReduceOperator<T, F>
31 |     where
32 |         F: Fn(T, T) -> T + Send + Sync,
33 |         T: Clone,
34 |     {
35 |         WindowReduceOperator::new(func, window)
36 |     }
37 | 
38 |     /// Helper to create a sum operator with a window
39 |     pub fn sum_window<T>(window: WindowConfig) -> WindowReduceOperator<T, impl Fn(T, T) -> T>
40 |     where
41 |         T: std::ops::Add<Output = T> + Clone + Send,
42 |     {
43 |         Self::window_reduce(|a, b| a + b, window)
44 |     }
45 | 
46 |     /// Helper to create a count operator with a window
47 |     pub fn count_window(
48 |         window: WindowConfig,
49 |     ) -> WindowReduceOperator<usize, impl Fn(usize, usize) -> usize> {
50 |         Self::window_reduce(|count, _| count + 1, window)
51 |     }
52 | 
53 |     /// Helper to create an average operator with a window
54 |     pub fn avg_window<T>(
55 |         window: WindowConfig,
56 |     ) -> WindowReduceOperator<AveragePair<T>, AverageReduceFn<T>>
57 |     where
58 |         T: std::ops::Add<Output = T> + Clone + Send + 'static,
59 |     {
60 |         Self::window_reduce(
61 |             Box::new(|(sum1, count1), (sum2, count2)| (sum1 + sum2, count1 + count2)),
62 |             window,
63 |         )
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/operator/filter.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_utils::models::{Record, StreamResult};
 3 | use std::marker::PhantomData;
 4 | 
 5 | /// Built-in filter operator
 6 | pub struct FilterOperator<T, F>
 7 | where
 8 |     F: Fn(&T) -> bool + Send + Sync,
 9 | {
10 |     func: F,
11 |     _phantom: PhantomData<T>,
12 | }
13 | 
14 | impl<T, F> FilterOperator<T, F>
15 | where
16 |     F: Fn(&T) -> bool + Send + Sync,
17 | {
18 |     pub fn new(func: F) -> Self {
19 |         Self {
20 |             func,
21 |             _phantom: PhantomData,
22 |         }
23 |     }
24 | }
25 | 
26 | #[async_trait]
27 | impl<T, F> super::Operator<T, T> for FilterOperator<T, F>
28 | where
29 |     T: Send,
30 |     F: Fn(&T) -> bool + Send + Sync,
31 | {
32 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<T>>> {
33 |         if (self.func)(&record.data) {
34 |             Ok(vec![record])
35 |         } else {
36 |             Ok(vec![])
37 |         }
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/operator/map.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_utils::models::{Record, StreamResult};
 3 | use std::marker::PhantomData;
 4 | 
 5 | /// Built-in map operator
 6 | pub struct MapOperator<In, Out, F>
 7 | where
 8 |     F: Fn(In) -> Out + Send + Sync,
 9 | {
10 |     func: F,
11 |     _phantom_in: PhantomData<In>,
12 |     _phantom_out: PhantomData<Out>,
13 | }
14 | 
15 | impl<In, Out, F> MapOperator<In, Out, F>
16 | where
17 |     F: Fn(In) -> Out + Send + Sync,
18 | {
19 |     pub fn new(func: F) -> Self {
20 |         Self {
21 |             func,
22 |             _phantom_in: PhantomData,
23 |             _phantom_out: PhantomData,
24 |         }
25 |     }
26 | }
27 | 
28 | #[async_trait]
29 | impl<In, Out, F> super::Operator<In, Out> for MapOperator<In, Out, F>
30 | where
31 |     In: Send,
32 |     Out: Send,
33 |     F: Fn(In) -> Out + Send + Sync,
34 | {
35 |     async fn process(&mut self, record: Record<In>) -> StreamResult<Vec<Record<Out>>> {
36 |         let output = (self.func)(record.data);
37 |         Ok(vec![Record::with_timestamp(output, record.timestamp)])
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/operator/mod.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_utils::models::{Record, StreamResult};
 3 | 
 4 | mod builder;
 5 | mod filter;
 6 | mod map;
 7 | mod window_match;
 8 | mod window_reduce;
 9 | 
10 | pub use builder::OperatorBuilder;
11 | pub use filter::FilterOperator;
12 | pub use map::MapOperator;
13 | pub use window_match::{WindowAllOperator, WindowAnyOperator};
14 | pub use window_reduce::WindowReduceOperator;
15 | 
16 | /// Operator trait defines the interface for stream processing operators
17 | #[async_trait]
18 | pub trait Operator<In, Out>: Send {
19 |     /// Initialize the operator
20 |     async fn init(&mut self) -> StreamResult<()> {
21 |         Ok(())
22 |     }
23 | 
24 |     /// Process a single record and return zero or more output records
25 |     async fn process(&mut self, record: Record<In>) -> StreamResult<Vec<Record<Out>>>;
26 | 
27 |     /// Called when a window is triggered (if windowing is enabled)
28 |     async fn on_window_trigger(&mut self) -> StreamResult<Vec<Record<Out>>> {
29 |         Ok(Vec::new())
30 |     }
31 | 
32 |     /// Close the operator and release resources
33 |     async fn close(&mut self) -> StreamResult<()> {
34 |         Ok(())
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/operator/window_match.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, marker::PhantomData};
  2 | 
  3 | use async_trait::async_trait;
  4 | use fluxus_utils::{
  5 |     models::{Record, StreamResult},
  6 |     window::WindowConfig,
  7 | };
  8 | 
  9 | use super::Operator;
 10 | 
 11 | pub struct WindowAnyOperator<T, F> {
 12 |     func: F,
 13 |     window: WindowConfig,
 14 |     buffer: HashMap<i64, Vec<Record<T>>>,
 15 |     _phantom: PhantomData<T>,
 16 | }
 17 | 
 18 | impl<T, F> WindowAnyOperator<T, F>
 19 | where
 20 |     T: Clone,
 21 |     F: Fn(&T) -> bool + Send + Sync,
 22 | {
 23 |     pub fn new(func: F, window: WindowConfig) -> Self {
 24 |         Self {
 25 |             func,
 26 |             window,
 27 |             buffer: HashMap::new(),
 28 |             _phantom: PhantomData,
 29 |         }
 30 |     }
 31 | 
 32 |     fn get_affected_windows(&self, timestamp: i64) -> Vec<i64> {
 33 |         self.window.window_type.get_affected_windows(timestamp)
 34 |     }
 35 | 
 36 |     fn process_window(&self, records: &[Record<T>]) -> Option<Record<bool>> {
 37 |         records.first().map(|first| Record {
 38 |             data: records.iter().any(|record| (self.func)(&record.data)),
 39 |             timestamp: first.timestamp,
 40 |         })
 41 |     }
 42 | }
 43 | 
 44 | #[async_trait]
 45 | impl<T, F> Operator<T, bool> for WindowAnyOperator<T, F>
 46 | where
 47 |     T: Clone + Send + 'static,
 48 |     F: Fn(&T) -> bool + Send + Sync,
 49 | {
 50 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<bool>>> {
 51 |         let mut results = Vec::new();
 52 | 
 53 |         // Get all windows that this record belongs to
 54 |         let window_keys = self.get_affected_windows(record.timestamp);
 55 | 
 56 |         // Add the record to all relevant windows
 57 |         for window_key in window_keys {
 58 |             let records = self.buffer.entry(window_key).or_default();
 59 |             records.push(record.clone());
 60 | 
 61 |             // Process each affected window
 62 |             let window_records = records.clone();
 63 |             if let Some(result) = self.process_window(&window_records) {
 64 |                 results.push(result);
 65 |             }
 66 |         }
 67 | 
 68 |         Ok(results)
 69 |     }
 70 | }
 71 | 
 72 | pub struct WindowAllOperator<T, F> {
 73 |     func: F,
 74 |     window: WindowConfig,
 75 |     buffer: HashMap<i64, Vec<Record<T>>>,
 76 |     _phantom: PhantomData<T>,
 77 | }
 78 | 
 79 | impl<T, F> WindowAllOperator<T, F>
 80 | where
 81 |     T: Clone,
 82 |     F: Fn(&T) -> bool + Send + Sync,
 83 | {
 84 |     pub fn new(func: F, window: WindowConfig) -> Self {
 85 |         Self {
 86 |             func,
 87 |             window,
 88 |             buffer: HashMap::new(),
 89 |             _phantom: PhantomData,
 90 |         }
 91 |     }
 92 | 
 93 |     fn get_affected_windows(&self, timestamp: i64) -> Vec<i64> {
 94 |         self.window.window_type.get_affected_windows(timestamp)
 95 |     }
 96 | 
 97 |     fn process_window(&self, records: &[Record<T>]) -> Option<Record<bool>> {
 98 |         // 由于前面已经检查了records不为空，这里可以安全地使用first()
 99 |         records.first().map(|first| Record {
100 |             data: records.iter().all(|record| (self.func)(&record.data)),
101 |             timestamp: first.timestamp,
102 |         })
103 |     }
104 | }
105 | 
106 | #[async_trait]
107 | impl<T, F> Operator<T, bool> for WindowAllOperator<T, F>
108 | where
109 |     T: Clone + Send + 'static,
110 |     F: Fn(&T) -> bool + Send + Sync,
111 | {
112 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<bool>>> {
113 |         let mut results = Vec::new();
114 | 
115 |         // Get all windows that this record belongs to
116 |         let window_keys = self.get_affected_windows(record.timestamp);
117 | 
118 |         // Add the record to all relevant windows
119 |         for window_key in window_keys {
120 |             let records = self.buffer.entry(window_key).or_default();
121 |             records.push(record.clone());
122 | 
123 |             // Process each affected window
124 |             let window_records = records.clone();
125 |             if let Some(result) = self.process_window(&window_records) {
126 |                 results.push(result);
127 |             }
128 |         }
129 | 
130 |         Ok(results)
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/operator/window_reduce.rs:
--------------------------------------------------------------------------------
  1 | use async_trait::async_trait;
  2 | use fluxus_utils::models::{Record, StreamResult};
  3 | use fluxus_utils::time::current_time;
  4 | use fluxus_utils::window::{WindowConfig, WindowType};
  5 | use std::collections::HashMap;
  6 | use std::marker::PhantomData;
  7 | 
  8 | /// Built-in window reduce operator
  9 | pub struct WindowReduceOperator<T, F>
 10 | where
 11 |     T: Clone,
 12 |     F: Fn(T, T) -> T + Send + Sync,
 13 | {
 14 |     func: F,
 15 |     window: WindowConfig,
 16 |     buffer: HashMap<i64, Vec<Record<T>>>,
 17 |     _phantom: PhantomData<T>,
 18 | }
 19 | 
 20 | impl<T, F> WindowReduceOperator<T, F>
 21 | where
 22 |     T: Clone,
 23 |     F: Fn(T, T) -> T + Send + Sync,
 24 | {
 25 |     pub fn new(func: F, window: WindowConfig) -> Self {
 26 |         Self {
 27 |             func,
 28 |             window,
 29 |             buffer: HashMap::new(),
 30 |             _phantom: PhantomData,
 31 |         }
 32 |     }
 33 | 
 34 |     fn get_affected_windows(&self, timestamp: i64) -> Vec<i64> {
 35 |         self.window.window_type.get_affected_windows(timestamp)
 36 |     }
 37 | 
 38 |     fn process_window(&self, records: &[Record<T>]) -> Option<Record<T>> {
 39 |         records.first().map(|first| {
 40 |             let result = records[1..].iter().fold(first.data.clone(), |acc, record| {
 41 |                 (self.func)(acc, record.data.clone())
 42 |             });
 43 |             Record {
 44 |                 data: result,
 45 |                 timestamp: first.timestamp,
 46 |             }
 47 |         })
 48 |     }
 49 | }
 50 | 
 51 | #[async_trait]
 52 | impl<T, F> super::Operator<T, T> for WindowReduceOperator<T, F>
 53 | where
 54 |     T: Clone + Send,
 55 |     F: Fn(T, T) -> T + Send + Sync,
 56 | {
 57 |     async fn process(&mut self, record: Record<T>) -> StreamResult<Vec<Record<T>>> {
 58 |         let mut results = Vec::new();
 59 | 
 60 |         // Get all windows that this record belongs to
 61 |         let window_keys = self.get_affected_windows(record.timestamp);
 62 | 
 63 |         // Add the record to all relevant windows
 64 |         for window_key in window_keys {
 65 |             let records = self.buffer.entry(window_key).or_default();
 66 |             records.push(record.clone());
 67 | 
 68 |             // Process each affected window
 69 |             let window_records = records.clone();
 70 |             if let Some(result) = self.process_window(&window_records) {
 71 |                 results.push(result);
 72 |             }
 73 |         }
 74 | 
 75 |         Ok(results)
 76 |     }
 77 | 
 78 |     async fn on_window_trigger(&mut self) -> StreamResult<Vec<Record<T>>> {
 79 |         let mut results = Vec::new();
 80 |         let now = current_time() as i64;
 81 | 
 82 |         // Process and remove expired windows
 83 |         let expired_keys: Vec<_> = self
 84 |             .buffer
 85 |             .keys()
 86 |             .filter(|&&key| match &self.window.window_type {
 87 |                 WindowType::Tumbling(duration) => {
 88 |                     key + duration.as_millis() as i64
 89 |                         + self.window.allow_lateness.as_millis() as i64
 90 |                         <= now
 91 |                 }
 92 |                 WindowType::Sliding(size, _) => {
 93 |                     key + size.as_millis() as i64 + self.window.allow_lateness.as_millis() as i64
 94 |                         <= now
 95 |                 }
 96 |                 WindowType::Session(gap) => {
 97 |                     key + gap.as_millis() as i64 + self.window.allow_lateness.as_millis() as i64
 98 |                         <= now
 99 |                 }
100 |                 WindowType::Global => {
101 |                     // Global window doesn't expire based on time, so it's never considered expired here
102 |                     false
103 |                 }
104 |             })
105 |             .cloned()
106 |             .collect();
107 | 
108 |         for key in expired_keys {
109 |             if let Some(records) = self.buffer.remove(&key) {
110 |                 if let Some(result) = self.process_window(&records) {
111 |                     results.push(result);
112 |                 }
113 |             }
114 |         }
115 | 
116 |         Ok(results)
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/transform_base.rs:
--------------------------------------------------------------------------------
 1 | use fluxus_utils::models::{Record, StreamResult};
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::{InnerOperator, InnerSource};
 5 | 
 6 | #[derive(Clone)]
 7 | pub struct TransformBase<T: Clone> {
 8 |     inner: Arc<InnerSource<T>>,
 9 |     operators: Vec<Arc<InnerOperator<T, T>>>,
10 | }
11 | 
12 | impl<T: Clone + Send + Sync + 'static> TransformBase<T> {
13 |     pub fn new(inner: Arc<InnerSource<T>>) -> Self {
14 |         Self {
15 |             inner,
16 |             operators: Vec::new(),
17 |         }
18 |     }
19 | 
20 |     pub fn set_operators(&mut self, operators: Vec<Arc<InnerOperator<T, T>>>) {
21 |         self.operators = operators;
22 |     }
23 | 
24 |     pub async fn process_operators(&mut self, record: Record<T>) -> StreamResult<Vec<Record<T>>> {
25 |         let mut records = vec![record];
26 | 
27 |         for op in &self.operators {
28 |             let mut processed = Vec::new();
29 | 
30 |             for rec in records {
31 |                 let operator = Arc::clone(op);
32 |                 let results = unsafe {
33 |                     // Safe because we have exclusive access through &mut self
34 |                     let op = &mut *(Arc::as_ptr(&operator) as *mut InnerOperator<T, T>);
35 |                     op.process(rec).await?
36 |                 };
37 | 
38 |                 processed.extend(results);
39 |             }
40 | 
41 |             if processed.is_empty() {
42 |                 return Ok(Vec::new());
43 |             }
44 | 
45 |             records = processed;
46 |         }
47 | 
48 |         Ok(records)
49 |     }
50 | 
51 |     pub async fn get_next_record(&mut self) -> StreamResult<Option<Record<T>>> {
52 |         let inner = Arc::clone(&self.inner);
53 |         unsafe {
54 |             // Safe because we have exclusive access through &mut self
55 |             let source = &mut *(Arc::as_ptr(&inner) as *mut InnerSource<T>);
56 |             source.next().await
57 |         }
58 |     }
59 | 
60 |     pub async fn close_inner(&mut self) -> StreamResult<()> {
61 |         let inner = Arc::clone(&self.inner);
62 |         unsafe {
63 |             // Safe because we have exclusive access through &mut self
64 |             let source = &mut *(Arc::as_ptr(&inner) as *mut InnerSource<T>);
65 |             source.close().await
66 |         }
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/transform_source.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_sources::Source;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::sync::Arc;
 5 | 
 6 | use crate::{InnerOperator, InnerSource, TransformBase};
 7 | 
 8 | #[derive(Clone)]
 9 | pub struct TransformSource<T: Clone> {
10 |     base: TransformBase<T>,
11 |     buffer: Vec<Record<T>>,
12 | }
13 | 
14 | impl<T: Clone + Send + Sync + 'static> TransformSource<T> {
15 |     pub fn new(inner: Arc<InnerSource<T>>) -> Self {
16 |         Self {
17 |             base: TransformBase::new(inner),
18 |             buffer: Vec::new(),
19 |         }
20 |     }
21 | 
22 |     pub fn set_operators(&mut self, operators: Vec<Arc<InnerOperator<T, T>>>) {
23 |         self.base.set_operators(operators);
24 |     }
25 | }
26 | 
27 | #[async_trait]
28 | impl<T: Clone + Send + Sync + 'static> Source<T> for TransformSource<T> {
29 |     async fn init(&mut self) -> StreamResult<()> {
30 |         Ok(())
31 |     }
32 | 
33 |     async fn next(&mut self) -> StreamResult<Option<Record<T>>> {
34 |         // If we have records in the buffer, return one
35 |         if !self.buffer.is_empty() {
36 |             return Ok(self.buffer.pop());
37 |         }
38 | 
39 |         let record = self.base.get_next_record().await?;
40 | 
41 |         // If there's no next record, return None
42 |         let Some(record) = record else {
43 |             return Ok(None);
44 |         };
45 | 
46 |         let records = self.base.process_operators(record).await?;
47 | 
48 |         if records.is_empty() {
49 |             return self.next().await;
50 |         }
51 | 
52 |         self.buffer = records;
53 |         self.buffer.reverse();
54 | 
55 |         Ok(self.buffer.pop())
56 |     }
57 | 
58 |     async fn close(&mut self) -> StreamResult<()> {
59 |         self.base.close_inner().await
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/crates/fluxus-transformers/src/transform_source_with_operator.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use fluxus_sources::Source;
 3 | use fluxus_utils::models::{Record, StreamResult};
 4 | use std::sync::Arc;
 5 | 
 6 | use crate::{InnerOperator, InnerSource, Operator, TransformBase};
 7 | 
 8 | /// A source that applies a single operator transformation
 9 | #[derive(Clone)]
10 | pub struct TransformSourceWithOperator<T, R>
11 | where
12 |     T: Clone,
13 |     R: Clone,
14 | {
15 |     base: TransformBase<T>,
16 |     operator: Arc<InnerOperator<T, R>>,
17 |     buffer: Vec<Record<R>>,
18 | }
19 | 
20 | impl<T, R> TransformSourceWithOperator<T, R>
21 | where
22 |     T: Clone + Send + Sync + 'static,
23 |     R: Clone + Send + Sync + 'static,
24 | {
25 |     pub fn new<O>(
26 |         inner: Arc<InnerSource<T>>,
27 |         operator: O,
28 |         operators: Vec<Arc<InnerOperator<T, T>>>,
29 |     ) -> Self
30 |     where
31 |         O: Operator<T, R> + Send + Sync + 'static,
32 |     {
33 |         let mut base = TransformBase::new(inner);
34 |         base.set_operators(operators);
35 |         Self {
36 |             base,
37 |             operator: Arc::new(operator),
38 |             buffer: Vec::new(),
39 |         }
40 |     }
41 | }
42 | 
43 | #[async_trait]
44 | impl<T, R> Source<R> for TransformSourceWithOperator<T, R>
45 | where
46 |     T: Clone + Send + Sync + 'static,
47 |     R: Clone + Send + Sync + 'static,
48 | {
49 |     async fn init(&mut self) -> StreamResult<()> {
50 |         Ok(())
51 |     }
52 | 
53 |     async fn next(&mut self) -> StreamResult<Option<Record<R>>> {
54 |         if !self.buffer.is_empty() {
55 |             return Ok(self.buffer.pop());
56 |         }
57 |         let record = self.base.get_next_record().await?;
58 | 
59 |         // If there's no next record, return None
60 |         let Some(record) = record else {
61 |             return Ok(None);
62 |         };
63 | 
64 |         let records = self.base.process_operators(record).await?;
65 | 
66 |         if records.is_empty() {
67 |             return self.next().await;
68 |         }
69 | 
70 |         let mut final_results = Vec::new();
71 |         for rec in records {
72 |             final_results.extend(unsafe {
73 |                 let op = &mut *(Arc::as_ptr(&self.operator) as *mut InnerOperator<T, R>);
74 |                 op.process(rec).await?
75 |             });
76 |         }
77 |         self.buffer = final_results;
78 |         self.buffer.reverse();
79 | 
80 |         Ok(self.buffer.pop())
81 |     }
82 | 
83 |     async fn close(&mut self) -> StreamResult<()> {
84 |         self.base.close_inner().await
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/crates/fluxus-utils/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus-utils"
 3 | description = "Utility components for Fluxus stream processing engine"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | categories.workspace = true
11 | keywords.workspace = true
12 | readme = "README.md"
13 | 
14 | [dependencies]
15 | tokio = { version = "1", features = ["full"] }
16 | futures = "0.3"
17 | serde = { version = "1.0", features = ["derive"] }
18 | serde_json = "1.0"
19 | anyhow = "1.0"
20 | thiserror = "1.0"
21 | async-trait = "0.1"
22 | tracing = "0.1"
23 | num_cpus = "1.16"
24 | csv = "1.3"
25 | 
26 | [dev-dependencies]
27 | cargo-husky = { version = "1", features = ["precommit-hook", "run-cargo-test", "run-cargo-clippy", "run-cargo-fmt"] }
28 | 


--------------------------------------------------------------------------------
/crates/fluxus-utils/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxus Utils
 2 | 
 3 | Utility components for the Fluxus stream processing engine. This crate provides a set of useful tools and helper functions to support the development and operation of Fluxus.
 4 | 
 5 | ## Overview
 6 | 
 7 | The `fluxus-utils` crate exposes three core modules: `error_converters`, `models`, and `window`. These modules can be utilized across different parts of the Fluxus ecosystem to streamline common tasks and improve overall system functionality.
 8 | 
 9 | ### `error_converters`
10 | The `error_converters` module contains utility functions for converting between different error types. This is particularly useful when dealing with errors that may be encountered in different parts of the Fluxus system.
11 | 
12 | ### `models`
13 | The `models` module defines various data structures used throughout the Fluxus ecosystem. These models include configuration settings, event data, and other essential components.
14 | 
15 | ### `window`
16 | The `window` module provides functionality for managing time-based windows in Fluxus. This is particularly useful for tasks such as aggregating data over time intervals.
17 | 


--------------------------------------------------------------------------------
/crates/fluxus-utils/src/error_converters.rs:
--------------------------------------------------------------------------------
 1 | use crate::models::StreamError;
 2 | use csv;
 3 | use serde_json;
 4 | 
 5 | /// Error converter for CSV errors
 6 | impl From<csv::Error> for StreamError {
 7 |     fn from(err: csv::Error) -> Self {
 8 |         StreamError::Serialization(err.to_string())
 9 |     }
10 | }
11 | 
12 | /// Error converter for UTF-8 errors
13 | impl From<std::string::FromUtf8Error> for StreamError {
14 |     fn from(err: std::string::FromUtf8Error) -> Self {
15 |         StreamError::Serialization(err.to_string())
16 |     }
17 | }
18 | 
19 | /// Error converter for serde_json errors
20 | impl From<serde_json::Error> for StreamError {
21 |     fn from(err: serde_json::Error) -> Self {
22 |         StreamError::Serialization(err.to_string())
23 |     }
24 | }
25 | 
26 | /// Error converter for CSV writer's IntoInnerError
27 | impl<T> From<csv::IntoInnerError<T>> for StreamError {
28 |     fn from(err: csv::IntoInnerError<T>) -> Self {
29 |         StreamError::Serialization(err.to_string())
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/crates/fluxus-utils/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod error_converters;
2 | pub mod models;
3 | pub mod time;
4 | pub mod window;
5 | 


--------------------------------------------------------------------------------
/crates/fluxus-utils/src/models.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | use crate::time::current_time;
 4 | 
 5 | /// Record represents a single data record in the stream
 6 | #[derive(Debug, Clone)]
 7 | pub struct Record<T> {
 8 |     /// The actual data payload
 9 |     pub data: T,
10 |     /// Timestamp of the record (in milliseconds)
11 |     pub timestamp: i64,
12 | }
13 | 
14 | impl<T> Record<T> {
15 |     /// Create a new record with the current timestamp
16 |     pub fn new(data: T) -> Self {
17 |         let timestamp = current_time() as i64;
18 |         Record { data, timestamp }
19 |     }
20 | 
21 |     /// Create a new record with a specific timestamp
22 |     pub fn with_timestamp(data: T, timestamp: i64) -> Self {
23 |         Record { data, timestamp }
24 |     }
25 | }
26 | 
27 | /// Error types that can occur during stream processing
28 | #[derive(Error, Debug)]
29 | pub enum StreamError {
30 |     #[error("IO error: {0}")]
31 |     Io(#[from] std::io::Error),
32 | 
33 |     #[error("Serialization error: {0}")]
34 |     Serialization(String),
35 | 
36 |     #[error("Configuration error: {0}")]
37 |     Config(String),
38 | 
39 |     #[error("Runtime error: {0}")]
40 |     Runtime(String),
41 | 
42 |     #[error("EOF")]
43 |     EOF,
44 | 
45 |     #[error("Wait for {0} milliseconds")]
46 |     Wait(u64),
47 | }
48 | 
49 | /// A Result type specialized for stream processing operations
50 | pub type StreamResult<T> = Result<T, StreamError>;
51 | 


--------------------------------------------------------------------------------
/crates/fluxus-utils/src/time.rs:
--------------------------------------------------------------------------------
1 | use std::time::{SystemTime, UNIX_EPOCH};
2 | 
3 | pub fn current_time() -> u128 {
4 |     SystemTime::now()
5 |         .duration_since(UNIX_EPOCH)
6 |         .unwrap_or_default()
7 |         .as_millis()
8 | }
9 | 


--------------------------------------------------------------------------------
/crates/fluxus-utils/src/window.rs:
--------------------------------------------------------------------------------
  1 | use std::time::Duration;
  2 | 
  3 | /// Window type for stream processing
  4 | #[derive(Debug, Clone)]
  5 | pub enum WindowType {
  6 |     /// Tumbling window with fixed size
  7 |     Tumbling(Duration),
  8 |     /// Sliding window with size and slide interval
  9 |     Sliding(Duration, Duration),
 10 |     /// Session window with gap timeout
 11 |     Session(Duration),
 12 |     /// Global window, no window boundaries
 13 |     Global,
 14 | }
 15 | 
 16 | /// Configuration for windowed operations
 17 | #[derive(Debug, Clone)]
 18 | pub struct WindowConfig {
 19 |     /// Type of the window
 20 |     pub window_type: WindowType,
 21 |     /// Whether to allow late arrivals
 22 |     pub allow_lateness: Duration,
 23 |     /// Watermark strategy (time to wait before processing)
 24 |     pub watermark_delay: Duration,
 25 | }
 26 | 
 27 | impl WindowConfig {
 28 |     /// Create a new tumbling window configuration
 29 |     pub fn tumbling(size: Duration) -> Self {
 30 |         Self {
 31 |             window_type: WindowType::Tumbling(size),
 32 |             allow_lateness: Duration::from_secs(0),
 33 |             watermark_delay: Duration::from_secs(0),
 34 |         }
 35 |     }
 36 | 
 37 |     /// Create a new sliding window configuration
 38 |     pub fn sliding(size: Duration, slide: Duration) -> Self {
 39 |         Self {
 40 |             window_type: WindowType::Sliding(size, slide),
 41 |             allow_lateness: Duration::from_secs(0),
 42 |             watermark_delay: Duration::from_secs(0),
 43 |         }
 44 |     }
 45 | 
 46 |     /// Create a new session window configuration
 47 |     pub fn session(gap: Duration) -> Self {
 48 |         Self {
 49 |             window_type: WindowType::Session(gap),
 50 |             allow_lateness: Duration::from_secs(0),
 51 |             watermark_delay: Duration::from_secs(0),
 52 |         }
 53 |     }
 54 | 
 55 |     /// Create a new global window configuration
 56 |     pub fn global() -> Self {
 57 |         Self {
 58 |             window_type: WindowType::Global,
 59 |             allow_lateness: Duration::from_secs(0),
 60 |             watermark_delay: Duration::from_secs(0),
 61 |         }
 62 |     }
 63 | 
 64 |     /// Set the allowed lateness for this window
 65 |     pub fn with_lateness(mut self, lateness: Duration) -> Self {
 66 |         self.allow_lateness = lateness;
 67 |         self
 68 |     }
 69 | 
 70 |     /// Set the watermark delay for this window
 71 |     pub fn with_watermark_delay(mut self, delay: Duration) -> Self {
 72 |         self.watermark_delay = delay;
 73 |         self
 74 |     }
 75 | }
 76 | 
 77 | impl WindowType {
 78 |     fn get_common_windows(&self, timestamp: i64) -> Vec<i64> {
 79 |         match self {
 80 |             WindowType::Tumbling(duration) => {
 81 |                 let duration_ms = duration.as_millis() as i64;
 82 |                 vec![(timestamp / duration_ms) * duration_ms]
 83 |             }
 84 |             WindowType::Sliding(size, slide) => {
 85 |                 let slide_ms = slide.as_millis() as i64;
 86 |                 let size_ms = size.as_millis() as i64;
 87 |                 let earliest_window = ((timestamp - size_ms) / slide_ms) * slide_ms;
 88 |                 let latest_window = (timestamp / slide_ms) * slide_ms;
 89 | 
 90 |                 (earliest_window..=latest_window)
 91 |                     .step_by(slide.as_millis() as usize)
 92 |                     .filter(|&start| timestamp - start < size_ms)
 93 |                     .collect()
 94 |             }
 95 |             WindowType::Session(gap) => {
 96 |                 let gap_ms = gap.as_millis() as i64;
 97 |                 vec![timestamp / gap_ms]
 98 |             }
 99 |             WindowType::Global => {
100 |                 vec![0]
101 |             }
102 |         }
103 |     }
104 | 
105 |     pub fn get_affected_windows(&self, timestamp: i64) -> Vec<i64> {
106 |         self.get_common_windows(timestamp)
107 |     }
108 | 
109 |     pub fn get_window_keys(&self, timestamp: i64) -> Vec<u64> {
110 |         self.get_common_windows(timestamp)
111 |             .iter()
112 |             .map(|&ts| ts as u64)
113 |             .collect()
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/crates/fluxus/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fluxus"
 3 | description = "Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real-time data processing and analysis."
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | license.workspace = true
 7 | authors.workspace = true
 8 | repository.workspace = true
 9 | readme = "README.md"
10 | 
11 | [dependencies]
12 | fluxus-api = { path = "../fluxus-api", version="0.2", optional = true }
13 | fluxus-core = { path = "../fluxus-core", version="0.2", optional = true }
14 | fluxus-runtime = { path = "../fluxus-runtime", version="0.2", optional = true }
15 | fluxus-sinks = { path = "../fluxus-sinks", version="0.2", optional = true }
16 | fluxus-sources = { path = "../fluxus-sources", version="0.2", optional = true }
17 | fluxus-transformers = { path = "../fluxus-transformers", version="0.2", optional = true }
18 | fluxus-utils = { path = "../fluxus-utils", version="0.2", optional = true }
19 | 
20 | tokio = { version = "1", features = ["full"] }
21 | futures = "0.3"
22 | serde = { version = "1.0", features = ["derive"] }
23 | serde_json = "1.0"
24 | anyhow = "1.0"
25 | thiserror = "1.0"
26 | async-trait = "0.1"
27 | tracing = "0.1"
28 | num_cpus = "1.16"
29 | csv = "1.3"
30 | 
31 | [features]
32 | # Include nothing by default
33 | default = []
34 | 
35 | # enable everything
36 | full = [
37 |     "fluxus-api",
38 |     "fluxus-core",
39 |     "fluxus-runtime",
40 |     "fluxus-sinks",
41 |     "fluxus-sources",
42 |     "fluxus-transformers",
43 |     "fluxus-utils"
44 | ]
45 | 


--------------------------------------------------------------------------------
/crates/fluxus/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img src="../../docs/images/fluxus-logo.png" width="200" alt="Fluxus Logo">
  3 | </p>
  4 | 
  5 | # Fluxus Stream Processing Engine
  6 | 
  7 | [![Crates.io](https://img.shields.io/crates/v/fluxus-core.svg)](https://crates.io/crates/fluxus-core)
  8 | [![Documentation](https://docs.rs/fluxus-core/badge.svg)](https://docs.rs/fluxus-core)
  9 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache2.0-yellow.svg)](https://opensource.org/license/apache-2-0)
 10 | [<img alt="build status" src="https://img.shields.io/github/actions/workflow/status/lispking/fluxus/ci.yml?branch=main&style=for-the-badge" height="20">](https://github.com/lispking/fluxus/actions?query=branch%3Amain)
 11 | 
 12 | 
 13 | Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real-time data processing and analysis.
 14 | 
 15 | ![Fluxus Architecture](../../docs/architecture.png)
 16 | 
 17 | ## Features
 18 | 
 19 | - High-performance stream processing
 20 | - Flexible windowing operations (Tumbling, Sliding, Session windows)
 21 | - Parallel processing support
 22 | - Rich set of stream operations (map, filter, aggregate)
 23 | - Type-safe API
 24 | - Easy to use and extend
 25 | 
 26 | ## Project Structure
 27 | 
 28 | - `crates/fluxus` - Main crate containing the Fluxus engine and its dependencies
 29 | - `crates/fluxus-api` - Core API definitions and interfaces
 30 | - `crates/fluxus-core` - Core implementations and data structures
 31 | - `crates/fluxus-runtime` - Runtime engine and execution environment
 32 | - `crates/fluxus-sinks` - Sink implementations for different data sinks (e.g., Kafka, Console)
 33 | - `crates/fluxus-sources` - Source implementations for different data sources (e.g., Kafka, Console)
 34 | - `crates/fluxus-transforms` - Transformations for stream processing (e.g., map, filter, aggregate)
 35 | - `crates/fluxus-utils` - Utility functions and helpers
 36 | - `examples` - Example applications demonstrating usage
 37 | 
 38 | ## Examples
 39 | 
 40 | The project includes several example applications that demonstrate different use cases:
 41 | 
 42 | ### Word Count
 43 | 
 44 | Simple word frequency analysis in text streams using tumbling windows.
 45 | 
 46 | ```bash
 47 | cargo run --example word-count
 48 | ```
 49 | 
 50 | ### Temperature Sensor Analysis
 51 | 
 52 | Processing and analyzing temperature sensor data with sliding windows.
 53 | 
 54 | ```bash
 55 | cargo run --example temperature-sensor
 56 | ```
 57 | 
 58 | ### Click Stream Analysis
 59 | 
 60 | Analyzing user click streams with session windows.
 61 | 
 62 | ```bash
 63 | cargo run --example click-stream
 64 | ```
 65 | 
 66 | ### Network Log Analysis
 67 | 
 68 | Processing network logs with sliding windows and aggregations.
 69 | 
 70 | ```bash
 71 | cargo run --example network-log
 72 | ```
 73 | 
 74 | ### View Available Examples
 75 | 
 76 | To see all available examples and options:
 77 | 
 78 | ```bash
 79 | cargo run --example
 80 | ```
 81 | 
 82 | ## Getting Started
 83 | 
 84 | 1. Clone the repository:
 85 | 
 86 | ```bash
 87 | git clone https://github.com/lispking/fluxus.git
 88 | cd fluxus
 89 | ```
 90 | 
 91 | 2. Build the project:
 92 | 
 93 | ```bash
 94 | cargo build
 95 | ```
 96 | 
 97 | 3. Run the examples:
 98 | 
 99 | ```bash
100 | cargo run --example [example-name]
101 | ```
102 | 
103 | ## Development
104 | 
105 | ### Prerequisites
106 | 
107 | - Rust 1.75+ 
108 | - Cargo 
109 | 
110 | ### Building
111 | 
112 | ```bash
113 | cargo build
114 | ```
115 | 
116 | ### Testing
117 | 
118 | ```bash
119 | cargo test
120 | ```
121 | 
122 | ## License
123 | 
124 | This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.


--------------------------------------------------------------------------------
/crates/fluxus/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Fluxus - A powerful stream processing framework in Rust
  2 | //!
  3 | //! Fluxus is a high-performance stream processing framework inspired by Flink,
  4 | //! designed to provide a seamless experience for building and running data processing pipelines in Rust.
  5 | //! It offers a rich set of APIs and components to handle various data sources, sinks, and transformations.
  6 | //!
  7 | //! ## Add Dependencies
  8 | //! To use Fluxus, you need to add the following dependencies to your `Cargo.toml`:
  9 | //! ```shell
 10 | //! cargo add fluxus --features full
 11 | //! ```
 12 | //!
 13 | //! ## Word Count Example
 14 | //! Here is a word count example using Fluxus:
 15 | //! ```rust
 16 | //! use anyhow::Result;
 17 | //! use fluxus::api::{
 18 | //!     DataStream,
 19 | //!     io::{CollectionSink, CollectionSource},
 20 | //! };
 21 | //! use fluxus::utils::window::WindowConfig;
 22 | //! use std::collections::HashMap;
 23 | //! use std::time::Duration;
 24 | //!
 25 | //! pub type WordCount = HashMap<String, usize>;
 26 | //!
 27 | //! #[tokio::main]
 28 | //! async fn main() -> Result<()> {
 29 | //!     // Sample input text
 30 | //!     let text = vec![
 31 | //!         "hello world",
 32 | //!         "hello stream processing",
 33 | //!         "world of streaming",
 34 | //!         "hello streaming world",
 35 | //!     ];
 36 | //!
 37 | //!     // Create a source from the text collection
 38 | //!     let source = CollectionSource::new(text);
 39 | //!     let sink: CollectionSink<WordCount> = CollectionSink::new();
 40 | //!
 41 | //!     // Build and execute the streaming pipeline
 42 | //!     DataStream::new(source)
 43 | //!         // Split text into words
 44 | //!         .map(|line| {
 45 | //!             line.split_whitespace()
 46 | //!                 .map(|s| s.to_lowercase())
 47 | //!                 .collect::<Vec<_>>()
 48 | //!         })
 49 | //!         // Parallelize the processing
 50 | //!         .parallel(2)
 51 | //!         // Create tumbling windows of 1 second
 52 | //!         .window(WindowConfig::tumbling(Duration::from_millis(1000)))
 53 | //!         // Count words in each window
 54 | //!         .aggregate(HashMap::new(), |mut counts, words| {
 55 | //!             for word in words {
 56 | //!                 *counts.entry(word).or_insert(0) += 1;
 57 | //!             }
 58 | //!             counts
 59 | //!         })
 60 | //!         // Write results to sink
 61 | //!         .sink(sink.clone())
 62 | //!         .await?;
 63 | //!
 64 | //!     // Print the results
 65 | //!     println!("\nWord count results:");
 66 | //!     for result in sink.get_data() {
 67 | //!         println!("\nWindow results:");
 68 | //!         let mut words: Vec<_> = result.iter().collect();
 69 | //!         words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0)));
 70 | //!         for (word, count) in words {
 71 | //!             println!("  {}: {}", word, count);
 72 | //!         }
 73 | //!     }
 74 | //!
 75 | //!     Ok(())
 76 | //! }
 77 | //!
 78 | 
 79 | #[cfg(feature = "fluxus-api")]
 80 | pub mod api {
 81 |     pub use fluxus_api::*;
 82 | }
 83 | 
 84 | #[cfg(feature = "fluxus-core")]
 85 | pub mod core {
 86 |     pub use fluxus_core::*;
 87 | }
 88 | 
 89 | #[cfg(feature = "fluxus-runtime")]
 90 | pub mod runtime {
 91 |     pub use fluxus_runtime::*;
 92 | }
 93 | 
 94 | #[cfg(feature = "fluxus-sinks")]
 95 | pub mod sinks {
 96 |     pub use fluxus_sinks::*;
 97 | }
 98 | 
 99 | #[cfg(feature = "fluxus-sources")]
100 | pub mod sources {
101 |     pub use fluxus_sources::*;
102 | }
103 | 
104 | #[cfg(feature = "fluxus-transformers")]
105 | pub mod transformers {
106 |     pub use fluxus_transformers::*;
107 | }
108 | 
109 | #[cfg(feature = "fluxus-utils")]
110 | pub mod utils {
111 |     pub use fluxus_utils::*;
112 | }
113 | 


--------------------------------------------------------------------------------
/docs/DESIGN.md:
--------------------------------------------------------------------------------
  1 | # Fluxus Stream Processing Engine Design Document
  2 | 
  3 | ## 1. Introduction
  4 | 
  5 | Fluxus is a lightweight stream processing engine written in Rust, designed for efficient real - time data processing and analysis. It provides high - performance stream processing capabilities with a type - safe API, making it easy to use and extend.
  6 | 
  7 | ## 2. Features
  8 | 
  9 | ### 2.1 High - Performance Stream Processing
 10 | 
 11 | Fluxus is optimized for real - time data processing, leveraging Rust's performance characteristics to handle high - volume data streams efficiently.
 12 | 
 13 | ### 2.2 Flexible Windowing Operations
 14 | 
 15 | - **Tumbling Windows**: Fixed - size, non - overlapping windows.
 16 | - **Sliding Windows**: Fixed - size, overlapping windows.
 17 | - **Session Windows**: Variable - size windows based on inactivity gaps.
 18 | 
 19 | ### 2.3 Parallel Processing Support
 20 | 
 21 | The engine supports parallel processing of data streams, allowing for better utilization of multi - core processors.
 22 | 
 23 | ### 2.4 Rich Set of Stream Operations
 24 | 
 25 | Fluxus provides a variety of stream operations, including `map`, `filter`, and `aggregate`, enabling users to perform complex data transformations.
 26 | 
 27 | ### 2.5 Type - Safe API
 28 | 
 29 | The API is type - safe, reducing the likelihood of runtime errors and providing better developer experience.
 30 | 
 31 | ## 3. Architecture
 32 | 
 33 | ### 3.1 Core Components
 34 | 
 35 | - **`fluxus`**: The main crate that serves as the entry point for the Fluxus engine. It provides the API for creating and managing stream processing tasks.
 36 | - **`fluxus-api`**: Defines the core API and interfaces for the Fluxus engine. It serves as the contract between different components of the engine and user applications.
 37 | - **`fluxus-core`**: Contains the core implementations and data structures. This component is responsible for handling the internal logic of stream processing, such as windowing and operation execution.
 38 | - **`fluxus-runtime`**: Provides the runtime engine and execution environment. It manages the execution of stream processing tasks, including resource allocation and task scheduling.
 39 | - **`fluxus-sinks`**: Provides sinks for outputting processed data. Sinks can be used to write data to various destinations, such as databases, files, or external services.
 40 | - **`fluxus-sources`**: Provides sources for ingesting data streams. Sources can be used to read data from various sources, such as Kafka, RabbitMQ, or files.
 41 | - ** `fluxus-transformers`**: Contains transformers for performing data transformations. Transformers can be used to perform operations on data streams, such as mapping, filtering, or aggregating.
 42 | - ** `fluxus-utils`**: Contains utility functions and helper classes. These utilities provide common functionality, such as serialization and deserialization, error handling, and configuration management.
 43 | 
 44 | ### 3.2 Data Flow
 45 | 
 46 | 1. **Data Ingestion**: Data streams are ingested into the engine.
 47 | 2. **Stream Processing**: The data streams are processed using the defined operations and windowing strategies.
 48 | 3. **Result Output**: The processed results are outputted to the specified destinations.
 49 | 
 50 | ## 4. Design Principles
 51 | 
 52 | ### 4.1 Performance - Oriented
 53 | 
 54 | The engine is designed with performance in mind. Rust's memory management and concurrency features are fully utilized to achieve high throughput and low latency.
 55 | 
 56 | ### 4.2 Flexibility
 57 | 
 58 | Fluxus provides flexible windowing operations and a rich set of stream operations, allowing users to adapt the engine to different use cases.
 59 | 
 60 | ### 4.3 Ease of Use
 61 | 
 62 | The type - safe API and well - structured project make it easy for developers to use and extend the engine.
 63 | 
 64 | ## 5. Example Applications
 65 | 
 66 | ### 5.1 Word Count
 67 | 
 68 | A simple word frequency analysis in text streams using tumbling windows.
 69 | 
 70 | ```bash
 71 | cargo run --example word-count
 72 | ```
 73 | 
 74 | ### 5.2 Temperature Sensor Analysis
 75 | 
 76 | Processing and analyzing temperature sensor data with sliding windows.
 77 | 
 78 | ```bash
 79 | cargo run --example temperature-sensor
 80 | ```
 81 | 
 82 | ### 5.3 Click Stream Analysis
 83 | 
 84 | Analyzing user click streams with session windows.
 85 | 
 86 | ```bash
 87 | cargo run --example click-stream
 88 | ```
 89 | 
 90 | ### 5.4 Network Log Analysis
 91 | 
 92 | Processing network logs with sliding windows and aggregations.
 93 | 
 94 | ```bash
 95 | cargo run --example network-log
 96 | ```
 97 | 
 98 | ### 5.5 IoT Device Analysis
 99 | 
100 | Processing IoT device data with tumbling windows and aggregations.
101 | 
102 | ```bash
103 | cargo run --example iot-devices
104 | ```
105 | 
106 | ### 5.6 Log Anomaly Detection
107 | 
108 | Detecting anomalies in log streams using sliding windows and aggregations.
109 | 
110 | ```bash
111 | cargo run --example log-anomaly
112 | ```
113 | 
114 | ### 5.7 Stock Analysis
115 | 
116 | Analyzing stock price data with tumbling windows and aggregations.
117 | 
118 | ```bash
119 | cargo run --example stock-market
120 | ```
121 | 
122 | 
123 | ## 6. Development
124 | 
125 | ### 6.1 Prerequisites
126 | 
127 | - Rust 1.75+
128 | - Cargo
129 | 
130 | ### 6.2 Building
131 | 
132 | ```bash
133 | cargo build
134 | ```
135 | 
136 | ### 6.3 Testing
137 | 
138 | ```bash
139 | cargo test
140 | ```
141 | 
142 | ## 7. Conclusion
143 | 
144 | Fluxus is a powerful and flexible stream processing engine. Its design emphasizes performance, flexibility, and ease of use. With a rich set of features and a well - structured architecture, it can be used in various real - time data processing scenarios.
145 | 


--------------------------------------------------------------------------------
/docs/Logo.md:
--------------------------------------------------------------------------------
 1 | # Mascot Name: Fluko
 2 | 
 3 | ### Image Setting:
 4 | - Species: Anthropomorphic otter (The otter is associated with "flow" and "flexibility", and has a strong affinity)
 5 | - Color: The main color is a gradient of blue and green, symbolizing flowing data and calm calculation
 6 | 
 7 | - Body Shape:
 8 |     - Streamlined body, and the tail is shaped like the icon of a data flow diagram
 9 |     - There are lightning-shaped pupils in the eyes (representing high-speed processing)
10 |     - There is a "node" device hanging on the belt, similar to a small chip (symbolizing node processing)
11 | 
12 | - Clothing:
13 |     - Wearing a simple high-tech style jacket, with a small "λ" (Lambda, representing functional calculation) printed on the sleeves
14 |     - There is a small energy backpack on the back, emitting a halo, symbolizing the continuous flow of computing energy
15 | 
16 | ### Personality Setting:
17 | - Smart but not arrogant: Represents the intelligence of Fluxus, yet is open and friendly
18 | - Quick to react: Always wears a headset, ready to respond to event or message streams at any time
19 | - Likes water and light: Prefers flowing environments and often shuttles through the digital river or the ocean of information
20 | 
21 | ## Application Scenarios:
22 | - Fluko can appear in the official website animation, symbolizing event processing by running or swimming
23 | - It can be made into Slack stickers, the mascot on the startup screen, a small assistant in documents, etc.
24 | - When attending open-source events, as a peripheral doll, it can also attract the attention of developers 
25 | 


--------------------------------------------------------------------------------
/docs/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lispking/fluxus/7bbbb2f83a21add8b118aea69f0fb01249b280c6/docs/architecture.png


--------------------------------------------------------------------------------
/docs/images/fluxus-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lispking/fluxus/7bbbb2f83a21add8b118aea69f0fb01249b280c6/docs/images/fluxus-logo.png


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
  1 | # Fluxus Examples
  2 | 
  3 | A collection of example applications demonstrating the usage of the Fluxus stream processing engine.
  4 | 
  5 | ## Available Examples
  6 | 
  7 | ### 1. Word Count (`word-count`)
  8 | 
  9 | Demonstrates basic stream processing with tumbling windows:
 10 | - Splits text into words
 11 | - Counts word frequencies in time windows
 12 | - Shows parallel processing capabilities
 13 | 
 14 | ```bash
 15 | cargo run --example word-count
 16 | ```
 17 | 
 18 | ### 2. Temperature Sensor Analysis (`temperature-sensor`)
 19 | 
 20 | Shows how to process IoT sensor data:
 21 | - Processes multiple sensor readings
 22 | - Calculates min/max/average temperatures
 23 | - Uses sliding windows for continuous monitoring
 24 | 
 25 | ```bash
 26 | cargo run --example temperature-sensor
 27 | ```
 28 | 
 29 | ### 3. Click Stream Analysis (`click-stream`)
 30 | 
 31 | Demonstrates session window usage for user behavior analysis:
 32 | - Tracks user navigation patterns
 33 | - Groups events into sessions
 34 | - Analyzes user engagement metrics
 35 | 
 36 | ```bash
 37 | cargo run --example click-stream
 38 | ```
 39 | 
 40 | ### 4. Network Log Analysis (`network-log`)
 41 | 
 42 | Shows advanced stream processing features:
 43 | - Processes HTTP access logs
 44 | - Calculates request statistics
 45 | - Uses sliding windows with custom aggregations
 46 | 
 47 | ```bash
 48 | cargo run --example network-log
 49 | ```
 50 | 
 51 | ### 5. IoT Device Analysis (`iot-devices`)
 52 | 
 53 | Demonstrates how to process various IoT device data:
 54 | - Processes sensor data from different devices
 55 | - Calculates device status statistics
 56 | - Uses tumbling windows for real-time monitoring
 57 | 
 58 | ```bash
 59 | cargo run --example iot-devices
 60 | ```
 61 | 
 62 | ### 6. Log Anomaly Detection (`log-anomaly`)
 63 | 
 64 | Demonstrates log anomaly detection capabilities:
 65 | - Processes system log data
 66 | - Detects abnormal log patterns
 67 | - Uses custom windows for anomaly analysis
 68 | 
 69 | ```bash
 70 | cargo run --example log-anomaly
 71 | ```
 72 | 
 73 | ### 7. Stock Market Analysis (`stock-market`)
 74 | 
 75 | Demonstrates stock market data processing:
 76 | - Processes real-time stock price data
 77 | - Calculates stock price indicators
 78 | - Uses session windows to analyze trading patterns
 79 | 
 80 | ```bash
 81 | cargo run --example stock-market
 82 | ```
 83 | 
 84 | ## Example Structure
 85 | 
 86 | Each example follows a similar pattern:
 87 | 1. Define data structures
 88 | 2. Create a data source
 89 | 3. Build processing pipeline
 90 | 4. Configure windows
 91 | 5. Define aggregations
 92 | 6. Output results
 93 | 
 94 | ## Learning Path
 95 | 
 96 | We recommend going through the examples in this order:
 97 | 1. Word Count - Basic concepts
 98 | 2. Temperature Sensor - Time-based windows
 99 | 3. Click Stream - Session windows
100 | 4. Network Log - Advanced features
101 | 5. IoT Devices - Multiple data sources
102 | 6. Log Anomaly - Custom windows
103 | 7. Stock Market - Real-time monitoring
104 | 8. [GitHub Archive](https://github.com/fluxus-labs/fluxus-source-gharchive/tree/main/examples) - Count event type from GitHub archive file


--------------------------------------------------------------------------------
/examples/click-stream/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "click-stream"
 3 | description = "Click stream analysis example for Fluxus stream processing engine"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "click-stream"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | anyhow = "1.0"
19 | clap = { version = "4.0", features = ["derive"] }
20 | tracing-subscriber = "0.3.19"
21 | tracing = "0.1.41"


--------------------------------------------------------------------------------
/examples/click-stream/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Click Stream Analysis Example
 3 | 
 4 | This example demonstrates how to use Fluxus to process click - stream data and implement user session analysis. It simulates the browsing behavior of users on an e - commerce website, including the processing of page visits and click events.
 5 | 
 6 | ## Features
 7 | 
 8 | - User session tracking
 9 | - Page browsing path analysis
10 | - Session duration statistics
11 | - Event aggregation processing
12 | 
13 | ## Running the Example
14 | 
15 | ```bash
16 | cargo run
17 | ```
18 | 
19 | ## Implementation Details
20 | 
21 | - Use session windows (30 - second timeout) to group user behaviors
22 | - Filter and process page visit events
23 | - Calculate session duration and total number of events
24 | - Record the user's page visit sequence
25 | 
26 | ## Output Example
27 | 
28 | ```
29 | Click stream analysis results:
30 | 
31 | Session window results:
32 | User user1: 4 events over 30s, Pages: home -> products -> cart -> checkout
33 | ```
34 | 
35 | ## Dependencies
36 | 
37 | - fluxus - core
38 | - fluxus - runtime
39 | - fluxus - api
40 | - tokio
41 | - anyhow
42 | 


--------------------------------------------------------------------------------
/examples/click-stream/src/main.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use fluxus::api::{
  3 |     DataStream,
  4 |     io::{CollectionSink, CollectionSource},
  5 | };
  6 | use fluxus::utils::window::WindowConfig;
  7 | use std::collections::HashMap;
  8 | use std::time::{Duration, SystemTime};
  9 | 
 10 | #[derive(Clone)]
 11 | pub struct ClickEvent {
 12 |     user_id: String,
 13 |     page_id: String,
 14 |     event_type: String,
 15 |     timestamp: SystemTime,
 16 | }
 17 | 
 18 | #[derive(Clone)]
 19 | pub struct UserSession {
 20 |     user_id: String,
 21 |     page_views: Vec<String>,
 22 |     start_time: SystemTime,
 23 |     duration_secs: u64,
 24 |     total_events: usize,
 25 | }
 26 | 
 27 | #[tokio::main]
 28 | async fn main() -> Result<()> {
 29 |     // Generate sample click events
 30 |     let events = generate_sample_clicks();
 31 |     let source = CollectionSource::new(events);
 32 |     let sink = CollectionSink::new();
 33 | 
 34 |     // Build and execute the streaming pipeline
 35 |     DataStream::new(source)
 36 |         // Filter only page view events
 37 |         .filter(|event| event.event_type == "page_view")
 38 |         // Group by user_id
 39 |         .map(|event| {
 40 |             (
 41 |                 event.user_id.clone(),
 42 |                 (event.page_id.clone(), event.timestamp),
 43 |             )
 44 |         })
 45 |         // Create session windows with 30-second timeout
 46 |         .window(WindowConfig::session(Duration::from_millis(30000)))
 47 |         // Aggregate user sessions
 48 |         .aggregate(
 49 |             HashMap::new(),
 50 |             |mut sessions, (user_id, (page_id, timestamp))| {
 51 |                 let session = sessions
 52 |                     .entry(user_id.clone())
 53 |                     .or_insert_with(|| UserSession {
 54 |                         user_id,
 55 |                         page_views: Vec::new(),
 56 |                         start_time: timestamp,
 57 |                         duration_secs: 0,
 58 |                         total_events: 0,
 59 |                     });
 60 | 
 61 |                 session.page_views.push(page_id);
 62 |                 session.duration_secs = timestamp
 63 |                     .duration_since(session.start_time)
 64 |                     .unwrap_or(Duration::from_secs(0))
 65 |                     .as_secs();
 66 |                 session.total_events += 1;
 67 | 
 68 |                 sessions
 69 |             },
 70 |         )
 71 |         .sink(sink.clone())
 72 |         .await?;
 73 | 
 74 |     // Print results
 75 |     println!("\nClick stream analysis results:");
 76 |     for session_data in sink.get_data() {
 77 |         println!("\nSession window results:");
 78 |         for (_, session) in session_data {
 79 |             println!(
 80 |                 "User {}: {} events over {}s, Pages: {}",
 81 |                 session.user_id,
 82 |                 session.total_events,
 83 |                 session.duration_secs,
 84 |                 session.page_views.join(" -> ")
 85 |             );
 86 |         }
 87 |     }
 88 | 
 89 |     Ok(())
 90 | }
 91 | 
 92 | // Helper function to generate sample data
 93 | fn generate_sample_clicks() -> Vec<ClickEvent> {
 94 |     let start_time = SystemTime::now();
 95 |     let mut events = Vec::new();
 96 |     let pages = ["home", "products", "cart", "checkout"];
 97 |     let users = ["user1", "user2", "user3"];
 98 | 
 99 |     for (user_idx, user_id) in users.iter().enumerate() {
100 |         let user_start = start_time + Duration::from_secs(user_idx as u64 * 5);
101 | 
102 |         // Simulate a user session with page views and some other events
103 |         for (i, &page) in pages.iter().enumerate() {
104 |             // Add page view
105 |             events.push(ClickEvent {
106 |                 user_id: user_id.to_string(),
107 |                 page_id: page.to_string(),
108 |                 event_type: "page_view".to_string(),
109 |                 timestamp: user_start + Duration::from_secs(i as u64 * 10),
110 |             });
111 | 
112 |             // Add some click events
113 |             events.push(ClickEvent {
114 |                 user_id: user_id.to_string(),
115 |                 page_id: page.to_string(),
116 |                 event_type: "click".to_string(),
117 |                 timestamp: user_start + Duration::from_secs(i as u64 * 10 + 2),
118 |             });
119 |         }
120 |     }
121 | 
122 |     events
123 | }
124 | 


--------------------------------------------------------------------------------
/examples/event-timestamp/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "event-timestamp"
 3 | description = "Example of how to add timestamps to events"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "event-timestamp"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | anyhow = "1.0"
17 | tokio = { version = "1.0", features = ["full"] }
18 | 


--------------------------------------------------------------------------------
/examples/event-timestamp/README.md:
--------------------------------------------------------------------------------
 1 | # Event Timestamp Example
 2 | 
 3 | This example demonstrates how to use Fluxus for processing timestamped event streams with windowing operations. It shows how to count events within specific time windows and aggregate the results.
 4 | 
 5 | ## Features
 6 | 
 7 | - Event stream processing with timestamps
 8 | - Tumbling window implementation
 9 | - Event counting and aggregation
10 | - Time-based event analysis
11 | - Real-time event processing
12 | 
13 | ## Running the Example
14 | 
15 | ```bash
16 | cargo run
17 | ```
18 | 
19 | ## Implementation Details
20 | 
21 | - Uses tumbling windows with 1-millisecond duration
22 | - Processes various event types (login, click, purchase)
23 | - Groups and counts events by type and timestamp
24 | - Demonstrates window-based aggregation
25 | - Sorts and displays results by event count
26 | 
27 | ## Output Example
28 | 
29 | ```
30 | Event counts by timestamp:
31 | 
32 | Time window results:
33 |   ("click", timestamp): 3
34 |   ("login", timestamp): 1
35 |   ("purchase", timestamp): 1
36 | ```
37 | 
38 | ## Dependencies
39 | 
40 | - fluxus-core
41 | - fluxus-runtime
42 | - fluxus-api
43 | - tokio
44 | - anyhow


--------------------------------------------------------------------------------
/examples/event-timestamp/src/main.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Result;
 2 | use fluxus::utils::{models::Record, window::WindowConfig};
 3 | use fluxus::{
 4 |     api::{
 5 |         DataStream,
 6 |         io::{CollectionSink, CollectionSource},
 7 |     },
 8 |     utils::time::current_time,
 9 | };
10 | use std::{collections::HashMap, time::Duration};
11 | 
12 | pub type EventCount = HashMap<(String, i64), usize>;
13 | 
14 | #[tokio::main]
15 | async fn main() -> Result<()> {
16 |     // Create timestamped event data
17 |     let events = vec![
18 |         Record::with_timestamp("login".to_string(), get_timestamp(0)),
19 |         Record::with_timestamp("click".to_string(), get_timestamp(100)),
20 |         Record::with_timestamp("click".to_string(), get_timestamp(100)),
21 |         Record::with_timestamp("click".to_string(), get_timestamp(100)),
22 |         Record::with_timestamp("login".to_string(), get_timestamp(200)),
23 |         Record::with_timestamp("purchase".to_string(), get_timestamp(300)),
24 |         Record::with_timestamp("click".to_string(), get_timestamp(400)),
25 |         Record::with_timestamp("click".to_string(), get_timestamp(400)),
26 |         Record::with_timestamp("click".to_string(), get_timestamp(600)),
27 |     ];
28 | 
29 |     // Create data source and sink
30 |     let source = CollectionSource::new(events);
31 |     let sink: CollectionSink<EventCount> = CollectionSink::new();
32 | 
33 |     // Build and execute stream processing pipeline
34 |     DataStream::new(source)
35 |         // Create tumbling windows of 1 milliseconds
36 |         .window(WindowConfig::tumbling(Duration::from_millis(1)))
37 |         // Count events in each time window
38 |         .aggregate(HashMap::new(), |mut counts, event| {
39 |             *counts.entry((event.data, event.timestamp)).or_insert(0) += 1;
40 |             counts
41 |         })
42 |         // Write results to sink
43 |         .sink(sink.clone())
44 |         .await?;
45 | 
46 |     // Print results
47 |     println!("\nEvent counts by timestamp:");
48 |     if let Some(last_result) = sink.get_last_element() {
49 |         println!("\nTime window results:");
50 |         let mut events: Vec<_> = last_result.iter().collect();
51 |         events.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0)));
52 |         for (event, count) in events {
53 |             println!("  {event:?}: {count}");
54 |         }
55 |     }
56 | 
57 |     Ok(())
58 | }
59 | 
60 | // Helper function: Generate timestamp relative to current time
61 | fn get_timestamp(offset_ms: u64) -> i64 {
62 |     let now = current_time() as i64;
63 |     now + offset_ms as i64
64 | }
65 | 


--------------------------------------------------------------------------------
/examples/iot-devices/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "iot-devices"
 3 | description = "IoT devices monitoring example for Fluxus stream processing engine"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "iot-devices"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | anyhow = "1.0"
19 | clap = { version = "4.0", features = ["derive"] }
20 | tracing-subscriber = "0.3.19"
21 | tracing = "0.1.41"


--------------------------------------------------------------------------------
/examples/iot-devices/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # IoT Device Monitoring Example
 3 | 
 4 | This example demonstrates how to use Fluxus to process and analyze IoT device data streams. It implements real - time monitoring of multiple IoT devices, including data aggregation, statistical analysis, and alarm detection.
 5 | 
 6 | ## Features
 7 | 
 8 | - Multi - device data stream processing
 9 | - Sliding window statistical analysis
10 | - Device status monitoring
11 | - Low battery and weak signal alarms
12 | - Real - time data aggregation
13 | 
14 | ## Running the Example
15 | 
16 | ```bash
17 | cargo run
18 | ```
19 | 
20 | ## Implementation Details
21 | 
22 | - Use a 2 - minute sliding window with a 30 - second sliding interval
23 | - Calculate the average value of device data
24 | - Monitor battery level and signal strength
25 | - Count alarm events
26 | - Track the latest update time of devices
27 | 
28 | ## Output Example
29 | 
30 | ```
31 | IoT Device Statistics:
32 | Device ID: DEV_001, Type: Temperature Sensor, Average Value: 25.50, Min Battery: 80%, Average Signal: -85dBm, Alert Count: 2
33 | ```
34 | 
35 | ## Dependencies
36 | 
37 | - fluxus - core
38 | - fluxus - runtime
39 | - fluxus - api
40 | - tokio
41 | - anyhow
42 | 


--------------------------------------------------------------------------------
/examples/iot-devices/src/main.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use fluxus::api::{
  3 |     DataStream,
  4 |     io::{CollectionSink, CollectionSource},
  5 | };
  6 | use fluxus::utils::window::WindowConfig;
  7 | use std::collections::HashMap;
  8 | use std::time::{Duration, SystemTime};
  9 | 
 10 | #[derive(Clone)]
 11 | pub struct IoTData {
 12 |     device_id: String,
 13 |     device_type: String,
 14 |     value: f64,
 15 |     battery_level: u8,
 16 |     signal_strength: i32,
 17 |     timestamp: SystemTime,
 18 | }
 19 | 
 20 | #[derive(Clone)]
 21 | pub struct DeviceStats {
 22 |     device_id: String,
 23 |     device_type: String,
 24 |     avg_value: f64,
 25 |     min_battery: u8,
 26 |     avg_signal: i32,
 27 |     alert_count: u32,
 28 |     last_update: SystemTime,
 29 | }
 30 | 
 31 | #[tokio::main]
 32 | async fn main() -> Result<()> {
 33 |     // Generate sample IoT device data
 34 |     let iot_data = generate_sample_data();
 35 |     let source = CollectionSource::new(iot_data);
 36 |     let sink = CollectionSink::new();
 37 | 
 38 |     // Build and execute stream processing pipeline
 39 |     DataStream::new(source)
 40 |         // Group by device ID
 41 |         .map(|data| (data.device_id.clone(), data))
 42 |         // Create 2-minute sliding window with 30-second slide
 43 |         .window(WindowConfig::sliding(
 44 |             Duration::from_secs(120), // 2 minutes
 45 |             Duration::from_secs(30),  // 30 seconds
 46 |         ))
 47 |         // Aggregate device statistics
 48 |         .aggregate(HashMap::new(), |mut stats, (device_id, data)| {
 49 |             let entry = stats
 50 |                 .entry(device_id.clone())
 51 |                 .or_insert_with(|| DeviceStats {
 52 |                     device_id,
 53 |                     device_type: data.device_type.clone(),
 54 |                     avg_value: 0.0,
 55 |                     min_battery: data.battery_level,
 56 |                     avg_signal: 0,
 57 |                     alert_count: 0,
 58 |                     last_update: data.timestamp,
 59 |                 });
 60 | 
 61 |             // Update statistics
 62 |             entry.avg_value = (entry.avg_value + data.value) / 2.0;
 63 |             entry.min_battery = entry.min_battery.min(data.battery_level);
 64 |             entry.avg_signal = (entry.avg_signal + data.signal_strength) / 2;
 65 |             entry.last_update = data.timestamp;
 66 | 
 67 |             // Check alert conditions
 68 |             if data.battery_level < 20 || data.signal_strength < -90 {
 69 |                 entry.alert_count += 1;
 70 |             }
 71 | 
 72 |             stats
 73 |         })
 74 |         // Output results to sink
 75 |         .sink(sink.clone())
 76 |         .await?;
 77 | 
 78 |     // Print results
 79 |     println!("\nIoT Device Statistics:");
 80 |     for result in sink.get_data() {
 81 |         for (_, stats) in result {
 82 |             println!(
 83 |                 "Device ID: {}, Type: {}, Average Value: {:.2}, Min Battery: {}%, Average Signal: {}dBm, Alert Count: {}",
 84 |                 stats.device_id,
 85 |                 stats.device_type,
 86 |                 stats.avg_value,
 87 |                 stats.min_battery,
 88 |                 stats.avg_signal,
 89 |                 stats.alert_count
 90 |             );
 91 |         }
 92 |     }
 93 | 
 94 |     Ok(())
 95 | }
 96 | 
 97 | // Generate sample IoT device data
 98 | fn generate_sample_data() -> Vec<IoTData> {
 99 |     let device_types = [
100 |         "Temperature Sensor",
101 |         "Humidity Sensor",
102 |         "Pressure Sensor",
103 |         "Light Sensor",
104 |     ];
105 |     let mut data = Vec::new();
106 |     let start_time = SystemTime::now();
107 | 
108 |     for i in 0..100 {
109 |         for j in 1..=5 {
110 |             let device_type = device_types[j % device_types.len()];
111 |             let base_value = match device_type {
112 |                 "Temperature Sensor" => 25.0,
113 |                 "Humidity Sensor" => 60.0,
114 |                 "Pressure Sensor" => 1013.0,
115 |                 "Light Sensor" => 500.0,
116 |                 _ => 0.0,
117 |             };
118 | 
119 |             // Simulate data fluctuation
120 |             let value_variation = (i as f64 * 0.1).sin() * 5.0;
121 |             let battery_drain = (i / 20) as u8; // Simulate battery consumption
122 | 
123 |             let reading = IoTData {
124 |                 device_id: format!("DEV_{j:03}"),
125 |                 device_type: device_type.to_string(),
126 |                 value: base_value + value_variation,
127 |                 battery_level: 100 - battery_drain,
128 |                 signal_strength: -70 - (i % 30), // Simulate signal strength fluctuation
129 |                 timestamp: start_time + Duration::from_secs(i as u64 * 15), // One data point every 15 seconds
130 |             };
131 |             data.push(reading);
132 |         }
133 |     }
134 | 
135 |     data
136 | }
137 | 


--------------------------------------------------------------------------------
/examples/log-anomaly/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "log-anomaly"
 3 | description = "Log anomaly detection example for Fluxus stream processing engine"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "log-anomaly"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | anyhow = "1.0"
19 | clap = { version = "4.0", features = ["derive"] }
20 | tracing-subscriber = "0.3.19"
21 | tracing = "0.1.41"


--------------------------------------------------------------------------------
/examples/log-anomaly/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Log Anomaly Detection Example
 3 | 
 4 | This example demonstrates how to use Fluxus for real - time log anomaly detection. It analyzes the log streams of multiple microservices to detect error rates and performance anomalies.
 5 | 
 6 | ## Features
 7 | 
 8 | - Multi - service log stream processing
 9 | - Error rate statistics
10 | - Latency anomaly detection
11 | - Real - time statistical analysis
12 | - Sliding window aggregation
13 | 
14 | ## Running the Example
15 | 
16 | ```bash
17 | cargo run
18 | ```
19 | 
20 | ## Implementation Details
21 | 
22 | - Use a 1 - minute sliding window with a 10 - second sliding interval.
23 | - Group and count by service name.
24 | - Calculate the error rate and average latency.
25 | - Detect high - latency events (>1 second).
26 | - Update service status statistics in real - time.
27 | 
28 | ## Output Example
29 | 
30 | ```
31 | Log Anomaly Detection Statistics:
32 | Service: api - gateway, Error Rate: 5.00%, Avg Latency: 150.25ms, Error Count: 10, High Latency Events: 5, Total Events: 200
33 | ```
34 | 
35 | ## Dependencies
36 | 
37 | - fluxus - core
38 | - fluxus - runtime
39 | - fluxus - api
40 | - tokio
41 | - anyhow
42 | 


--------------------------------------------------------------------------------
/examples/log-anomaly/src/main.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use fluxus::api::{
  3 |     DataStream,
  4 |     io::{CollectionSink, CollectionSource},
  5 | };
  6 | use fluxus::utils::window::WindowConfig;
  7 | use std::{
  8 |     collections::HashMap,
  9 |     time::{Duration, SystemTime},
 10 | };
 11 | 
 12 | #[derive(Clone)]
 13 | #[allow(dead_code)]
 14 | pub struct LogEvent {
 15 |     service: String,
 16 |     level: String,
 17 |     message: String,
 18 |     latency_ms: u64,
 19 |     timestamp: SystemTime,
 20 | }
 21 | 
 22 | #[derive(Clone)]
 23 | pub struct AnomalyStats {
 24 |     service: String,
 25 |     error_rate: f64,
 26 |     avg_latency: f64,
 27 |     error_count: u32,
 28 |     high_latency_count: u32,
 29 |     total_events: u32,
 30 | }
 31 | 
 32 | #[tokio::main]
 33 | async fn main() -> Result<()> {
 34 |     // Generate sample log events
 35 |     let events = generate_sample_events();
 36 |     let source = CollectionSource::new(events);
 37 |     let sink = CollectionSink::new();
 38 | 
 39 |     // Build and execute stream processing pipeline
 40 |     DataStream::new(source)
 41 |         // Group by service name
 42 |         .map(|event| (event.service.clone(), event))
 43 |         // Create 1-minute sliding window with 10-second slide
 44 |         .window(WindowConfig::sliding(
 45 |             Duration::from_secs(60), // 1 minute
 46 |             Duration::from_secs(10), // 10 seconds
 47 |         ))
 48 |         // Aggregate anomaly statistics
 49 |         .aggregate(HashMap::new(), |mut stats, (service, event)| {
 50 |             let entry = stats
 51 |                 .entry(service.clone())
 52 |                 .or_insert_with(|| AnomalyStats {
 53 |                     service,
 54 |                     error_rate: 0.0,
 55 |                     avg_latency: 0.0,
 56 |                     error_count: 0,
 57 |                     high_latency_count: 0,
 58 |                     total_events: 0,
 59 |                 });
 60 | 
 61 |             // Update statistics
 62 |             entry.total_events += 1;
 63 |             entry.avg_latency = (entry.avg_latency * (entry.total_events - 1) as f64
 64 |                 + event.latency_ms as f64)
 65 |                 / entry.total_events as f64;
 66 | 
 67 |             // Detect errors and high latency
 68 |             if event.level == "ERROR" {
 69 |                 entry.error_count += 1;
 70 |             }
 71 |             if event.latency_ms > 1000 {
 72 |                 // Latency over 1 second
 73 |                 entry.high_latency_count += 1;
 74 |             }
 75 | 
 76 |             // Calculate error rate
 77 |             entry.error_rate = entry.error_count as f64 / entry.total_events as f64;
 78 | 
 79 |             stats
 80 |         })
 81 |         // Output results to sink
 82 |         .sink(sink.clone())
 83 |         .await?;
 84 | 
 85 |     // Print results
 86 |     println!("\nLog Anomaly Detection Statistics:");
 87 |     for result in sink.get_data() {
 88 |         for (_, stats) in result {
 89 |             println!(
 90 |                 "Service: {}, Error Rate: {:.2}%, Avg Latency: {:.2}ms, Error Count: {}, High Latency Events: {}, Total Events: {}",
 91 |                 stats.service,
 92 |                 stats.error_rate * 100.0,
 93 |                 stats.avg_latency,
 94 |                 stats.error_count,
 95 |                 stats.high_latency_count,
 96 |                 stats.total_events
 97 |             );
 98 |         }
 99 |     }
100 | 
101 |     Ok(())
102 | }
103 | 
104 | // Generate sample log events
105 | fn generate_sample_events() -> Vec<LogEvent> {
106 |     let services = vec![
107 |         "api-gateway",
108 |         "user-service",
109 |         "order-service",
110 |         "payment-service",
111 |     ];
112 |     let mut events = Vec::new();
113 |     let start_time = SystemTime::now();
114 | 
115 |     for i in 0..200 {
116 |         for service in &services {
117 |             // Simulate different error probabilities for services
118 |             let error_prob = match *service {
119 |                 "api-gateway" => 0.05,
120 |                 "user-service" => 0.02,
121 |                 "order-service" => 0.08,
122 |                 "payment-service" => 0.03,
123 |                 _ => 0.01,
124 |             };
125 | 
126 |             // Randomly select log level
127 |             let level = if rand_float() < error_prob {
128 |                 "ERROR"
129 |             } else if rand_float() < 0.15 {
130 |                 "WARN"
131 |             } else {
132 |                 "INFO"
133 |             };
134 | 
135 |             // Simulate latency
136 |             let base_latency = match *service {
137 |                 "api-gateway" => 50,
138 |                 "user-service" => 100,
139 |                 "order-service" => 150,
140 |                 "payment-service" => 200,
141 |                 _ => 100,
142 |             };
143 | 
144 |             let latency = base_latency + (rand_float() * 1000.0) as u64;
145 |             let message = format!("Processing request #{i}");
146 | 
147 |             let event = LogEvent {
148 |                 service: service.to_string(),
149 |                 level: level.to_string(),
150 |                 message,
151 |                 latency_ms: latency,
152 |                 timestamp: start_time + Duration::from_secs(i as u64 / 2), // One event every 0.5 seconds
153 |             };
154 |             events.push(event);
155 |         }
156 |     }
157 | 
158 |     events
159 | }
160 | 
161 | // Generate random float between 0 and 1
162 | fn rand_float() -> f64 {
163 |     use std::time::SystemTime;
164 |     let nanos = SystemTime::now()
165 |         .duration_since(SystemTime::UNIX_EPOCH)
166 |         .expect("System time cannot be earlier than UNIX epoch")
167 |         .subsec_nanos() as f64;
168 |     (nanos % 1000.0) / 1000.0
169 | }
170 | 


--------------------------------------------------------------------------------
/examples/network-log/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "network-log"
 3 | description = "Network log analysis example for Fluxus stream processing engine"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "network-log"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | anyhow = "1.0"
19 | clap = { version = "4.0", features = ["derive"] }
20 | tracing-subscriber = "0.3.19"
21 | tracing = "0.1.41"


--------------------------------------------------------------------------------
/examples/network-log/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Network Log Analysis Example
 3 | 
 4 | This example demonstrates how to use Fluxus for real - time network log analysis. It implements real - time processing of HTTP request logs, including request statistics, error rate analysis, and response size monitoring.
 5 | 
 6 | ## Features
 7 | 
 8 | - Real - time processing of HTTP request logs
 9 | - Grouped statistics by path
10 | - Error rate monitoring
11 | - Response size analysis
12 | - Sliding window aggregation
13 | 
14 | ## Running the Example
15 | 
16 | ```bash
17 | cargo run
18 | ```
19 | 
20 | ## Implementation Details
21 | 
22 | - Use a 60 - second sliding window with a 10 - second sliding interval.
23 | - Group and count requests by API path.
24 | - Calculate the number of requests and errors for each path.
25 | - Monitor changes in response size.
26 | - Calculate the error rate in real - time.
27 | 
28 | ## Output Example
29 | 
30 | ```
31 | Network log analysis results:
32 | Path: /api/users
33 |   Requests: 50
34 |   Errors: 5
35 |   Avg Size: 1250.50 bytes
36 |   Error Rate: 10.0%
37 | ```
38 | 
39 | ## Dependencies
40 | 
41 | - fluxus - core
42 | - fluxus - runtime
43 | - fluxus - api
44 | - tokio
45 | 


--------------------------------------------------------------------------------
/examples/network-log/src/main.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use fluxus::api::{
  3 |     DataStream,
  4 |     io::{CollectionSink, CollectionSource},
  5 | };
  6 | use fluxus::utils::window::WindowConfig;
  7 | use std::collections::HashMap;
  8 | use std::time::{Duration, SystemTime};
  9 | 
 10 | #[derive(Clone)]
 11 | #[allow(dead_code)]
 12 | pub struct LogEntry {
 13 |     ip: String,
 14 |     method: String,
 15 |     path: String,
 16 |     status: u16,
 17 |     bytes: u64,
 18 |     timestamp: SystemTime,
 19 | }
 20 | 
 21 | #[derive(Clone)]
 22 | pub struct PathStats {
 23 |     path: String,
 24 |     total_requests: usize,
 25 |     error_count: usize,
 26 |     total_bytes: u64,
 27 |     avg_response_size: f64,
 28 | }
 29 | 
 30 | #[tokio::main]
 31 | async fn main() -> Result<()> {
 32 |     // Generate sample log entries
 33 |     let logs = generate_sample_logs();
 34 |     let source = CollectionSource::new(logs);
 35 |     let sink = CollectionSink::new();
 36 | 
 37 |     // Build and execute the streaming pipeline
 38 |     DataStream::new(source)
 39 |         // Group by path
 40 |         .map(|log| (log.path.clone(), log))
 41 |         // Create 60-second sliding windows with 10-second slide
 42 |         .window(WindowConfig::sliding(
 43 |             Duration::from_millis(60000),
 44 |             Duration::from_millis(10000),
 45 |         ))
 46 |         // Aggregate path statistics
 47 |         .aggregate(HashMap::new(), |mut stats, (path, log)| {
 48 |             let entry = stats.entry(path).or_insert_with(|| PathStats {
 49 |                 path: String::new(),
 50 |                 total_requests: 0,
 51 |                 error_count: 0,
 52 |                 total_bytes: 0,
 53 |                 avg_response_size: 0.0,
 54 |             });
 55 | 
 56 |             entry.path = log.path;
 57 |             entry.total_requests += 1;
 58 |             if log.status >= 400 {
 59 |                 entry.error_count += 1;
 60 |             }
 61 |             entry.total_bytes += log.bytes;
 62 |             entry.avg_response_size = entry.total_bytes as f64 / entry.total_requests as f64;
 63 | 
 64 |             stats
 65 |         })
 66 |         .sink(sink.clone())
 67 |         .await?;
 68 | 
 69 |     // Print results
 70 |     println!("\nNetwork log analysis results:");
 71 |     for window_stats in sink.get_data() {
 72 |         println!("\nWindow results:");
 73 |         for (_, stats) in window_stats {
 74 |             println!(
 75 |                 "Path: {}\n  Requests: {}\n  Errors: {}\n  Avg Size: {:.2} bytes\n  Error Rate: {:.1}%",
 76 |                 stats.path,
 77 |                 stats.total_requests,
 78 |                 stats.error_count,
 79 |                 stats.avg_response_size,
 80 |                 (stats.error_count as f64 / stats.total_requests as f64) * 100.0
 81 |             );
 82 |         }
 83 |     }
 84 | 
 85 |     Ok(())
 86 | }
 87 | 
 88 | // Helper function to generate sample data
 89 | fn generate_sample_logs() -> Vec<LogEntry> {
 90 |     let start_time = SystemTime::now();
 91 |     let mut logs = Vec::new();
 92 |     let paths = ["/api/users", "/api/products", "/api/orders", "/health"];
 93 |     let methods = ["GET", "POST", "PUT", "DELETE"];
 94 | 
 95 |     for i in 0..200 {
 96 |         let timestamp = start_time + Duration::from_secs(i as u64 / 4);
 97 |         let path = paths[i % paths.len()];
 98 |         let method = methods[i % methods.len()];
 99 | 
100 |         // Generate a mix of successful and error responses
101 |         let status = if i % 10 == 0 {
102 |             500 // Occasional server errors
103 |         } else if i % 7 == 0 {
104 |             404 // Some not found errors
105 |         } else {
106 |             200 // Mostly successful
107 |         };
108 | 
109 |         // Simulate variable response sizes
110 |         let bytes = if status == 200 {
111 |             1000 + (i % 5) * 500 // Successful responses have larger sizes
112 |         } else {
113 |             100 + (i % 3) * 50 // Error responses are smaller
114 |         } as u64;
115 | 
116 |         logs.push(LogEntry {
117 |             ip: format!("192.168.1.{}", i % 256),
118 |             method: method.to_string(),
119 |             path: path.to_string(),
120 |             status,
121 |             bytes,
122 |             timestamp,
123 |         });
124 |     }
125 | 
126 |     logs
127 | }
128 | 


--------------------------------------------------------------------------------
/examples/remote-csv/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "remote_csv"
 3 | description = "Example of a remote CSV source"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [dependencies]
11 | fluxus-sources = { path = "../../crates/fluxus-sources" }
12 | tokio = { version = "1", features = ["full"] } 


--------------------------------------------------------------------------------
/examples/remote-csv/README.md:
--------------------------------------------------------------------------------
 1 | # Remote CSV Example
 2 | 
 3 | This example demonstrates how to use Fluxus for reading and processing remote CSV data streams. It shows how to connect to a remote CSV file and process its contents line by line.
 4 | 
 5 | ## Features
 6 | 
 7 | - Remote CSV file streaming
 8 | - URL-based data source
 9 | - Line-by-line processing
10 | - Asynchronous data reading
11 | - Source initialization and cleanup
12 | 
13 | ## Running the Example
14 | 
15 | ```bash
16 | cargo run
17 | ```
18 | 
19 | ## Implementation Details
20 | 
21 | - Connects to a remote CSV file via URL
22 | - Initializes a CSV source stream
23 | - Processes records asynchronously
24 | - Demonstrates proper source cleanup
25 | - Handles streaming termination
26 | 
27 | ## Output Example
28 | 
29 | ```
30 | Reading CSV data from: [URL]
31 | Line 1: [CSV record data]
32 | Line 2: [CSV record data]
33 | ...
34 | Done!
35 | ```
36 | 
37 | ## Dependencies
38 | 
39 | - fluxus-core
40 | - fluxus-runtime
41 | - fluxus-sources
42 | - tokio


--------------------------------------------------------------------------------
/examples/remote-csv/src/main.rs:
--------------------------------------------------------------------------------
 1 | use fluxus_sources::{CsvSource, Source};
 2 | 
 3 | #[tokio::main]
 4 | async fn main() -> Result<(), Box<dyn std::error::Error>> {
 5 |     let url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv";
 6 | 
 7 |     println!("Reading CSV data from: {}", url);
 8 | 
 9 |     let mut source = CsvSource::from_url(url);
10 | 
11 |     source.init().await?;
12 | 
13 |     for i in 0..10 {
14 |         match source.next().await? {
15 |             Some(record) => println!("Line {}: {}", i + 1, record.data),
16 |             None => {
17 |                 println!("End of file reached");
18 |                 break;
19 |             }
20 |         }
21 |     }
22 | 
23 |     source.close().await?;
24 | 
25 |     println!("Done!");
26 | 
27 |     Ok(())
28 | }
29 | 


--------------------------------------------------------------------------------
/examples/stock-market/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "stock-market"
 3 | description = "Stock market analysis example for Fluxus stream processing engine"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "stock-market"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | anyhow = "1.0"
19 | clap = { version = "4.0", features = ["derive"] }
20 | tracing-subscriber = "0.3.19"
21 | tracing = "0.1.41"


--------------------------------------------------------------------------------
/examples/stock-market/README.md:
--------------------------------------------------------------------------------
 1 | # Stock Analysis Example
 2 | 
 3 | This example demonstrates how to use Fluxus to monitor real-time stock price fluctuations, analyze trading volume, and generate stock price trend predictions.
 4 | 
 5 | ## Features
 6 | 
 7 | - Monitor real-time stock price fluctuations.
 8 | - Analyze trading volume.
 9 | - Generate stock price trend predictions.
10 | 
11 | ## Running the Example
12 | 
13 | ```bash
14 | cargo run
15 | ```
16 | 
17 | ## Implementation Details
18 | 
19 | - Use a streaming processing framework to process stock data.
20 | - Filter and aggregate real-time data.
21 | - Apply machine learning models for trend prediction.
22 | 
23 | ## Output Example
24 | 
25 | ```
26 | Stock analysis results:
27 | Price trend: Upward
28 | Trading volume: 100000
29 | ...
30 | ```
31 | 
32 | ## Dependencies
33 | 
34 | - fluxus - core
35 | - fluxus - runtime
36 | - fluxus - api
37 | - tokio
38 | - anyhow


--------------------------------------------------------------------------------
/examples/stock-market/src/main.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use fluxus::api::{
  3 |     DataStream,
  4 |     io::{CollectionSink, CollectionSource},
  5 | };
  6 | use fluxus::utils::window::WindowConfig;
  7 | use std::{
  8 |     collections::HashMap,
  9 |     time::{Duration, SystemTime},
 10 | };
 11 | 
 12 | #[derive(Clone)]
 13 | #[allow(dead_code)]
 14 | pub struct StockTrade {
 15 |     symbol: String,
 16 |     price: f64,
 17 |     volume: u64,
 18 |     timestamp: SystemTime,
 19 | }
 20 | 
 21 | #[derive(Clone)]
 22 | #[allow(dead_code)]
 23 | pub struct StockStats {
 24 |     symbol: String,
 25 |     vwap: f64, // Volume Weighted Average Price
 26 |     total_volume: u64,
 27 |     price_change: f64,
 28 |     high: f64,
 29 |     low: f64,
 30 | }
 31 | 
 32 | #[tokio::main]
 33 | async fn main() -> Result<()> {
 34 |     // Generate sample stock trading data
 35 |     let trades = generate_sample_trades();
 36 |     let source = CollectionSource::new(trades);
 37 |     let sink = CollectionSink::new();
 38 | 
 39 |     // Build and execute stream processing pipeline
 40 |     DataStream::new(source)
 41 |         // Group by stock symbol
 42 |         .map(|trade| (trade.symbol.clone(), trade))
 43 |         // Create 5-minute sliding window with 1-minute slide
 44 |         .window(WindowConfig::sliding(
 45 |             Duration::from_secs(300), // 5 minutes
 46 |             Duration::from_secs(60),  // 1 minute
 47 |         ))
 48 |         // Aggregate stock statistics within each window
 49 |         .aggregate(HashMap::new(), |mut stats, (symbol, trade)| {
 50 |             let entry = stats.entry(symbol.clone()).or_insert_with(|| StockStats {
 51 |                 symbol,
 52 |                 vwap: 0.0,
 53 |                 total_volume: 0,
 54 |                 price_change: 0.0,
 55 |                 high: trade.price,
 56 |                 low: trade.price,
 57 |             });
 58 | 
 59 |             // Update statistics
 60 |             let volume_price =
 61 |                 (entry.vwap * entry.total_volume as f64) + (trade.price * trade.volume as f64);
 62 |             entry.total_volume += trade.volume;
 63 |             entry.vwap = volume_price / entry.total_volume as f64;
 64 |             entry.high = entry.high.max(trade.price);
 65 |             entry.low = entry.low.min(trade.price);
 66 |             entry.price_change = entry.high - entry.low;
 67 | 
 68 |             stats
 69 |         })
 70 |         // Output results to sink
 71 |         .sink(sink.clone())
 72 |         .await?;
 73 | 
 74 |     // Print results
 75 |     println!("\nStock Market Statistics:");
 76 |     for result in sink.get_data() {
 77 |         for (symbol, stats) in result {
 78 |             println!(
 79 |                 "Stock: {}, VWAP: {:.2}, Volume: {}, Price Change: {:.2}, High: {:.2}, Low: {:.2}",
 80 |                 symbol, stats.vwap, stats.total_volume, stats.price_change, stats.high, stats.low
 81 |             );
 82 |         }
 83 |     }
 84 | 
 85 |     Ok(())
 86 | }
 87 | 
 88 | // Generate sample trading data
 89 | fn generate_sample_trades() -> Vec<StockTrade> {
 90 |     let symbols = vec!["AAPL", "GOOGL", "MSFT", "AMZN"];
 91 |     let mut trades = Vec::new();
 92 |     let start_time = SystemTime::now();
 93 | 
 94 |     for i in 0..100 {
 95 |         for symbol in &symbols {
 96 |             let base_price = match *symbol {
 97 |                 "AAPL" => 150.0,
 98 |                 "GOOGL" => 2800.0,
 99 |                 "MSFT" => 300.0,
100 |                 "AMZN" => 3300.0,
101 |                 _ => 100.0,
102 |             };
103 | 
104 |             // Simulate price fluctuation
105 |             let price_variation = (i as f64 * 0.1).sin() * 5.0;
106 |             let trade = StockTrade {
107 |                 symbol: symbol.to_string(),
108 |                 price: base_price + price_variation,
109 |                 volume: 100 + (i as u64 % 900),
110 |                 timestamp: start_time + Duration::from_secs(i as u64 * 30), // Data point every 30 seconds
111 |             };
112 |             trades.push(trade);
113 |         }
114 |     }
115 | 
116 |     trades
117 | }
118 | 


--------------------------------------------------------------------------------
/examples/temperature-sensor/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "temperature-sensor"
 3 | description = "Temperature sensor analysis example for Fluxus stream processing engine"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "temperature-sensor"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | anyhow = "1.0"
19 | clap = { version = "4.0", features = ["derive"] }
20 | tracing-subscriber = "0.3.19"
21 | tracing = "0.1.41"


--------------------------------------------------------------------------------
/examples/temperature-sensor/README.md:
--------------------------------------------------------------------------------
 1 | # Temperature Analysis Example
 2 | 
 3 | This example demonstrates how to use Fluxus to monitor real-time temperature data, analyze temperature change trends, and detect abnormal temperature values.
 4 | 
 5 | ## Features
 6 | 
 7 | - Monitor real-time temperature data.
 8 | - Analyze temperature change trends.
 9 | - Detect abnormal temperature values.
10 | 
11 | ## Running the Example
12 | 
13 | ## Implementation Details
14 | 
15 | - Use a streaming processing framework to process temperature data.
16 | - Filter and aggregate real-time data.
17 | - Apply a threshold detection algorithm to identify abnormal temperatures.
18 | 
19 | ## Output Example
20 | 
21 | ```
22 | Temperature analysis results:
23 | 
24 | Window results:
25 | Sensor sensor3: 100 readings, Avg: 25.0°C, Min: 22.0°C, Max: 28.0°C, Avg Humidity: 60.0%
26 | Sensor sensor1: 100 readings, Avg: 20.4°C, Min: 18.0°C, Max: 22.0°C, Avg Humidity: 49.8%
27 | Sensor sensor2: 100 readings, Avg: 26.9°C, Min: 22.0°C, Max: 31.9°C, Avg Humidity: 64.9%
28 | ```
29 | 
30 | ## Dependencies
31 | 
32 | - fluxus - core
33 | - fluxus - runtime
34 | - fluxus - api
35 | - tokio
36 | - anyhow
37 | 


--------------------------------------------------------------------------------
/examples/temperature-sensor/src/main.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use fluxus::api::{
  3 |     DataStream,
  4 |     io::{CollectionSink, CollectionSource},
  5 | };
  6 | use fluxus::utils::window::WindowConfig;
  7 | use std::collections::HashMap;
  8 | use std::time::{Duration, SystemTime};
  9 | 
 10 | #[derive(Clone)]
 11 | pub struct SensorReading {
 12 |     sensor_id: String,
 13 |     temperature: f64,
 14 |     humidity: f64,
 15 |     timestamp: SystemTime,
 16 | }
 17 | 
 18 | #[derive(Clone)]
 19 | pub struct SensorStats {
 20 |     sensor_id: String,
 21 |     avg_temperature: f64,
 22 |     avg_humidity: f64,
 23 |     min_temperature: f64,
 24 |     max_temperature: f64,
 25 |     reading_count: usize,
 26 | }
 27 | 
 28 | #[tokio::main]
 29 | async fn main() -> Result<()> {
 30 |     // Generate sample temperature readings
 31 |     let readings = generate_sample_readings();
 32 |     let source = CollectionSource::new(readings);
 33 |     let sink = CollectionSink::new();
 34 | 
 35 |     // Build and execute the streaming pipeline
 36 |     DataStream::new(source)
 37 |         // Group by sensor_id
 38 |         .map(|reading| {
 39 |             (
 40 |                 reading.sensor_id.clone(),
 41 |                 (reading.temperature, reading.humidity, reading.timestamp),
 42 |             )
 43 |         })
 44 |         // Create 10-second tumbling windows
 45 |         .window(WindowConfig::tumbling(Duration::from_millis(10000)))
 46 |         // Aggregate temperatures in each window
 47 |         .aggregate(
 48 |             HashMap::new(),
 49 |             |mut stats, (sensor_id, (temp, humidity, _))| {
 50 |                 let entry = stats
 51 |                     .entry(sensor_id.clone())
 52 |                     .or_insert_with(|| SensorStats {
 53 |                         sensor_id: String::new(),
 54 |                         avg_temperature: 0.0,
 55 |                         avg_humidity: 0.0,
 56 |                         min_temperature: f64::MAX,
 57 |                         max_temperature: f64::MIN,
 58 |                         reading_count: 0,
 59 |                     });
 60 | 
 61 |                 entry.sensor_id = sensor_id;
 62 |                 entry.min_temperature = entry.min_temperature.min(temp);
 63 |                 entry.max_temperature = entry.max_temperature.max(temp);
 64 |                 entry.avg_temperature = (entry.avg_temperature * entry.reading_count as f64 + temp)
 65 |                     / (entry.reading_count + 1) as f64;
 66 |                 entry.avg_humidity = (entry.avg_humidity * entry.reading_count as f64 + humidity)
 67 |                     / (entry.reading_count + 1) as f64;
 68 |                 entry.reading_count += 1;
 69 | 
 70 |                 stats
 71 |             },
 72 |         )
 73 |         .sink(sink.clone())
 74 |         .await?;
 75 | 
 76 |     // Print results
 77 |     println!("\nTemperature analysis results:");
 78 |     for window_stats in sink.get_data() {
 79 |         println!("\nWindow results:");
 80 |         for (_, stats) in window_stats {
 81 |             println!(
 82 |                 "Sensor {}: {} readings, Avg: {:.1}°C, Min: {:.1}°C, Max: {:.1}°C, Avg Humidity: {:.1}%",
 83 |                 stats.sensor_id,
 84 |                 stats.reading_count,
 85 |                 stats.avg_temperature,
 86 |                 stats.min_temperature,
 87 |                 stats.max_temperature,
 88 |                 stats.avg_humidity,
 89 |             );
 90 |         }
 91 |     }
 92 | 
 93 |     Ok(())
 94 | }
 95 | 
 96 | // Helper function to generate sample data
 97 | fn generate_sample_readings() -> Vec<SensorReading> {
 98 |     let start_time = SystemTime::now();
 99 |     let mut readings = Vec::new();
100 | 
101 |     for i in 0..100 {
102 |         let timestamp = start_time + Duration::from_secs(i as u64 / 10);
103 | 
104 |         // Sensor 1: Normal temperature variations
105 |         readings.push(SensorReading {
106 |             sensor_id: "sensor1".to_string(),
107 |             temperature: 20.0 + (i as f64 / 10.0).sin() * 2.0,
108 |             humidity: 50.0 + (i as f64 / 10.0).cos() * 5.0,
109 |             timestamp,
110 |         });
111 | 
112 |         // Sensor 2: Gradually increasing temperature
113 |         readings.push(SensorReading {
114 |             sensor_id: "sensor2".to_string(),
115 |             temperature: 22.0 + i as f64 * 0.1,
116 |             humidity: 55.0 + i as f64 * 0.2,
117 |             timestamp,
118 |         });
119 | 
120 |         // Sensor 3: Random fluctuations
121 |         readings.push(SensorReading {
122 |             sensor_id: "sensor3".to_string(),
123 |             temperature: 25.0 + (i as f64 * 0.7).cos() * 3.0,
124 |             humidity: 60.0 + (i as f64 * 0.5).sin() * 4.0,
125 |             timestamp,
126 |         });
127 |     }
128 | 
129 |     readings
130 | }
131 | 


--------------------------------------------------------------------------------
/examples/word-count/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "word-count"
 3 | description = "Word count example for Fluxus stream processing engine"
 4 | publish = false
 5 | version = "0.1.0"
 6 | edition = "2024"
 7 | license = "Apache-2.0"
 8 | readme = "./README.md"
 9 | 
10 | [[example]]
11 | name = "word-count"
12 | path = "src/main.rs"
13 | 
14 | [dependencies]
15 | fluxus = { path = "../../crates/fluxus", features = ["full"] }
16 | 
17 | tokio = { version = "1", features = ["full"] }
18 | anyhow = "1.0"
19 | clap = { version = "4.0", features = ["derive"] }
20 | tracing-subscriber = "0.3.19"
21 | tracing = "0.1.41"
22 | 


--------------------------------------------------------------------------------
/examples/word-count/README.md:
--------------------------------------------------------------------------------
 1 | # Word Count Example
 2 | 
 3 | This example demonstrates how to use Fluxus to count the number of words in input text, filter common stop words, and sort the results by word frequency.
 4 | 
 5 | ## Features
 6 | 
 7 | - Count the number of words in input text.
 8 | - Support filtering common stop words.
 9 | - Output results sorted by word frequency.
10 | 
11 | ## Running the Example
12 | 
13 | ```bash
14 | cargo run
15 | ```
16 | 
17 | ## Implementation Details
18 | 
19 | - Use a streaming processing framework to process text data.
20 | - Tokenize the input text.
21 | - Count the occurrence of each word.
22 | - Filter stop words and sort the output.
23 | 
24 | ## Output Example
25 | 
26 | ```
27 | Word count results:
28 | The: 10
29 | And: 8
30 | ...
31 | ```
32 | 
33 | ## Dependencies
34 | 
35 | - fluxus - core
36 | - fluxus - runtime
37 | - fluxus - api
38 | - tokio
39 | - anyhow


--------------------------------------------------------------------------------
/examples/word-count/src/main.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Result;
 2 | use fluxus::api::{
 3 |     DataStream,
 4 |     io::{CollectionSink, CollectionSource},
 5 | };
 6 | use fluxus::utils::window::WindowConfig;
 7 | use std::collections::HashMap;
 8 | use std::time::Duration;
 9 | 
10 | pub type WordCount = HashMap<String, usize>;
11 | 
12 | #[tokio::main]
13 | async fn main() -> Result<()> {
14 |     // Sample input text
15 |     let text = vec![
16 |         "hello world",
17 |         "hello stream processing",
18 |         "world of streaming",
19 |         "hello streaming world",
20 |     ];
21 | 
22 |     // Create a source from the text collection
23 |     let source = CollectionSource::new(text);
24 |     let sink: CollectionSink<WordCount> = CollectionSink::new();
25 | 
26 |     // Build and execute the streaming pipeline
27 |     DataStream::new(source)
28 |         .filter(|line| line.starts_with("hello"))
29 |         // Split text into words
30 |         .map(|line| {
31 |             line.split_whitespace()
32 |                 .map(|s| s.to_lowercase())
33 |                 .collect::<Vec<_>>()
34 |         })
35 |         // Parallelize the processing
36 |         .parallel(2)
37 |         // Create tumbling windows of 1 second
38 |         .window(WindowConfig::tumbling(Duration::from_millis(1000)))
39 |         // Count words in each window
40 |         .aggregate(HashMap::new(), |mut counts, words| {
41 |             for word in words {
42 |                 *counts.entry(word).or_insert(0) += 1;
43 |             }
44 |             counts
45 |         })
46 |         // Write results to sink
47 |         .sink(sink.clone())
48 |         .await?;
49 | 
50 |     // Print the results
51 |     println!("\nWord count last result:");
52 |     let last_result = sink.get_last_element().unwrap();
53 |     let mut words: Vec<_> = last_result.iter().collect();
54 |     words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0)));
55 |     for (word, count) in words {
56 |         println!("  {word}: {count}");
57 |     }
58 | 
59 |     println!("\nWord count results:");
60 |     for result in sink.get_data() {
61 |         println!("\nWindow results:");
62 |         let mut words: Vec<_> = result.iter().collect();
63 |         words.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0)));
64 |         for (word, count) in words {
65 |             println!("  {word}: {count}");
66 |         }
67 |     }
68 | 
69 |     Ok(())
70 | }
71 | 


--------------------------------------------------------------------------------