├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── doc-deploy.yml │ ├── docker.yml │ ├── rust.yml │ └── test-doc-deploy.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── README_zh.md ├── crates ├── arkflow-core │ ├── Cargo.toml │ └── src │ │ ├── buffer │ │ └── mod.rs │ │ ├── cli │ │ └── mod.rs │ │ ├── codec │ │ └── mod.rs │ │ ├── config.rs │ │ ├── engine │ │ └── mod.rs │ │ ├── input │ │ └── mod.rs │ │ ├── lib.rs │ │ ├── output │ │ └── mod.rs │ │ ├── pipeline │ │ └── mod.rs │ │ ├── processor │ │ └── mod.rs │ │ ├── stream │ │ └── mod.rs │ │ └── temporary │ │ └── mod.rs ├── arkflow-plugin │ ├── Cargo.toml │ └── src │ │ ├── buffer │ │ ├── join.rs │ │ ├── memory.rs │ │ ├── mod.rs │ │ ├── session_window.rs │ │ ├── sliding_window.rs │ │ ├── tumbling_window.rs │ │ └── window.rs │ │ ├── codec │ │ ├── json.rs │ │ └── mod.rs │ │ ├── component │ │ ├── json.rs │ │ ├── mod.rs │ │ ├── redis.rs │ │ └── sql.rs │ │ ├── expr │ │ └── mod.rs │ │ ├── input │ │ ├── generate.rs │ │ ├── http.rs │ │ ├── kafka.rs │ │ ├── memory.rs │ │ ├── mod.rs │ │ ├── modbus.rs │ │ ├── mqtt.rs │ │ ├── multiple_inputs.rs │ │ ├── nats.rs │ │ ├── redis.rs │ │ ├── sql.rs │ │ └── websocket.rs │ │ ├── lib.rs │ │ ├── output │ │ ├── drop.rs │ │ ├── http.rs │ │ ├── kafka.rs │ │ ├── mod.rs │ │ ├── mqtt.rs │ │ ├── nats.rs │ │ ├── redis.rs │ │ ├── sql.rs │ │ └── stdout.rs │ │ ├── processor │ │ ├── batch.rs │ │ ├── json.rs │ │ ├── mod.rs │ │ ├── protobuf.rs │ │ ├── sql.rs │ │ └── vrl.rs │ │ ├── temporary │ │ ├── mod.rs │ │ └── redis.rs │ │ ├── time │ │ └── mod.rs │ │ └── udf │ │ ├── aggregate_udf.rs │ │ ├── mod.rs │ │ ├── scalar_udf.rs │ │ └── window_udf.rs └── arkflow │ ├── Cargo.toml │ └── src │ └── main.rs ├── docker └── Dockerfile ├── docs ├── .gitignore ├── README.md ├── blog │ ├── 2025 │ │ ├── 04 │ │ │ ├── 01-v0.2.0-rc1.md │ │ │ └── 15-v0.2.0-release.md │ │ └── 05 │ │ │ └── 09-v0.3.0-release.md │ ├── authors.yml │ └── tags.yml ├── docs │ ├── 0-intro.md │ ├── 999-about-logo.md │ ├── components │ │ ├── 0-inputs │ │ │ ├── _category_.json │ │ │ ├── generate.md │ │ │ ├── http.md │ │ │ ├── kafka.md │ │ │ ├── memory.md │ │ │ ├── modbus.md │ │ │ ├── mqtt.md │ │ │ ├── nats.md │ │ │ ├── redis.md │ │ │ └── sql.md │ │ ├── 1-buffers │ │ │ ├── _category_.json │ │ │ ├── memory.md │ │ │ ├── session_window.md │ │ │ ├── sliding_window.md │ │ │ └── tumbling_window.md │ │ ├── 2-processors │ │ │ ├── _category_.json │ │ │ ├── batch.md │ │ │ ├── json.md │ │ │ ├── protobuf.md │ │ │ ├── sql.md │ │ │ └── vrl.md │ │ ├── 3-outputs │ │ │ ├── _category_.json │ │ │ ├── drop.md │ │ │ ├── http.md │ │ │ ├── kafka.md │ │ │ ├── mqtt.md │ │ │ ├── nats.md │ │ │ └── stdout.md │ │ └── _category_.json │ ├── deploy │ │ ├── _category_.json │ │ └── k8s-deployment.md │ ├── logo.svg │ └── sql │ │ ├── 0-data-types.md │ │ ├── 1-operators.md │ │ ├── 2-select.md │ │ ├── 4-subqueries.md │ │ ├── 5-aggregate_functions.md │ │ ├── 6-window_functions.md │ │ ├── 7-scalar_functions.md │ │ ├── 8-special_functions.md │ │ ├── 9-udf.md │ │ └── _category_.json ├── docusaurus.config.ts ├── package.json ├── pnpm-lock.yaml ├── sidebars.ts ├── src │ ├── components │ │ └── HomepageFeatures │ │ │ ├── index.tsx │ │ │ └── styles.module.css │ ├── css │ │ └── custom.css │ └── pages │ │ ├── index.module.css │ │ ├── index.tsx │ │ └── markdown-page.md ├── static │ ├── .nojekyll │ └── img │ │ ├── docusaurus-social-card.jpg │ │ ├── docusaurus.png │ │ ├── favicon.ico │ │ ├── favicon.svg │ │ ├── home-1.svg │ │ ├── home-2.svg │ │ ├── home-3.svg │ │ ├── home-4.svg │ │ ├── logo-1.svg │ │ ├── logo-cp.svg │ │ ├── logo.svg │ │ ├── logo2.svg │ │ ├── undraw_docusaurus_mountain.svg │ │ ├── undraw_docusaurus_react.svg │ │ └── undraw_docusaurus_tree.svg ├── tsconfig.json ├── versioned_docs │ ├── version-0.2.x │ │ ├── 0-intro.md │ │ ├── components │ │ │ ├── 0-inputs │ │ │ │ ├── _category_.json │ │ │ │ ├── generate.md │ │ │ │ ├── http.md │ │ │ │ ├── kafka.md │ │ │ │ ├── memory.md │ │ │ │ ├── mqtt.md │ │ │ │ └── sql.md │ │ │ ├── 1-buffers │ │ │ │ ├── _category_.json │ │ │ │ └── memory.md │ │ │ ├── 2-processors │ │ │ │ ├── _category_.json │ │ │ │ ├── batch.md │ │ │ │ ├── json.md │ │ │ │ ├── protobuf.md │ │ │ │ └── sql.md │ │ │ ├── 3-outputs │ │ │ │ ├── _category_.json │ │ │ │ ├── drop.md │ │ │ │ ├── http.md │ │ │ │ ├── kafka.md │ │ │ │ ├── mqtt.md │ │ │ │ └── stdout.md │ │ │ └── _category_.json │ │ ├── deploy │ │ │ ├── _category_.json │ │ │ └── k8s-deployment.md │ │ └── sql │ │ │ ├── 0-data-types.md │ │ │ ├── 1-operators.md │ │ │ ├── 2-select.md │ │ │ ├── 4-subqueries.md │ │ │ ├── 5-aggregate_functions.md │ │ │ ├── 6-window_functions.md │ │ │ ├── 7-scalar_functions.md │ │ │ ├── 8-special_functions.md │ │ │ └── _category_.json │ └── version-0.3.x │ │ ├── 0-intro.md │ │ ├── 999-about-logo.md │ │ ├── architecture.svg │ │ ├── components │ │ ├── 0-inputs │ │ │ ├── _category_.json │ │ │ ├── generate.md │ │ │ ├── http.md │ │ │ ├── kafka.md │ │ │ ├── memory.md │ │ │ ├── mqtt.md │ │ │ ├── nats.md │ │ │ ├── redis.md │ │ │ └── sql.md │ │ ├── 1-buffers │ │ │ ├── _category_.json │ │ │ ├── memory.md │ │ │ ├── session_window.md │ │ │ ├── sliding_window.md │ │ │ └── tumbling_window.md │ │ ├── 2-processors │ │ │ ├── _category_.json │ │ │ ├── batch.md │ │ │ ├── json.md │ │ │ ├── protobuf.md │ │ │ ├── sql.md │ │ │ └── vrl.md │ │ ├── 3-outputs │ │ │ ├── _category_.json │ │ │ ├── drop.md │ │ │ ├── http.md │ │ │ ├── kafka.md │ │ │ ├── mqtt.md │ │ │ ├── nats.md │ │ │ └── stdout.md │ │ └── _category_.json │ │ ├── deploy │ │ ├── _category_.json │ │ └── k8s-deployment.md │ │ ├── logo.svg │ │ └── sql │ │ ├── 0-data-types.md │ │ ├── 1-operators.md │ │ ├── 2-select.md │ │ ├── 4-subqueries.md │ │ ├── 5-aggregate_functions.md │ │ ├── 6-window_functions.md │ │ ├── 7-scalar_functions.md │ │ ├── 8-special_functions.md │ │ ├── 9-udf.md │ │ └── _category_.json ├── versioned_sidebars │ ├── version-0.2.x-sidebars.json │ └── version-0.3.x-sidebars.json └── versions.json ├── examples ├── drop_output_example.yaml ├── generate_example.yaml ├── http_client_example.yaml ├── http_server_example.yaml ├── input_data.csv ├── join_buffer_example.yaml ├── kafka_example.yaml ├── message.proto ├── mqtt_example.yaml ├── nats_input_example.yaml ├── nats_output_example.yaml ├── protobuf_example.yaml ├── redis_input_example.yaml ├── redis_output_example.yaml ├── redis_temporary_example.yaml ├── sql_input_example.yaml ├── sql_output_example.yaml ├── stream_data.json ├── vrl_example.yaml └── websocket_input_example.yaml └── logo.svg /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | allow: 9 | - dependency-type: direct 10 | - dependency-type: indirect -------------------------------------------------------------------------------- /.github/workflows/doc-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | # Review gh actions docs if you want to further define triggers, paths, etc 8 | # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on 9 | 10 | jobs: 11 | build: 12 | name: Build Docusaurus 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | - uses: pnpm/action-setup@v2 19 | with: 20 | version: 6.32.9 21 | - uses: actions/setup-node@v4 22 | with: 23 | node-version: '18' 24 | cache: 'pnpm' 25 | cache-dependency-path: ./docs/pnpm-lock.yaml 26 | 27 | - name: Install dependencies 28 | working-directory: ./docs 29 | run: pnpm install 30 | - name: Build website 31 | working-directory: ./docs 32 | run: pnpm run build 33 | 34 | - name: Upload Build Artifact 35 | id: deployment 36 | uses: actions/upload-pages-artifact@v3 37 | with: 38 | path: ./docs/build 39 | 40 | deploy: 41 | # Add a dependency to the build job 42 | needs: build 43 | 44 | # Grant GITHUB_TOKEN the permissions required to make a Pages deployment 45 | permissions: 46 | pages: write # to deploy to Pages 47 | id-token: write # to verify the deployment originates from an appropriate source 48 | 49 | # Deploy to the github-pages environment 50 | environment: 51 | name: github-pages 52 | url: ${{ steps.deployment.outputs.page_url }} 53 | 54 | # Specify runner + deployment step 55 | runs-on: ubuntu-latest 56 | steps: 57 | - name: Deploy to GitHub Pages 58 | id: deployment 59 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | CMAKE_POLICY_VERSION_MINIMUM: 3.5 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Install Protobuf Compiler 20 | run: sudo apt-get update && sudo apt-get install -y protobuf-compiler 21 | - name: Set PROTOC Environment Variable 22 | run: echo "PROTOC=$(which protoc)" >> $GITHUB_ENV 23 | - name: Build 24 | run: cargo build --verbose 25 | - name: Run tests 26 | run: cargo test --verbose 27 | -------------------------------------------------------------------------------- /.github/workflows/test-doc-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Test deployment 2 | 3 | on: 4 | pull_request: 5 | # Review gh actions docs if you want to further define triggers, paths, etc 6 | # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on 7 | 8 | jobs: 9 | build: 10 | name: Build Docusaurus 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 0 16 | - uses: pnpm/action-setup@v2 17 | with: 18 | version: 6.32.9 19 | - uses: actions/setup-node@v4 20 | with: 21 | node-version: '18' 22 | cache: 'pnpm' 23 | cache-dependency-path: ./docs/pnpm-lock.yaml 24 | 25 | - name: Install dependencies 26 | working-directory: ./docs 27 | run: pnpm install 28 | - name: Build website 29 | working-directory: ./docs 30 | run: pnpm run build 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | 3 | .idea 4 | 5 | .DS_Store 6 | 7 | examples/output.txt 8 | 9 | .run -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace.package] 2 | version = "0.3.1" 3 | edition = "2021" 4 | description = "High-performance Rust flow processing engine" 5 | authors = ["chenquan "] 6 | repository = "https://github.com/arkflow-rs/arkflow" 7 | homepage = "https://github.com/arkflow-rs/arkflow" 8 | license = "Apache-2.0" 9 | 10 | [workspace] 11 | members = ["crates/arkflow-plugin", "crates/arkflow-core", "crates/arkflow"] 12 | 13 | resolver = "2" 14 | 15 | [workspace.dependencies] 16 | tokio = { version = "1", features = ["full"] } 17 | tokio-util = "0.7.15" 18 | async-trait = "0.1" 19 | futures = "0.3" 20 | futures-util = "0.3" 21 | serde = { version = "1", features = ["derive"] } 22 | serde_json = "1.0" 23 | serde_yaml = "0.9" 24 | humantime = "2.1.0" 25 | thiserror = "2.0" 26 | anyhow = "1.0" 27 | tracing = "0.1" 28 | tracing-subscriber = { version = "0.3", features = ["std", "fmt", "json"] } 29 | prometheus = "0.13" 30 | datafusion = "46" 31 | datafusion-functions-json = "0.46.0" 32 | arrow-json = "54" 33 | prost-reflect = "0.14.7" 34 | prost-types = "0.13.5" 35 | protobuf-parse = "3.7.2" 36 | protobuf = "3.7.2" 37 | toml = "0.8" 38 | lazy_static = "1.4" 39 | axum = "0.7" 40 | reqwest = { version = "0.12", features = ["json"] } 41 | clap = { version = "4.5", features = ["derive"] } 42 | colored = "3.0" 43 | flume = "=0.11" 44 | rumqttc = "0.24.0" 45 | 46 | # Sql 47 | sqlx = { version = "0.8", features = ["mysql", "postgres", "runtime-tokio", "tls-native-tls"] } 48 | 49 | # Kafka 50 | aws-msk-iam-sasl-signer = "1.0.0" 51 | rdkafka = { version = "0.37", features = ["cmake-build", "tracing", "sasl", "ssl-vendored", "zstd"] } 52 | rdkafka-sys = "4.8.0" 53 | sasl2-sys = { version = "0.1.22", features = ["vendored"] } 54 | tempfile = "3.20.0" 55 | 56 | mockall = "0.12" 57 | arkflow-core = { path = "crates/arkflow-core" } 58 | arkflow-plugin = { path = "crates/arkflow-plugin" } 59 | 60 | [profile.release] 61 | codegen-units = 1 62 | lto = true 63 | opt-level = 3 -------------------------------------------------------------------------------- /crates/arkflow-core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arkflow-core" 3 | version.workspace = true 4 | edition.workspace = true 5 | description.workspace = true 6 | authors.workspace = true 7 | repository.workspace = true 8 | homepage.workspace = true 9 | license.workspace = true 10 | 11 | [dependencies] 12 | tokio = { workspace = true } 13 | tokio-util = { workspace = true, features = ["rt"] } 14 | async-trait = { workspace = true } 15 | futures = { workspace = true } 16 | serde = { workspace = true } 17 | serde_json = { workspace = true } 18 | serde_yaml = { workspace = true } 19 | toml = { workspace = true } 20 | thiserror = { workspace = true } 21 | anyhow = { workspace = true } 22 | tracing = { workspace = true } 23 | tracing-subscriber = { workspace = true } 24 | datafusion = { workspace = true } 25 | lazy_static = { workspace = true } 26 | clap = { workspace = true } 27 | colored = { workspace = true } 28 | flume = { workspace = true } 29 | axum = { workspace = true } 30 | num_cpus = "1.16.0" -------------------------------------------------------------------------------- /crates/arkflow-core/src/buffer/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | use crate::input::Ack; 16 | use crate::{Error, MessageBatch, Resource}; 17 | use async_trait::async_trait; 18 | use serde::{Deserialize, Serialize}; 19 | use std::collections::HashMap; 20 | use std::sync::{Arc, RwLock}; 21 | 22 | lazy_static::lazy_static! { 23 | static ref BUFFER_BUILDERS: RwLock>> = RwLock::new(HashMap::new()); 24 | } 25 | 26 | #[async_trait] 27 | pub trait Buffer: Send + Sync { 28 | async fn write(&self, msg: MessageBatch, arc: Arc) -> Result<(), Error>; 29 | 30 | async fn read(&self) -> Result)>, Error>; 31 | 32 | async fn flush(&self) -> Result<(), Error>; 33 | 34 | async fn close(&self) -> Result<(), Error>; 35 | } 36 | 37 | /// Buffer builder 38 | pub trait BufferBuilder: Send + Sync { 39 | fn build( 40 | &self, 41 | name: Option<&String>, 42 | config: &Option, 43 | resource: &Resource, 44 | ) -> Result, Error>; 45 | } 46 | 47 | /// Buffer configuration 48 | #[derive(Debug, Clone, Serialize, Deserialize)] 49 | pub struct BufferConfig { 50 | #[serde(rename = "type")] 51 | pub buffer_type: String, 52 | pub name: Option, 53 | #[serde(flatten)] 54 | pub config: Option, 55 | } 56 | 57 | impl BufferConfig { 58 | /// Building buffer components 59 | pub fn build(&self, resource: &Resource) -> Result, Error> { 60 | let builders = BUFFER_BUILDERS.read().unwrap(); 61 | 62 | if let Some(builder) = builders.get(&self.buffer_type) { 63 | builder.build(self.name.as_ref(), &self.config, resource) 64 | } else { 65 | Err(Error::Config(format!( 66 | "Unknown buffer type: {}", 67 | self.buffer_type 68 | ))) 69 | } 70 | } 71 | } 72 | 73 | pub fn register_buffer_builder( 74 | type_name: &str, 75 | builder: Arc, 76 | ) -> Result<(), Error> { 77 | let mut builders = BUFFER_BUILDERS.write().unwrap(); 78 | if builders.contains_key(type_name) { 79 | return Err(Error::Config(format!( 80 | "Buffer type already registered: {}", 81 | type_name 82 | ))); 83 | } 84 | builders.insert(type_name.to_string(), builder); 85 | Ok(()) 86 | } 87 | -------------------------------------------------------------------------------- /crates/arkflow-core/src/codec/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | use crate::{Error, MessageBatch, Resource}; 15 | use serde::{Deserialize, Serialize}; 16 | use std::collections::HashMap; 17 | use std::sync::{Arc, RwLock}; 18 | 19 | lazy_static::lazy_static! { 20 | static ref CODEC_BUILDERS: RwLock>> = RwLock::new(HashMap::new()); 21 | } 22 | 23 | pub trait Encoder: Send + Sync { 24 | fn encode(&self, b: MessageBatch) -> Result; 25 | } 26 | 27 | pub trait Decoder: Send + Sync { 28 | fn decode(&self, b: MessageBatch) -> Result; 29 | } 30 | 31 | pub trait Codec: Encoder + Decoder {} 32 | 33 | // Implement the Codec trait for any type that implements Encoder and Decoder 34 | impl Codec for T where T: Encoder + Decoder {} 35 | 36 | pub trait CodecBuilder: Send + Sync { 37 | fn build( 38 | &self, 39 | name: Option<&String>, 40 | config: &Option, 41 | resource: &Resource, 42 | ) -> Result, Error>; 43 | } 44 | 45 | /// Buffer configuration 46 | #[derive(Debug, Clone, Serialize, Deserialize)] 47 | pub struct CodecConfig { 48 | #[serde(rename = "type")] 49 | pub codec_type: String, 50 | pub name: Option, 51 | #[serde(flatten)] 52 | pub config: Option, 53 | } 54 | 55 | impl CodecConfig { 56 | /// Building codec components 57 | pub fn build(&self, resource: &Resource) -> Result, Error> { 58 | let builders = CODEC_BUILDERS.read().unwrap(); 59 | 60 | if let Some(builder) = builders.get(&self.codec_type) { 61 | builder.build(self.name.as_ref(), &self.config, resource) 62 | } else { 63 | Err(Error::Config(format!( 64 | "Unknown codec type: {}", 65 | self.codec_type 66 | ))) 67 | } 68 | } 69 | } 70 | 71 | pub fn register_codec_builder( 72 | type_name: &str, 73 | builder: Arc, 74 | ) -> Result<(), Error> { 75 | let mut builders = CODEC_BUILDERS.write().unwrap(); 76 | if builders.contains_key(type_name) { 77 | return Err(Error::Config(format!( 78 | "Codec type already registered: {}", 79 | type_name 80 | ))); 81 | } 82 | builders.insert(type_name.to_string(), builder); 83 | Ok(()) 84 | } 85 | -------------------------------------------------------------------------------- /crates/arkflow-core/src/output/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | //! Output component module 16 | //! 17 | //! The output component is responsible for sending the processed data to the target system. 18 | 19 | use async_trait::async_trait; 20 | use serde::{Deserialize, Serialize}; 21 | use std::collections::HashMap; 22 | use std::sync::{Arc, RwLock}; 23 | 24 | use crate::{Error, MessageBatch, Resource}; 25 | 26 | lazy_static::lazy_static! { 27 | static ref OUTPUT_BUILDERS: RwLock>> = RwLock::new(HashMap::new()); 28 | } 29 | /// Feature interface of the output component 30 | #[async_trait] 31 | pub trait Output: Send + Sync { 32 | /// Connect to the output destination 33 | async fn connect(&self) -> Result<(), Error>; 34 | 35 | /// Write a message to the output destination 36 | async fn write(&self, msg: MessageBatch) -> Result<(), Error>; 37 | 38 | /// Close the output destination connection 39 | async fn close(&self) -> Result<(), Error>; 40 | } 41 | 42 | /// Output configuration 43 | #[derive(Debug, Clone, Serialize, Deserialize)] 44 | pub struct OutputConfig { 45 | #[serde(rename = "type")] 46 | pub output_type: String, 47 | pub name: Option, 48 | #[serde(flatten)] 49 | pub config: Option, 50 | } 51 | 52 | impl OutputConfig { 53 | /// Build the output component according to the configuration 54 | pub fn build(&self, resource: &Resource) -> Result, Error> { 55 | let builders = OUTPUT_BUILDERS.read().unwrap(); 56 | 57 | if let Some(builder) = builders.get(&self.output_type) { 58 | builder.build(self.name.as_ref(), &self.config, resource) 59 | } else { 60 | Err(Error::Config(format!( 61 | "Unknown output type: {}", 62 | self.output_type 63 | ))) 64 | } 65 | } 66 | } 67 | 68 | pub trait OutputBuilder: Send + Sync { 69 | fn build( 70 | &self, 71 | name: Option<&String>, 72 | config: &Option, 73 | resource: &Resource, 74 | ) -> Result, Error>; 75 | } 76 | 77 | pub fn register_output_builder( 78 | type_name: &str, 79 | builder: Arc, 80 | ) -> Result<(), Error> { 81 | let mut builders = OUTPUT_BUILDERS.write().unwrap(); 82 | if builders.contains_key(type_name) { 83 | return Err(Error::Config(format!( 84 | "Output type already registered: {}", 85 | type_name 86 | ))); 87 | } 88 | builders.insert(type_name.to_string(), builder); 89 | Ok(()) 90 | } 91 | -------------------------------------------------------------------------------- /crates/arkflow-core/src/pipeline/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | //! Pipeline Component Module 16 | //! 17 | //! A pipeline is an ordered collection of processors that defines how data flows from input to output, through a series of processing steps. 18 | 19 | use serde::{Deserialize, Serialize}; 20 | use std::sync::Arc; 21 | 22 | use crate::{processor::Processor, Error, MessageBatch, Resource}; 23 | 24 | pub struct Pipeline { 25 | processors: Vec>, 26 | } 27 | 28 | impl Pipeline { 29 | /// Create a new pipeline 30 | pub fn new(processors: Vec>) -> Self { 31 | Self { processors } 32 | } 33 | 34 | /// Process messages 35 | pub async fn process(&self, msg: MessageBatch) -> Result, Error> { 36 | let mut msgs = vec![msg]; 37 | for processor in &self.processors { 38 | let mut new_msgs = Vec::with_capacity(msgs.len()); 39 | for msg in msgs { 40 | match processor.process(msg).await { 41 | Ok(processed) => new_msgs.extend(processed), 42 | Err(e) => return Err(e), 43 | } 44 | } 45 | msgs = new_msgs; 46 | } 47 | Ok(msgs) 48 | } 49 | 50 | /// Shut down all processors in the pipeline 51 | pub async fn close(&self) -> Result<(), Error> { 52 | for processor in &self.processors { 53 | processor.close().await? 54 | } 55 | Ok(()) 56 | } 57 | } 58 | 59 | /// Pipeline configuration 60 | #[derive(Debug, Clone, Serialize, Deserialize)] 61 | pub struct PipelineConfig { 62 | #[serde(default = "default_thread_num")] 63 | pub thread_num: u32, 64 | pub processors: Vec, 65 | } 66 | 67 | impl PipelineConfig { 68 | /// Build pipelines based on your configuration 69 | pub fn build(&self, resource: &Resource) -> Result<(Pipeline, u32), Error> { 70 | let mut processors = Vec::with_capacity(self.processors.len()); 71 | for processor_config in &self.processors { 72 | processors.push(processor_config.build(resource)?); 73 | } 74 | Ok((Pipeline::new(processors), self.thread_num)) 75 | } 76 | } 77 | 78 | fn default_thread_num() -> u32 { 79 | num_cpus::get() as u32 80 | } 81 | -------------------------------------------------------------------------------- /crates/arkflow-core/src/processor/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | //! Processor component module 16 | //! 17 | //! The processor component is responsible for transforming, filtering, enriching, and so on. 18 | 19 | use async_trait::async_trait; 20 | use serde::{Deserialize, Serialize}; 21 | use std::collections::HashMap; 22 | use std::sync::{Arc, RwLock}; 23 | 24 | use crate::{Error, MessageBatch, Resource}; 25 | 26 | lazy_static::lazy_static! { 27 | static ref PROCESSOR_BUILDERS: RwLock>> = RwLock::new(HashMap::new()); 28 | } 29 | 30 | /// Characteristic interface of the processor component 31 | #[async_trait] 32 | pub trait Processor: Send + Sync { 33 | /// Process messages 34 | async fn process(&self, batch: MessageBatch) -> Result, Error>; 35 | 36 | /// Turn off the processor 37 | async fn close(&self) -> Result<(), Error>; 38 | } 39 | 40 | /// Processor configuration 41 | #[derive(Debug, Clone, Serialize, Deserialize)] 42 | pub struct ProcessorConfig { 43 | #[serde(rename = "type")] 44 | pub processor_type: String, 45 | pub name: Option, 46 | #[serde(flatten)] 47 | pub config: Option, 48 | } 49 | 50 | impl ProcessorConfig { 51 | /// Build the processor components according to the configuration 52 | pub fn build(&self, resource: &Resource) -> Result, Error> { 53 | let builders = PROCESSOR_BUILDERS.read().unwrap(); 54 | 55 | if let Some(builder) = builders.get(&self.processor_type) { 56 | builder.build(self.name.as_ref(), &self.config, resource) 57 | } else { 58 | Err(Error::Config(format!( 59 | "Unknown processor type: {}", 60 | self.processor_type 61 | ))) 62 | } 63 | } 64 | } 65 | 66 | pub trait ProcessorBuilder: Send + Sync { 67 | fn build( 68 | &self, 69 | name: Option<&String>, 70 | config: &Option, 71 | resource: &Resource, 72 | ) -> Result, Error>; 73 | } 74 | 75 | pub fn register_processor_builder( 76 | type_name: &str, 77 | builder: Arc, 78 | ) -> Result<(), Error> { 79 | let mut builders = PROCESSOR_BUILDERS.write().unwrap(); 80 | if builders.contains_key(type_name) { 81 | return Err(Error::Config(format!( 82 | "Processor type already registered: {}", 83 | type_name 84 | ))); 85 | } 86 | builders.insert(type_name.to_string(), builder); 87 | Ok(()) 88 | } 89 | -------------------------------------------------------------------------------- /crates/arkflow-core/src/temporary/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | //! Temporary component module 16 | //! 17 | //! This module contains the Temporary trait and its associated builder. 18 | 19 | use crate::{Error, MessageBatch, Resource}; 20 | use async_trait::async_trait; 21 | use datafusion::logical_expr::ColumnarValue; 22 | use serde::{Deserialize, Serialize}; 23 | use std::collections::HashMap; 24 | use std::sync::{Arc, RwLock}; 25 | 26 | lazy_static::lazy_static! { 27 | static ref TEMPORARY_BUILDERS: RwLock>> = RwLock::new(HashMap::new()); 28 | } 29 | 30 | #[derive(Debug, Clone, Serialize, Deserialize)] 31 | pub struct TemporaryConfig { 32 | #[serde(rename = "type")] 33 | pub temporary_type: String, 34 | pub name: String, 35 | #[serde(flatten)] 36 | pub config: Option, 37 | } 38 | 39 | #[async_trait] 40 | pub trait Temporary: Send + Sync { 41 | async fn connect(&self) -> Result<(), Error>; 42 | async fn get(&self, keys: &[ColumnarValue]) -> Result, Error>; 43 | async fn close(&self) -> Result<(), Error>; 44 | } 45 | 46 | pub trait TemporaryBuilder: Send + Sync { 47 | fn build( 48 | &self, 49 | config: &Option, 50 | resource: &Resource, 51 | ) -> Result, Error>; 52 | } 53 | 54 | impl TemporaryConfig { 55 | /// Build the temporary component according to the configuration 56 | pub fn build(&self, resource: &Resource) -> Result, Error> { 57 | let builders = TEMPORARY_BUILDERS.read().unwrap(); 58 | 59 | if let Some(builder) = builders.get(&self.temporary_type) { 60 | builder.build(&self.config, resource) 61 | } else { 62 | Err(Error::Config(format!( 63 | "Unknown temporary type: {}", 64 | self.temporary_type 65 | ))) 66 | } 67 | } 68 | } 69 | 70 | pub fn register_temporary_builder( 71 | type_name: &str, 72 | builder: Arc, 73 | ) -> Result<(), Error> { 74 | let mut builders = TEMPORARY_BUILDERS.write().unwrap(); 75 | if builders.contains_key(type_name) { 76 | return Err(Error::Config(format!( 77 | "Temporary type already registered: {}", 78 | type_name 79 | ))); 80 | } 81 | builders.insert(type_name.to_string(), builder); 82 | Ok(()) 83 | } 84 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arkflow-plugin" 3 | version.workspace = true 4 | edition.workspace = true 5 | description.workspace = true 6 | authors.workspace = true 7 | repository.workspace = true 8 | homepage.workspace = true 9 | license.workspace = true 10 | 11 | 12 | [dependencies] 13 | tokio = { workspace = true } 14 | tokio-util = { workspace = true, features = ["rt"] } 15 | async-trait = { workspace = true } 16 | futures-util = { workspace = true } 17 | serde = { workspace = true } 18 | serde_json = { workspace = true } 19 | serde_yaml = { workspace = true } 20 | toml = { workspace = true } 21 | humantime = { workspace = true } 22 | tracing = { workspace = true } 23 | datafusion = { workspace = true } 24 | datafusion-functions-json = { workspace = true } 25 | datafusion-table-providers = { version = "0.4", features = [ 26 | "mysql", 27 | "postgres", 28 | "duckdb", 29 | "sqlite", 30 | ] } 31 | ballista = { version = "46" } 32 | spiceai_duckdb_fork = "=1.2.1" 33 | arrow-json = { workspace = true } 34 | prost-reflect = { workspace = true } 35 | prost-types = { workspace = true } 36 | protobuf-parse = { workspace = true } 37 | protobuf = { workspace = true } 38 | lazy_static = { workspace = true } 39 | axum = { workspace = true } 40 | reqwest = { workspace = true } 41 | tower = "0.5" 42 | tower-http = { version = "0.6.5", features = ["cors", "trace"] } 43 | base64 = "0.22" 44 | colored = { workspace = true } 45 | flume = { workspace = true } 46 | rumqttc = "0.24.0" 47 | # Kafka 48 | aws-msk-iam-sasl-signer = "1.0.0" 49 | rdkafka = { version = "0.37", features = [ 50 | "cmake-build", 51 | "tracing", 52 | "sasl", 53 | "ssl-vendored", 54 | "zstd", 55 | ] } 56 | rdkafka-sys = "4.8.0" 57 | sasl2-sys = { version = "0.1.22", features = ["vendored"] } 58 | 59 | # redis 60 | redis = { version = "0.31", features = ["tokio-native-tls-comp", "aio", "connection-manager", "cluster-async"] } 61 | 62 | # vrl https://github.com/vectordotdev/vrl 63 | vrl = { version = "0.24.0", features = ["value", "compiler", "stdlib"] } 64 | 65 | 66 | # arkflow 67 | arkflow-core = { workspace = true } 68 | sqlx = { workspace = true } 69 | 70 | # Websocket 71 | tokio-tungstenite = { version = "0.26", features = ["native-tls"] } 72 | 73 | # NATS 74 | async-nats = "0.41.0" 75 | 76 | 77 | # modbus 78 | tokio-modbus = { version = "0.16", default-features = false, features = ["tcp"] } 79 | 80 | # Object Store 81 | object_store = { version = "0.11.2", features = ["aws", "azure", "gcp"] } 82 | hdfs-native-object-store = "0.13" 83 | 84 | 85 | once_cell = "1.19.0" 86 | futures = { workspace = true } 87 | tokio-stream = "0.1.17" 88 | url = "2.5.4" 89 | num_cpus = "1.16.0" 90 | 91 | [dev-dependencies] 92 | tempfile = { workspace = true } 93 | mockall = { workspace = true } 94 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/buffer/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | mod join; 15 | pub mod memory; 16 | pub mod session_window; 17 | pub mod sliding_window; 18 | pub mod tumbling_window; 19 | pub(crate) mod window; 20 | 21 | use arkflow_core::Error; 22 | 23 | pub fn init() -> Result<(), Error> { 24 | memory::init()?; 25 | tumbling_window::init()?; 26 | sliding_window::init()?; 27 | session_window::init()?; 28 | Ok(()) 29 | } 30 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/codec/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | use arkflow_core::Error; 15 | 16 | mod json; 17 | 18 | pub fn init() -> Result<(), Error> { 19 | json::init()?; 20 | Ok(()) 21 | } 22 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/component/json.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | use arkflow_core::Error; 15 | use arrow_json::ReaderBuilder; 16 | use datafusion::arrow; 17 | use datafusion::arrow::record_batch::RecordBatch; 18 | use std::collections::HashSet; 19 | use std::io::Cursor; 20 | use std::sync::Arc; 21 | 22 | pub(crate) fn try_to_arrow( 23 | content: &[u8], 24 | fields_to_include: Option<&HashSet>, 25 | ) -> Result { 26 | let mut cursor_for_inference = Cursor::new(content); 27 | let (mut inferred_schema, _) = 28 | arrow_json::reader::infer_json_schema(&mut cursor_for_inference, Some(1)) 29 | .map_err(|e| Error::Process(format!("Schema inference error: {}", e)))?; 30 | if let Some(ref set) = fields_to_include { 31 | inferred_schema = inferred_schema 32 | .project( 33 | &set.iter() 34 | .filter_map(|name| inferred_schema.index_of(name).ok()) 35 | .collect::>(), 36 | ) 37 | .map_err(|e| Error::Process(format!("Arrow JSON Projection Error: {}", e)))?; 38 | } 39 | 40 | let inferred_schema = Arc::new(inferred_schema); 41 | let reader = ReaderBuilder::new(inferred_schema.clone()) 42 | .build(Cursor::new(content)) 43 | .map_err(|e| Error::Process(format!("Arrow JSON Reader Builder Error: {}", e)))?; 44 | 45 | let result = reader 46 | .map(|batch| { 47 | Ok(batch.map_err(|e| Error::Process(format!("Arrow JSON Reader Error: {}", e)))?) 48 | }) 49 | .collect::, Error>>()?; 50 | if result.is_empty() { 51 | return Ok(RecordBatch::new_empty(inferred_schema)); 52 | } 53 | 54 | let new_batch = arrow::compute::concat_batches(&inferred_schema, &result) 55 | .map_err(|e| Error::Process(format!("Merge batches failed: {}", e)))?; 56 | 57 | Ok(new_batch) 58 | } 59 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/component/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | pub mod json; 16 | pub mod redis; 17 | pub mod sql; 18 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/component/sql.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | use crate::udf; 15 | use arkflow_core::Error; 16 | use datafusion::prelude::SessionContext; 17 | 18 | pub(crate) fn create_session_context() -> Result { 19 | let mut ctx = SessionContext::new(); 20 | udf::init(&mut ctx)?; 21 | datafusion_functions_json::register_all(&mut ctx) 22 | .map_err(|e| Error::Process(format!("Registration JSON function failed: {}", e)))?; 23 | Ok(ctx) 24 | } 25 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/input/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | //! Input component module 16 | //! 17 | //! The input component is responsible for receiving data from various sources such as message queues, file systems, HTTP endpoints, and so on. 18 | 19 | use arkflow_core::Error; 20 | 21 | pub mod generate; 22 | pub mod http; 23 | pub mod kafka; 24 | pub mod memory; 25 | pub mod modbus; 26 | pub mod mqtt; 27 | pub mod multiple_inputs; 28 | pub mod nats; 29 | pub mod redis; 30 | pub mod sql; 31 | pub mod websocket; 32 | 33 | pub fn init() -> Result<(), Error> { 34 | generate::init()?; 35 | http::init()?; 36 | kafka::init()?; 37 | memory::init()?; 38 | mqtt::init()?; 39 | nats::init()?; 40 | redis::init()?; 41 | sql::init()?; 42 | websocket::init()?; 43 | multiple_inputs::init()?; 44 | modbus::init()?; 45 | Ok(()) 46 | } 47 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/lib.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | pub mod buffer; 16 | pub mod codec; 17 | pub mod component; 18 | pub mod expr; 19 | pub mod input; 20 | pub mod output; 21 | pub mod processor; 22 | pub mod temporary; 23 | pub mod time; 24 | pub mod udf; 25 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/output/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | //! Output component module 16 | //! 17 | //! The output component is responsible for sending the processed data to the target system. 18 | 19 | use arkflow_core::Error; 20 | 21 | pub mod drop; 22 | pub mod http; 23 | pub mod kafka; 24 | pub mod mqtt; 25 | pub mod sql; 26 | pub mod nats; 27 | pub mod redis; 28 | pub mod stdout; 29 | 30 | pub fn init() -> Result<(), Error> { 31 | drop::init()?; 32 | http::init()?; 33 | kafka::init()?; 34 | mqtt::init()?; 35 | stdout::init()?; 36 | sql::init()?; 37 | nats::init()?; 38 | redis::init()?; 39 | Ok(()) 40 | } 41 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/processor/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | //! Processor component module 16 | //! 17 | //! The processor component is responsible for transforming, filtering, enriching, and so on. 18 | 19 | use arkflow_core::Error; 20 | 21 | pub mod batch; 22 | pub mod json; 23 | pub mod protobuf; 24 | pub mod sql; 25 | pub mod vrl; 26 | 27 | pub fn init() -> Result<(), Error> { 28 | batch::init()?; 29 | json::init()?; 30 | protobuf::init()?; 31 | sql::init()?; 32 | vrl::init()?; 33 | Ok(()) 34 | } 35 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/temporary/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | mod redis; 15 | 16 | use arkflow_core::Error; 17 | 18 | pub fn init() -> Result<(), Error> { 19 | redis::init()?; 20 | Ok(()) 21 | } 22 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/time/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | use serde::de::Unexpected; 16 | use serde::{de, Deserialize, Deserializer}; 17 | use std::time::Duration; 18 | 19 | pub fn deserialize_duration<'de, D>(deserializer: D) -> Result 20 | where 21 | D: Deserializer<'de>, 22 | { 23 | let s: String = Deserialize::deserialize(deserializer)?; 24 | humantime::parse_duration(&s).map_err(|_| { 25 | de::Error::invalid_value(Unexpected::Str(&s), &"a duration like '10ms' or '1s'") 26 | }) 27 | } 28 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/udf/aggregate_udf.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | use arkflow_core::Error; 15 | use datafusion::execution::FunctionRegistry; 16 | use datafusion::logical_expr::AggregateUDF; 17 | use std::collections::HashMap; 18 | use std::sync::{Arc, RwLock}; 19 | use tracing::debug; 20 | 21 | lazy_static::lazy_static! { 22 | static ref UDFS: RwLock>> = RwLock::new(HashMap::new()); 23 | } 24 | 25 | /// Register a new aggregate UDF (User Defined Function). 26 | /// 27 | /// This function adds a UDF to the global registry. The UDF will be available for use 28 | /// in SQL queries after the next call to `init`. 29 | /// 30 | /// # Arguments 31 | /// * `udf` - The AggregateUDF instance to register. 32 | pub fn register(udf: AggregateUDF) -> Result<(), Error> { 33 | let mut udfs = UDFS.write().map_err(|_| { 34 | Error::Config("Failed to acquire write lock for aggregate UDFS".to_string()) 35 | })?; 36 | let name = udf.name(); 37 | if udfs.contains_key(name) { 38 | return Err(Error::Config(format!( 39 | "Aggregate UDF with name '{}' already registered", 40 | name 41 | ))); 42 | }; 43 | udfs.insert(name.to_string(), Arc::new(udf)); 44 | Ok(()) 45 | } 46 | 47 | pub(crate) fn init(registry: &mut T) -> Result<(), Error> { 48 | let aggregate_udfs = UDFS 49 | .read() 50 | .map_err(|_| Error::Config("Failed to acquire read lock for aggregate UDFS".to_string()))?; 51 | aggregate_udfs 52 | .iter() 53 | .try_for_each(|(_, udf)| { 54 | let existing_udf = registry.register_udaf(Arc::clone(udf))?; 55 | if let Some(existing_udf) = existing_udf { 56 | debug!("Overwrite existing aggregate UDF: {}", existing_udf.name()); 57 | } 58 | Ok(()) as datafusion::common::Result<()> 59 | }) 60 | .map_err(|e| Error::Config(format!("Failed to register aggregate UDFs: {}", e))) 61 | } 62 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/udf/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | /// Module for managing scalar user-defined functions (UDFs) for SQL processing. 16 | /// 17 | /// This module provides functionality to register and initialize UDFs in a thread-safe manner. 18 | /// UDFs are registered globally and then added to the SQL function registry during context initialization. 19 | use arkflow_core::Error; 20 | use datafusion::execution::FunctionRegistry; 21 | 22 | pub mod aggregate_udf; 23 | pub mod scalar_udf; 24 | pub mod window_udf; 25 | 26 | /// Initializes and registers all user-defined functions (UDFs). 27 | /// 28 | /// This function calls the `init` function of each UDF module (aggregate, scalar, window) 29 | /// to register their respective functions with the provided `FunctionRegistry`. 30 | /// 31 | /// # Arguments 32 | /// 33 | /// * `registry` - A mutable reference to a type implementing `FunctionRegistry` where the UDFs will be registered. 34 | /// 35 | /// # Errors 36 | /// 37 | /// Returns an `Error` if any of the underlying `init` calls fail during registration. 38 | pub(crate) fn init(registry: &mut T) -> Result<(), Error> { 39 | aggregate_udf::init(registry)?; 40 | scalar_udf::init(registry)?; 41 | window_udf::init(registry)?; 42 | Ok(()) 43 | } 44 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/udf/scalar_udf.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | use arkflow_core::Error; 15 | use datafusion::execution::FunctionRegistry; 16 | use datafusion::logical_expr::ScalarUDF; 17 | use std::collections::HashMap; 18 | use std::sync::{Arc, RwLock}; 19 | use tracing::debug; 20 | 21 | lazy_static::lazy_static! { 22 | static ref UDFS: RwLock>> = RwLock::new(HashMap::new()); 23 | } 24 | 25 | /// Register a new scalar UDF. 26 | /// 27 | /// This function adds a UDF to the global registry. The UDF will be available for use 28 | /// in SQL queries after the next call to `init`. 29 | /// 30 | /// # Arguments 31 | /// 32 | /// * `udf` - The UDF to register, wrapped in an Arc for shared ownership. 33 | pub fn register(udf: ScalarUDF) -> Result<(), Error> { 34 | let mut udfs = UDFS 35 | .write() 36 | .map_err(|_| Error::Config("Failed to acquire write lock for UDFS".to_string()))?; 37 | 38 | let name = udf.name(); 39 | if udfs.contains_key(name) { 40 | return Err(Error::Config(format!( 41 | "Scalar UDF with name '{}' already registered", 42 | name 43 | ))); 44 | }; 45 | udfs.insert(name.to_string(), Arc::new(udf)); 46 | Ok(()) 47 | } 48 | 49 | pub(crate) fn init(registry: &mut T) -> Result<(), Error> { 50 | let scalar_udfs = UDFS 51 | .read() 52 | .expect("Failed to acquire read lock for scalar UDFS"); 53 | scalar_udfs 54 | .iter() 55 | .try_for_each(|(_, udf)| { 56 | let existing_udf = registry.register_udf(Arc::clone(udf))?; 57 | if let Some(existing_udf) = existing_udf { 58 | debug!("Overwrite existing scalar UDF: {}", existing_udf.name()); 59 | } 60 | Ok(()) as datafusion::common::Result<()> 61 | }) 62 | .map_err(|e| Error::Config(format!("Failed to register scalar UDFs: {}", e))) 63 | } 64 | -------------------------------------------------------------------------------- /crates/arkflow-plugin/src/udf/window_udf.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | use arkflow_core::Error; 15 | use datafusion::execution::FunctionRegistry; 16 | use datafusion::logical_expr::WindowUDF; 17 | use std::collections::HashMap; 18 | use std::sync::{Arc, RwLock}; 19 | use tracing::debug; 20 | 21 | lazy_static::lazy_static! { 22 | static ref UDFS: RwLock>> = RwLock::new(HashMap::new()); 23 | } 24 | 25 | /// Register a new window UDF (User Defined Function). 26 | /// 27 | /// This function adds a UDF to the global registry. The UDF will be available for use 28 | /// in SQL queries after the next call to `init`. 29 | /// 30 | /// # Arguments 31 | /// * `udf` - The WindowUDF instance to register. 32 | pub fn register(udf: WindowUDF) -> Result<(), Error> { 33 | let mut udfs = UDFS 34 | .write() 35 | .map_err(|_| Error::Config("Failed to acquire write lock for window UDFS".to_string()))?; 36 | let name = udf.name(); 37 | if udfs.contains_key(name) { 38 | return Err(Error::Config(format!( 39 | "Window UDF with name '{}' already registered", 40 | name 41 | ))); 42 | }; 43 | udfs.insert(name.to_string(), Arc::new(udf)); 44 | Ok(()) 45 | } 46 | 47 | pub(crate) fn init(registry: &mut T) -> Result<(), Error> { 48 | let window_udfs = UDFS 49 | .read() 50 | .expect("Failed to acquire read lock for window UDFS"); 51 | window_udfs 52 | .iter() 53 | .try_for_each(|(_, udf)| { 54 | let existing_udf = registry.register_udwf(Arc::clone(udf))?; 55 | if let Some(existing_udf) = existing_udf { 56 | debug!("Overwrite existing window UDF: {}", existing_udf.name()); 57 | } 58 | Ok(()) as datafusion::common::Result<()> 59 | }) 60 | .map_err(|e| Error::Config(format!("Failed to register window UDFs: {}", e))) 61 | } 62 | -------------------------------------------------------------------------------- /crates/arkflow/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arkflow" 3 | version.workspace = true 4 | edition.workspace = true 5 | description.workspace = true 6 | authors.workspace = true 7 | repository.workspace = true 8 | homepage.workspace = true 9 | license.workspace = true 10 | 11 | 12 | [dependencies] 13 | tokio = { workspace = true} 14 | 15 | # arkflow 16 | arkflow-core = { workspace = true } 17 | arkflow-plugin = { workspace = true } 18 | -------------------------------------------------------------------------------- /crates/arkflow/src/main.rs: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | use arkflow_core::cli::Cli; 16 | use arkflow_plugin::{buffer, codec, input, output, processor, temporary}; 17 | 18 | #[tokio::main] 19 | async fn main() -> Result<(), Box> { 20 | input::init()?; 21 | output::init()?; 22 | processor::init()?; 23 | buffer::init()?; 24 | temporary::init()?; 25 | codec::init()?; 26 | let mut cli = Cli::default(); 27 | cli.parse()?; 28 | cli.run().await 29 | } 30 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM rust:1.86-slim as builder 3 | 4 | WORKDIR /app 5 | COPY .. . 6 | RUN apt-get update && \ 7 | apt-get install -y clang perl libfindbin-libs-perl make cmake gcc libssl-dev pkg-config build-essential libsqlite3-dev protobuf-compiler 8 | 9 | # Build project 10 | RUN cargo build --release 11 | 12 | # Runtime stage 13 | FROM debian:bookworm-slim as arkflow 14 | 15 | WORKDIR /app 16 | 17 | # Install runtime dependencies 18 | RUN apt-get update && apt-get install -y libsqlite3-0 && rm -rf /var/lib/apt/lists/* 19 | 20 | # Copy compiled binary from builder stage 21 | COPY --from=builder /app/target/release/arkflow /app/arkflow 22 | 23 | 24 | # Set environment variables 25 | ENV RUST_LOG=info 26 | 27 | 28 | # Set startup command 29 | CMD ["/app/arkflow", "--config", "/app/etc/config.yaml"] -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Website 2 | 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator. 4 | 5 | ### Installation 6 | 7 | ``` 8 | $ yarn 9 | ``` 10 | 11 | ### Local Development 12 | 13 | ``` 14 | $ yarn start 15 | ``` 16 | 17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. 18 | 19 | ### Build 20 | 21 | ``` 22 | $ yarn build 23 | ``` 24 | 25 | This command generates static content into the `build` directory and can be served using any static contents hosting service. 26 | 27 | ### Deployment 28 | 29 | Using SSH: 30 | 31 | ``` 32 | $ USE_SSH=true yarn deploy 33 | ``` 34 | 35 | Not using SSH: 36 | 37 | ``` 38 | $ GIT_USER= yarn deploy 39 | ``` 40 | 41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch. 42 | -------------------------------------------------------------------------------- /docs/blog/authors.yml: -------------------------------------------------------------------------------- 1 | chenquan: 2 | name: Chen quan 3 | title: Software Engineer 4 | url: https://github.com/chenquan 5 | image_url: https://github.com/chenquan.png 6 | page: true 7 | socials: 8 | github: chenquan -------------------------------------------------------------------------------- /docs/blog/tags.yml: -------------------------------------------------------------------------------- 1 | facebook: 2 | label: Facebook 3 | permalink: /facebook 4 | description: Facebook tag description 5 | 6 | hello: 7 | label: Hello 8 | permalink: /hello 9 | description: Hello tag description 10 | 11 | docusaurus: 12 | label: Docusaurus 13 | permalink: /docusaurus 14 | description: Docusaurus tag description 15 | 16 | hola: 17 | label: Hola 18 | permalink: /hola 19 | description: Hola tag description 20 | -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Inputs", 3 | "link": { 4 | "title": "Inputs", 5 | "type": "generated-index", 6 | "description": "Input components are responsible for consuming data from various sources such as Kafka, MQTT, HTTP, and Memory. Each input component has its own configuration options that can be customized according to your needs." 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/generate.md: -------------------------------------------------------------------------------- 1 | # Generate 2 | 3 | Generate is an input component that generates test data. 4 | 5 | ## Configuration 6 | 7 | ### **context** 8 | 9 | The context is a JSON object that will be used to generate the data. The JSON object will be serialized to bytes and sent as message content. 10 | 11 | type: `string` 12 | 13 | optional: `true` 14 | 15 | ### **count** 16 | 17 | The total number of data points to generate. If not specified, the generator will run indefinitely until manually stopped. 18 | 19 | type: `integer` 20 | 21 | optional: `true` 22 | 23 | ### **interval** 24 | 25 | The interval is the time between each data point. 26 | 27 | type: `string` 28 | 29 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 30 | 31 | optional: `false` 32 | 33 | ### **batch_size** 34 | 35 | The batch size is the number of data points to generate at each interval. If the remaining count is less than batch_size, only the remaining messages will be sent. 36 | 37 | type: `integer` 38 | 39 | optional: `false` 40 | 41 | ## Examples 42 | 43 | ```yaml 44 | - input: 45 | type: "generate" 46 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 47 | interval: 1ms 48 | batch_size: 1000 49 | count: 10000 # Optional: generate 10000 messages in total 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/http.md: -------------------------------------------------------------------------------- 1 | # HTTP 2 | 3 | The HTTP input component receives data from HTTP endpoints. 4 | 5 | ## Configuration 6 | 7 | ### **address** 8 | 9 | Listening address for the HTTP server. 10 | 11 | type: `string` 12 | 13 | ### **path** 14 | 15 | The endpoint path to receive data. 16 | 17 | type: `string` 18 | 19 | ### **cors_enabled** 20 | 21 | Whether to enable CORS (Cross-Origin Resource Sharing). 22 | 23 | type: `boolean` 24 | 25 | default: `false` 26 | 27 | ### **auth** 28 | 29 | Authentication configuration. 30 | 31 | type: `object` 32 | 33 | properties: 34 | - **type**: Authentication type (`basic` or `bearer`) 35 | - **username**: Username for basic authentication 36 | - **password**: Password for basic authentication 37 | - **token**: Token for bearer authentication 38 | 39 | ## Examples 40 | 41 | ### Basic HTTP Server 42 | 43 | ```yaml 44 | - input: 45 | type: "http" 46 | address: "0.0.0.0:8080" 47 | path: "/data" 48 | cors_enabled: true 49 | ``` 50 | 51 | ### With Basic Authentication 52 | 53 | ```yaml 54 | - input: 55 | type: "http" 56 | address: "0.0.0.0:8080" 57 | path: "/data" 58 | auth: 59 | type: "basic" 60 | username: "user" 61 | password: "pass" 62 | ``` 63 | 64 | ### With Bearer Token Authentication 65 | 66 | ```yaml 67 | - input: 68 | type: "http" 69 | address: "0.0.0.0:8080" 70 | path: "/data" 71 | auth: 72 | type: "bearer" 73 | token: "your-token" 74 | ``` -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/kafka.md: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | The Kafka input component consumes messages from a Kafka topic. It provides reliable message consumption with consumer group support and configurable offset management. 4 | 5 | ## Configuration 6 | 7 | ### **brokers** 8 | 9 | List of Kafka server addresses. 10 | 11 | - Format: `["host1:port1", "host2:port2"]` 12 | - At least one broker address must be specified 13 | - Multiple brokers can be specified for high availability 14 | 15 | type: `array` of `string` 16 | 17 | optional: `false` 18 | 19 | ### **topics** 20 | 21 | Subscribed to topics. 22 | 23 | - Format: `["topic1", "topic2"]` 24 | - Multiple topics can be subscribed 25 | - Topics must exist in the Kafka cluster 26 | - The consumer will receive messages from all specified topics 27 | 28 | type: `array` of `string` 29 | 30 | optional: `false` 31 | 32 | ### **consumer_group** 33 | 34 | Consumer group ID. 35 | 36 | - Consumers within the same consumer group will share message consumption 37 | - Different consumer groups will independently consume the same messages 38 | - It is recommended to set a unique consumer group ID for each application 39 | - Used for distributed message processing and load balancing 40 | 41 | type: `string` 42 | 43 | optional: `false` 44 | 45 | ### **client_id** 46 | 47 | Client ID (optional). 48 | 49 | - If not specified, the system will automatically generate a random ID 50 | - It is recommended to set an explicit client ID for monitoring in production environments 51 | - Used to identify the client in Kafka logs and metrics 52 | 53 | type: `string` 54 | 55 | optional: `true` 56 | 57 | ### **start_from_latest** 58 | 59 | Start with the most recent messages. 60 | 61 | - When set to true, the consumer will start consuming from the latest messages 62 | - When set to false, the consumer will start from the earliest available messages 63 | - Useful for controlling message replay behavior on consumer startup 64 | 65 | type: `boolean` 66 | 67 | default: `false` 68 | 69 | optional: `true` 70 | 71 | ## Examples 72 | 73 | ```yaml 74 | - input: 75 | type: kafka 76 | brokers: 77 | - localhost:9092 78 | topics: 79 | - my_topic 80 | consumer_group: my_consumer_group 81 | client_id: my_client 82 | start_from_latest: false 83 | ``` 84 | 85 | ```yaml 86 | - input: 87 | type: kafka 88 | brokers: 89 | - kafka1:9092 90 | - kafka2:9092 91 | topics: 92 | - topic1 93 | - topic2 94 | consumer_group: app1_group 95 | start_from_latest: true 96 | ``` -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/memory.md: -------------------------------------------------------------------------------- 1 | # Memory 2 | 3 | The Memory input component reads data from an in-memory message queue. 4 | 5 | ## Configuration 6 | 7 | ### **messages** 8 | 9 | The initial list of messages in the memory queue (optional). 10 | 11 | type: `array` of `string` 12 | 13 | ## Examples 14 | 15 | ```yaml 16 | - input: 17 | type: "memory" 18 | messages: 19 | - "Hello" 20 | - "World" 21 | ``` -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/modbus.md: -------------------------------------------------------------------------------- 1 | # Modbus 2 | 3 | The Modbus input component receives data from Modbus TCP devices, supporting various register types. 4 | 5 | ## Configuration 6 | 7 | ### **addr** (required) 8 | 9 | Modbus TCP server address in the format `host:port`. 10 | 11 | type: `integer` 12 | 13 | ### **slave_id** (required) 14 | 15 | Modbus slave ID. 16 | 17 | type: `integer` 18 | 19 | ### **points** (required) 20 | 21 | List of Modbus points to read. 22 | 23 | type: `array` of point objects 24 | 25 | #### Point Object 26 | 27 | ##### **type** (required) 28 | 29 | Type of Modbus register to read. Must be one of: `"coils"`, `"discrete_inputs"`, `"holding_registers"`, or `"input_registers"`. 30 | 31 | type: `string` 32 | 33 | ##### **name** (required) 34 | 35 | Name for the data point. This will be used as the field name in the output record. 36 | 37 | type: `string` 38 | 39 | ##### **address** (required) 40 | 41 | Modbus register address to read from. 42 | 43 | type: `integer` 44 | 45 | ##### **quantity** (required) 46 | 47 | Number of registers to read. 48 | 49 | type: `integer` 50 | 51 | ### **read_interval** (required) 52 | 53 | Interval between consecutive reads. 54 | 55 | type: `string` (parsed as duration) 56 | 57 | example: `"1s"`, `"500ms"`, `"1m"` 58 | 59 | ## Examples 60 | 61 | 62 | ```yaml 63 | - input: 64 | type: "modbus" 65 | addr: "192.168.1.100:502" 66 | slave_id: 1 67 | read_interval: "1s" 68 | points: 69 | - type: "holding_registers" 70 | name: "temperature" 71 | address: 100 72 | quantity: 2 73 | - type: "coils" 74 | name: "status_flags" 75 | address: 200 76 | quantity: 2 77 | ``` -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/mqtt.md: -------------------------------------------------------------------------------- 1 | # MQTT 2 | 3 | The MQTT input component receives data from an MQTT broker. 4 | 5 | ## Configuration 6 | 7 | ### **host** 8 | 9 | MQTT broker address. 10 | 11 | type: `string` 12 | 13 | ### **port** 14 | 15 | MQTT broker port. 16 | 17 | type: `integer` 18 | 19 | ### **client_id** 20 | 21 | Unique identifier for the MQTT client. 22 | 23 | type: `string` 24 | 25 | ### **username** 26 | 27 | Username for authentication (optional). 28 | 29 | type: `string` 30 | 31 | ### **password** 32 | 33 | Password for authentication (optional). 34 | 35 | type: `string` 36 | 37 | ### **topics** 38 | 39 | List of topics to subscribe to. 40 | 41 | type: `array` of `string` 42 | 43 | ### **qos** 44 | 45 | Quality of Service level (0, 1, or 2). 46 | 47 | type: `integer` 48 | 49 | default: `1` 50 | 51 | ### **clean_session** 52 | 53 | Whether to start a clean session. 54 | 55 | type: `boolean` 56 | 57 | default: `true` 58 | 59 | ### **keep_alive** 60 | 61 | Keep alive interval in seconds. 62 | 63 | type: `integer` 64 | 65 | default: `60` 66 | 67 | ## Examples 68 | 69 | ```yaml 70 | - input: 71 | type: "mqtt" 72 | host: "localhost" 73 | port: 1883 74 | client_id: "my_client" 75 | username: "user" 76 | password: "pass" 77 | topics: 78 | - "sensors/temperature" 79 | - "sensors/humidity" 80 | qos: 1 81 | clean_session: true 82 | keep_alive: 60 83 | ``` -------------------------------------------------------------------------------- /docs/docs/components/0-inputs/redis.md: -------------------------------------------------------------------------------- 1 | # Redis 2 | 3 | The Redis input component receives data from a Redis server, supporting both pub/sub and list modes. 4 | 5 | ## Configuration 6 | 7 | #### **url** (required) 8 | 9 | Redis server URL in the format `redis://host:port` or `rediss://host:port` for SSL/TLS connections. 10 | 11 | type: `string` 12 | 13 | #### **redis_type** (required) 14 | 15 | Redis operation mode. Must be specified with a `type` of either `"subscribe"` or `"list"`. 16 | 17 | type: `object` 18 | 19 | ##### Subscribe Mode 20 | 21 | ```yaml 22 | redis_type: 23 | type: "subscribe" 24 | subscribe: 25 | type: "channels" 26 | channels: 27 | - "my_channel" 28 | ``` 29 | 30 | ###### **subscribe** 31 | 32 | Subscription configuration with either channels or patterns. 33 | 34 | type: `object` 35 | 36 | ###### **type** (required) 37 | 38 | Subscription type, must be either `"channels"` or `"patterns"`. 39 | 40 | type: `string` 41 | 42 | ###### **channels** 43 | 44 | List of channels to subscribe to. Required when type is `"channels"`. 45 | 46 | type: `array` of `string` 47 | 48 | ###### **patterns** 49 | 50 | List of patterns to subscribe to. Required when type is `"patterns"`. 51 | 52 | type: `array` of `string` 53 | 54 | ##### List Mode 55 | 56 | ```yaml 57 | redis_type: 58 | type: "list" 59 | list: 60 | - "my_list" 61 | ``` 62 | 63 | ###### **list** (required) 64 | 65 | List of Redis lists to consume messages from. 66 | 67 | type: `array` of `string` 68 | 69 | ## Examples 70 | 71 | ### Subscribe Mode Example (Channels) 72 | 73 | ```yaml 74 | - input: 75 | type: "redis" 76 | url: "redis://localhost:6379" 77 | redis_type: 78 | type: "subscribe" 79 | subscribe: 80 | type: "channels" 81 | channels: 82 | - "news" 83 | - "events" 84 | ``` 85 | 86 | ### Subscribe Mode Example (Patterns) 87 | 88 | ```yaml 89 | - input: 90 | type: "redis" 91 | url: "redis://localhost:6379" 92 | redis_type: 93 | type: "subscribe" 94 | subscribe: 95 | type: "patterns" 96 | patterns: 97 | - "user.*" 98 | - "notification.*" 99 | ``` 100 | 101 | ### List Mode Example 102 | 103 | ```yaml 104 | - input: 105 | type: "redis" 106 | url: "redis://localhost:6379" 107 | redis_type: 108 | type: "list" 109 | list: 110 | - "tasks" 111 | - "notifications" 112 | ``` -------------------------------------------------------------------------------- /docs/docs/components/1-buffers/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Buffers", 3 | "link": { 4 | "title": "Buffers", 5 | "type": "generated-index", 6 | "description": "A buffer is an optional component type that immediately follows the input layer and can be used as a way to decouple the transaction model from downstream components such as processing layers and outputs. This is considered a high-level component, as most users may not benefit from buffers, but they allow you to group messages using windowing algorithms. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/docs/components/1-buffers/memory.md: -------------------------------------------------------------------------------- 1 | # Memory 2 | 3 | The Memory buffer component provides an in-memory message queue for temporary message storage and buffering. It implements a FIFO (First-In-First-Out) queue with configurable capacity and timeout settings. 4 | 5 | ## Configuration 6 | 7 | ### **capacity** 8 | 9 | The maximum number of messages that can be stored in the memory buffer. When this limit is reached, the buffer will trigger processing of the buffered messages to apply backpressure to upstream components. 10 | 11 | type: `integer` 12 | 13 | required: `true` 14 | 15 | ### **timeout** 16 | 17 | The duration to wait before processing buffered messages, even if the buffer is not full. This ensures messages don't stay in the buffer indefinitely. 18 | 19 | type: `string` 20 | 21 | required: `true` 22 | 23 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 24 | 25 | ## Internal Mechanism 26 | 27 | - Messages are stored in a thread-safe queue using `RwLock` 28 | - Messages are written to the front of the queue and read from the back (FIFO) 29 | - When the total message count reaches the configured capacity, the buffer triggers message processing 30 | - A background timer periodically checks the timeout condition to process messages 31 | - Messages are batched and concatenated during processing for better performance 32 | - Implements proper backpressure handling to prevent memory overflow 33 | 34 | ## Examples 35 | 36 | ```yaml 37 | buffer: 38 | type: "memory" 39 | capacity: 100 # Process after 100 messages 40 | timeout: "1s" # Or process after 1 second 41 | ``` 42 | 43 | This example configures a memory buffer that will process messages either when: 44 | - The total number of buffered messages reaches 100 45 | - 1 second has elapsed since the last message was received 46 | 47 | The buffer helps smooth out traffic spikes and provides backpressure when downstream components can't keep up with the incoming message rate. -------------------------------------------------------------------------------- /docs/docs/components/1-buffers/session_window.md: -------------------------------------------------------------------------------- 1 | # Session Window 2 | 3 | The Session Window buffer component provides a session-based message grouping mechanism where messages are grouped based on activity gaps. It implements a session window that closes after a configurable period of inactivity. 4 | 5 | ## Configuration 6 | 7 | ### **gap** 8 | 9 | The duration of inactivity that triggers the closing of a session window. When this period elapses without new messages, the buffer will process the messages in the current session. 10 | 11 | type: `string` 12 | 13 | required: `true` 14 | 15 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 16 | 17 | ## Internal Mechanism 18 | 19 | - Messages are stored in a thread-safe queue using `RwLock` 20 | - Each message arrival resets an inactivity timer 21 | - When the gap duration elapses without new messages, the session window closes and processes messages 22 | - Messages are batched and concatenated during processing for better performance 23 | - Implements proper backpressure handling to prevent memory overflow 24 | 25 | ## Examples 26 | 27 | ```yaml 28 | buffer: 29 | type: "session_window" 30 | gap: "5s" # Close session after 5 seconds of inactivity 31 | ``` 32 | 33 | This example configures a session window buffer that will: 34 | - Group messages into sessions 35 | - Close the session and process messages when no new messages arrive for 5 seconds 36 | 37 | The buffer helps group related messages that occur close together in time while separating unrelated messages that have gaps between them. -------------------------------------------------------------------------------- /docs/docs/components/1-buffers/sliding_window.md: -------------------------------------------------------------------------------- 1 | # Sliding Window 2 | 3 | The Sliding Window buffer component provides a time-based windowing mechanism for processing message batches. It implements a sliding window algorithm with configurable window size, slide interval and slide size. 4 | 5 | ## Configuration 6 | 7 | ### **window_size** 8 | 9 | The number of messages that define the window size. When this number of messages is collected, the window will slide forward. 10 | 11 | type: `integer` 12 | 13 | required: `true` 14 | 15 | ### **interval** 16 | 17 | The duration between window slides, even if the window is not full. This ensures messages don't stay in the buffer indefinitely. 18 | 19 | type: `string` 20 | 21 | required: `true` 22 | 23 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 24 | 25 | ### **slide_size** 26 | 27 | The number of messages to slide the window forward by when processing completes. 28 | 29 | type: `integer` 30 | 31 | required: `true` 32 | 33 | ## Internal Mechanism 34 | 35 | - Messages are stored in a thread-safe queue using `RwLock` 36 | - A background timer periodically checks the interval condition to trigger window slides 37 | - When the total message count reaches the configured window_size, the buffer triggers message processing 38 | - After processing, the window slides forward by slide_size messages 39 | - Messages are batched and concatenated during processing for better performance 40 | - Implements proper backpressure handling to prevent memory overflow 41 | 42 | ## Examples 43 | 44 | ```yaml 45 | buffer: 46 | type: "sliding_window" 47 | window_size: 100 # Process after 100 messages 48 | interval: "1s" # Or process after 1 second 49 | slide_size: 10 # Slide forward by 10 messages 50 | ``` 51 | 52 | This example configures a sliding window buffer that will process messages either when: 53 | - The total number of buffered messages reaches 100 54 | - 1 second has elapsed since the last window slide 55 | 56 | The buffer then slides forward by 10 messages for the next window. -------------------------------------------------------------------------------- /docs/docs/components/1-buffers/tumbling_window.md: -------------------------------------------------------------------------------- 1 | # Tumbling Window 2 | 3 | The Tumbling Window buffer component provides a fixed-size, non-overlapping windowing mechanism for processing message batches. It implements a tumbling window algorithm with configurable interval settings. 4 | 5 | ## Configuration 6 | 7 | ### **interval** 8 | 9 | The duration between window slides. This determines how often the window will process messages regardless of message count. 10 | 11 | type: `string` 12 | 13 | required: `true` 14 | 15 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 16 | 17 | ## Internal Mechanism 18 | 19 | - Messages are stored in a thread-safe queue using `RwLock` 20 | - A background timer periodically triggers window processing based on the interval 21 | - When the timer fires, all buffered messages are processed as a batch 22 | - Messages are batched and concatenated during processing for better performance 23 | - Implements proper backpressure handling to prevent memory overflow 24 | - Uses Tokio's async runtime for efficient timer handling 25 | 26 | ## Examples 27 | 28 | ```yaml 29 | buffer: 30 | type: "tumbling_window" 31 | interval: "1s" # Process every 1 second 32 | ``` 33 | 34 | This example configures a tumbling window buffer that will process messages every 1 second, regardless of message count. -------------------------------------------------------------------------------- /docs/docs/components/2-processors/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Processors", 3 | "link": { 4 | "title": "Processors", 5 | "type": "generated-index", 6 | "description": "A processor is a function applied to messages passed through a pipeline. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/docs/components/2-processors/batch.md: -------------------------------------------------------------------------------- 1 | # Batch 2 | 3 | The Batch processor component allows you to accumulate messages into batches before they are sent to the next processor in the pipeline. 4 | 5 | ## Configuration 6 | 7 | ### **batch_size** 8 | 9 | The number of messages to accumulate before creating a batch. 10 | 11 | type: `integer` 12 | 13 | default: `1` 14 | 15 | ## Examples 16 | 17 | ```yaml 18 | - processor: 19 | type: "batch" 20 | batch_size: 1000 21 | ``` -------------------------------------------------------------------------------- /docs/docs/components/2-processors/json.md: -------------------------------------------------------------------------------- 1 | # JSON 2 | 3 | The JSON processor component provides two processors for converting between JSON and Arrow formats. 4 | 5 | ## JSON to Arrow 6 | 7 | The `json_to_arrow` processor converts JSON objects to Arrow format. 8 | 9 | ### Configuration 10 | 11 | #### **value_field** 12 | 13 | Specifies the JSON field name to process. 14 | 15 | type: `string` 16 | 17 | optional: `true` 18 | 19 | #### **fields_to_include** 20 | 21 | Specifies a set of field names to include in the output. If not specified, all fields will be included. 22 | 23 | type: `array[string]` 24 | 25 | optional: `true` 26 | 27 | ### Example 28 | 29 | ```yaml 30 | - processor: 31 | type: "json_to_arrow" 32 | value_field: "data" 33 | fields_to_include: 34 | - "field1" 35 | - "field2" 36 | ``` 37 | 38 | ## Arrow to JSON 39 | 40 | The `arrow_to_json` processor converts Arrow format data to JSON format. 41 | 42 | ### Configuration 43 | 44 | #### **fields_to_include** 45 | 46 | Specifies a set of field names to include in the output. If not specified, all fields will be included. 47 | 48 | type: `array[string]` 49 | 50 | optional: `true` 51 | 52 | ### Example 53 | 54 | ```yaml 55 | - processor: 56 | type: "arrow_to_json" 57 | fields_to_include: 58 | - "field1" 59 | - "field2" 60 | ``` 61 | 62 | ## Data Type Mapping 63 | 64 | The processor supports the following JSON to Arrow data type conversions: 65 | 66 | | JSON Type | Arrow Type | Notes | 67 | |-----------|------------|--------| 68 | | null | Null | | 69 | | boolean | Boolean | | 70 | | number (integer) | Int64 | For integer values | 71 | | number (unsigned) | UInt64 | For unsigned integer values | 72 | | number (float) | Float64 | For floating point values | 73 | | string | Utf8 | | 74 | | array | Utf8 | Serialized as JSON string | 75 | | object | Utf8 | Serialized as JSON string | -------------------------------------------------------------------------------- /docs/docs/components/2-processors/protobuf.md: -------------------------------------------------------------------------------- 1 | # Protobuf 2 | 3 | The Protobuf processor component provides functionality for converting between Protobuf and Arrow formats. 4 | 5 | ## Configuration 6 | 7 | ### **type** 8 | 9 | The type of Protobuf conversion to perform. 10 | 11 | type: `string` 12 | 13 | required: `true` 14 | 15 | Available options: 16 | - `arrow_to_protobuf`: Convert Arrow format to Protobuf data 17 | - `protobuf_to_arrow`: Convert Protobuf data to Arrow format 18 | 19 | ### **proto_inputs** 20 | 21 | A list of directories containing Protobuf message type descriptor files (*.proto). 22 | 23 | type: `array[string]` 24 | 25 | required: `true` 26 | 27 | ### **proto_includes** 28 | 29 | A list of directories to search for imported Protobuf files. 30 | 31 | type: `array[string]` 32 | 33 | optional: `true` 34 | 35 | default: Same as proto_inputs 36 | 37 | ### **message_type** 38 | 39 | The Protobuf message type name (e.g. "example.MyMessage"). 40 | 41 | type: `string` 42 | 43 | required: `true` 44 | 45 | ### **value_field** 46 | 47 | Specifies the field name containing the Protobuf binary data when converting from Protobuf to Arrow. 48 | 49 | type: `string` 50 | 51 | optional: `true` 52 | 53 | ### **fields_to_include** 54 | 55 | Specifies a set of field names to include when converting from Arrow to Protobuf. If not specified, all fields will be included. 56 | 57 | type: `array[string]` 58 | 59 | optional: `true` 60 | 61 | ## Data Type Mapping 62 | 63 | The processor supports the following Protobuf to Arrow data type conversions: 64 | 65 | | Protobuf Type | Arrow Type | Notes | 66 | |--------------|------------|--------| 67 | | bool | Boolean | | 68 | | int32, sint32, sfixed32 | Int32 | | 69 | | int64, sint64, sfixed64 | Int64 | | 70 | | uint32, fixed32 | UInt32 | | 71 | | uint64, fixed64 | UInt64 | | 72 | | float | Float32 | | 73 | | double | Float64 | | 74 | | string | Utf8 | | 75 | | bytes | Binary | | 76 | | enum | Int32 | Stored as enum number | 77 | 78 | ## Examples 79 | 80 | ```yaml 81 | # Convert Arrow to Protobuf 82 | - processor: 83 | type: "arrow_to_protobuf" 84 | proto_inputs: ["./protos/"] 85 | message_type: "example.MyMessage" 86 | fields_to_include: 87 | - "field1" 88 | - "field2" 89 | 90 | # Convert Protobuf to Arrow 91 | - processor: 92 | type: "protobuf_to_arrow" 93 | proto_inputs: ["./protos/"] 94 | proto_includes: ["./includes/"] 95 | message_type: "example.MyMessage" 96 | value_field: "data" 97 | ``` -------------------------------------------------------------------------------- /docs/docs/components/2-processors/sql.md: -------------------------------------------------------------------------------- 1 | # SQL 2 | 3 | The SQL processor component allows you to process data using SQL queries. It uses DataFusion as the query engine to execute SQL statements on the data. 4 | 5 | Reference to [SQL](../../category/sql). 6 | 7 | ## Configuration 8 | 9 | ### **query** 10 | 11 | The SQL query statement to execute on the data. 12 | 13 | type: `string` 14 | 15 | ### **table_name** 16 | 17 | The table name to use in SQL queries. This is the name that will be used to reference the data in your SQL queries. 18 | 19 | type: `string` 20 | 21 | default: `flow` 22 | 23 | ### **ballista (experimental)** 24 | 25 | Optional configuration for distributed computing using Ballista. When configured, SQL queries will be executed in a distributed manner. 26 | 27 | type: `object` 28 | 29 | required: `false` 30 | 31 | properties: 32 | - `remote_url`: Ballista server URL (e.g., "df://localhost:50050") 33 | 34 | type: `string` 35 | 36 | required: `true` 37 | 38 | 39 | ## Examples 40 | 41 | ```yaml 42 | - processor: 43 | type: "sql" 44 | query: "SELECT id, name, age FROM users WHERE age > 18" 45 | table_name: "users" 46 | ``` 47 | -------------------------------------------------------------------------------- /docs/docs/components/2-processors/vrl.md: -------------------------------------------------------------------------------- 1 | # VRL 2 | 3 | The VRL (Vector Remap Language) processor component allows you to process and transform data using the VRL language. It supports rich data type conversion and processing operations, enabling you to flexibly modify and transform messages in the data stream. 4 | 5 | ## Configuration 6 | 7 | ### **statement** 8 | 9 | VRL statement used to perform data transformation operations. 10 | 11 | type: `string` 12 | 13 | ## Supported Data Types 14 | 15 | The VRL processor supports the conversion of the following data types: 16 | 17 | - **String** 18 | - **Integer**: Supports Int8, Int16, Int32, Int64 19 | - **Float**: Supports Float32, Float64 20 | - **Boolean** 21 | - **Binary** 22 | - **Timestamp** 23 | - **Null** 24 | 25 | ## Examples 26 | 27 | ```yaml 28 | - processor: 29 | type: "vrl" 30 | statement: ".v2, err = .value * 2; ." 31 | ``` 32 | 33 | In this example, the VRL processor multiplies the `value` field in the input message by 2 and stores the result in a new `v2` field. 34 | 35 | ### Complete Pipeline Example 36 | 37 | ```yaml 38 | streams: 39 | - input: 40 | type: "generate" 41 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 42 | interval: 1s 43 | batch_size: 1 44 | 45 | pipeline: 46 | thread_num: 4 47 | processors: 48 | - type: "json_to_arrow" 49 | - type: "vrl" 50 | statement: ".v2, err = .value * 2; ." 51 | - type: "arrow_to_json" 52 | 53 | output: 54 | type: "stdout" 55 | ``` 56 | 57 | This example demonstrates a complete pipeline where: 58 | 1. First, it generates a JSON message containing timestamp, value, and sensor information 59 | 2. Converts the JSON to Arrow format 60 | 3. Uses the VRL processor to transform the data 61 | 4. Converts the processed data back to JSON format 62 | 5. Finally outputs to standard output -------------------------------------------------------------------------------- /docs/docs/components/3-outputs/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Outputs", 3 | "link": { 4 | "title": "Outputs", 5 | "type": "generated-index", 6 | "description": "The output is the receiving point where we want to send consumed data after applying an optional processor array. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/docs/components/3-outputs/drop.md: -------------------------------------------------------------------------------- 1 | # Drop 2 | 3 | The Drop output component discards all messages that it receives. 4 | 5 | ## Configuration 6 | 7 | This component has no configuration options. 8 | 9 | ## Examples 10 | 11 | ```yaml 12 | - output: 13 | type: "drop" 14 | ``` -------------------------------------------------------------------------------- /docs/docs/components/3-outputs/http.md: -------------------------------------------------------------------------------- 1 | # HTTP 2 | 3 | The HTTP output component sends messages to an HTTP server. 4 | 5 | ## Configuration 6 | 7 | ### **url** 8 | 9 | The URL to send requests to. 10 | 11 | type: `string` 12 | 13 | ### **method** 14 | 15 | The HTTP method to use. 16 | 17 | type: `string` 18 | 19 | default: `"POST"` 20 | 21 | Supported methods: `GET`, `POST`, `PUT`, `DELETE`, `PATCH` 22 | 23 | ### **timeout_ms** 24 | 25 | The maximum time to wait for a response in milliseconds. 26 | 27 | type: `integer` 28 | 29 | default: `5000` 30 | 31 | ### **retry_count** 32 | 33 | Number of retry attempts for failed requests. 34 | 35 | type: `integer` 36 | 37 | default: `0` 38 | 39 | ### **headers** 40 | 41 | A map of headers to add to the request. 42 | 43 | type: `object` 44 | 45 | default: `{}` 46 | 47 | ### **body_field** 48 | 49 | Specifies which field from the message to use as the request body. 50 | 51 | type: `string` 52 | 53 | default: `"value"` 54 | 55 | ### **auth** 56 | 57 | Authentication configuration. 58 | 59 | type: `object` 60 | 61 | properties: 62 | - **type**: Authentication type (`basic` or `bearer`) 63 | - **username**: Username for basic authentication 64 | - **password**: Password for basic authentication 65 | - **token**: Token for bearer authentication 66 | 67 | ## Examples 68 | 69 | ### Basic HTTP Request 70 | 71 | ```yaml 72 | - output: 73 | type: "http" 74 | url: "http://example.com/post/data" 75 | method: "POST" 76 | timeout_ms: 5000 77 | retry_count: 3 78 | headers: 79 | Content-Type: "application/json" 80 | ``` 81 | 82 | ### With Basic Authentication 83 | 84 | ```yaml 85 | - output: 86 | type: "http" 87 | url: "http://example.com/data" 88 | method: "POST" 89 | auth: 90 | type: "basic" 91 | username: "user" 92 | password: "pass" 93 | ``` 94 | 95 | ### With Bearer Token 96 | 97 | ```yaml 98 | - output: 99 | type: "http" 100 | url: "http://example.com/api/data" 101 | method: "POST" 102 | auth: 103 | type: "bearer" 104 | token: "your-token" 105 | headers: 106 | Content-Type: "application/json" 107 | ``` -------------------------------------------------------------------------------- /docs/docs/components/3-outputs/kafka.md: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | The Kafka output component writes messages to a Kafka topic. 4 | 5 | ## Configuration 6 | 7 | ### **brokers** 8 | 9 | A list of broker addresses to connect to. 10 | 11 | type: `array` of `string` 12 | 13 | ### **topic** 14 | 15 | The topic to write messages to. Supports both static values and SQL expressions. 16 | 17 | type: `object` 18 | 19 | One of: 20 | - `type: "value"` with `value: string` - Static topic name 21 | - `type: "expr"` with `expr: string` - SQL expression to evaluate topic name 22 | 23 | ### **key** 24 | 25 | The key to set for each message (optional). Supports both static values and SQL expressions. 26 | 27 | type: `object` 28 | 29 | One of: 30 | - `type: "value"` with `value: string` - Static key value 31 | - `type: "expr"` with `expr: string` - SQL expression to evaluate key 32 | 33 | ### **client_id** 34 | 35 | The client ID to use when connecting to Kafka. 36 | 37 | type: `string` 38 | 39 | ### **compression** 40 | 41 | The compression type to use for messages. 42 | 43 | type: `string` 44 | 45 | One of: 46 | - `none` - No compression 47 | - `gzip` - Gzip compression 48 | - `snappy` - Snappy compression 49 | - `lz4` - LZ4 compression 50 | 51 | ### **acks** 52 | 53 | The number of acknowledgments the producer requires the leader to have received before considering a request complete. 54 | 55 | type: `string` 56 | 57 | One of: 58 | - `0` - No acknowledgment 59 | - `1` - Leader acknowledgment only 60 | - `all` - All replicas acknowledgment 61 | 62 | ### **value_field** 63 | 64 | The field to use as the message value. If not specified, uses the default binary value field. 65 | 66 | type: `string` 67 | 68 | ## Examples 69 | 70 | ```yaml 71 | output: 72 | type: "kafka" 73 | brokers: 74 | - "localhost:9092" 75 | topic: 76 | type: "expr" 77 | expr: "concat('1','x')" 78 | key: 79 | type: "value" 80 | value: "my-key" 81 | client_id: "my-client" 82 | compression: "gzip" 83 | acks: "all" 84 | value_field: "message" 85 | ``` 86 | 87 | ```yaml 88 | output: 89 | type: "kafka" 90 | brokers: 91 | - "localhost:9092" 92 | topic: 93 | type: "value" 94 | value: "my-topic" 95 | compression: "snappy" 96 | acks: "1" 97 | ``` -------------------------------------------------------------------------------- /docs/docs/components/3-outputs/mqtt.md: -------------------------------------------------------------------------------- 1 | # MQTT 2 | 3 | The MQTT output component publishes messages to an MQTT broker. 4 | 5 | ## Configuration 6 | 7 | ### **host** 8 | 9 | MQTT broker address. 10 | 11 | type: `string` 12 | 13 | ### **port** 14 | 15 | MQTT broker port. 16 | 17 | type: `integer` 18 | 19 | ### **client_id** 20 | 21 | The client ID to use when connecting to the broker. 22 | 23 | type: `string` 24 | 25 | ### **username** 26 | 27 | Username for authentication (optional). 28 | 29 | type: `string` 30 | 31 | ### **password** 32 | 33 | Password for authentication (optional). 34 | 35 | type: `string` 36 | 37 | ### **topic** 38 | 39 | The topic to publish messages to. Supports both static values and SQL expressions. 40 | 41 | type: `object` 42 | 43 | One of: 44 | - `type: "value"` with `value: string` - Static topic name 45 | - `type: "expr"` with `expr: string` - SQL expression to evaluate topic name 46 | 47 | ### **qos** 48 | 49 | The Quality of Service level to use. 50 | 51 | type: `integer` 52 | 53 | One of: 54 | - `0` - At most once delivery 55 | - `1` - At least once delivery 56 | - `2` - Exactly once delivery 57 | 58 | default: `1` 59 | 60 | ### **clean_session** 61 | 62 | Whether to use clean session. 63 | 64 | type: `boolean` 65 | 66 | default: `true` 67 | 68 | ### **keep_alive** 69 | 70 | Keep alive interval in seconds. 71 | 72 | type: `integer` 73 | 74 | default: `60` 75 | 76 | ### **retain** 77 | 78 | Whether to set the retain flag on published messages. 79 | 80 | type: `boolean` 81 | 82 | default: `false` 83 | 84 | ### **value_field** 85 | 86 | The field to use as the message value. If not specified, uses the default binary value field. 87 | 88 | type: `string` 89 | 90 | ## Examples 91 | 92 | ```yaml 93 | output: 94 | type: "mqtt" 95 | host: "localhost" 96 | port: 1883 97 | client_id: "my-client" 98 | username: "user" 99 | password: "pass" 100 | topic: 101 | type: "value" 102 | value: "my-topic" 103 | qos: 2 104 | clean_session: true 105 | keep_alive: 60 106 | retain: true 107 | value_field: "message" 108 | ``` 109 | 110 | ```yaml 111 | output: 112 | type: "mqtt" 113 | host: "localhost" 114 | port: 1883 115 | topic: 116 | type: "expr" 117 | expr: "concat('sensor/', id)" 118 | qos: 1 119 | ``` -------------------------------------------------------------------------------- /docs/docs/components/3-outputs/nats.md: -------------------------------------------------------------------------------- 1 | # NATS 2 | 3 | The NATS output component writes messages to a NATS subject. 4 | 5 | ## Configuration 6 | 7 | ### **url** 8 | 9 | The NATS server URL to connect to. 10 | 11 | type: `string` 12 | 13 | ### **mode** 14 | 15 | The NATS operation mode. 16 | 17 | type: `object` 18 | 19 | One of: 20 | - `type: "regular"` with: 21 | - `subject: object` - The subject to publish to, with: 22 | - `type: "value"` with `value: string` - Static subject name 23 | - `type: "expr"` with `expr: string` - SQL expression to evaluate subject 24 | - `type: "jetstream"` with: 25 | - `subject: object` - The subject to publish to, with: 26 | - `type: "value"` with `value: string` - Static subject name 27 | - `type: "expr"` with `expr: string` - SQL expression to evaluate subject 28 | 29 | ### **auth** 30 | 31 | Authentication configuration (optional). 32 | 33 | type: `object` 34 | 35 | Fields: 36 | - `username: string` - Username for authentication (optional) 37 | - `password: string` - Password for authentication (optional) 38 | - `token: string` - Authentication token (optional) 39 | 40 | ### **value_field** 41 | 42 | The field to use as the message value. If not specified, uses the default binary value field. 43 | 44 | type: `string` 45 | 46 | ## Examples 47 | 48 | ```yaml 49 | output: 50 | type: "nats" 51 | url: "nats://localhost:4222" 52 | mode: 53 | type: "regular" 54 | subject: 55 | type: "expr" 56 | expr: "concat('orders.', id)" 57 | auth: 58 | username: "user" 59 | password: "pass" 60 | value_field: "message" 61 | ``` 62 | 63 | ```yaml 64 | output: 65 | type: "nats" 66 | url: "nats://localhost:4222" 67 | mode: 68 | type: "jetstream" 69 | subject: 70 | type: "value" 71 | value: "orders.new" 72 | auth: 73 | token: "secret-token" 74 | ``` -------------------------------------------------------------------------------- /docs/docs/components/3-outputs/stdout.md: -------------------------------------------------------------------------------- 1 | # Stdout 2 | 3 | The Stdout output component writes messages to the standard output stream. 4 | 5 | ## Configuration 6 | 7 | ### **append_newline** 8 | 9 | Whether to add a line break after each message (optional). 10 | 11 | type: `bool` 12 | 13 | default: `true` 14 | 15 | ## Examples 16 | 17 | ```yaml 18 | - output: 19 | type: "stdout" 20 | append_newline: true 21 | ``` -------------------------------------------------------------------------------- /docs/docs/components/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Components", 3 | "position": 1, 4 | "link": { 5 | "title": "Components", 6 | "type": "generated-index", 7 | "description": "Components " 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/docs/deploy/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Deploy", 3 | "position": 4, 4 | "link": { 5 | "title": "Deploy", 6 | "type": "generated-index", 7 | "description": "Deploy ArkFlow to the cloud" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/docs/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Layer 1 5 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 6 | {/* Original fountain paths, their coordinates are relative to the unrotated state */} 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/docs/sql/8-special_functions.md: -------------------------------------------------------------------------------- 1 | # Special Functions 2 | 3 | ## Expansion Functions 4 | 5 | - [unnest](#unnest) 6 | - [unnest(struct)](#unnest-struct) 7 | 8 | ### `unnest` 9 | 10 | Expands an array or map into rows. 11 | 12 | #### Arguments 13 | 14 | - **array**: Array expression to unnest. 15 | Can be a constant, column, or function, and any combination of array operators. 16 | 17 | #### Examples 18 | 19 | ```sql 20 | > select unnest(make_array(1, 2, 3, 4, 5)) as unnested; 21 | +----------+ 22 | | unnested | 23 | +----------+ 24 | | 1 | 25 | | 2 | 26 | | 3 | 27 | | 4 | 28 | | 5 | 29 | +----------+ 30 | ``` 31 | 32 | ```sql 33 | > select unnest(range(0, 10)) as unnested_range; 34 | +----------------+ 35 | | unnested_range | 36 | +----------------+ 37 | | 0 | 38 | | 1 | 39 | | 2 | 40 | | 3 | 41 | | 4 | 42 | | 5 | 43 | | 6 | 44 | | 7 | 45 | | 8 | 46 | | 9 | 47 | +----------------+ 48 | ``` 49 | 50 | ### `unnest (struct)` 51 | 52 | Expand a struct fields into individual columns. 53 | 54 | #### Arguments 55 | 56 | - **struct**: Object expression to unnest. 57 | Can be a constant, column, or function, and any combination of object operators. 58 | 59 | #### Examples 60 | 61 | ```sql 62 | > create table foo as values ({a: 5, b: 'a string'}), ({a:6, b: 'another string'}); 63 | 64 | > create view foov as select column1 as struct_column from foo; 65 | 66 | > select * from foov; 67 | +---------------------------+ 68 | | struct_column | 69 | +---------------------------+ 70 | | {a: 5, b: a string} | 71 | | {a: 6, b: another string} | 72 | +---------------------------+ 73 | 74 | > select unnest(struct_column) from foov; 75 | +------------------------------------------+------------------------------------------+ 76 | | unnest_placeholder(foov.struct_column).a | unnest_placeholder(foov.struct_column).b | 77 | +------------------------------------------+------------------------------------------+ 78 | | 5 | a string | 79 | | 6 | another string | 80 | +------------------------------------------+------------------------------------------+ 81 | ``` -------------------------------------------------------------------------------- /docs/docs/sql/9-udf.md: -------------------------------------------------------------------------------- 1 | # User Defined Functions (UDFs) 2 | 3 | User Defined Functions (UDFs) allow you to extend the functionality of SQL by defining custom functions in Rust and then using them within your SQL queries. 4 | 5 | This project supports three types of UDFs: 6 | 7 | 1. **Scalar UDF**: Operates on a single row and returns a single value for each row. For example, a function that converts a string to uppercase. 8 | 2. **Aggregate UDF**: Operates on a group of rows and returns a single aggregate value. For example, calculating a custom average for a set of values. 9 | 3. **Window UDF**: Operates on a window (a set of rows) related to the current row. For example, calculating a moving average within a window. 10 | 11 | ## Registering UDFs 12 | 13 | To use a custom UDF, you first need to register it with the system. Registration is done by calling the `register` function in the corresponding module: 14 | 15 | - **Scalar UDF**: Use `arkflow_plugin::processor::udf::scalar_udf::register(udf: ScalarUDF)` 16 | - **Aggregate UDF**: Use `arkflow_plugin::processor::udf::aggregate_udf::register(udf: AggregateUDF)` 17 | - **Window UDF**: Use `arkflow_plugin::processor::udf::window_udf::register(udf: WindowUDF)` 18 | 19 | These `register` functions add your UDF to a global list. 20 | 21 | ```rust 22 | use datafusion::logical_expr::{ScalarUDF, AggregateUDF, WindowUDF}; 23 | use arkflow_plugin::processor::udf::{scalar_udf, aggregate_udf, window_udf}; 24 | 25 | // Example: Registering a scalar UDF 26 | // let my_scalar_udf = ScalarUDF::new(...); 27 | // scalar_udf::register(my_scalar_udf); 28 | 29 | // Example: Registering an aggregate UDF 30 | // let my_aggregate_udf = AggregateUDF::new(...); 31 | // aggregate_udf::register(my_aggregate_udf); 32 | 33 | // Example: Registering a window UDF 34 | // let my_window_udf = WindowUDF::new(...); 35 | // window_udf::register(my_window_udf); 36 | ``` 37 | 38 | ## Initialization 39 | 40 | Registered UDFs are not immediately available in SQL queries. They are automatically added to DataFusion's `FunctionRegistry` during the processor's execution context initialization via an internal call to the `arkflow_plugin::processor::udf::init` function. This `init` function iterates through all registered scalar, aggregate, and window UDFs and registers them with the current DataFusion context. 41 | 42 | Once initialization is complete, you can use your registered UDFs in SQL queries just like built-in functions. -------------------------------------------------------------------------------- /docs/docs/sql/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "SQL", 3 | "position": 2, 4 | "link": { 5 | "title": "SQL", 6 | "type": "generated-index", 7 | "description": "SQL query engine documentation, including comprehensive coverage of data types, operators, query syntax, subqueries, aggregate functions, window functions, and more" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "arkflow-doc", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids", 15 | "typecheck": "tsc" 16 | }, 17 | "dependencies": { 18 | "@docusaurus/core": "3.7.0", 19 | "@docusaurus/preset-classic": "3.7.0", 20 | "@easyops-cn/docusaurus-search-local": "^0.49.2", 21 | "@mdx-js/react": "^3.0.0", 22 | "clsx": "^2.0.0", 23 | "prism-react-renderer": "^2.3.0", 24 | "react": "^19.0.0", 25 | "react-dom": "^19.0.0" 26 | }, 27 | "devDependencies": { 28 | "@docusaurus/module-type-aliases": "3.7.0", 29 | "@docusaurus/tsconfig": "3.7.0", 30 | "@docusaurus/types": "3.7.0", 31 | "typescript": "~5.6.2" 32 | }, 33 | "browserslist": { 34 | "production": [ 35 | ">0.5%", 36 | "not dead", 37 | "not op_mini all" 38 | ], 39 | "development": [ 40 | "last 3 chrome version", 41 | "last 3 firefox version", 42 | "last 5 safari version" 43 | ] 44 | }, 45 | "engines": { 46 | "node": ">=18.0" 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /docs/sidebars.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | import type {SidebarsConfig} from '@docusaurus/plugin-content-docs'; 16 | 17 | // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...) 18 | 19 | /** 20 | * Creating a sidebar enables you to: 21 | - create an ordered group of docs 22 | - render a sidebar for each doc of that group 23 | - provide next/previous navigation 24 | 25 | The sidebars can be generated from the filesystem, or explicitly defined here. 26 | 27 | Create as many sidebars as you want. 28 | */ 29 | const sidebars: SidebarsConfig = { 30 | // By default, Docusaurus generates a sidebar from the docs folder structure 31 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}], 32 | 33 | // But you can create a sidebar manually 34 | /* 35 | tutorialSidebar: [ 36 | 'intro', 37 | 'hello', 38 | { 39 | type: 'category', 40 | label: 'Tutorial', 41 | items: ['tutorial-basics/create-a-document'], 42 | }, 43 | ], 44 | */ 45 | }; 46 | 47 | export default sidebars; 48 | -------------------------------------------------------------------------------- /docs/src/components/HomepageFeatures/index.tsx: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | import type {ReactNode} from 'react'; 16 | import clsx from 'clsx'; 17 | import Heading from '@theme/Heading'; 18 | import styles from './styles.module.css'; 19 | 20 | type FeatureItem = { 21 | title: string; 22 | Svg: React.ComponentType>; 23 | description: ReactNode; 24 | }; 25 | 26 | const FeatureList: FeatureItem[] = [ 27 | { 28 | title: 'High Performance', 29 | Svg: require('@site/static/img/home-1.svg').default, 30 | description: ( 31 | <> 32 | Built on Rust and Tokio async runtime, offering excellent performance and low latency. 33 | 34 | ), 35 | }, 36 | { 37 | title: 'Multiple Data Sources', 38 | Svg: require('@site/static/img/home-2.svg').default, 39 | description: ( 40 | <> 41 | Support for Kafka, MQTT, HTTP, files, and other input/output sources. 42 | 43 | ), 44 | }, 45 | { 46 | title: 'Powerful Processing Capabilities', 47 | Svg: require('@site/static/img/home-3.svg').default, 48 | description: ( 49 | <> 50 | Built-in SQL queries, JSON processing, Protobuf encoding/decoding, 51 | batch, VRL processing, and other processors. 52 | 53 | ), 54 | }, 55 | { 56 | title: 'Extensible', 57 | Svg: require('@site/static/img/home-4.svg').default, 58 | description: ( 59 | <> 60 | Modular design, easy to extend with new input, output, and processor components. 61 | 62 | ), 63 | }, 64 | ]; 65 | 66 | function Feature({title, Svg, description}: FeatureItem) { 67 | return ( 68 |
69 |
70 | 71 |
72 |
73 | {title} 74 |

{description}

75 |
76 |
77 | ); 78 | } 79 | 80 | export default function HomepageFeatures(): ReactNode { 81 | return ( 82 |
83 |
84 |
85 | {FeatureList.map((props, idx) => ( 86 | 87 | ))} 88 |
89 |
90 |
91 | ); 92 | } 93 | -------------------------------------------------------------------------------- /docs/src/components/HomepageFeatures/styles.module.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | .features { 16 | display: flex; 17 | align-items: center; 18 | padding: 2rem 0; 19 | width: 100%; 20 | } 21 | 22 | .featureSvg { 23 | height: 200px; 24 | width: 200px; 25 | } 26 | -------------------------------------------------------------------------------- /docs/src/css/custom.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | /** 16 | * Any CSS included here will be global. The classic template 17 | * bundles Infima by default. Infima is a CSS framework designed to 18 | * work well for content-centric websites. 19 | */ 20 | 21 | /* You can override the default Infima variables here. */ 22 | :root { 23 | --ifm-color-primary: #2563eb; 24 | --ifm-color-primary-dark: #3b82f6; 25 | --ifm-color-primary-darker: #60a5fa; 26 | --ifm-color-primary-darkest: #93c5fd; 27 | --ifm-color-primary-light: #33925d; 28 | --ifm-color-primary-lighter: #359962; 29 | --ifm-color-primary-lightest: #3cad6e; 30 | --ifm-code-font-size: 95%; 31 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 32 | } 33 | 34 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 35 | [data-theme='dark'] { 36 | --ifm-color-primary: #2563eb; 37 | --ifm-color-primary-dark: #3b82f6; 38 | --ifm-color-primary-darker: #60a5fa; 39 | --ifm-color-primary-darkest: #93c5fd; 40 | --ifm-color-primary-light: #29d5b0; 41 | --ifm-color-primary-lighter: #32d8b4; 42 | --ifm-color-primary-lightest: #4fddbf; 43 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); 44 | } 45 | -------------------------------------------------------------------------------- /docs/src/pages/index.module.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | /** 16 | * CSS files with the .module.css suffix will be treated as CSS modules 17 | * and scoped locally. 18 | */ 19 | 20 | .heroBanner { 21 | padding: 4rem 0; 22 | text-align: center; 23 | position: relative; 24 | overflow: hidden; 25 | } 26 | 27 | @media screen and (max-width: 996px) { 28 | .heroBanner { 29 | padding: 2rem; 30 | } 31 | } 32 | 33 | .buttons { 34 | display: flex; 35 | align-items: center; 36 | justify-content: center; 37 | } 38 | -------------------------------------------------------------------------------- /docs/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | import type {ReactNode} from 'react'; 16 | import clsx from 'clsx'; 17 | import Link from '@docusaurus/Link'; 18 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; 19 | import Layout from '@theme/Layout'; 20 | import HomepageFeatures from '@site/src/components/HomepageFeatures'; 21 | import Heading from '@theme/Heading'; 22 | 23 | import styles from './index.module.css'; 24 | 25 | function HomepageHeader() { 26 | const {siteConfig} = useDocusaurusContext(); 27 | return ( 28 |
29 |
30 | 31 | {siteConfig.title} 32 | 33 |

{siteConfig.tagline}

34 |
35 | 38 | ArkFlow Tutorial - 5min ⏱️ 39 | 40 |
41 |
42 |
43 | ); 44 | } 45 | 46 | export default function Home(): ReactNode { 47 | const {siteConfig} = useDocusaurusContext(); 48 | return ( 49 | 52 | 53 |
54 | 55 |
56 |
57 | ); 58 | } 59 | -------------------------------------------------------------------------------- /docs/src/pages/markdown-page.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Markdown page example 3 | --- 4 | 5 | # Markdown page example 6 | 7 | You don't need React to write simple standalone pages. 8 | -------------------------------------------------------------------------------- /docs/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arkflow-rs/arkflow/db7c18ab41abcc1dae3692e013713db81981bb7c/docs/static/.nojekyll -------------------------------------------------------------------------------- /docs/static/img/docusaurus-social-card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arkflow-rs/arkflow/db7c18ab41abcc1dae3692e013713db81981bb7c/docs/static/img/docusaurus-social-card.jpg -------------------------------------------------------------------------------- /docs/static/img/docusaurus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arkflow-rs/arkflow/db7c18ab41abcc1dae3692e013713db81981bb7c/docs/static/img/docusaurus.png -------------------------------------------------------------------------------- /docs/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arkflow-rs/arkflow/db7c18ab41abcc1dae3692e013713db81981bb7c/docs/static/img/favicon.ico -------------------------------------------------------------------------------- /docs/static/img/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Layer 1 5 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 6 | {/* Original fountain paths, their coordinates are relative to the unrotated state */} 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/static/img/logo-1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Layer 1 5 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 6 | {/* Original fountain paths, their coordinates are relative to the unrotated state */} 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/static/img/logo-cp.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Layer 1 5 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 6 | {/* Original fountain paths, their coordinates are relative to the unrotated state */} 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/static/img/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Layer 1 5 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 6 | {/* Original fountain paths, their coordinates are relative to the unrotated state */} 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/static/img/logo2.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Layer 1 4 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // This file is not used in compilation. It is here just for a nice editor experience. 3 | "extends": "@docusaurus/tsconfig", 4 | "compilerOptions": { 5 | "baseUrl": "." 6 | }, 7 | "exclude": [".docusaurus", "build"] 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/0-intro.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | # Introduction 6 | 7 | ArkFlow is a high-performance Rust stream processing engine that provides powerful data stream processing capabilities, supporting various input/output sources and processors. 8 | 9 | ## Core Features 10 | 11 | - **High Performance**: Built on Rust and Tokio async runtime, delivering exceptional performance and low latency 12 | - **Multiple Data Sources**: Support for Kafka, MQTT, HTTP, files, and other input/output sources 13 | - **Powerful Processing**: Built-in SQL queries, JSON processing, Protobuf encoding/decoding, batch processing, and other processors 14 | - **Extensibility**: Modular design, easy to extend with new input, output, and processor components 15 | 16 | ## Installation 17 | 18 | ### Building from Source 19 | 20 | ```bash 21 | # Clone repository 22 | git clone https://github.com/arkflow-rs/arkflow.git 23 | cd arkflow 24 | 25 | # Build project 26 | cargo build --release 27 | 28 | # Run tests 29 | cargo test 30 | ``` 31 | 32 | ## Quick Start 33 | 34 | 1. Create a configuration file `config.yaml`: 35 | 36 | ```yaml 37 | logging: 38 | level: info 39 | streams: 40 | - input: 41 | type: "generate" 42 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 43 | interval: 1s 44 | batch_size: 10 45 | buffer: 46 | type: "memory" 47 | capacity: 10 48 | timeout: 10s 49 | pipeline: 50 | thread_num: 4 51 | processors: 52 | - type: "json_to_arrow" 53 | - type: "sql" 54 | query: "SELECT * FROM flow WHERE value >= 10" 55 | 56 | output: 57 | type: "stdout" 58 | ``` 59 | 60 | 2. Run ArkFlow: 61 | 62 | ```bash 63 | ./target/release/arkflow --config config.yaml 64 | ``` 65 | 66 | ## Configuration Guide 67 | 68 | ArkFlow uses YAML format configuration files and supports the following main configuration items: 69 | 70 | ### Top-level Configuration 71 | 72 | ```yaml 73 | logging: 74 | level: info # Log levels: debug, info, warn, error 75 | 76 | streams: # Stream definition list 77 | - input: # Input configuration 78 | # ... 79 | pipeline: # Pipeline configuration 80 | # ... 81 | output: # Output configuration 82 | # ... 83 | buffer: # Buffer configuration 84 | # ... 85 | ``` 86 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/0-inputs/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Inputs", 3 | "link": { 4 | "title": "Inputs", 5 | "type": "generated-index", 6 | "description": "Input components are responsible for consuming data from various sources such as Kafka, MQTT, HTTP, and Memory. Each input component has its own configuration options that can be customized according to your needs." 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/0-inputs/generate.md: -------------------------------------------------------------------------------- 1 | # Generate 2 | 3 | Generate is an input component that generates test data. 4 | 5 | ## Configuration 6 | 7 | ### **context** 8 | 9 | The context is a JSON object that will be used to generate the data. The JSON object will be serialized to bytes and sent as message content. 10 | 11 | type: `string` 12 | 13 | optional: `true` 14 | 15 | ### **count** 16 | 17 | The total number of data points to generate. If not specified, the generator will run indefinitely until manually stopped. 18 | 19 | type: `integer` 20 | 21 | optional: `true` 22 | 23 | ### **interval** 24 | 25 | The interval is the time between each data point. 26 | 27 | type: `string` 28 | 29 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 30 | 31 | optional: `false` 32 | 33 | ### **batch_size** 34 | 35 | The batch size is the number of data points to generate at each interval. If the remaining count is less than batch_size, only the remaining messages will be sent. 36 | 37 | type: `integer` 38 | 39 | optional: `false` 40 | 41 | ## Examples 42 | 43 | ```yaml 44 | - input: 45 | type: "generate" 46 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 47 | interval: 1ms 48 | batch_size: 1000 49 | count: 10000 # Optional: generate 10000 messages in total 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/0-inputs/http.md: -------------------------------------------------------------------------------- 1 | # HTTP 2 | 3 | The HTTP input component receives data from HTTP endpoints. 4 | 5 | ## Configuration 6 | 7 | ### **address** 8 | 9 | Listening address for the HTTP server. 10 | 11 | type: `string` 12 | 13 | ### **path** 14 | 15 | The endpoint path to receive data. 16 | 17 | type: `string` 18 | 19 | ### **cors_enabled** 20 | 21 | Whether to enable CORS (Cross-Origin Resource Sharing). 22 | 23 | type: `boolean` 24 | 25 | default: `false` 26 | 27 | ### **auth** 28 | 29 | Authentication configuration. 30 | 31 | type: `object` 32 | 33 | properties: 34 | - **type**: Authentication type (`basic` or `bearer`) 35 | - **username**: Username for basic authentication 36 | - **password**: Password for basic authentication 37 | - **token**: Token for bearer authentication 38 | 39 | ## Examples 40 | 41 | ### Basic HTTP Server 42 | 43 | ```yaml 44 | - input: 45 | type: "http" 46 | address: "0.0.0.0:8080" 47 | path: "/data" 48 | cors_enabled: true 49 | ``` 50 | 51 | ### With Basic Authentication 52 | 53 | ```yaml 54 | - input: 55 | type: "http" 56 | address: "0.0.0.0:8080" 57 | path: "/data" 58 | auth: 59 | type: "basic" 60 | username: "user" 61 | password: "pass" 62 | ``` 63 | 64 | ### With Bearer Token Authentication 65 | 66 | ```yaml 67 | - input: 68 | type: "http" 69 | address: "0.0.0.0:8080" 70 | path: "/data" 71 | auth: 72 | type: "bearer" 73 | token: "your-token" 74 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/0-inputs/kafka.md: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | The Kafka input component consumes messages from a Kafka topic. It provides reliable message consumption with consumer group support and configurable offset management. 4 | 5 | ## Configuration 6 | 7 | ### **brokers** 8 | 9 | List of Kafka server addresses. 10 | 11 | - Format: `["host1:port1", "host2:port2"]` 12 | - At least one broker address must be specified 13 | - Multiple brokers can be specified for high availability 14 | 15 | type: `array` of `string` 16 | 17 | optional: `false` 18 | 19 | ### **topics** 20 | 21 | Subscribed to topics. 22 | 23 | - Format: `["topic1", "topic2"]` 24 | - Multiple topics can be subscribed 25 | - Topics must exist in the Kafka cluster 26 | - The consumer will receive messages from all specified topics 27 | 28 | type: `array` of `string` 29 | 30 | optional: `false` 31 | 32 | ### **consumer_group** 33 | 34 | Consumer group ID. 35 | 36 | - Consumers within the same consumer group will share message consumption 37 | - Different consumer groups will independently consume the same messages 38 | - It is recommended to set a unique consumer group ID for each application 39 | - Used for distributed message processing and load balancing 40 | 41 | type: `string` 42 | 43 | optional: `false` 44 | 45 | ### **client_id** 46 | 47 | Client ID (optional). 48 | 49 | - If not specified, the system will automatically generate a random ID 50 | - It is recommended to set an explicit client ID for monitoring in production environments 51 | - Used to identify the client in Kafka logs and metrics 52 | 53 | type: `string` 54 | 55 | optional: `true` 56 | 57 | ### **start_from_latest** 58 | 59 | Start with the most recent messages. 60 | 61 | - When set to true, the consumer will start consuming from the latest messages 62 | - When set to false, the consumer will start from the earliest available messages 63 | - Useful for controlling message replay behavior on consumer startup 64 | 65 | type: `boolean` 66 | 67 | default: `false` 68 | 69 | optional: `true` 70 | 71 | ## Examples 72 | 73 | ```yaml 74 | - input: 75 | type: kafka 76 | brokers: 77 | - localhost:9092 78 | topics: 79 | - my_topic 80 | consumer_group: my_consumer_group 81 | client_id: my_client 82 | start_from_latest: false 83 | ``` 84 | 85 | ```yaml 86 | - input: 87 | type: kafka 88 | brokers: 89 | - kafka1:9092 90 | - kafka2:9092 91 | topics: 92 | - topic1 93 | - topic2 94 | consumer_group: app1_group 95 | start_from_latest: true 96 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/0-inputs/memory.md: -------------------------------------------------------------------------------- 1 | # Memory 2 | 3 | The Memory input component reads data from an in-memory message queue. 4 | 5 | ## Configuration 6 | 7 | ### **messages** 8 | 9 | The initial list of messages in the memory queue (optional). 10 | 11 | type: `array` of `string` 12 | 13 | ## Examples 14 | 15 | ```yaml 16 | - input: 17 | type: "memory" 18 | messages: 19 | - "Hello" 20 | - "World" 21 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/0-inputs/mqtt.md: -------------------------------------------------------------------------------- 1 | # MQTT 2 | 3 | The MQTT input component receives data from an MQTT broker. 4 | 5 | ## Configuration 6 | 7 | ### **host** 8 | 9 | MQTT broker address. 10 | 11 | type: `string` 12 | 13 | ### **port** 14 | 15 | MQTT broker port. 16 | 17 | type: `integer` 18 | 19 | ### **client_id** 20 | 21 | Unique identifier for the MQTT client. 22 | 23 | type: `string` 24 | 25 | ### **username** 26 | 27 | Username for authentication (optional). 28 | 29 | type: `string` 30 | 31 | ### **password** 32 | 33 | Password for authentication (optional). 34 | 35 | type: `string` 36 | 37 | ### **topics** 38 | 39 | List of topics to subscribe to. 40 | 41 | type: `array` of `string` 42 | 43 | ### **qos** 44 | 45 | Quality of Service level (0, 1, or 2). 46 | 47 | type: `integer` 48 | 49 | default: `1` 50 | 51 | ### **clean_session** 52 | 53 | Whether to start a clean session. 54 | 55 | type: `boolean` 56 | 57 | default: `true` 58 | 59 | ### **keep_alive** 60 | 61 | Keep alive interval in seconds. 62 | 63 | type: `integer` 64 | 65 | default: `60` 66 | 67 | ## Examples 68 | 69 | ```yaml 70 | - input: 71 | type: "mqtt" 72 | host: "localhost" 73 | port: 1883 74 | client_id: "my_client" 75 | username: "user" 76 | password: "pass" 77 | topics: 78 | - "sensors/temperature" 79 | - "sensors/humidity" 80 | qos: 1 81 | clean_session: true 82 | keep_alive: 60 83 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/0-inputs/sql.md: -------------------------------------------------------------------------------- 1 | # SQL 2 | 3 | The SQL input component allows you to query data from various input sources using SQL. 4 | 5 | Reference to [SQL](../../category/sql). 6 | 7 | ## Configuration 8 | 9 | ### **select_sql** 10 | 11 | The SQL query statement to execute. 12 | 13 | type: `string` 14 | 15 | ### **input_type** 16 | 17 | The type of input source to query from. 18 | 19 | type: `enum` 20 | 21 | options: 22 | - `avro` 23 | - `arrow` 24 | - `json` 25 | - `csv` 26 | - `parquet` 27 | - `mysql` 28 | - `duckdb` 29 | - `postgres` 30 | - `sqlite` 31 | 32 | ## Input Type Configurations 33 | 34 | ### **Avro** 35 | - `table_name`: Optional table name (used in SQL queries) 36 | 37 | type: `string` 38 | - `path`: Path to Avro file 39 | 40 | type: `string` 41 | 42 | 43 | ### **Arrow** 44 | - `table_name`: Optional table name (used in SQL queries) 45 | 46 | type: `string` 47 | - `path`: Path to Arrow file 48 | 49 | type: `string` 50 | 51 | ### **Json** 52 | - `table_name`: Optional table name (used in SQL queries) 53 | 54 | type: `string` 55 | - `path`: Path to JSON file 56 | 57 | type: `string` 58 | 59 | ### **Csv** 60 | - `table_name`: Optional table name (used in SQL queries) 61 | 62 | type: `string` 63 | - `path`: Path to CSV file 64 | 65 | type: `string` 66 | 67 | ### **Parquet** 68 | - `table_name`: Optional table name (used in SQL queries) 69 | 70 | type: `string` 71 | - `path`: Path to Parquet file 72 | 73 | type: `string` 74 | 75 | ### **Mysql** 76 | - `name`: Optional connection name 77 | 78 | type: `string` 79 | - `uri`: MySQL connection URI 80 | 81 | type: `string` 82 | - `ssl`: 83 | - `ssl_mode`: SSL mode 84 | 85 | type: `string` 86 | - `root_cert`: Optional root certificate path 87 | 88 | type: `string` 89 | 90 | ### **DuckDB** 91 | - `name`: Optional connection name 92 | 93 | type: `string` 94 | - `path`: Path to DuckDB file 95 | 96 | type: `string` 97 | 98 | ### **Postgres** 99 | - `name`: Optional connection name 100 | 101 | type: `string` 102 | - `uri`: PostgreSQL connection URI 103 | 104 | type: `string` 105 | - `ssl`: 106 | - `ssl_mode`: SSL mode 107 | 108 | type: `string` 109 | - `root_cert`: Optional root certificate path 110 | 111 | type: `string` 112 | 113 | ### **Sqlite** 114 | - `name`: Optional connection name 115 | 116 | type: `string` 117 | - `path`: Path to SQLite file 118 | 119 | type: `string` 120 | 121 | ## Examples 122 | 123 | ```yaml 124 | - input: 125 | type: "sql" 126 | select_sql: "SELECT * FROM table" 127 | input_type: 128 | mysql: 129 | name: "my_mysql" 130 | uri: "mysql://user:password@localhost:3306/db" 131 | ssl: 132 | ssl_mode: "verify_identity" 133 | root_cert: "/path/to/cert.pem" 134 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/1-buffers/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Buffers", 3 | "link": { 4 | "title": "Buffers", 5 | "type": "generated-index", 6 | "description": "A buffer is an optional component type that immediately follows the input layer and can be used as a way to decouple the transaction model from downstream components such as processing layers and outputs. This is considered a high-level component, as most users may not benefit from buffers, but they allow you to group messages using windowing algorithms. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/1-buffers/memory.md: -------------------------------------------------------------------------------- 1 | # Memory 2 | 3 | The Memory buffer component provides an in-memory message queue for temporary message storage and buffering. It implements a FIFO (First-In-First-Out) queue with configurable capacity and timeout settings. 4 | 5 | ## Configuration 6 | 7 | ### **capacity** 8 | 9 | The maximum number of messages that can be stored in the memory buffer. When this limit is reached, the buffer will trigger processing of the buffered messages to apply backpressure to upstream components. 10 | 11 | type: `integer` 12 | 13 | required: `true` 14 | 15 | ### **timeout** 16 | 17 | The duration to wait before processing buffered messages, even if the buffer is not full. This ensures messages don't stay in the buffer indefinitely. 18 | 19 | type: `string` 20 | 21 | required: `true` 22 | 23 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 24 | 25 | ## Internal Mechanism 26 | 27 | - Messages are stored in a thread-safe queue using `RwLock` 28 | - Messages are written to the front of the queue and read from the back (FIFO) 29 | - When the total message count reaches the configured capacity, the buffer triggers message processing 30 | - A background timer periodically checks the timeout condition to process messages 31 | - Messages are batched and concatenated during processing for better performance 32 | - Implements proper backpressure handling to prevent memory overflow 33 | 34 | ## Examples 35 | 36 | ```yaml 37 | buffer: 38 | type: "memory" 39 | capacity: 100 # Process after 100 messages 40 | timeout: "1s" # Or process after 1 second 41 | ``` 42 | 43 | This example configures a memory buffer that will process messages either when: 44 | - The total number of buffered messages reaches 100 45 | - 1 second has elapsed since the last message was received 46 | 47 | The buffer helps smooth out traffic spikes and provides backpressure when downstream components can't keep up with the incoming message rate. -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/2-processors/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Processors", 3 | "link": { 4 | "title": "Processors", 5 | "type": "generated-index", 6 | "description": "A processor is a function applied to messages passed through a pipeline. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/2-processors/batch.md: -------------------------------------------------------------------------------- 1 | # Batch 2 | 3 | The Batch processor component allows you to accumulate messages into batches before they are sent to the next processor in the pipeline. 4 | 5 | ## Configuration 6 | 7 | ### **batch_size** 8 | 9 | The number of messages to accumulate before creating a batch. 10 | 11 | type: `integer` 12 | 13 | default: `1` 14 | 15 | ## Examples 16 | 17 | ```yaml 18 | - processor: 19 | type: "batch" 20 | batch_size: 1000 21 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/2-processors/json.md: -------------------------------------------------------------------------------- 1 | # JSON 2 | 3 | The JSON processor component provides two processors for converting between JSON and Arrow formats. 4 | 5 | ## JSON to Arrow 6 | 7 | The `json_to_arrow` processor converts JSON objects to Arrow format. 8 | 9 | ### Configuration 10 | 11 | #### **value_field** 12 | 13 | Specifies the JSON field name to process. 14 | 15 | type: `string` 16 | 17 | optional: `true` 18 | 19 | #### **fields_to_include** 20 | 21 | Specifies a set of field names to include in the output. If not specified, all fields will be included. 22 | 23 | type: `array[string]` 24 | 25 | optional: `true` 26 | 27 | ### Example 28 | 29 | ```yaml 30 | - processor: 31 | type: "json_to_arrow" 32 | value_field: "data" 33 | fields_to_include: 34 | - "field1" 35 | - "field2" 36 | ``` 37 | 38 | ## Arrow to JSON 39 | 40 | The `arrow_to_json` processor converts Arrow format data to JSON format. 41 | 42 | ### Configuration 43 | 44 | #### **fields_to_include** 45 | 46 | Specifies a set of field names to include in the output. If not specified, all fields will be included. 47 | 48 | type: `array[string]` 49 | 50 | optional: `true` 51 | 52 | ### Example 53 | 54 | ```yaml 55 | - processor: 56 | type: "arrow_to_json" 57 | fields_to_include: 58 | - "field1" 59 | - "field2" 60 | ``` 61 | 62 | ## Data Type Mapping 63 | 64 | The processor supports the following JSON to Arrow data type conversions: 65 | 66 | | JSON Type | Arrow Type | Notes | 67 | |-----------|------------|--------| 68 | | null | Null | | 69 | | boolean | Boolean | | 70 | | number (integer) | Int64 | For integer values | 71 | | number (unsigned) | UInt64 | For unsigned integer values | 72 | | number (float) | Float64 | For floating point values | 73 | | string | Utf8 | | 74 | | array | Utf8 | Serialized as JSON string | 75 | | object | Utf8 | Serialized as JSON string | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/2-processors/protobuf.md: -------------------------------------------------------------------------------- 1 | # Protobuf 2 | 3 | The Protobuf processor component provides functionality for converting between Protobuf and Arrow formats. 4 | 5 | ## Configuration 6 | 7 | ### **type** 8 | 9 | The type of Protobuf conversion to perform. 10 | 11 | type: `string` 12 | 13 | required: `true` 14 | 15 | Available options: 16 | - `arrow_to_protobuf`: Convert Arrow format to Protobuf data 17 | - `protobuf_to_arrow`: Convert Protobuf data to Arrow format 18 | 19 | ### **proto_inputs** 20 | 21 | A list of directories containing Protobuf message type descriptor files (*.proto). 22 | 23 | type: `array[string]` 24 | 25 | required: `true` 26 | 27 | ### **proto_includes** 28 | 29 | A list of directories to search for imported Protobuf files. 30 | 31 | type: `array[string]` 32 | 33 | optional: `true` 34 | 35 | default: Same as proto_inputs 36 | 37 | ### **message_type** 38 | 39 | The Protobuf message type name (e.g. "example.MyMessage"). 40 | 41 | type: `string` 42 | 43 | required: `true` 44 | 45 | ### **value_field** 46 | 47 | Specifies the field name containing the Protobuf binary data when converting from Protobuf to Arrow. 48 | 49 | type: `string` 50 | 51 | optional: `true` 52 | 53 | ### **fields_to_include** 54 | 55 | Specifies a set of field names to include when converting from Arrow to Protobuf. If not specified, all fields will be included. 56 | 57 | type: `array[string]` 58 | 59 | optional: `true` 60 | 61 | ## Data Type Mapping 62 | 63 | The processor supports the following Protobuf to Arrow data type conversions: 64 | 65 | | Protobuf Type | Arrow Type | Notes | 66 | |--------------|------------|--------| 67 | | bool | Boolean | | 68 | | int32, sint32, sfixed32 | Int32 | | 69 | | int64, sint64, sfixed64 | Int64 | | 70 | | uint32, fixed32 | UInt32 | | 71 | | uint64, fixed64 | UInt64 | | 72 | | float | Float32 | | 73 | | double | Float64 | | 74 | | string | Utf8 | | 75 | | bytes | Binary | | 76 | | enum | Int32 | Stored as enum number | 77 | 78 | ## Examples 79 | 80 | ```yaml 81 | # Convert Arrow to Protobuf 82 | - processor: 83 | type: "arrow_to_protobuf" 84 | proto_inputs: ["./protos/"] 85 | message_type: "example.MyMessage" 86 | fields_to_include: 87 | - "field1" 88 | - "field2" 89 | 90 | # Convert Protobuf to Arrow 91 | - processor: 92 | type: "protobuf_to_arrow" 93 | proto_inputs: ["./protos/"] 94 | proto_includes: ["./includes/"] 95 | message_type: "example.MyMessage" 96 | value_field: "data" 97 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/2-processors/sql.md: -------------------------------------------------------------------------------- 1 | # SQL 2 | 3 | The SQL processor component allows you to process data using SQL queries. It uses DataFusion as the query engine to execute SQL statements on the data. 4 | 5 | Reference to [SQL](../../category/sql). 6 | 7 | ## Configuration 8 | 9 | ### **query** 10 | 11 | The SQL query statement to execute on the data. 12 | 13 | type: `string` 14 | 15 | ### **table_name** 16 | 17 | The table name to use in SQL queries. This is the name that will be used to reference the data in your SQL queries. 18 | 19 | type: `string` 20 | 21 | default: `flow` 22 | 23 | ## Examples 24 | 25 | ```yaml 26 | - processor: 27 | type: "sql" 28 | query: "SELECT id, name, age FROM users WHERE age > 18" 29 | table_name: "users" 30 | ``` 31 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/3-outputs/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Outputs", 3 | "link": { 4 | "title": "Outputs", 5 | "type": "generated-index", 6 | "description": "The output is the receiving point where we want to send consumed data after applying an optional processor array. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/3-outputs/drop.md: -------------------------------------------------------------------------------- 1 | # Drop 2 | 3 | The Drop output component discards all messages that it receives. 4 | 5 | ## Configuration 6 | 7 | This component has no configuration options. 8 | 9 | ## Examples 10 | 11 | ```yaml 12 | - output: 13 | type: "drop" 14 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/3-outputs/http.md: -------------------------------------------------------------------------------- 1 | # HTTP 2 | 3 | The HTTP output component sends messages to an HTTP server. 4 | 5 | ## Configuration 6 | 7 | ### **url** 8 | 9 | The URL to send requests to. 10 | 11 | type: `string` 12 | 13 | ### **method** 14 | 15 | The HTTP method to use. 16 | 17 | type: `string` 18 | 19 | default: `"POST"` 20 | 21 | Supported methods: `GET`, `POST`, `PUT`, `DELETE`, `PATCH` 22 | 23 | ### **timeout_ms** 24 | 25 | The maximum time to wait for a response in milliseconds. 26 | 27 | type: `integer` 28 | 29 | default: `5000` 30 | 31 | ### **retry_count** 32 | 33 | Number of retry attempts for failed requests. 34 | 35 | type: `integer` 36 | 37 | default: `0` 38 | 39 | ### **headers** 40 | 41 | A map of headers to add to the request. 42 | 43 | type: `object` 44 | 45 | default: `{}` 46 | 47 | ### **body_field** 48 | 49 | Specifies which field from the message to use as the request body. 50 | 51 | type: `string` 52 | 53 | default: `"value"` 54 | 55 | ### **auth** 56 | 57 | Authentication configuration. 58 | 59 | type: `object` 60 | 61 | properties: 62 | - **type**: Authentication type (`basic` or `bearer`) 63 | - **username**: Username for basic authentication 64 | - **password**: Password for basic authentication 65 | - **token**: Token for bearer authentication 66 | 67 | ## Examples 68 | 69 | ### Basic HTTP Request 70 | 71 | ```yaml 72 | - output: 73 | type: "http" 74 | url: "http://example.com/post/data" 75 | method: "POST" 76 | timeout_ms: 5000 77 | retry_count: 3 78 | headers: 79 | Content-Type: "application/json" 80 | ``` 81 | 82 | ### With Basic Authentication 83 | 84 | ```yaml 85 | - output: 86 | type: "http" 87 | url: "http://example.com/data" 88 | method: "POST" 89 | auth: 90 | type: "basic" 91 | username: "user" 92 | password: "pass" 93 | ``` 94 | 95 | ### With Bearer Token 96 | 97 | ```yaml 98 | - output: 99 | type: "http" 100 | url: "http://example.com/api/data" 101 | method: "POST" 102 | auth: 103 | type: "bearer" 104 | token: "your-token" 105 | headers: 106 | Content-Type: "application/json" 107 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/3-outputs/kafka.md: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | The Kafka output component writes messages to a Kafka topic. 4 | 5 | ## Configuration 6 | 7 | ### **brokers** 8 | 9 | A list of broker addresses to connect to. 10 | 11 | type: `array` of `string` 12 | 13 | ### **topic** 14 | 15 | The topic to write messages to. Supports both static values and SQL expressions. 16 | 17 | type: `object` 18 | 19 | One of: 20 | - `type: "value"` with `value: string` - Static topic name 21 | - `type: "expr"` with `expr: string` - SQL expression to evaluate topic name 22 | 23 | ### **key** 24 | 25 | The key to set for each message (optional). Supports both static values and SQL expressions. 26 | 27 | type: `object` 28 | 29 | One of: 30 | - `type: "value"` with `value: string` - Static key value 31 | - `type: "expr"` with `expr: string` - SQL expression to evaluate key 32 | 33 | ### **client_id** 34 | 35 | The client ID to use when connecting to Kafka. 36 | 37 | type: `string` 38 | 39 | ### **compression** 40 | 41 | The compression type to use for messages. 42 | 43 | type: `string` 44 | 45 | One of: 46 | - `none` - No compression 47 | - `gzip` - Gzip compression 48 | - `snappy` - Snappy compression 49 | - `lz4` - LZ4 compression 50 | 51 | ### **acks** 52 | 53 | The number of acknowledgments the producer requires the leader to have received before considering a request complete. 54 | 55 | type: `string` 56 | 57 | One of: 58 | - `0` - No acknowledgment 59 | - `1` - Leader acknowledgment only 60 | - `all` - All replicas acknowledgment 61 | 62 | ### **value_field** 63 | 64 | The field to use as the message value. If not specified, uses the default binary value field. 65 | 66 | type: `string` 67 | 68 | ## Examples 69 | 70 | ```yaml 71 | output: 72 | type: "kafka" 73 | brokers: 74 | - "localhost:9092" 75 | topic: 76 | type: "expr" 77 | expr: "concat('1','x')" 78 | key: 79 | type: "value" 80 | value: "my-key" 81 | client_id: "my-client" 82 | compression: "gzip" 83 | acks: "all" 84 | value_field: "message" 85 | ``` 86 | 87 | ```yaml 88 | output: 89 | type: "kafka" 90 | brokers: 91 | - "localhost:9092" 92 | topic: 93 | type: "value" 94 | value: "my-topic" 95 | compression: "snappy" 96 | acks: "1" 97 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/3-outputs/mqtt.md: -------------------------------------------------------------------------------- 1 | # MQTT 2 | 3 | The MQTT output component publishes messages to an MQTT broker. 4 | 5 | ## Configuration 6 | 7 | ### **host** 8 | 9 | MQTT broker address. 10 | 11 | type: `string` 12 | 13 | ### **port** 14 | 15 | MQTT broker port. 16 | 17 | type: `integer` 18 | 19 | ### **client_id** 20 | 21 | The client ID to use when connecting to the broker. 22 | 23 | type: `string` 24 | 25 | ### **username** 26 | 27 | Username for authentication (optional). 28 | 29 | type: `string` 30 | 31 | ### **password** 32 | 33 | Password for authentication (optional). 34 | 35 | type: `string` 36 | 37 | ### **topic** 38 | 39 | The topic to publish messages to. Supports both static values and SQL expressions. 40 | 41 | type: `object` 42 | 43 | One of: 44 | - `type: "value"` with `value: string` - Static topic name 45 | - `type: "expr"` with `expr: string` - SQL expression to evaluate topic name 46 | 47 | ### **qos** 48 | 49 | The Quality of Service level to use. 50 | 51 | type: `integer` 52 | 53 | One of: 54 | - `0` - At most once delivery 55 | - `1` - At least once delivery 56 | - `2` - Exactly once delivery 57 | 58 | default: `1` 59 | 60 | ### **clean_session** 61 | 62 | Whether to use clean session. 63 | 64 | type: `boolean` 65 | 66 | default: `true` 67 | 68 | ### **keep_alive** 69 | 70 | Keep alive interval in seconds. 71 | 72 | type: `integer` 73 | 74 | default: `60` 75 | 76 | ### **retain** 77 | 78 | Whether to set the retain flag on published messages. 79 | 80 | type: `boolean` 81 | 82 | default: `false` 83 | 84 | ### **value_field** 85 | 86 | The field to use as the message value. If not specified, uses the default binary value field. 87 | 88 | type: `string` 89 | 90 | ## Examples 91 | 92 | ```yaml 93 | output: 94 | type: "mqtt" 95 | host: "localhost" 96 | port: 1883 97 | client_id: "my-client" 98 | username: "user" 99 | password: "pass" 100 | topic: 101 | type: "value" 102 | value: "my-topic" 103 | qos: 2 104 | clean_session: true 105 | keep_alive: 60 106 | retain: true 107 | value_field: "message" 108 | ``` 109 | 110 | ```yaml 111 | output: 112 | type: "mqtt" 113 | host: "localhost" 114 | port: 1883 115 | topic: 116 | type: "expr" 117 | expr: "concat('sensor/', id)" 118 | qos: 1 119 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/3-outputs/stdout.md: -------------------------------------------------------------------------------- 1 | # Stdout 2 | 3 | The Stdout output component writes messages to the standard output stream. 4 | 5 | ## Configuration 6 | 7 | ### **append_newline** 8 | 9 | Whether to add a line break after each message (optional). 10 | 11 | type: `bool` 12 | 13 | default: `true` 14 | 15 | ## Examples 16 | 17 | ```yaml 18 | - output: 19 | type: "stdout" 20 | append_newline: true 21 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/components/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Components", 3 | "position": 1, 4 | "link": { 5 | "title": "Components", 6 | "type": "generated-index", 7 | "description": "Components " 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/deploy/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Deploy", 3 | "position": 4, 4 | "link": { 5 | "title": "Deploy", 6 | "type": "generated-index", 7 | "description": "Deploy ArkFlow to the cloud" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/sql/8-special_functions.md: -------------------------------------------------------------------------------- 1 | # Special Functions 2 | 3 | ## Expansion Functions 4 | 5 | - [unnest](#unnest) 6 | - [unnest(struct)](#unnest-struct) 7 | 8 | ### `unnest` 9 | 10 | Expands an array or map into rows. 11 | 12 | #### Arguments 13 | 14 | - **array**: Array expression to unnest. 15 | Can be a constant, column, or function, and any combination of array operators. 16 | 17 | #### Examples 18 | 19 | ```sql 20 | > select unnest(make_array(1, 2, 3, 4, 5)) as unnested; 21 | +----------+ 22 | | unnested | 23 | +----------+ 24 | | 1 | 25 | | 2 | 26 | | 3 | 27 | | 4 | 28 | | 5 | 29 | +----------+ 30 | ``` 31 | 32 | ```sql 33 | > select unnest(range(0, 10)) as unnested_range; 34 | +----------------+ 35 | | unnested_range | 36 | +----------------+ 37 | | 0 | 38 | | 1 | 39 | | 2 | 40 | | 3 | 41 | | 4 | 42 | | 5 | 43 | | 6 | 44 | | 7 | 45 | | 8 | 46 | | 9 | 47 | +----------------+ 48 | ``` 49 | 50 | ### `unnest (struct)` 51 | 52 | Expand a struct fields into individual columns. 53 | 54 | #### Arguments 55 | 56 | - **struct**: Object expression to unnest. 57 | Can be a constant, column, or function, and any combination of object operators. 58 | 59 | #### Examples 60 | 61 | ```sql 62 | > create table foo as values ({a: 5, b: 'a string'}), ({a:6, b: 'another string'}); 63 | 64 | > create view foov as select column1 as struct_column from foo; 65 | 66 | > select * from foov; 67 | +---------------------------+ 68 | | struct_column | 69 | +---------------------------+ 70 | | {a: 5, b: a string} | 71 | | {a: 6, b: another string} | 72 | +---------------------------+ 73 | 74 | > select unnest(struct_column) from foov; 75 | +------------------------------------------+------------------------------------------+ 76 | | unnest_placeholder(foov.struct_column).a | unnest_placeholder(foov.struct_column).b | 77 | +------------------------------------------+------------------------------------------+ 78 | | 5 | a string | 79 | | 6 | another string | 80 | +------------------------------------------+------------------------------------------+ 81 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.2.x/sql/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "SQL", 3 | "position": 2, 4 | "link": { 5 | "title": "SQL", 6 | "type": "generated-index", 7 | "description": "SQL query engine documentation, including comprehensive coverage of data types, operators, query syntax, subqueries, aggregate functions, window functions, and more" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/0-inputs/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Inputs", 3 | "link": { 4 | "title": "Inputs", 5 | "type": "generated-index", 6 | "description": "Input components are responsible for consuming data from various sources such as Kafka, MQTT, HTTP, and Memory. Each input component has its own configuration options that can be customized according to your needs." 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/0-inputs/generate.md: -------------------------------------------------------------------------------- 1 | # Generate 2 | 3 | Generate is an input component that generates test data. 4 | 5 | ## Configuration 6 | 7 | ### **context** 8 | 9 | The context is a JSON object that will be used to generate the data. The JSON object will be serialized to bytes and sent as message content. 10 | 11 | type: `string` 12 | 13 | optional: `true` 14 | 15 | ### **count** 16 | 17 | The total number of data points to generate. If not specified, the generator will run indefinitely until manually stopped. 18 | 19 | type: `integer` 20 | 21 | optional: `true` 22 | 23 | ### **interval** 24 | 25 | The interval is the time between each data point. 26 | 27 | type: `string` 28 | 29 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 30 | 31 | optional: `false` 32 | 33 | ### **batch_size** 34 | 35 | The batch size is the number of data points to generate at each interval. If the remaining count is less than batch_size, only the remaining messages will be sent. 36 | 37 | type: `integer` 38 | 39 | optional: `false` 40 | 41 | ## Examples 42 | 43 | ```yaml 44 | - input: 45 | type: "generate" 46 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 47 | interval: 1ms 48 | batch_size: 1000 49 | count: 10000 # Optional: generate 10000 messages in total 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/0-inputs/http.md: -------------------------------------------------------------------------------- 1 | # HTTP 2 | 3 | The HTTP input component receives data from HTTP endpoints. 4 | 5 | ## Configuration 6 | 7 | ### **address** 8 | 9 | Listening address for the HTTP server. 10 | 11 | type: `string` 12 | 13 | ### **path** 14 | 15 | The endpoint path to receive data. 16 | 17 | type: `string` 18 | 19 | ### **cors_enabled** 20 | 21 | Whether to enable CORS (Cross-Origin Resource Sharing). 22 | 23 | type: `boolean` 24 | 25 | default: `false` 26 | 27 | ### **auth** 28 | 29 | Authentication configuration. 30 | 31 | type: `object` 32 | 33 | properties: 34 | - **type**: Authentication type (`basic` or `bearer`) 35 | - **username**: Username for basic authentication 36 | - **password**: Password for basic authentication 37 | - **token**: Token for bearer authentication 38 | 39 | ## Examples 40 | 41 | ### Basic HTTP Server 42 | 43 | ```yaml 44 | - input: 45 | type: "http" 46 | address: "0.0.0.0:8080" 47 | path: "/data" 48 | cors_enabled: true 49 | ``` 50 | 51 | ### With Basic Authentication 52 | 53 | ```yaml 54 | - input: 55 | type: "http" 56 | address: "0.0.0.0:8080" 57 | path: "/data" 58 | auth: 59 | type: "basic" 60 | username: "user" 61 | password: "pass" 62 | ``` 63 | 64 | ### With Bearer Token Authentication 65 | 66 | ```yaml 67 | - input: 68 | type: "http" 69 | address: "0.0.0.0:8080" 70 | path: "/data" 71 | auth: 72 | type: "bearer" 73 | token: "your-token" 74 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/0-inputs/kafka.md: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | The Kafka input component consumes messages from a Kafka topic. It provides reliable message consumption with consumer group support and configurable offset management. 4 | 5 | ## Configuration 6 | 7 | ### **brokers** 8 | 9 | List of Kafka server addresses. 10 | 11 | - Format: `["host1:port1", "host2:port2"]` 12 | - At least one broker address must be specified 13 | - Multiple brokers can be specified for high availability 14 | 15 | type: `array` of `string` 16 | 17 | optional: `false` 18 | 19 | ### **topics** 20 | 21 | Subscribed to topics. 22 | 23 | - Format: `["topic1", "topic2"]` 24 | - Multiple topics can be subscribed 25 | - Topics must exist in the Kafka cluster 26 | - The consumer will receive messages from all specified topics 27 | 28 | type: `array` of `string` 29 | 30 | optional: `false` 31 | 32 | ### **consumer_group** 33 | 34 | Consumer group ID. 35 | 36 | - Consumers within the same consumer group will share message consumption 37 | - Different consumer groups will independently consume the same messages 38 | - It is recommended to set a unique consumer group ID for each application 39 | - Used for distributed message processing and load balancing 40 | 41 | type: `string` 42 | 43 | optional: `false` 44 | 45 | ### **client_id** 46 | 47 | Client ID (optional). 48 | 49 | - If not specified, the system will automatically generate a random ID 50 | - It is recommended to set an explicit client ID for monitoring in production environments 51 | - Used to identify the client in Kafka logs and metrics 52 | 53 | type: `string` 54 | 55 | optional: `true` 56 | 57 | ### **start_from_latest** 58 | 59 | Start with the most recent messages. 60 | 61 | - When set to true, the consumer will start consuming from the latest messages 62 | - When set to false, the consumer will start from the earliest available messages 63 | - Useful for controlling message replay behavior on consumer startup 64 | 65 | type: `boolean` 66 | 67 | default: `false` 68 | 69 | optional: `true` 70 | 71 | ## Examples 72 | 73 | ```yaml 74 | - input: 75 | type: kafka 76 | brokers: 77 | - localhost:9092 78 | topics: 79 | - my_topic 80 | consumer_group: my_consumer_group 81 | client_id: my_client 82 | start_from_latest: false 83 | ``` 84 | 85 | ```yaml 86 | - input: 87 | type: kafka 88 | brokers: 89 | - kafka1:9092 90 | - kafka2:9092 91 | topics: 92 | - topic1 93 | - topic2 94 | consumer_group: app1_group 95 | start_from_latest: true 96 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/0-inputs/memory.md: -------------------------------------------------------------------------------- 1 | # Memory 2 | 3 | The Memory input component reads data from an in-memory message queue. 4 | 5 | ## Configuration 6 | 7 | ### **messages** 8 | 9 | The initial list of messages in the memory queue (optional). 10 | 11 | type: `array` of `string` 12 | 13 | ## Examples 14 | 15 | ```yaml 16 | - input: 17 | type: "memory" 18 | messages: 19 | - "Hello" 20 | - "World" 21 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/0-inputs/mqtt.md: -------------------------------------------------------------------------------- 1 | # MQTT 2 | 3 | The MQTT input component receives data from an MQTT broker. 4 | 5 | ## Configuration 6 | 7 | ### **host** 8 | 9 | MQTT broker address. 10 | 11 | type: `string` 12 | 13 | ### **port** 14 | 15 | MQTT broker port. 16 | 17 | type: `integer` 18 | 19 | ### **client_id** 20 | 21 | Unique identifier for the MQTT client. 22 | 23 | type: `string` 24 | 25 | ### **username** 26 | 27 | Username for authentication (optional). 28 | 29 | type: `string` 30 | 31 | ### **password** 32 | 33 | Password for authentication (optional). 34 | 35 | type: `string` 36 | 37 | ### **topics** 38 | 39 | List of topics to subscribe to. 40 | 41 | type: `array` of `string` 42 | 43 | ### **qos** 44 | 45 | Quality of Service level (0, 1, or 2). 46 | 47 | type: `integer` 48 | 49 | default: `1` 50 | 51 | ### **clean_session** 52 | 53 | Whether to start a clean session. 54 | 55 | type: `boolean` 56 | 57 | default: `true` 58 | 59 | ### **keep_alive** 60 | 61 | Keep alive interval in seconds. 62 | 63 | type: `integer` 64 | 65 | default: `60` 66 | 67 | ## Examples 68 | 69 | ```yaml 70 | - input: 71 | type: "mqtt" 72 | host: "localhost" 73 | port: 1883 74 | client_id: "my_client" 75 | username: "user" 76 | password: "pass" 77 | topics: 78 | - "sensors/temperature" 79 | - "sensors/humidity" 80 | qos: 1 81 | clean_session: true 82 | keep_alive: 60 83 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/0-inputs/redis.md: -------------------------------------------------------------------------------- 1 | # Redis 2 | 3 | The Redis input component receives data from a Redis server, supporting both pub/sub and list modes. 4 | 5 | ## Configuration 6 | 7 | #### **url** (required) 8 | 9 | Redis server URL in the format `redis://host:port` or `rediss://host:port` for SSL/TLS connections. 10 | 11 | type: `string` 12 | 13 | #### **redis_type** (required) 14 | 15 | Redis operation mode. Must be specified with a `type` of either `"subscribe"` or `"list"`. 16 | 17 | type: `object` 18 | 19 | ##### Subscribe Mode 20 | 21 | ```yaml 22 | redis_type: 23 | type: "subscribe" 24 | subscribe: 25 | type: "channels" 26 | channels: 27 | - "my_channel" 28 | ``` 29 | 30 | ###### **subscribe** 31 | 32 | Subscription configuration with either channels or patterns. 33 | 34 | type: `object` 35 | 36 | ###### **type** (required) 37 | 38 | Subscription type, must be either `"channels"` or `"patterns"`. 39 | 40 | type: `string` 41 | 42 | ###### **channels** 43 | 44 | List of channels to subscribe to. Required when type is `"channels"`. 45 | 46 | type: `array` of `string` 47 | 48 | ###### **patterns** 49 | 50 | List of patterns to subscribe to. Required when type is `"patterns"`. 51 | 52 | type: `array` of `string` 53 | 54 | ##### List Mode 55 | 56 | ```yaml 57 | redis_type: 58 | type: "list" 59 | list: 60 | - "my_list" 61 | ``` 62 | 63 | ###### **list** (required) 64 | 65 | List of Redis lists to consume messages from. 66 | 67 | type: `array` of `string` 68 | 69 | ## Examples 70 | 71 | ### Subscribe Mode Example (Channels) 72 | 73 | ```yaml 74 | - input: 75 | type: "redis" 76 | url: "redis://localhost:6379" 77 | redis_type: 78 | type: "subscribe" 79 | subscribe: 80 | type: "channels" 81 | channels: 82 | - "news" 83 | - "events" 84 | ``` 85 | 86 | ### Subscribe Mode Example (Patterns) 87 | 88 | ```yaml 89 | - input: 90 | type: "redis" 91 | url: "redis://localhost:6379" 92 | redis_type: 93 | type: "subscribe" 94 | subscribe: 95 | type: "patterns" 96 | patterns: 97 | - "user.*" 98 | - "notification.*" 99 | ``` 100 | 101 | ### List Mode Example 102 | 103 | ```yaml 104 | - input: 105 | type: "redis" 106 | url: "redis://localhost:6379" 107 | redis_type: 108 | type: "list" 109 | list: 110 | - "tasks" 111 | - "notifications" 112 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/1-buffers/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Buffers", 3 | "link": { 4 | "title": "Buffers", 5 | "type": "generated-index", 6 | "description": "A buffer is an optional component type that immediately follows the input layer and can be used as a way to decouple the transaction model from downstream components such as processing layers and outputs. This is considered a high-level component, as most users may not benefit from buffers, but they allow you to group messages using windowing algorithms. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/1-buffers/memory.md: -------------------------------------------------------------------------------- 1 | # Memory 2 | 3 | The Memory buffer component provides an in-memory message queue for temporary message storage and buffering. It implements a FIFO (First-In-First-Out) queue with configurable capacity and timeout settings. 4 | 5 | ## Configuration 6 | 7 | ### **capacity** 8 | 9 | The maximum number of messages that can be stored in the memory buffer. When this limit is reached, the buffer will trigger processing of the buffered messages to apply backpressure to upstream components. 10 | 11 | type: `integer` 12 | 13 | required: `true` 14 | 15 | ### **timeout** 16 | 17 | The duration to wait before processing buffered messages, even if the buffer is not full. This ensures messages don't stay in the buffer indefinitely. 18 | 19 | type: `string` 20 | 21 | required: `true` 22 | 23 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 24 | 25 | ## Internal Mechanism 26 | 27 | - Messages are stored in a thread-safe queue using `RwLock` 28 | - Messages are written to the front of the queue and read from the back (FIFO) 29 | - When the total message count reaches the configured capacity, the buffer triggers message processing 30 | - A background timer periodically checks the timeout condition to process messages 31 | - Messages are batched and concatenated during processing for better performance 32 | - Implements proper backpressure handling to prevent memory overflow 33 | 34 | ## Examples 35 | 36 | ```yaml 37 | buffer: 38 | type: "memory" 39 | capacity: 100 # Process after 100 messages 40 | timeout: "1s" # Or process after 1 second 41 | ``` 42 | 43 | This example configures a memory buffer that will process messages either when: 44 | - The total number of buffered messages reaches 100 45 | - 1 second has elapsed since the last message was received 46 | 47 | The buffer helps smooth out traffic spikes and provides backpressure when downstream components can't keep up with the incoming message rate. -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/1-buffers/session_window.md: -------------------------------------------------------------------------------- 1 | # Session Window 2 | 3 | The Session Window buffer component provides a session-based message grouping mechanism where messages are grouped based on activity gaps. It implements a session window that closes after a configurable period of inactivity. 4 | 5 | ## Configuration 6 | 7 | ### **gap** 8 | 9 | The duration of inactivity that triggers the closing of a session window. When this period elapses without new messages, the buffer will process the messages in the current session. 10 | 11 | type: `string` 12 | 13 | required: `true` 14 | 15 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 16 | 17 | ## Internal Mechanism 18 | 19 | - Messages are stored in a thread-safe queue using `RwLock` 20 | - Each message arrival resets an inactivity timer 21 | - When the gap duration elapses without new messages, the session window closes and processes messages 22 | - Messages are batched and concatenated during processing for better performance 23 | - Implements proper backpressure handling to prevent memory overflow 24 | 25 | ## Examples 26 | 27 | ```yaml 28 | buffer: 29 | type: "session_window" 30 | gap: "5s" # Close session after 5 seconds of inactivity 31 | ``` 32 | 33 | This example configures a session window buffer that will: 34 | - Group messages into sessions 35 | - Close the session and process messages when no new messages arrive for 5 seconds 36 | 37 | The buffer helps group related messages that occur close together in time while separating unrelated messages that have gaps between them. -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/1-buffers/sliding_window.md: -------------------------------------------------------------------------------- 1 | # Sliding Window 2 | 3 | The Sliding Window buffer component provides a time-based windowing mechanism for processing message batches. It implements a sliding window algorithm with configurable window size, slide interval and slide size. 4 | 5 | ## Configuration 6 | 7 | ### **window_size** 8 | 9 | The number of messages that define the window size. When this number of messages is collected, the window will slide forward. 10 | 11 | type: `integer` 12 | 13 | required: `true` 14 | 15 | ### **interval** 16 | 17 | The duration between window slides, even if the window is not full. This ensures messages don't stay in the buffer indefinitely. 18 | 19 | type: `string` 20 | 21 | required: `true` 22 | 23 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 24 | 25 | ### **slide_size** 26 | 27 | The number of messages to slide the window forward by when processing completes. 28 | 29 | type: `integer` 30 | 31 | required: `true` 32 | 33 | ## Internal Mechanism 34 | 35 | - Messages are stored in a thread-safe queue using `RwLock` 36 | - A background timer periodically checks the interval condition to trigger window slides 37 | - When the total message count reaches the configured window_size, the buffer triggers message processing 38 | - After processing, the window slides forward by slide_size messages 39 | - Messages are batched and concatenated during processing for better performance 40 | - Implements proper backpressure handling to prevent memory overflow 41 | 42 | ## Examples 43 | 44 | ```yaml 45 | buffer: 46 | type: "sliding_window" 47 | window_size: 100 # Process after 100 messages 48 | interval: "1s" # Or process after 1 second 49 | slide_size: 10 # Slide forward by 10 messages 50 | ``` 51 | 52 | This example configures a sliding window buffer that will process messages either when: 53 | - The total number of buffered messages reaches 100 54 | - 1 second has elapsed since the last window slide 55 | 56 | The buffer then slides forward by 10 messages for the next window. -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/1-buffers/tumbling_window.md: -------------------------------------------------------------------------------- 1 | # Tumbling Window 2 | 3 | The Tumbling Window buffer component provides a fixed-size, non-overlapping windowing mechanism for processing message batches. It implements a tumbling window algorithm with configurable interval settings. 4 | 5 | ## Configuration 6 | 7 | ### **interval** 8 | 9 | The duration between window slides. This determines how often the window will process messages regardless of message count. 10 | 11 | type: `string` 12 | 13 | required: `true` 14 | 15 | example: `1ms`, `1s`, `1m`, `1h`, `1d` 16 | 17 | ## Internal Mechanism 18 | 19 | - Messages are stored in a thread-safe queue using `RwLock` 20 | - A background timer periodically triggers window processing based on the interval 21 | - When the timer fires, all buffered messages are processed as a batch 22 | - Messages are batched and concatenated during processing for better performance 23 | - Implements proper backpressure handling to prevent memory overflow 24 | - Uses Tokio's async runtime for efficient timer handling 25 | 26 | ## Examples 27 | 28 | ```yaml 29 | buffer: 30 | type: "tumbling_window" 31 | interval: "1s" # Process every 1 second 32 | ``` 33 | 34 | This example configures a tumbling window buffer that will process messages every 1 second, regardless of message count. -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/2-processors/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Processors", 3 | "link": { 4 | "title": "Processors", 5 | "type": "generated-index", 6 | "description": "A processor is a function applied to messages passed through a pipeline. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/2-processors/batch.md: -------------------------------------------------------------------------------- 1 | # Batch 2 | 3 | The Batch processor component allows you to accumulate messages into batches before they are sent to the next processor in the pipeline. 4 | 5 | ## Configuration 6 | 7 | ### **batch_size** 8 | 9 | The number of messages to accumulate before creating a batch. 10 | 11 | type: `integer` 12 | 13 | default: `1` 14 | 15 | ## Examples 16 | 17 | ```yaml 18 | - processor: 19 | type: "batch" 20 | batch_size: 1000 21 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/2-processors/json.md: -------------------------------------------------------------------------------- 1 | # JSON 2 | 3 | The JSON processor component provides two processors for converting between JSON and Arrow formats. 4 | 5 | ## JSON to Arrow 6 | 7 | The `json_to_arrow` processor converts JSON objects to Arrow format. 8 | 9 | ### Configuration 10 | 11 | #### **value_field** 12 | 13 | Specifies the JSON field name to process. 14 | 15 | type: `string` 16 | 17 | optional: `true` 18 | 19 | #### **fields_to_include** 20 | 21 | Specifies a set of field names to include in the output. If not specified, all fields will be included. 22 | 23 | type: `array[string]` 24 | 25 | optional: `true` 26 | 27 | ### Example 28 | 29 | ```yaml 30 | - processor: 31 | type: "json_to_arrow" 32 | value_field: "data" 33 | fields_to_include: 34 | - "field1" 35 | - "field2" 36 | ``` 37 | 38 | ## Arrow to JSON 39 | 40 | The `arrow_to_json` processor converts Arrow format data to JSON format. 41 | 42 | ### Configuration 43 | 44 | #### **fields_to_include** 45 | 46 | Specifies a set of field names to include in the output. If not specified, all fields will be included. 47 | 48 | type: `array[string]` 49 | 50 | optional: `true` 51 | 52 | ### Example 53 | 54 | ```yaml 55 | - processor: 56 | type: "arrow_to_json" 57 | fields_to_include: 58 | - "field1" 59 | - "field2" 60 | ``` 61 | 62 | ## Data Type Mapping 63 | 64 | The processor supports the following JSON to Arrow data type conversions: 65 | 66 | | JSON Type | Arrow Type | Notes | 67 | |-----------|------------|--------| 68 | | null | Null | | 69 | | boolean | Boolean | | 70 | | number (integer) | Int64 | For integer values | 71 | | number (unsigned) | UInt64 | For unsigned integer values | 72 | | number (float) | Float64 | For floating point values | 73 | | string | Utf8 | | 74 | | array | Utf8 | Serialized as JSON string | 75 | | object | Utf8 | Serialized as JSON string | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/2-processors/protobuf.md: -------------------------------------------------------------------------------- 1 | # Protobuf 2 | 3 | The Protobuf processor component provides functionality for converting between Protobuf and Arrow formats. 4 | 5 | ## Configuration 6 | 7 | ### **type** 8 | 9 | The type of Protobuf conversion to perform. 10 | 11 | type: `string` 12 | 13 | required: `true` 14 | 15 | Available options: 16 | - `arrow_to_protobuf`: Convert Arrow format to Protobuf data 17 | - `protobuf_to_arrow`: Convert Protobuf data to Arrow format 18 | 19 | ### **proto_inputs** 20 | 21 | A list of directories containing Protobuf message type descriptor files (*.proto). 22 | 23 | type: `array[string]` 24 | 25 | required: `true` 26 | 27 | ### **proto_includes** 28 | 29 | A list of directories to search for imported Protobuf files. 30 | 31 | type: `array[string]` 32 | 33 | optional: `true` 34 | 35 | default: Same as proto_inputs 36 | 37 | ### **message_type** 38 | 39 | The Protobuf message type name (e.g. "example.MyMessage"). 40 | 41 | type: `string` 42 | 43 | required: `true` 44 | 45 | ### **value_field** 46 | 47 | Specifies the field name containing the Protobuf binary data when converting from Protobuf to Arrow. 48 | 49 | type: `string` 50 | 51 | optional: `true` 52 | 53 | ### **fields_to_include** 54 | 55 | Specifies a set of field names to include when converting from Arrow to Protobuf. If not specified, all fields will be included. 56 | 57 | type: `array[string]` 58 | 59 | optional: `true` 60 | 61 | ## Data Type Mapping 62 | 63 | The processor supports the following Protobuf to Arrow data type conversions: 64 | 65 | | Protobuf Type | Arrow Type | Notes | 66 | |--------------|------------|--------| 67 | | bool | Boolean | | 68 | | int32, sint32, sfixed32 | Int32 | | 69 | | int64, sint64, sfixed64 | Int64 | | 70 | | uint32, fixed32 | UInt32 | | 71 | | uint64, fixed64 | UInt64 | | 72 | | float | Float32 | | 73 | | double | Float64 | | 74 | | string | Utf8 | | 75 | | bytes | Binary | | 76 | | enum | Int32 | Stored as enum number | 77 | 78 | ## Examples 79 | 80 | ```yaml 81 | # Convert Arrow to Protobuf 82 | - processor: 83 | type: "arrow_to_protobuf" 84 | proto_inputs: ["./protos/"] 85 | message_type: "example.MyMessage" 86 | fields_to_include: 87 | - "field1" 88 | - "field2" 89 | 90 | # Convert Protobuf to Arrow 91 | - processor: 92 | type: "protobuf_to_arrow" 93 | proto_inputs: ["./protos/"] 94 | proto_includes: ["./includes/"] 95 | message_type: "example.MyMessage" 96 | value_field: "data" 97 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/2-processors/sql.md: -------------------------------------------------------------------------------- 1 | # SQL 2 | 3 | The SQL processor component allows you to process data using SQL queries. It uses DataFusion as the query engine to execute SQL statements on the data. 4 | 5 | Reference to [SQL](../../category/sql). 6 | 7 | ## Configuration 8 | 9 | ### **query** 10 | 11 | The SQL query statement to execute on the data. 12 | 13 | type: `string` 14 | 15 | ### **table_name** 16 | 17 | The table name to use in SQL queries. This is the name that will be used to reference the data in your SQL queries. 18 | 19 | type: `string` 20 | 21 | default: `flow` 22 | 23 | ### **ballista (experimental)** 24 | 25 | Optional configuration for distributed computing using Ballista. When configured, SQL queries will be executed in a distributed manner. 26 | 27 | type: `object` 28 | 29 | required: `false` 30 | 31 | properties: 32 | - `remote_url`: Ballista server URL (e.g., "df://localhost:50050") 33 | 34 | type: `string` 35 | 36 | required: `true` 37 | 38 | 39 | ## Examples 40 | 41 | ```yaml 42 | - processor: 43 | type: "sql" 44 | query: "SELECT id, name, age FROM users WHERE age > 18" 45 | table_name: "users" 46 | ``` 47 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/2-processors/vrl.md: -------------------------------------------------------------------------------- 1 | # VRL 2 | 3 | The VRL (Vector Remap Language) processor component allows you to process and transform data using the VRL language. It supports rich data type conversion and processing operations, enabling you to flexibly modify and transform messages in the data stream. 4 | 5 | ## Configuration 6 | 7 | ### **statement** 8 | 9 | VRL statement used to perform data transformation operations. 10 | 11 | type: `string` 12 | 13 | ## Supported Data Types 14 | 15 | The VRL processor supports the conversion of the following data types: 16 | 17 | - **String** 18 | - **Integer**: Supports Int8, Int16, Int32, Int64 19 | - **Float**: Supports Float32, Float64 20 | - **Boolean** 21 | - **Binary** 22 | - **Timestamp** 23 | - **Null** 24 | 25 | ## Examples 26 | 27 | ```yaml 28 | - processor: 29 | type: "vrl" 30 | statement: ".v2, err = .value * 2; ." 31 | ``` 32 | 33 | In this example, the VRL processor multiplies the `value` field in the input message by 2 and stores the result in a new `v2` field. 34 | 35 | ### Complete Pipeline Example 36 | 37 | ```yaml 38 | streams: 39 | - input: 40 | type: "generate" 41 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 42 | interval: 1s 43 | batch_size: 1 44 | 45 | pipeline: 46 | thread_num: 4 47 | processors: 48 | - type: "json_to_arrow" 49 | - type: "vrl" 50 | statement: ".v2, err = .value * 2; ." 51 | - type: "arrow_to_json" 52 | 53 | output: 54 | type: "stdout" 55 | ``` 56 | 57 | This example demonstrates a complete pipeline where: 58 | 1. First, it generates a JSON message containing timestamp, value, and sensor information 59 | 2. Converts the JSON to Arrow format 60 | 3. Uses the VRL processor to transform the data 61 | 4. Converts the processed data back to JSON format 62 | 5. Finally outputs to standard output -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/3-outputs/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Outputs", 3 | "link": { 4 | "title": "Outputs", 5 | "type": "generated-index", 6 | "description": "The output is the receiving point where we want to send consumed data after applying an optional processor array. " 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/3-outputs/drop.md: -------------------------------------------------------------------------------- 1 | # Drop 2 | 3 | The Drop output component discards all messages that it receives. 4 | 5 | ## Configuration 6 | 7 | This component has no configuration options. 8 | 9 | ## Examples 10 | 11 | ```yaml 12 | - output: 13 | type: "drop" 14 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/3-outputs/http.md: -------------------------------------------------------------------------------- 1 | # HTTP 2 | 3 | The HTTP output component sends messages to an HTTP server. 4 | 5 | ## Configuration 6 | 7 | ### **url** 8 | 9 | The URL to send requests to. 10 | 11 | type: `string` 12 | 13 | ### **method** 14 | 15 | The HTTP method to use. 16 | 17 | type: `string` 18 | 19 | default: `"POST"` 20 | 21 | Supported methods: `GET`, `POST`, `PUT`, `DELETE`, `PATCH` 22 | 23 | ### **timeout_ms** 24 | 25 | The maximum time to wait for a response in milliseconds. 26 | 27 | type: `integer` 28 | 29 | default: `5000` 30 | 31 | ### **retry_count** 32 | 33 | Number of retry attempts for failed requests. 34 | 35 | type: `integer` 36 | 37 | default: `0` 38 | 39 | ### **headers** 40 | 41 | A map of headers to add to the request. 42 | 43 | type: `object` 44 | 45 | default: `{}` 46 | 47 | ### **body_field** 48 | 49 | Specifies which field from the message to use as the request body. 50 | 51 | type: `string` 52 | 53 | default: `"value"` 54 | 55 | ### **auth** 56 | 57 | Authentication configuration. 58 | 59 | type: `object` 60 | 61 | properties: 62 | - **type**: Authentication type (`basic` or `bearer`) 63 | - **username**: Username for basic authentication 64 | - **password**: Password for basic authentication 65 | - **token**: Token for bearer authentication 66 | 67 | ## Examples 68 | 69 | ### Basic HTTP Request 70 | 71 | ```yaml 72 | - output: 73 | type: "http" 74 | url: "http://example.com/post/data" 75 | method: "POST" 76 | timeout_ms: 5000 77 | retry_count: 3 78 | headers: 79 | Content-Type: "application/json" 80 | ``` 81 | 82 | ### With Basic Authentication 83 | 84 | ```yaml 85 | - output: 86 | type: "http" 87 | url: "http://example.com/data" 88 | method: "POST" 89 | auth: 90 | type: "basic" 91 | username: "user" 92 | password: "pass" 93 | ``` 94 | 95 | ### With Bearer Token 96 | 97 | ```yaml 98 | - output: 99 | type: "http" 100 | url: "http://example.com/api/data" 101 | method: "POST" 102 | auth: 103 | type: "bearer" 104 | token: "your-token" 105 | headers: 106 | Content-Type: "application/json" 107 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/3-outputs/kafka.md: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | The Kafka output component writes messages to a Kafka topic. 4 | 5 | ## Configuration 6 | 7 | ### **brokers** 8 | 9 | A list of broker addresses to connect to. 10 | 11 | type: `array` of `string` 12 | 13 | ### **topic** 14 | 15 | The topic to write messages to. Supports both static values and SQL expressions. 16 | 17 | type: `object` 18 | 19 | One of: 20 | - `type: "value"` with `value: string` - Static topic name 21 | - `type: "expr"` with `expr: string` - SQL expression to evaluate topic name 22 | 23 | ### **key** 24 | 25 | The key to set for each message (optional). Supports both static values and SQL expressions. 26 | 27 | type: `object` 28 | 29 | One of: 30 | - `type: "value"` with `value: string` - Static key value 31 | - `type: "expr"` with `expr: string` - SQL expression to evaluate key 32 | 33 | ### **client_id** 34 | 35 | The client ID to use when connecting to Kafka. 36 | 37 | type: `string` 38 | 39 | ### **compression** 40 | 41 | The compression type to use for messages. 42 | 43 | type: `string` 44 | 45 | One of: 46 | - `none` - No compression 47 | - `gzip` - Gzip compression 48 | - `snappy` - Snappy compression 49 | - `lz4` - LZ4 compression 50 | 51 | ### **acks** 52 | 53 | The number of acknowledgments the producer requires the leader to have received before considering a request complete. 54 | 55 | type: `string` 56 | 57 | One of: 58 | - `0` - No acknowledgment 59 | - `1` - Leader acknowledgment only 60 | - `all` - All replicas acknowledgment 61 | 62 | ### **value_field** 63 | 64 | The field to use as the message value. If not specified, uses the default binary value field. 65 | 66 | type: `string` 67 | 68 | ## Examples 69 | 70 | ```yaml 71 | output: 72 | type: "kafka" 73 | brokers: 74 | - "localhost:9092" 75 | topic: 76 | type: "expr" 77 | expr: "concat('1','x')" 78 | key: 79 | type: "value" 80 | value: "my-key" 81 | client_id: "my-client" 82 | compression: "gzip" 83 | acks: "all" 84 | value_field: "message" 85 | ``` 86 | 87 | ```yaml 88 | output: 89 | type: "kafka" 90 | brokers: 91 | - "localhost:9092" 92 | topic: 93 | type: "value" 94 | value: "my-topic" 95 | compression: "snappy" 96 | acks: "1" 97 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/3-outputs/mqtt.md: -------------------------------------------------------------------------------- 1 | # MQTT 2 | 3 | The MQTT output component publishes messages to an MQTT broker. 4 | 5 | ## Configuration 6 | 7 | ### **host** 8 | 9 | MQTT broker address. 10 | 11 | type: `string` 12 | 13 | ### **port** 14 | 15 | MQTT broker port. 16 | 17 | type: `integer` 18 | 19 | ### **client_id** 20 | 21 | The client ID to use when connecting to the broker. 22 | 23 | type: `string` 24 | 25 | ### **username** 26 | 27 | Username for authentication (optional). 28 | 29 | type: `string` 30 | 31 | ### **password** 32 | 33 | Password for authentication (optional). 34 | 35 | type: `string` 36 | 37 | ### **topic** 38 | 39 | The topic to publish messages to. Supports both static values and SQL expressions. 40 | 41 | type: `object` 42 | 43 | One of: 44 | - `type: "value"` with `value: string` - Static topic name 45 | - `type: "expr"` with `expr: string` - SQL expression to evaluate topic name 46 | 47 | ### **qos** 48 | 49 | The Quality of Service level to use. 50 | 51 | type: `integer` 52 | 53 | One of: 54 | - `0` - At most once delivery 55 | - `1` - At least once delivery 56 | - `2` - Exactly once delivery 57 | 58 | default: `1` 59 | 60 | ### **clean_session** 61 | 62 | Whether to use clean session. 63 | 64 | type: `boolean` 65 | 66 | default: `true` 67 | 68 | ### **keep_alive** 69 | 70 | Keep alive interval in seconds. 71 | 72 | type: `integer` 73 | 74 | default: `60` 75 | 76 | ### **retain** 77 | 78 | Whether to set the retain flag on published messages. 79 | 80 | type: `boolean` 81 | 82 | default: `false` 83 | 84 | ### **value_field** 85 | 86 | The field to use as the message value. If not specified, uses the default binary value field. 87 | 88 | type: `string` 89 | 90 | ## Examples 91 | 92 | ```yaml 93 | output: 94 | type: "mqtt" 95 | host: "localhost" 96 | port: 1883 97 | client_id: "my-client" 98 | username: "user" 99 | password: "pass" 100 | topic: 101 | type: "value" 102 | value: "my-topic" 103 | qos: 2 104 | clean_session: true 105 | keep_alive: 60 106 | retain: true 107 | value_field: "message" 108 | ``` 109 | 110 | ```yaml 111 | output: 112 | type: "mqtt" 113 | host: "localhost" 114 | port: 1883 115 | topic: 116 | type: "expr" 117 | expr: "concat('sensor/', id)" 118 | qos: 1 119 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/3-outputs/nats.md: -------------------------------------------------------------------------------- 1 | # NATS 2 | 3 | The NATS output component writes messages to a NATS subject. 4 | 5 | ## Configuration 6 | 7 | ### **url** 8 | 9 | The NATS server URL to connect to. 10 | 11 | type: `string` 12 | 13 | ### **mode** 14 | 15 | The NATS operation mode. 16 | 17 | type: `object` 18 | 19 | One of: 20 | - `type: "regular"` with: 21 | - `subject: object` - The subject to publish to, with: 22 | - `type: "value"` with `value: string` - Static subject name 23 | - `type: "expr"` with `expr: string` - SQL expression to evaluate subject 24 | - `type: "jetstream"` with: 25 | - `subject: object` - The subject to publish to, with: 26 | - `type: "value"` with `value: string` - Static subject name 27 | - `type: "expr"` with `expr: string` - SQL expression to evaluate subject 28 | 29 | ### **auth** 30 | 31 | Authentication configuration (optional). 32 | 33 | type: `object` 34 | 35 | Fields: 36 | - `username: string` - Username for authentication (optional) 37 | - `password: string` - Password for authentication (optional) 38 | - `token: string` - Authentication token (optional) 39 | 40 | ### **value_field** 41 | 42 | The field to use as the message value. If not specified, uses the default binary value field. 43 | 44 | type: `string` 45 | 46 | ## Examples 47 | 48 | ```yaml 49 | output: 50 | type: "nats" 51 | url: "nats://localhost:4222" 52 | mode: 53 | type: "regular" 54 | subject: 55 | type: "expr" 56 | expr: "concat('orders.', id)" 57 | auth: 58 | username: "user" 59 | password: "pass" 60 | value_field: "message" 61 | ``` 62 | 63 | ```yaml 64 | output: 65 | type: "nats" 66 | url: "nats://localhost:4222" 67 | mode: 68 | type: "jetstream" 69 | subject: 70 | type: "value" 71 | value: "orders.new" 72 | auth: 73 | token: "secret-token" 74 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/3-outputs/stdout.md: -------------------------------------------------------------------------------- 1 | # Stdout 2 | 3 | The Stdout output component writes messages to the standard output stream. 4 | 5 | ## Configuration 6 | 7 | ### **append_newline** 8 | 9 | Whether to add a line break after each message (optional). 10 | 11 | type: `bool` 12 | 13 | default: `true` 14 | 15 | ## Examples 16 | 17 | ```yaml 18 | - output: 19 | type: "stdout" 20 | append_newline: true 21 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/components/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Components", 3 | "position": 1, 4 | "link": { 5 | "title": "Components", 6 | "type": "generated-index", 7 | "description": "Components " 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/deploy/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Deploy", 3 | "position": 4, 4 | "link": { 5 | "title": "Deploy", 6 | "type": "generated-index", 7 | "description": "Deploy ArkFlow to the cloud" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Layer 1 5 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 6 | {/* Original fountain paths, their coordinates are relative to the unrotated state */} 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/sql/8-special_functions.md: -------------------------------------------------------------------------------- 1 | # Special Functions 2 | 3 | ## Expansion Functions 4 | 5 | - [unnest](#unnest) 6 | - [unnest(struct)](#unnest-struct) 7 | 8 | ### `unnest` 9 | 10 | Expands an array or map into rows. 11 | 12 | #### Arguments 13 | 14 | - **array**: Array expression to unnest. 15 | Can be a constant, column, or function, and any combination of array operators. 16 | 17 | #### Examples 18 | 19 | ```sql 20 | > select unnest(make_array(1, 2, 3, 4, 5)) as unnested; 21 | +----------+ 22 | | unnested | 23 | +----------+ 24 | | 1 | 25 | | 2 | 26 | | 3 | 27 | | 4 | 28 | | 5 | 29 | +----------+ 30 | ``` 31 | 32 | ```sql 33 | > select unnest(range(0, 10)) as unnested_range; 34 | +----------------+ 35 | | unnested_range | 36 | +----------------+ 37 | | 0 | 38 | | 1 | 39 | | 2 | 40 | | 3 | 41 | | 4 | 42 | | 5 | 43 | | 6 | 44 | | 7 | 45 | | 8 | 46 | | 9 | 47 | +----------------+ 48 | ``` 49 | 50 | ### `unnest (struct)` 51 | 52 | Expand a struct fields into individual columns. 53 | 54 | #### Arguments 55 | 56 | - **struct**: Object expression to unnest. 57 | Can be a constant, column, or function, and any combination of object operators. 58 | 59 | #### Examples 60 | 61 | ```sql 62 | > create table foo as values ({a: 5, b: 'a string'}), ({a:6, b: 'another string'}); 63 | 64 | > create view foov as select column1 as struct_column from foo; 65 | 66 | > select * from foov; 67 | +---------------------------+ 68 | | struct_column | 69 | +---------------------------+ 70 | | {a: 5, b: a string} | 71 | | {a: 6, b: another string} | 72 | +---------------------------+ 73 | 74 | > select unnest(struct_column) from foov; 75 | +------------------------------------------+------------------------------------------+ 76 | | unnest_placeholder(foov.struct_column).a | unnest_placeholder(foov.struct_column).b | 77 | +------------------------------------------+------------------------------------------+ 78 | | 5 | a string | 79 | | 6 | another string | 80 | +------------------------------------------+------------------------------------------+ 81 | ``` -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/sql/9-udf.md: -------------------------------------------------------------------------------- 1 | # User Defined Functions (UDFs) 2 | 3 | User Defined Functions (UDFs) allow you to extend the functionality of SQL by defining custom functions in Rust and then using them within your SQL queries. 4 | 5 | This project supports three types of UDFs: 6 | 7 | 1. **Scalar UDF**: Operates on a single row and returns a single value for each row. For example, a function that converts a string to uppercase. 8 | 2. **Aggregate UDF**: Operates on a group of rows and returns a single aggregate value. For example, calculating a custom average for a set of values. 9 | 3. **Window UDF**: Operates on a window (a set of rows) related to the current row. For example, calculating a moving average within a window. 10 | 11 | ## Registering UDFs 12 | 13 | To use a custom UDF, you first need to register it with the system. Registration is done by calling the `register` function in the corresponding module: 14 | 15 | - **Scalar UDF**: Use `arkflow_plugin::processor::udf::scalar_udf::register(udf: ScalarUDF)` 16 | - **Aggregate UDF**: Use `arkflow_plugin::processor::udf::aggregate_udf::register(udf: AggregateUDF)` 17 | - **Window UDF**: Use `arkflow_plugin::processor::udf::window_udf::register(udf: WindowUDF)` 18 | 19 | These `register` functions add your UDF to a global list. 20 | 21 | ```rust 22 | use datafusion::logical_expr::{ScalarUDF, AggregateUDF, WindowUDF}; 23 | use arkflow_plugin::processor::udf::{scalar_udf, aggregate_udf, window_udf}; 24 | 25 | // Example: Registering a scalar UDF 26 | // let my_scalar_udf = ScalarUDF::new(...); 27 | // scalar_udf::register(my_scalar_udf); 28 | 29 | // Example: Registering an aggregate UDF 30 | // let my_aggregate_udf = AggregateUDF::new(...); 31 | // aggregate_udf::register(my_aggregate_udf); 32 | 33 | // Example: Registering a window UDF 34 | // let my_window_udf = WindowUDF::new(...); 35 | // window_udf::register(my_window_udf); 36 | ``` 37 | 38 | ## Initialization 39 | 40 | Registered UDFs are not immediately available in SQL queries. They are automatically added to DataFusion's `FunctionRegistry` during the processor's execution context initialization via an internal call to the `arkflow_plugin::processor::udf::init` function. This `init` function iterates through all registered scalar, aggregate, and window UDFs and registers them with the current DataFusion context. 41 | 42 | Once initialization is complete, you can use your registered UDFs in SQL queries just like built-in functions. -------------------------------------------------------------------------------- /docs/versioned_docs/version-0.3.x/sql/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "SQL", 3 | "position": 2, 4 | "link": { 5 | "title": "SQL", 6 | "type": "generated-index", 7 | "description": "SQL query engine documentation, including comprehensive coverage of data types, operators, query syntax, subqueries, aggregate functions, window functions, and more" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /docs/versioned_sidebars/version-0.2.x-sidebars.json: -------------------------------------------------------------------------------- 1 | { 2 | "tutorialSidebar": [ 3 | { 4 | "type": "autogenerated", 5 | "dirName": "." 6 | } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /docs/versioned_sidebars/version-0.3.x-sidebars.json: -------------------------------------------------------------------------------- 1 | { 2 | "tutorialSidebar": [ 3 | { 4 | "type": "autogenerated", 5 | "dirName": "." 6 | } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /docs/versions.json: -------------------------------------------------------------------------------- 1 | [ 2 | "0.3.x", 3 | "0.2.x" 4 | ] 5 | -------------------------------------------------------------------------------- /examples/drop_output_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 7 | interval: 1ms 8 | batch_size: 10000 9 | 10 | 11 | pipeline: 12 | thread_num: 4 13 | processors: 14 | - type: "json_to_arrow" 15 | 16 | - type: "sql" 17 | # SQL查询语句,支持标准SQL语法 18 | query: "SELECT count(*) FROM flow WHERE value >= 10 group by sensor" 19 | - type: "arrow_to_json" 20 | output: 21 | type: "drop" 22 | -------------------------------------------------------------------------------- /examples/generate_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 7 | interval: 1ns 8 | batch_size: 1 9 | count: 10 10 | 11 | # buffer: 12 | # type: "memory" 13 | # capacity: 5 14 | # timeout: 5s 15 | pipeline: 16 | thread_num: 10 17 | processors: 18 | # - type: "batch" 19 | # count: 3 20 | # timeout_ms: 10000 21 | # data_type: "binary" 22 | - type: "json_to_arrow" 23 | 24 | - type: "sql" 25 | query: "SELECT sum(value),avg(value) ,111 as x FROM flow group by sensor" 26 | - type: "arrow_to_json" 27 | - type: "sql" 28 | query: "SELECT *,cast( __value__ as string) as y FROM flow " 29 | 30 | 31 | output: 32 | type: "stdout" 33 | -------------------------------------------------------------------------------- /examples/http_client_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 7 | interval: 1ms 8 | batch_size: 1000 9 | 10 | 11 | buffer: 12 | type: "memory" 13 | capacity: 10 14 | timeout: 10s 15 | pipeline: 16 | thread_num: 4 17 | processors: 18 | # - type: "batch" 19 | # count: 3 20 | # timeout_ms: 10000 21 | # data_type: "binary" 22 | - type: "json_to_arrow" 23 | 24 | - type: "sql" 25 | # SQL查询语句,支持标准SQL语法 26 | query: "SELECT sum(value) as value ,avg(value) ,111 as x FROM flow" 27 | - type: "arrow_to_json" 28 | 29 | 30 | output: 31 | type: "http" 32 | url: "http://localhost:8080/v1/test " 33 | method: "POST" 34 | timeout_ms: 90 35 | retry_count: 1 36 | -------------------------------------------------------------------------------- /examples/http_server_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | # file_path: "./log/example.log" 4 | format: plain 5 | #health_check: 6 | # enabled: true 7 | streams: 8 | - input: 9 | type: "http" 10 | address: '0.0.0.0:8090' 11 | path: '/' 12 | 13 | 14 | pipeline: 15 | thread_num: 4 16 | processors: 17 | # - type: "batch" 18 | # count: 3 19 | # timeout_ms: 10000 20 | # data_type: "binary" 21 | - type: "json_to_arrow" 22 | 23 | - type: "sql" 24 | # SQL查询语句,支持标准SQL语法 25 | query: "SELECT sum(value),avg(value) ,111 as x FROM flow" 26 | # - type: "arrow_to_json" 27 | 28 | 29 | output: 30 | type: "stdout" 31 | error_output: 32 | type: "stdout" 33 | -------------------------------------------------------------------------------- /examples/input_data.csv: -------------------------------------------------------------------------------- 1 | id,data 2 | 1,a 3 | 2,b 4 | -------------------------------------------------------------------------------- /examples/join_buffer_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "multiple_inputs" 6 | inputs: 7 | - type: "generate" 8 | name: "flow_input1" 9 | context: '{ "id": 1625000000000, "value": 10, "sensor": "temp_1" }' 10 | interval: 1s 11 | batch_size: 1 12 | count: 10 13 | - type: "generate" 14 | name: "flow_input2" 15 | context: '{ "id": 1625000000000, "value": 20, "sensor": "temp_2" }' 16 | interval: 1s 17 | batch_size: 1 18 | count: 10 19 | 20 | 21 | buffer: 22 | type: "session_window" 23 | gap: 1s 24 | join: 25 | query: "SELECT * FROM flow_input1 join flow_input2 on (flow_input1.id = flow_input2.id)" 26 | # query: "SELECT * FROM flow_input1" 27 | codec: 28 | type: "json" 29 | pipeline: 30 | processors: 31 | # - type: "json_to_arrow" 32 | 33 | 34 | 35 | output: 36 | type: "stdout" 37 | 38 | -------------------------------------------------------------------------------- /examples/kafka_example.yaml: -------------------------------------------------------------------------------- 1 | 2 | logging: 3 | level: debug 4 | streams: 5 | - input: 6 | type: kafka 7 | brokers: 8 | - localhost:9092 9 | topics: 10 | - test-topic 11 | consumer_group: test-group 12 | client_id: rsflow 13 | start_from_latest: true 14 | 15 | pipeline: 16 | thread_num: 4 17 | processors: 18 | - type: json_to_arrow 19 | - type: sql 20 | query: "SELECT * FROM flow" 21 | - type: arrow_to_json 22 | 23 | output: 24 | type: kafka 25 | brokers: 26 | - localhost:9092 27 | topic: 28 | type: value 29 | value: test-topic-copy 30 | client_id: rsflow-copy 31 | -------------------------------------------------------------------------------- /examples/message.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package message; 4 | 5 | 6 | message Message{ 7 | int64 timestamp = 1; 8 | double value = 2; 9 | string sensor = 3; 10 | } -------------------------------------------------------------------------------- /examples/mqtt_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: error 3 | streams: 4 | - input: 5 | type: "mqtt" 6 | host: "localhost" 7 | port: 1883 8 | qos: 1 9 | client_id: "flow_input" 10 | topics: [ "flow_input/#" ] 11 | 12 | 13 | 14 | pipeline: 15 | thread_num: 4 16 | processors: 17 | - type: "json_to_arrow" 18 | - type: "sql" 19 | query: "SELECT * ,cast(value as string) as tx FROM flow WHERE value > 10" 20 | - type: "arrow_to_json" 21 | 22 | output: 23 | type: "stdout" 24 | # output: 25 | # type: "mqtt" 26 | # host: "localhost" 27 | # port: 1883 28 | # qos: 1 29 | # client_id: "flow_output" 30 | # topic: "flow_output" 31 | -------------------------------------------------------------------------------- /examples/nats_input_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "nats" 6 | url: "nats://localhost:4222" 7 | mode: 8 | type: jet_stream 9 | stream: "test" 10 | consumer_name: "test" 11 | durable_name: "test" 12 | 13 | 14 | 15 | 16 | pipeline: 17 | thread_num: 4 18 | processors: 19 | - type: "sql" 20 | query: "SELECT cast( __value__ as string) as s FROM flow" 21 | 22 | 23 | output: 24 | type: "stdout" 25 | -------------------------------------------------------------------------------- /examples/nats_output_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 7 | interval: 1s 8 | batch_size: 1 9 | 10 | 11 | 12 | pipeline: 13 | thread_num: 4 14 | processors: 15 | - type: "sql" 16 | query: "SELECT cast( __value__ as string) as s,* FROM flow" 17 | 18 | 19 | output: 20 | type: "nats" 21 | url: "nats://localhost:4222" 22 | 23 | mode: 24 | type: regular 25 | subject: 26 | type: "value" 27 | value: "test" 28 | 29 | -------------------------------------------------------------------------------- /examples/protobuf_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: trace 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10.0, "sensor": "temp_1" }' 7 | interval: 1s 8 | batch_size: 1 9 | 10 | buffer: 11 | type: "memory" 12 | capacity: 100 13 | timeout: 10s 14 | 15 | 16 | 17 | pipeline: 18 | thread_num: 4 19 | processors: 20 | - type: "json_to_arrow" 21 | - type: "sql" 22 | query: "SELECT count(timestamp) as timestamp, sum(value) as value, cast(count(sensor) as string) as sensor FROM flow WHERE value >= 10 order by sensor" 23 | - type: "arrow_to_protobuf" 24 | proto_inputs: 25 | - "examples/" 26 | message_type: "message.Message" 27 | - type: "protobuf_to_arrow" 28 | proto_inputs: 29 | - "examples/" 30 | message_type: "message.Message" 31 | 32 | output: 33 | type: "stdout" 34 | -------------------------------------------------------------------------------- /examples/redis_input_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: debug 3 | streams: 4 | - input: 5 | type: "redis" 6 | mode: 7 | type: single 8 | url: redis://127.0.0.1:6379 9 | 10 | redis_type: 11 | type: subscribe 12 | subscribe: 13 | type: channels 14 | channels: [ "test" ] 15 | 16 | # redis_type: 17 | # type: subscribe 18 | # subscribe: 19 | # type: patterns 20 | # patterns: [ "test" ] 21 | # 22 | # redis_type: 23 | # type: list 24 | # list: [ "test" ] 25 | 26 | pipeline: 27 | thread_num: 4 28 | processors: 29 | - type: "sql" 30 | query: "SELECT cast( __value__ as string) as s FROM flow" 31 | 32 | 33 | output: 34 | type: "stdout" 35 | -------------------------------------------------------------------------------- /examples/redis_output_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: debug 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10.0, "sensor": "temp_1" }' 7 | interval: 1s 8 | batch_size: 10 9 | 10 | 11 | pipeline: 12 | thread_num: 4 13 | processors: 14 | - type: "json_to_arrow" 15 | - type: "sql" 16 | query: "SELECT * FROM flow" 17 | - type: "arrow_to_json" 18 | 19 | output: 20 | type: "redis" 21 | mode: 22 | type: "single" 23 | url: "redis://127.0.0.1:6379/1" 24 | # redis_type: 25 | # type: list 26 | # key: 27 | # type: value 28 | # value: 'test' 29 | 30 | # redis_type: 31 | # type: publish 32 | # channel: 33 | # type: value 34 | # value: 'test' 35 | 36 | # redis_type: 37 | # type: hashes 38 | # key: 39 | # type: value 40 | # value: 'test_hash' 41 | # field: 42 | # type: expr 43 | # expr: 'sensor' 44 | 45 | redis_type: 46 | type: strings 47 | key: 48 | type: expr 49 | expr: concat("sensor",cast(random() as string)) 50 | -------------------------------------------------------------------------------- /examples/redis_temporary_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 7 | interval: 5s 8 | batch_size: 2 9 | 10 | temporary: 11 | - name: redis_temporary 12 | type: "redis" 13 | mode: 14 | type: single 15 | url: redis://127.0.0.1:6379 16 | redis_type: 17 | # type: list 18 | type: string 19 | 20 | 21 | pipeline: 22 | thread_num: 10 23 | processors: 24 | - type: "json_to_arrow" 25 | 26 | - type: "sql" 27 | query: "SELECT * FROM flow right join redis_table on (flow.sensor = redis_table.x)" 28 | temporary_list: 29 | - name: redis_temporary 30 | table_name: redis_table 31 | key: 32 | type: value 33 | value: 'test' 34 | 35 | # - type: "arrow_to_json" 36 | # - type: "sql" 37 | # query: "SELECT *,cast( __value__ as string) as y FROM flow " 38 | 39 | 40 | output: 41 | type: "stdout" 42 | # error_output: 43 | # type: "stdout" -------------------------------------------------------------------------------- /examples/sql_input_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "sql" 6 | input_type: 7 | type: "json" 8 | # input_type: "csv" 9 | path: 's3://test/stream_data.json' 10 | object_store: 11 | type: s3 12 | endpoint: "http://localhost:9000" 13 | region: "us-east-1" 14 | bucket_name: "test" 15 | access_key_id: "XlmP2GPWWIyppydCmtb7" 16 | secret_access_key: "7wDf9WyuLGtz5LsWHtO5BkGdqU0HqIhRpvNuL9ui" 17 | allow_http: true 18 | # path: './examples/input_data.csv' 19 | select_sql: | 20 | select *,value + 999999 from flow; 21 | 22 | 23 | 24 | pipeline: 25 | thread_num: 4 26 | processors: 27 | - type: "sql" 28 | query: "SELECT * FROM flow" 29 | 30 | 31 | output: 32 | type: "stdout" 33 | -------------------------------------------------------------------------------- /examples/sql_output_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 7 | interval: 1s 8 | batch_size: 10 9 | 10 | pipeline: 11 | thread_num: 4 12 | processors: 13 | - type: "json_to_arrow" 14 | - type: "sql" 15 | query: "SELECT * FROM flow WHERE value >= 10" 16 | 17 | output: 18 | type: "sql" 19 | output_type: 20 | type: "mysql" 21 | uri: "mysql://root:1234@localhost:3306/arkflow" 22 | table_name: "arkflow_test" 23 | 24 | error_output: 25 | type: "stdout" -------------------------------------------------------------------------------- /examples/stream_data.json: -------------------------------------------------------------------------------- 1 | { "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" } 2 | { "timestamp": 1625000001000, "value": 15, "sensor": "temp_1" } 3 | { "timestamp": 1625000002000, "value": 12, "sensor": "temp_1" } 4 | { "timestamp": 1625000003000, "value": 18, "sensor": "temp_2" } 5 | { "timestamp": 1625000004000, "value": 20, "sensor": "temp_2" } 6 | { "timestamp": 1625000005000, "value": 17, "sensor": "temp_1" } 7 | { "timestamp": 1625000006000, "value": 22, "sensor": "temp_2" } 8 | { "timestamp": 1625000007000, "value": 19, "sensor": "temp_1" } 9 | { "timestamp": 1625000008000, "value": 25, "sensor": "temp_2" } 10 | { "timestamp": 1625000009000, "value": 21, "sensor": "temp_1" } 11 | { "timestamp": 1625000010000, "value": 28, "sensor": "temp_2" } 12 | { "timestamp": 1625000011000, "value": 23, "sensor": "temp_1" } 13 | { "timestamp": 1625000012000, "value": 30, "sensor": "temp_2" } 14 | { "timestamp": 1625000013000, "value": 24, "sensor": "temp_1" } 15 | { "timestamp": 1625000014000, "value": 32, "sensor": "temp_2" } 16 | { "timestamp": 1625000015000, "value": 26, "sensor": "temp_1" } 17 | { "timestamp": 1625000016000, "value": 35, "sensor": "temp_2" } 18 | { "timestamp": 1625000017000, "value": 27, "sensor": "temp_1" } 19 | { "timestamp": 1625000018000, "value": 38, "sensor": "temp_2" } 20 | { "timestamp": 1625000019000, "value": 29, "sensor": "temp_1" } 21 | { "timestamp": 1625000020000, "value": 40, "sensor": "temp_2" } -------------------------------------------------------------------------------- /examples/vrl_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: error 3 | streams: 4 | - input: 5 | type: "generate" 6 | context: '{ "timestamp": 1625000000000, "value": 10, "sensor": "temp_1" }' 7 | interval: 1s 8 | batch_size: 1 9 | 10 | pipeline: 11 | thread_num: 4 12 | processors: 13 | - type: "json_to_arrow" 14 | - type: "vrl" 15 | statement: ".v2, err = .value * 2; ." 16 | - type: "arrow_to_json" 17 | 18 | output: 19 | type: "stdout" 20 | -------------------------------------------------------------------------------- /examples/websocket_input_example.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | level: info 3 | streams: 4 | - input: 5 | type: "websocket" 6 | url: "ws://localhost:8800" 7 | 8 | pipeline: 9 | thread_num: 4 10 | processors: 11 | - type: "sql" 12 | query: "SELECT cast( __value__ as string) as s, * FROM flow" 13 | 14 | 15 | output: 16 | type: "stdout" 17 | -------------------------------------------------------------------------------- /logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Layer 1 5 | {/* Group and rotate all elements -90 degrees around center (75,75) */} 6 | {/* Original fountain paths, their coordinates are relative to the unrotated state */} 7 | 8 | 9 | 10 | 11 | 12 | 13 | --------------------------------------------------------------------------------