├── examples ├── text2sql │ ├── .gitignore │ ├── src │ │ ├── lib.rs │ │ ├── main.rs │ │ └── flow.rs │ ├── example_data │ │ ├── orders.csv │ │ └── customers.csv │ ├── Cargo.toml │ └── Cargo.lock ├── pocketflow-rs-rag │ ├── src │ │ ├── lib.rs │ │ ├── nodes │ │ │ ├── mod.rs │ │ │ ├── embed_query.rs │ │ │ ├── chunk_documents.rs │ │ │ ├── retrieve_document.rs │ │ │ ├── embed_documents.rs │ │ │ ├── generate_answer.rs │ │ │ ├── create_index.rs │ │ │ ├── query_rewrite.rs │ │ │ └── file_loader.rs │ │ ├── state.rs │ │ └── main.rs │ ├── Cargo.toml │ └── README.md └── basic.rs ├── .gitignore ├── static └── pocketflow_rust_title.png ├── src ├── utils │ ├── mod.rs │ ├── viz_debug.rs │ ├── embedding.rs │ ├── llm_wrapper.rs │ ├── web_search.rs │ ├── vector_db.rs │ └── text_chunking.rs ├── lib.rs ├── context.rs ├── node.rs └── flow.rs ├── .github └── workflows │ └── publish.yml ├── Cargo.toml ├── .cursor └── rules │ └── 001-how_to_use.mdc └── README.md /examples/text2sql/.gitignore: -------------------------------------------------------------------------------- 1 | ecommerce.duckdb -------------------------------------------------------------------------------- /examples/text2sql/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod flow; 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .idea/ 3 | 4 | target 5 | .env 6 | 7 | Cargo.lock 8 | .vscode/ -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod nodes; 2 | pub mod state; 3 | 4 | pub use nodes::*; 5 | pub use state::*; 6 | -------------------------------------------------------------------------------- /static/pocketflow_rust_title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Pocket/PocketFlow-Rust/main/static/pocketflow_rust_title.png -------------------------------------------------------------------------------- /src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod embedding; 2 | pub mod llm_wrapper; 3 | pub mod text_chunking; 4 | pub mod vector_db; 5 | pub mod viz_debug; 6 | pub mod web_search; 7 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod context; 2 | pub mod flow; 3 | pub mod node; 4 | pub mod utils; 5 | 6 | pub use context::Context; 7 | pub use flow::*; 8 | pub use node::*; 9 | pub use utils::*; 10 | 11 | pub type Params = std::collections::HashMap; 12 | -------------------------------------------------------------------------------- /examples/text2sql/example_data/orders.csv: -------------------------------------------------------------------------------- 1 | order_id,customer_id,product,amount,order_date 2 | 101,1,Laptop,1299.99,2023-02-10 3 | 102,3,Phone,799.99,2023-03-15 4 | 103,2,Headphones,149.99,2023-02-22 5 | 104,5,Tablet,499.99,2023-04-18 6 | 105,1,Monitor,349.99,2023-05-05 7 | 106,4,Keyboard,89.99,2023-03-10 8 | 107,3,Mouse,59.99,2023-04-02 9 | 108,7,Printer,279.99,2023-03-28 10 | 109,9,Speakers,129.99,2023-05-15 11 | 110,10,Webcam,79.99,2023-04-20 12 | -------------------------------------------------------------------------------- /examples/text2sql/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "text2sql" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | [[bin]] 7 | name = "text2sql" 8 | 9 | [dependencies] 10 | duckdb = {version="1.2.2", features = ["bundled"]} 11 | pocketflow_rs = { path = '../..'} 12 | serde_json = "1.0.140" 13 | async-trait = "0.1" 14 | tokio = { version = "1.0", features = ["full"] } 15 | anyhow = "1.0.98" 16 | thiserror = "1.0.69" 17 | openai_api_rust = "0.1.9" 18 | chrono = "0.4.41" 19 | tracing = "0.1.41" 20 | tracing-subscriber = "0.3.19" 21 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/mod.rs: -------------------------------------------------------------------------------- 1 | mod chunk_documents; 2 | mod create_index; 3 | mod embed_documents; 4 | mod embed_query; 5 | mod file_loader; 6 | mod generate_answer; 7 | mod query_rewrite; 8 | mod retrieve_document; 9 | 10 | pub use chunk_documents::ChunkDocumentsNode; 11 | pub use create_index::CreateIndexNode; 12 | pub use embed_documents::EmbedDocumentsNode; 13 | pub use embed_query::EmbedQueryNode; 14 | pub use file_loader::FileLoaderNode; 15 | pub use generate_answer::GenerateAnswerNode; 16 | pub use query_rewrite::QueryRewriteNode; 17 | pub use retrieve_document::RetrieveDocumentNode; 18 | -------------------------------------------------------------------------------- /examples/text2sql/example_data/customers.csv: -------------------------------------------------------------------------------- 1 | id,name,city,email,signup_date 2 | 1,John Doe,New York,john.doe@example.com,2023-01-15 3 | 2,Jane Smith,San Francisco,jane.smith@example.com,2023-02-20 4 | 3,Robert Johnson,New York,robert.j@example.com,2023-03-05 5 | 4,Emily Davis,Chicago,emily.d@example.com,2023-01-30 6 | 5,Michael Brown,Boston,michael.b@example.com,2023-04-10 7 | 6,Lisa Wang,Chicago,lisa.w@example.com,2023-05-12 8 | 7,David Lee,San Francisco,david.l@example.com,2023-02-28 9 | 8,Sarah Miller,Boston,sarah.m@example.com,2023-06-01 10 | 9,James Taylor,New York,james.t@example.com,2023-04-22 11 | 10,Amanda Garcia,Chicago,amanda.g@example.com,2023-03-18 -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pocketflow-rs-rag" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | [dependencies] 7 | pocketflow_rs = { path = "../../", features = ["openai", "qdrant", "debug"] } 8 | anyhow = "1.0" 9 | tokio = { version = "1.0", features = ["full"] } 10 | tracing = "0.1" 11 | tracing-subscriber = "0.3" 12 | serde_json = "1.0" 13 | async-trait = "0.1" 14 | faiss = "0.12.1" 15 | clap = { version = "4.5", features = ["derive"] } 16 | pdf-extract = "0.9" 17 | reqwest = { version = "0.12.15", features = ["json"] } 18 | uuid = { version = "1.16.0", features = ["v4"] } 19 | qdrant-client = "1.14.0" 20 | termimad = "0.31.3" 21 | 22 | [dev-dependencies] 23 | tempfile = "3.8" 24 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to crates.io 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' # Trigger on version tags 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | 11 | jobs: 12 | publish: 13 | name: Publish 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Install Rust toolchain 19 | uses: dtolnay/rust-toolchain@stable 20 | with: 21 | components: rustfmt, clippy 22 | 23 | - name: Cache dependencies 24 | uses: Swatinem/rust-cache@v2 25 | 26 | - name: Run tests 27 | run: cargo test --all-features 28 | 29 | - name: Run clippy 30 | run: cargo clippy --all-features -- -D warnings 31 | 32 | - name: Run rustfmt 33 | run: cargo fmt --all -- --check 34 | 35 | - name: Publish to crates.io 36 | run: cargo publish --token ${CRATES_TOKEN} 37 | env: 38 | CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }} -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pocketflow_rs" 3 | version = "0.1.0" 4 | edition = "2024" 5 | description = "PocketFlow implemented by rust" 6 | authors = ["Yan Lu "] 7 | license = "MIT" 8 | 9 | [lib] 10 | name = "pocketflow_rs" 11 | path = "src/lib.rs" 12 | 13 | [[example]] 14 | name = "basic" 15 | path = "examples/basic.rs" 16 | 17 | [workspace] 18 | members = [ 19 | "examples/pocketflow-rs-rag", 20 | "examples/text2sql" 21 | ] 22 | 23 | [dependencies] 24 | anyhow = "1.0" 25 | async-trait = "0.1" 26 | tokio = { version = "1.0", features = ["full"] } 27 | serde = { version = "1.0", features = ["derive"] } 28 | serde_json = "1.0" 29 | thiserror = "1.0" 30 | tracing = "0.1" 31 | rand = "0.8" 32 | openai_api_rust = { version = "0.1.9", optional = true} 33 | regex = "1.11.1" 34 | qdrant-client = {version = "1.14.0", optional = true} 35 | reqwest = { version = "0.12", features = ["json"], optional = true } 36 | 37 | [features] 38 | openai = ["dep:openai_api_rust"] 39 | websearch = ["dep:reqwest"] 40 | qdrant = ["dep:qdrant-client"] 41 | debug = [] 42 | default = [ 43 | "openai", 44 | ] -------------------------------------------------------------------------------- /src/utils/viz_debug.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "debug")] 2 | use std::fmt::Debug; 3 | 4 | pub trait DebugVisualizer { 5 | fn visualize(&self, data: &T) -> String; 6 | fn visualize_flow(&self, flow_data: &[u8]) -> String; 7 | } 8 | 9 | pub struct ConsoleDebugVisualizer; 10 | 11 | impl DebugVisualizer for ConsoleDebugVisualizer { 12 | fn visualize(&self, data: &T) -> String { 13 | format!("{:?}", data) 14 | } 15 | 16 | #[allow(unused_variables)] 17 | fn visualize_flow(&self, flow_data: &[u8]) -> String { 18 | // TODO: Implement flow visualization 19 | "Flow visualization not implemented".to_string() 20 | } 21 | } 22 | 23 | pub struct GraphDebugVisualizer; 24 | 25 | impl DebugVisualizer for GraphDebugVisualizer { 26 | fn visualize(&self, data: &T) -> String { 27 | // TODO: Implement graph visualization 28 | format!("Graph visualization of {:?}", data) 29 | } 30 | 31 | #[allow(unused_variables)] 32 | fn visualize_flow(&self, flow_data: &[u8]) -> String { 33 | // TODO: Implement flow graph visualization 34 | "Flow graph visualization not implemented".to_string() 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /examples/text2sql/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | use anyhow::Result; 4 | use duckdb::Connection; 5 | use pocketflow_rs::{Context, build_flow}; 6 | use text2sql::flow::{ExecuteSQLNode, OpenAISQLGenerationNode, SchemaRetrievalNode}; 7 | 8 | #[tokio::main] 9 | async fn main() -> Result<()> { 10 | tracing_subscriber::fmt::init(); 11 | 12 | let db_path = "ecommerce.duckdb"; 13 | let conn = Connection::open(db_path)?; 14 | 15 | conn.execute(&format!( 16 | "CREATE TABLE IF NOT EXISTS customers AS SELECT * FROM read_csv_auto('{}', AUTO_DETECT=TRUE)", 17 | "example_data/customers.csv" 18 | ), [])?; 19 | 20 | conn.execute(&format!( 21 | "CREATE TABLE IF NOT EXISTS orders AS SELECT * FROM read_csv_auto('{}', AUTO_DETECT=TRUE)", 22 | "example_data/orders.csv" 23 | ), [])?; 24 | 25 | println!("please input your query using natural language?"); 26 | let mut user_query = String::new(); 27 | std::io::stdin().read_line(&mut user_query)?; 28 | user_query = user_query.trim().to_string(); 29 | 30 | let schema_retrieval = SchemaRetrievalNode::new(db_path.to_string()); 31 | let openai_sql_gen = 32 | OpenAISQLGenerationNode::new(env::var("DASH_SCOPE_API_KEY").unwrap(), user_query); 33 | let execute_sql = ExecuteSQLNode::new(db_path.to_string()); 34 | 35 | let flow = build_flow! ( 36 | start: ("start", schema_retrieval), 37 | nodes: [ 38 | ("generate_sql", openai_sql_gen), 39 | ("execute_sql", execute_sql), 40 | ], 41 | edges: [ 42 | ("start", "generate_sql", text2sql::flow::SqlExecutorState::Default), 43 | ("generate_sql", "execute_sql", text2sql::flow::SqlExecutorState::Default) 44 | ] 45 | ); 46 | let context = Context::new(); 47 | 48 | let result = flow.run(context).await?; 49 | println!("result: {:?}", result); 50 | 51 | Ok(()) 52 | } 53 | -------------------------------------------------------------------------------- /.cursor/rules/001-how_to_use.mdc: -------------------------------------------------------------------------------- 1 | --- 2 | description: 3 | globs: 4 | alwaysApply: false 5 | --- 6 | ## How to create a workflow 7 | 8 | ### Build Node 9 | 10 | 1. Define State(Optional) 11 | Each node can define different states based on the possible execution of subsequent nodes. If the node logic is simple, it can be directly implemented using BaseState 12 | 13 | A example for define state: the SQL executing node can have states such as: execution successful/execution failed SQL error/execution failed connection error. If the execution is successful, the node can proceed to the next stage. If the execution is incorrect, it will jump to generate SQL and regenerate the node. 14 | 15 | ```rust 16 | pub enum SqlExecutorState{ 17 | SqlSyntaxError, 18 | SqlClientError 19 | Default, // Success 20 | } 21 | ``` 22 | 23 | 2. implement trait function. 24 | + prepare(optional): Sets up necessary preconditions, preprocess the data in context. 25 | + execute: Performs the main logic and produces a result. 26 | + post_process(optional): 27 | + Evaluates the execute result, updates the Context. 28 | + Return the corresponding state based on the result, allowing the Flow runtime library to determine which node to call next by evaluating the edge conditions. 29 | + If the logic of the node is simple enough and does not require post-processing, it can be omitted and the default can be used. 30 | 31 | ## Build Flow: 32 | 33 | You can use rust macro `build_flow` and `build_batch_flow` to create a workflow for LLM. 34 | 35 | such as: 36 | 37 | ```rust 38 | let flow = build_flow!( 39 | start: ("start", node1), // define begin node, node1 is object for Node and 'start' is alias. 40 | nodes: [("next", node2)], // define other nodes as start. 41 | edges: [ 42 | ("start", "next", MyState::Default) // start -> next, when start post_process returned state is MyState::Default 43 | ] 44 | ); 45 | ``` 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/embed_query.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use pocketflow_rs::utils::embedding::{ 5 | EmbeddingGenerator, EmbeddingOptions, OpenAIEmbeddingGenerator, 6 | }; 7 | use pocketflow_rs::{Context, Node, ProcessResult}; 8 | use serde_json::{Value, json}; 9 | use std::sync::Arc; 10 | 11 | pub struct EmbedQueryNode { 12 | generator: Arc, 13 | } 14 | 15 | impl EmbedQueryNode { 16 | pub fn new(api_key: String, endpoint: String, model: String, dimension: Option) -> Self { 17 | Self { 18 | generator: Arc::new(OpenAIEmbeddingGenerator::new( 19 | &api_key, 20 | &endpoint, 21 | EmbeddingOptions { 22 | model, 23 | dimensions: dimension, 24 | }, 25 | )), 26 | } 27 | } 28 | } 29 | 30 | #[async_trait] 31 | impl Node for EmbedQueryNode { 32 | type State = RagState; 33 | 34 | #[allow(unused_variables)] 35 | async fn execute(&self, context: &Context) -> Result { 36 | let query = context 37 | .get("rewritten_query") 38 | .and_then(|v| v.as_str()) 39 | .unwrap_or("") 40 | .to_string(); 41 | let embedding = self.generator.generate_embedding(&query).await?; 42 | if embedding.is_empty() { 43 | return Err(anyhow::anyhow!("No embedding generated for query")); 44 | } 45 | Ok(Value::Array( 46 | embedding.into_iter().map(|x| json!(x)).collect(), 47 | )) 48 | } 49 | 50 | async fn post_process( 51 | &self, 52 | context: &mut Context, 53 | result: &Result, 54 | ) -> Result> { 55 | match result { 56 | Ok(value) => { 57 | context.set("query_embedding", value.clone()); 58 | Ok(ProcessResult::new( 59 | RagState::Default, 60 | "query_embedded".to_string(), 61 | )) 62 | } 63 | Err(e) => Ok(ProcessResult::new( 64 | RagState::QueryEmbeddingError, 65 | format!("query_embedding_error: {}", e), 66 | )), 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/state.rs: -------------------------------------------------------------------------------- 1 | use pocketflow_rs::ProcessState; 2 | 3 | #[derive(Debug, Clone, PartialEq, Eq)] 4 | pub enum RagState { 5 | // Offline states 6 | FileLoadedError, 7 | DocumentsLoaded, 8 | DocumentsChunked, 9 | ChunksEmbedded, 10 | IndexCreated, 11 | // Offline error states 12 | DocumentLoadError, 13 | ChunkingError, 14 | EmbeddingError, 15 | IndexCreationError, 16 | // Online states 17 | QueryEmbedded, 18 | DocumentsRetrieved, 19 | AnswerGenerated, 20 | // Online error states 21 | QueryEmbeddingError, 22 | RetrievalError, 23 | GenerationError, 24 | Default, 25 | QueryRewriteError, 26 | } 27 | 28 | impl ProcessState for RagState { 29 | fn is_default(&self) -> bool { 30 | matches!(self, RagState::Default) 31 | } 32 | 33 | fn to_condition(&self) -> String { 34 | match self { 35 | // Offline states 36 | RagState::FileLoadedError => "file_loaded_error".to_string(), 37 | RagState::DocumentsLoaded => "documents_loaded".to_string(), 38 | RagState::DocumentsChunked => "documents_chunked".to_string(), 39 | RagState::ChunksEmbedded => "chunks_embedded".to_string(), 40 | RagState::IndexCreated => "index_created".to_string(), 41 | // Offline error states 42 | RagState::DocumentLoadError => "document_load_error".to_string(), 43 | RagState::ChunkingError => "chunking_error".to_string(), 44 | RagState::EmbeddingError => "embedding_error".to_string(), 45 | RagState::IndexCreationError => "index_creation_error".to_string(), 46 | // Online states 47 | RagState::QueryEmbedded => "query_embedded".to_string(), 48 | RagState::DocumentsRetrieved => "documents_retrieved".to_string(), 49 | RagState::AnswerGenerated => "answer_generated".to_string(), 50 | // Online error states 51 | RagState::QueryEmbeddingError => "query_embedding_error".to_string(), 52 | RagState::RetrievalError => "retrieval_error".to_string(), 53 | RagState::GenerationError => "generation_error".to_string(), 54 | RagState::Default => "default".to_string(), 55 | RagState::QueryRewriteError => "query_rewrite_error".to_string(), 56 | } 57 | } 58 | } 59 | 60 | impl Default for RagState { 61 | fn default() -> Self { 62 | RagState::Default 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/chunk_documents.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use pocketflow_rs::utils::text_chunking::{ChunkingOptions, ChunkingStrategy, TextChunker}; 5 | use pocketflow_rs::{Context, Node, ProcessResult}; 6 | use serde_json::{Value, json}; 7 | use tracing::info; 8 | 9 | pub struct ChunkDocumentsNode { 10 | chunker: TextChunker, 11 | options: ChunkingOptions, 12 | } 13 | 14 | impl ChunkDocumentsNode { 15 | pub fn new(chunk_size: usize, overlap: usize, strategy: ChunkingStrategy) -> Self { 16 | Self { 17 | chunker: TextChunker::new(), 18 | options: ChunkingOptions { 19 | chunk_size, 20 | overlap, 21 | strategy, 22 | }, 23 | } 24 | } 25 | } 26 | 27 | #[async_trait] 28 | impl Node for ChunkDocumentsNode { 29 | type State = RagState; 30 | 31 | async fn execute(&self, context: &Context) -> Result { 32 | let documents = context 33 | .get("documents") 34 | .and_then(|v| v.as_array()) 35 | .ok_or_else(|| anyhow::anyhow!("No documents found in context"))?; 36 | 37 | let mut chunks_meta = Vec::new(); 38 | for doc_map in documents { 39 | let content = doc_map 40 | .get("content") 41 | .and_then(|v| v.as_str()) 42 | .ok_or_else(|| anyhow::anyhow!("No content found in document"))?; 43 | let chunks = self.chunker.chunk_text(content, &self.options); 44 | info!( 45 | "Process: {:?}, Chunks lens: {:?}", 46 | doc_map.get("metadata").unwrap(), 47 | chunks.len() 48 | ); 49 | chunks_meta.push(json!({ 50 | "chunks": chunks, 51 | "metadata": doc_map.get("metadata").unwrap_or(&Value::Null), 52 | })); 53 | } 54 | 55 | Ok(Value::Array(chunks_meta)) 56 | } 57 | 58 | async fn post_process( 59 | &self, 60 | context: &mut Context, 61 | result: &Result, 62 | ) -> Result> { 63 | match result { 64 | Ok(value) => { 65 | context.set("documents_chunked", value.clone()); 66 | Ok(ProcessResult::new( 67 | RagState::Default, 68 | "documents_chunked".to_string(), 69 | )) 70 | } 71 | Err(e) => Ok(ProcessResult::new( 72 | RagState::ChunkingError, 73 | format!("chunking_error: {}", e), 74 | )), 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/retrieve_document.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use pocketflow_rs::utils::vector_db::{QdrantDB, VectorDB}; 5 | use pocketflow_rs::vector_db::{DistanceMetric, VectorDBOptions}; 6 | use pocketflow_rs::{Context, Node, ProcessResult}; 7 | use serde_json::Value; 8 | use std::sync::Arc; 9 | use tracing::{error, info}; 10 | 11 | pub struct RetrieveDocumentNode { 12 | db: Arc, 13 | k: usize, 14 | } 15 | 16 | impl RetrieveDocumentNode { 17 | pub async fn new( 18 | db_url: String, 19 | api_key: Option, 20 | collection: String, 21 | dimension: usize, 22 | distance_metric: DistanceMetric, 23 | k: usize, 24 | ) -> Result { 25 | let db = QdrantDB::new( 26 | db_url, 27 | api_key, 28 | VectorDBOptions { 29 | collection_name: collection, 30 | dimension, 31 | distance_metric, 32 | }, 33 | ) 34 | .await?; 35 | Ok(Self { 36 | db: Arc::new(db), 37 | k, 38 | }) 39 | } 40 | } 41 | 42 | #[async_trait] 43 | impl Node for RetrieveDocumentNode { 44 | type State = RagState; 45 | 46 | async fn execute(&self, context: &Context) -> Result { 47 | let query_embedding = context 48 | .get("query_embedding") 49 | .and_then(|v| v.as_array()) 50 | .map(|arr| { 51 | arr.iter() 52 | .filter_map(|v| v.as_f64().map(|x| x as f32)) 53 | .collect::>() 54 | }) 55 | .ok_or_else(|| anyhow::anyhow!("No query embedding found in context"))?; 56 | 57 | let records = self.db.search(query_embedding, self.k).await?; 58 | if records.is_empty() { 59 | error!("No documents retrieved"); 60 | return Err(anyhow::anyhow!("No documents retrieved")); 61 | } 62 | 63 | info!("Retrieved documents line: {:?}", records.len()); 64 | 65 | let result_array: Vec = records 66 | .into_iter() 67 | .map(|record| record.to_value()) 68 | .collect(); 69 | 70 | Ok(Value::Array(result_array)) 71 | } 72 | 73 | async fn post_process( 74 | &self, 75 | context: &mut Context, 76 | result: &Result, 77 | ) -> Result> { 78 | match result { 79 | Ok(value) => { 80 | context.set("retrieved_documents", value.clone()); 81 | Ok(ProcessResult::new( 82 | RagState::Default, 83 | "documents_retrieved".to_string(), 84 | )) 85 | } 86 | Err(e) => Ok(ProcessResult::new( 87 | RagState::RetrievalError, 88 | format!("retrieval_error: {}", e), 89 | )), 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/context.rs: -------------------------------------------------------------------------------- 1 | use serde_json::Value; 2 | use std::collections::HashMap; 3 | use std::fmt; 4 | 5 | #[derive(Debug, Clone, Default)] 6 | pub struct Context { 7 | data: HashMap, 8 | metadata: HashMap, 9 | } 10 | 11 | impl Context { 12 | pub fn new() -> Self { 13 | Self { 14 | data: HashMap::new(), 15 | metadata: HashMap::new(), 16 | } 17 | } 18 | 19 | pub fn from_data(data: HashMap) -> Self { 20 | Self { 21 | data, 22 | metadata: HashMap::new(), 23 | } 24 | } 25 | 26 | pub fn get(&self, key: &str) -> Option<&Value> { 27 | self.data.get(key) 28 | } 29 | 30 | pub fn get_metadata(&self, key: &str) -> Option<&Value> { 31 | self.metadata.get(key) 32 | } 33 | 34 | pub fn set(&mut self, key: &str, value: Value) { 35 | self.data.insert(key.to_string(), value); 36 | } 37 | 38 | pub fn set_metadata(&mut self, key: &str, value: Value) { 39 | self.metadata.insert(key.to_string(), value); 40 | } 41 | 42 | pub fn remove(&mut self, key: &str) -> Option { 43 | self.data.remove(key) 44 | } 45 | 46 | pub fn remove_metadata(&mut self, key: &str) -> Option { 47 | self.metadata.remove(key) 48 | } 49 | 50 | pub fn get_all_data(&self) -> &HashMap { 51 | &self.data 52 | } 53 | 54 | pub fn get_all_metadata(&self) -> &HashMap { 55 | &self.metadata 56 | } 57 | 58 | pub fn merge(&mut self, other: &Context) { 59 | for (key, value) in &other.data { 60 | self.data.insert(key.clone(), value.clone()); 61 | } 62 | for (key, value) in &other.metadata { 63 | self.metadata.insert(key.clone(), value.clone()); 64 | } 65 | } 66 | 67 | pub fn clear(&mut self) { 68 | self.data.clear(); 69 | self.metadata.clear(); 70 | } 71 | 72 | pub fn contains_key(&self, key: &str) -> bool { 73 | self.data.contains_key(key) 74 | } 75 | 76 | pub fn contains_metadata_key(&self, key: &str) -> bool { 77 | self.metadata.contains_key(key) 78 | } 79 | } 80 | 81 | impl fmt::Display for Context { 82 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 83 | writeln!(f, "Context {{")?; 84 | 85 | // Display data 86 | writeln!(f, " data: {{")?; 87 | for (key, value) in &self.data { 88 | writeln!(f, " \"{}\": {},", key, value)?; 89 | } 90 | writeln!(f, " }},")?; 91 | 92 | // Display metadata 93 | writeln!(f, " metadata: {{")?; 94 | for (key, value) in &self.metadata { 95 | writeln!(f, " \"{}\": {},", key, value)?; 96 | } 97 | writeln!(f, " }}")?; 98 | 99 | write!(f, "}}") 100 | } 101 | } 102 | 103 | impl From> for Context { 104 | fn from(data: HashMap) -> Self { 105 | Self::from_data(data) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/utils/embedding.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "openai")] 2 | 3 | use async_trait::async_trait; 4 | use openai_api_rust::embeddings::*; 5 | use openai_api_rust::*; 6 | use tracing::info; 7 | 8 | #[derive(Debug, Clone)] 9 | pub struct EmbeddingOptions { 10 | pub model: String, 11 | pub dimensions: Option, 12 | } 13 | 14 | impl Default for EmbeddingOptions { 15 | fn default() -> Self { 16 | Self { 17 | model: "text-embedding-ada-002".to_string(), 18 | dimensions: None, 19 | } 20 | } 21 | } 22 | 23 | #[async_trait] 24 | pub trait EmbeddingGenerator { 25 | async fn generate_embedding(&self, text: &str) -> anyhow::Result>; 26 | async fn generate_embeddings(&self, texts: &[String]) -> anyhow::Result>>; 27 | } 28 | 29 | #[allow(dead_code)] 30 | pub struct OpenAIEmbeddingGenerator { 31 | api_key: String, 32 | options: EmbeddingOptions, 33 | client: OpenAI, 34 | } 35 | 36 | impl OpenAIEmbeddingGenerator { 37 | pub fn new(api_key: &str, endpoint: &str, options: EmbeddingOptions) -> Self { 38 | let auth = Auth::new(api_key); 39 | let client = OpenAI::new(auth, endpoint); 40 | Self { 41 | api_key: api_key.to_string(), 42 | options, 43 | client, 44 | } 45 | } 46 | } 47 | 48 | #[async_trait] 49 | impl EmbeddingGenerator for OpenAIEmbeddingGenerator { 50 | async fn generate_embedding(&self, text: &str) -> anyhow::Result> { 51 | let embeds = self.generate_embeddings(&[text.to_string()]).await?; 52 | let result: Vec = embeds[0].to_vec(); 53 | Ok(result) 54 | } 55 | 56 | async fn generate_embeddings(&self, texts: &[String]) -> anyhow::Result>> { 57 | // chunked by 10 58 | let chunks = texts.chunks(10).collect::>(); 59 | let mut results = Vec::new(); 60 | for chunk in chunks { 61 | let embedding = EmbeddingsBody { 62 | model: self.options.model.clone(), 63 | input: chunk.to_vec(), 64 | user: None, 65 | }; 66 | 67 | info!("Sending request to OpenAI Embedding API"); 68 | let response = self.client.embeddings_create(&embedding).unwrap(); 69 | let data = response.data.unwrap(); 70 | let result: Vec> = data 71 | .into_iter() 72 | .map(|x: EmbeddingData| x.embedding.unwrap()) 73 | .collect(); 74 | results.extend(result); 75 | } 76 | Ok(results) 77 | } 78 | } 79 | 80 | #[cfg(test)] 81 | mod tests { 82 | use super::*; 83 | use std::env; 84 | 85 | #[tokio::test] 86 | #[ignore = "E2E case, requires API keys"] 87 | async fn test_e2e_embedding_generator() { 88 | let generator = OpenAIEmbeddingGenerator::new( 89 | &env::var("DASH_SCOPE_API_KEY").unwrap(), 90 | "https://dashscope.aliyuncs.com/compatible-mode/v1/", 91 | EmbeddingOptions { 92 | model: "text-embedding-v3".to_string(), 93 | dimensions: Some(64), 94 | }, 95 | ); 96 | let text = "Hello, world!"; 97 | let embedding = generator.generate_embedding(text).await.unwrap(); 98 | println!("{:?}", embedding); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/embed_documents.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use pocketflow_rs::embedding::EmbeddingGenerator; 5 | use pocketflow_rs::utils::embedding::{EmbeddingOptions, OpenAIEmbeddingGenerator}; 6 | use pocketflow_rs::{Context, Node, ProcessResult}; 7 | use serde_json::{Value, json}; 8 | use std::sync::Arc; 9 | use tracing::{debug, info}; 10 | 11 | pub struct EmbedDocumentsNode { 12 | generator: Arc, 13 | } 14 | 15 | impl EmbedDocumentsNode { 16 | pub fn new(api_key: String, endpoint: String, model: String, dimension: Option) -> Self { 17 | Self { 18 | generator: Arc::new(OpenAIEmbeddingGenerator::new( 19 | &api_key, 20 | &endpoint, 21 | EmbeddingOptions { 22 | model, 23 | dimensions: dimension, 24 | }, 25 | )), 26 | } 27 | } 28 | } 29 | 30 | #[async_trait] 31 | impl Node for EmbedDocumentsNode { 32 | type State = RagState; 33 | 34 | async fn execute(&self, context: &Context) -> Result { 35 | let documents_chunked = context 36 | .get("documents_chunked") 37 | .and_then(|v| v.as_array()) 38 | .ok_or_else(|| anyhow::anyhow!("No chunks found in context"))?; 39 | info!("Documents chunked: {:?}", documents_chunked.len()); 40 | 41 | let mut embed_result = Vec::new(); 42 | for chunk in documents_chunked { 43 | let chunks = chunk 44 | .get("chunks") 45 | .and_then(|v| v.as_array()) 46 | .ok_or_else(|| anyhow::anyhow!("No chunks found in document"))?; 47 | let chunk_text: Vec = chunks 48 | .iter() 49 | .filter_map(|v| v.as_str().map(|s| s.to_string())) 50 | .collect(); 51 | debug!("Chunk text: {:?}", chunk_text); 52 | info!("Chunk text len: {:?}", chunk_text.len()); 53 | let embeddings = self.generator.generate_embeddings(&chunk_text).await?; 54 | info!("Embeddings len: {:?}", embeddings.len()); 55 | if embeddings.is_empty() { 56 | return Err(anyhow::anyhow!("Embeddings array is empty")); 57 | } 58 | info!("First Embeddings: {:?}", embeddings[0]); 59 | 60 | embed_result.push(json!( 61 | { 62 | "chunks": chunk_text, 63 | "embeddings": embeddings, 64 | "metadata": chunk.get("metadata").unwrap_or(&Value::Null), 65 | } 66 | )); 67 | } 68 | 69 | Ok(Value::Array(embed_result)) 70 | } 71 | 72 | async fn post_process( 73 | &self, 74 | context: &mut Context, 75 | result: &Result, 76 | ) -> Result> { 77 | match result { 78 | Ok(value) => { 79 | context.set("chunk_embeddings", value.clone()); 80 | Ok(ProcessResult::new( 81 | RagState::Default, 82 | "chunks_embedded".to_string(), 83 | )) 84 | } 85 | Err(e) => Ok(ProcessResult::new( 86 | RagState::EmbeddingError, 87 | format!("embedding_error: {}", e), 88 | )), 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/generate_answer.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use pocketflow_rs::utils::llm_wrapper::{LLMWrapper, OpenAIClient}; 5 | use pocketflow_rs::vector_db::VectorRecord; 6 | use pocketflow_rs::{Context, Node, ProcessResult}; 7 | use serde_json::Value; 8 | use std::sync::Arc; 9 | 10 | pub struct GenerateAnswerNode { 11 | client: Arc, 12 | query: String, 13 | } 14 | 15 | impl GenerateAnswerNode { 16 | pub fn new(api_key: String, model: String, endpoint: String, query: String) -> Self { 17 | Self { 18 | client: Arc::new(OpenAIClient::new(api_key, model, endpoint)), 19 | query, 20 | } 21 | } 22 | } 23 | 24 | #[async_trait] 25 | impl Node for GenerateAnswerNode { 26 | type State = RagState; 27 | 28 | async fn execute(&self, context: &Context) -> Result { 29 | let retrieved_docs = context 30 | .get("retrieved_documents") 31 | .and_then(|v| v.as_array()) 32 | .ok_or_else(|| anyhow::anyhow!("No retrieved documents found in context"))?; 33 | 34 | let retrieved_docs_array: Vec = retrieved_docs 35 | .iter() 36 | .map(VectorRecord::parse_by_value) 37 | .collect(); 38 | 39 | let retrieved_text_with_meta = retrieved_docs_array 40 | .iter() 41 | .map(|v| { 42 | format!( 43 | "{}: {}", 44 | v.metadata 45 | .get("file_metadata") 46 | .unwrap() 47 | .get("url") 48 | .unwrap() 49 | .as_str() 50 | .unwrap(), 51 | v.metadata.get("text").unwrap() 52 | ) 53 | }) 54 | .collect::>() 55 | .join("\n\n"); 56 | 57 | if retrieved_text_with_meta.is_empty() { 58 | return Ok(Value::String("I don't know.".to_string())); 59 | } 60 | 61 | let prompt = format!(" 62 | You are a helpful assistant. Based on the following context, please answer the question. If the answer cannot be found in the context, say 'I don't know'.\n\n 63 | Output format using markdown and add reference links to the source documents. \n\n 64 | You can use the following context to answer the question: \n{}\n\n 65 | Question: {}\n\n 66 | Answer:", 67 | retrieved_text_with_meta, 68 | self.query 69 | ); 70 | 71 | let response = self.client.generate(&prompt).await?; 72 | if response.content.is_empty() { 73 | return Err(anyhow::anyhow!("Empty response from LLM")); 74 | } 75 | 76 | Ok(Value::String(response.content.trim().to_string())) 77 | } 78 | 79 | async fn post_process( 80 | &self, 81 | context: &mut Context, 82 | result: &Result, 83 | ) -> Result> { 84 | match result { 85 | Ok(value) => { 86 | context.set("result", value.clone()); 87 | Ok(ProcessResult::new( 88 | RagState::Default, 89 | "answer_generated".to_string(), 90 | )) 91 | } 92 | Err(e) => Ok(ProcessResult::new( 93 | RagState::GenerationError, 94 | format!("generation_error: {}", e), 95 | )), 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/utils/llm_wrapper.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "openai")] 2 | 3 | use std::{collections::HashMap, hash::RandomState}; 4 | 5 | use async_trait::async_trait; 6 | use openai_api_rust::chat::*; 7 | use openai_api_rust::*; 8 | use serde::{Deserialize, Serialize}; 9 | use tracing::info; 10 | 11 | #[derive(Debug, Clone, Serialize, Deserialize)] 12 | pub struct LLMResponse { 13 | pub content: String, 14 | pub usage: Option, 15 | } 16 | 17 | #[derive(Debug, Clone, Serialize, Deserialize)] 18 | pub struct LLMUsage { 19 | pub prompt_tokens: Option, 20 | pub completion_tokens: Option, 21 | pub total_tokens: Option, 22 | } 23 | 24 | #[async_trait] 25 | pub trait LLMWrapper { 26 | async fn generate(&self, prompt: &str) -> anyhow::Result; 27 | async fn generate_with_options( 28 | &self, 29 | prompt: &str, 30 | options: LLMOptions, 31 | ) -> anyhow::Result; 32 | } 33 | 34 | #[derive(Debug, Clone, Default)] 35 | pub struct LLMOptions { 36 | pub temperature: Option, 37 | pub max_tokens: Option, 38 | pub top_p: Option, 39 | pub frequency_penalty: Option, 40 | pub presence_penalty: Option, 41 | pub stop: Option>, 42 | pub logit_bias: Option>, 43 | } 44 | 45 | #[allow(dead_code)] 46 | pub struct OpenAIClient { 47 | api_key: String, 48 | model: String, 49 | endpoint: String, 50 | client: OpenAI, 51 | } 52 | 53 | impl OpenAIClient { 54 | pub fn new(api_key: String, model: String, endpoint: String) -> Self { 55 | let auth = Auth::new(&api_key); 56 | let client = OpenAI::new(auth, &endpoint); 57 | Self { 58 | api_key, 59 | model, 60 | endpoint, 61 | client, 62 | } 63 | } 64 | } 65 | 66 | #[async_trait] 67 | impl LLMWrapper for OpenAIClient { 68 | async fn generate(&self, prompt: &str) -> anyhow::Result { 69 | self.generate_with_options(prompt, LLMOptions::default()) 70 | .await 71 | } 72 | 73 | async fn generate_with_options( 74 | &self, 75 | prompt: &str, 76 | options: LLMOptions, 77 | ) -> anyhow::Result { 78 | let chat = ChatBody { 79 | model: self.model.clone(), 80 | temperature: options.temperature, 81 | max_tokens: options.max_tokens, 82 | presence_penalty: options.presence_penalty, 83 | frequency_penalty: options.frequency_penalty, 84 | logit_bias: options.logit_bias, 85 | top_p: options.top_p, 86 | stream: Some(false), 87 | stop: options.stop, 88 | user: None, 89 | n: Some(1), 90 | messages: vec![Message { 91 | role: Role::User, 92 | content: prompt.to_string(), 93 | }], 94 | }; 95 | 96 | info!("Sending request to OpenAI API"); 97 | let response = self.client.chat_completion_create(&chat).unwrap(); 98 | let choice = response.choices; 99 | let content = &choice[0].message.as_ref().unwrap().content; 100 | let u = response.usage; 101 | let usage = LLMUsage { 102 | prompt_tokens: u.prompt_tokens, 103 | completion_tokens: u.completion_tokens, 104 | total_tokens: u.total_tokens, 105 | }; 106 | 107 | Ok(LLMResponse { 108 | content: content.clone(), 109 | usage: Some(usage), 110 | }) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/utils/web_search.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "websearch")] 2 | 3 | use async_trait::async_trait; 4 | use reqwest::Client; 5 | use serde::{Deserialize, Serialize}; 6 | use tracing::info; 7 | 8 | #[derive(Debug, Clone, Serialize, Deserialize)] 9 | pub struct SearchResult { 10 | pub title: String, 11 | pub url: String, 12 | pub snippet: String, 13 | } 14 | 15 | #[async_trait] 16 | pub trait WebSearcher { 17 | async fn search(&self, query: &str) -> anyhow::Result>; 18 | async fn search_with_options( 19 | &self, 20 | query: &str, 21 | options: SearchOptions, 22 | ) -> anyhow::Result>; 23 | } 24 | 25 | #[derive(Debug, Clone, Default)] 26 | pub struct SearchOptions { 27 | pub max_results: Option, 28 | pub language: Option, 29 | pub region: Option, 30 | } 31 | 32 | pub struct GoogleSearcher { 33 | api_key: String, 34 | search_engine_id: String, 35 | client: Client, 36 | } 37 | 38 | impl GoogleSearcher { 39 | pub fn new(api_key: String, search_engine_id: String) -> Self { 40 | Self { 41 | api_key, 42 | search_engine_id, 43 | client: Client::new(), 44 | } 45 | } 46 | } 47 | 48 | #[async_trait] 49 | impl WebSearcher for GoogleSearcher { 50 | async fn search(&self, query: &str) -> anyhow::Result> { 51 | self.search_with_options(query, SearchOptions::default()) 52 | .await 53 | } 54 | 55 | async fn search_with_options( 56 | &self, 57 | query: &str, 58 | options: SearchOptions, 59 | ) -> anyhow::Result> { 60 | let mut url = format!( 61 | "https://www.googleapis.com/customsearch/v1?key={}&cx={}&q={}", 62 | self.api_key, self.search_engine_id, query 63 | ); 64 | 65 | if let Some(lang) = options.language { 66 | url.push_str(&format!("&lr=lang_{}", lang)); 67 | } 68 | if let Some(region) = options.region { 69 | url.push_str(&format!("&cr=country{}", region)); 70 | } 71 | if let Some(max_results) = options.max_results { 72 | url.push_str(&format!("&num={}", max_results)); 73 | } 74 | 75 | info!("Sending request to Google Search API"); 76 | let response = self.client.get(&url).send().await?; 77 | let search_response: serde_json::Value = response.json().await?; 78 | let default_val: Vec = vec![]; 79 | let items = search_response["items"].as_array().unwrap_or(&default_val); 80 | let results = items 81 | .iter() 82 | .map(|item| SearchResult { 83 | title: item["title"].as_str().unwrap_or("").to_string(), 84 | url: item["link"].as_str().unwrap_or("").to_string(), 85 | snippet: item["snippet"].as_str().unwrap_or("").to_string(), 86 | }) 87 | .collect(); 88 | 89 | Ok(results) 90 | } 91 | } 92 | 93 | #[cfg(test)] 94 | mod tests { 95 | use super::*; 96 | use std::env; 97 | 98 | #[tokio::test] 99 | #[ignore = "E2E case, requires API keys"] 100 | async fn test_e2e_google_searcher() { 101 | let searcher = GoogleSearcher::new( 102 | env::var("GOOGLE_API_KEY").unwrap(), 103 | env::var("GOOGLE_SEARCH_ENGINE_ID").unwrap(), 104 | ); 105 | let results = searcher 106 | .search("Beijing's temperature today") 107 | .await 108 | .unwrap(); 109 | println!("{:?}", results); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/create_index.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use pocketflow_rs::utils::vector_db::{ 5 | DistanceMetric, QdrantDB, VectorDB, VectorDBOptions, VectorRecord, 6 | }; 7 | use pocketflow_rs::{Context, Node, ProcessResult}; 8 | use serde_json::Value; 9 | use std::sync::Arc; 10 | 11 | pub struct CreateIndexNode { 12 | db: Arc, 13 | } 14 | 15 | impl CreateIndexNode { 16 | pub async fn new( 17 | db_url: String, 18 | api_key: Option, 19 | collection: String, 20 | dimension: usize, 21 | distance_metric: DistanceMetric, 22 | ) -> Result { 23 | let options = VectorDBOptions { 24 | collection_name: collection, 25 | dimension, 26 | distance_metric, 27 | }; 28 | let db = QdrantDB::new(db_url, api_key, options).await?; 29 | Ok(Self { db: Arc::new(db) }) 30 | } 31 | } 32 | 33 | #[async_trait] 34 | impl Node for CreateIndexNode { 35 | type State = RagState; 36 | 37 | async fn execute(&self, context: &Context) -> Result { 38 | let chunks_embeddings = context 39 | .get("chunk_embeddings") 40 | .and_then(|v| v.as_array()) 41 | .ok_or_else(|| anyhow::anyhow!("No embeddings found in context"))?; 42 | 43 | let mut records = Vec::new(); 44 | for chunk_embedding in chunks_embeddings { 45 | let chunks = chunk_embedding 46 | .get("chunks") 47 | .and_then(|v| v.as_array()) 48 | .ok_or_else(|| anyhow::anyhow!("No chunks found in document"))?; 49 | let embeddings = chunk_embedding 50 | .get("embeddings") 51 | .and_then(|v| v.as_array()) 52 | .ok_or_else(|| anyhow::anyhow!("No embeddings found in document"))?; 53 | let metadata = chunk_embedding.get("metadata").unwrap_or(&Value::Null); 54 | 55 | let chunks_size = chunks.len(); 56 | for i in 0..chunks_size { 57 | let chunk = chunks[i].to_string(); 58 | let default_embedding = Vec::new(); 59 | let embedding = embeddings[i].as_array().unwrap_or(&default_embedding); 60 | let embedding_vec: Vec = embedding 61 | .iter() 62 | .filter_map(|v| v.as_f64().map(|x| x as f32)) 63 | .collect(); 64 | records.push(VectorRecord { 65 | id: uuid::Uuid::new_v4().to_string(), 66 | vector: embedding_vec, 67 | metadata: serde_json::Map::from_iter(vec![ 68 | ("text".to_string(), serde_json::Value::String(chunk)), 69 | ("file_metadata".to_string(), metadata.clone()), 70 | ]), 71 | }); 72 | } 73 | } 74 | 75 | if records.is_empty() { 76 | return Err(anyhow::anyhow!("No valid records to insert")); 77 | } 78 | 79 | self.db 80 | .insert(records) 81 | .await 82 | .map_err(|e| anyhow::anyhow!("Failed to insert records: {}", e))?; 83 | Ok(Value::Null) 84 | } 85 | 86 | #[allow(unused_variables)] 87 | async fn post_process( 88 | &self, 89 | context: &mut Context, 90 | result: &Result, 91 | ) -> Result> { 92 | match result { 93 | Ok(_) => Ok(ProcessResult::new( 94 | RagState::Default, 95 | "index_created".to_string(), 96 | )), 97 | Err(e) => Ok(ProcessResult::new( 98 | RagState::IndexCreationError, 99 | format!("index_creation_error: {}", e), 100 | )), 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/node.rs: -------------------------------------------------------------------------------- 1 | use crate::{Params, context::Context}; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use std::collections::HashMap; 5 | use std::sync::Arc; 6 | 7 | pub trait ProcessState: Send + Sync { 8 | fn is_default(&self) -> bool; 9 | fn to_condition(&self) -> String; 10 | } 11 | 12 | #[derive(Debug, Clone, PartialEq, Default)] 13 | pub enum BaseState { 14 | Success, 15 | Failure, 16 | #[default] 17 | Default, 18 | } 19 | 20 | impl ProcessState for BaseState { 21 | fn is_default(&self) -> bool { 22 | matches!(self, BaseState::Default) 23 | } 24 | 25 | fn to_condition(&self) -> String { 26 | match self { 27 | BaseState::Success => "success".to_string(), 28 | BaseState::Failure => "failure".to_string(), 29 | BaseState::Default => "default".to_string(), 30 | } 31 | } 32 | } 33 | 34 | #[derive(Debug, Clone, PartialEq)] 35 | pub struct ProcessResult { 36 | pub state: S, 37 | pub message: String, 38 | } 39 | 40 | impl ProcessResult { 41 | pub fn new(state: S, message: String) -> Self { 42 | Self { state, message } 43 | } 44 | } 45 | 46 | impl Default for ProcessResult { 47 | fn default() -> Self { 48 | Self { 49 | state: S::default(), 50 | message: "default".to_string(), 51 | } 52 | } 53 | } 54 | 55 | #[async_trait] 56 | pub trait Node: Send + Sync { 57 | type State: ProcessState + Default; 58 | 59 | #[allow(unused_variables)] 60 | async fn prepare(&self, context: &mut Context) -> Result<()> { 61 | Ok(()) 62 | } 63 | 64 | async fn execute(&self, context: &Context) -> Result; 65 | 66 | #[allow(unused_variables)] 67 | async fn post_process( 68 | &self, 69 | context: &mut Context, 70 | result: &Result, 71 | ) -> Result> { 72 | match result { 73 | Ok(value) => { 74 | context.set("result", value.clone()); 75 | Ok(ProcessResult::default()) 76 | } 77 | Err(e) => { 78 | context.set("error", serde_json::Value::String(e.to_string())); 79 | Ok(ProcessResult::new(Self::State::default(), e.to_string())) 80 | } 81 | } 82 | } 83 | } 84 | 85 | pub trait BaseNodeTrait: Node {} 86 | 87 | #[allow(dead_code)] 88 | pub struct BaseNode { 89 | params: Params, 90 | next_nodes: HashMap>, 91 | } 92 | 93 | impl BaseNode { 94 | pub fn new(params: Params) -> Self { 95 | Self { 96 | params, 97 | next_nodes: HashMap::new(), 98 | } 99 | } 100 | 101 | pub fn add_next(&mut self, action: String, node: Arc) { 102 | self.next_nodes.insert(action, node); 103 | } 104 | } 105 | 106 | #[async_trait] 107 | impl Node for BaseNode { 108 | type State = BaseState; 109 | 110 | #[allow(unused_variables)] 111 | async fn execute(&self, context: &Context) -> Result { 112 | Ok(serde_json::Value::Null) 113 | } 114 | } 115 | 116 | impl BaseNodeTrait for BaseNode {} 117 | 118 | #[allow(dead_code)] 119 | pub struct BatchNode { 120 | base: BaseNode, 121 | batch_size: usize, 122 | } 123 | 124 | impl BatchNode { 125 | pub fn new(params: Params, batch_size: usize) -> Self { 126 | Self { 127 | base: BaseNode::new(params), 128 | batch_size, 129 | } 130 | } 131 | } 132 | 133 | #[async_trait] 134 | impl Node for BatchNode { 135 | type State = BaseState; 136 | 137 | #[allow(unused_variables)] 138 | async fn execute(&self, context: &Context) -> Result { 139 | Ok(serde_json::Value::Null) 140 | } 141 | } 142 | 143 | impl BaseNodeTrait for BatchNode {} 144 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/query_rewrite.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::Result; 3 | use async_trait::async_trait; 4 | use pocketflow_rs::utils::llm_wrapper::{LLMWrapper, OpenAIClient}; 5 | use pocketflow_rs::{Context, Node, ProcessResult}; 6 | use serde_json::Value; 7 | use std::sync::Arc; 8 | use tracing::info; 9 | 10 | pub struct QueryRewriteNode { 11 | client: Arc, 12 | } 13 | 14 | impl QueryRewriteNode { 15 | pub fn new(api_key: String, model: String, endpoint: String) -> Self { 16 | Self { 17 | client: Arc::new(OpenAIClient::new(api_key, model, endpoint)), 18 | } 19 | } 20 | } 21 | 22 | #[async_trait] 23 | impl Node for QueryRewriteNode { 24 | type State = RagState; 25 | 26 | async fn execute(&self, context: &Context) -> Result { 27 | let user_query = context.get("user_query").unwrap(); 28 | let prompt = format!(" 29 | **Role:** You are an AI Query Enhancer for a Retrieval-Augmented Generation (RAG) system. 30 | 31 | **Goal:** Your task is to take a raw user query and rewrite it into an optimized query string suitable for vector database search. This involves identifying the user's core intent and transforming the query into a concise, keyword-focused format that maximizes the chances of retrieving relevant documents. 32 | 33 | **Input:** You will receive a single \"Original User Query\". 34 | 35 | **Instructions:** 36 | 37 | 1. **Analyze Intent:** Carefully examine the \"Original User Query\" to understand the user's underlying information need or question. What are they *really* trying to find out? 38 | 2. **Identify Keywords:** Extract the most critical entities, concepts, and keywords from the query. 39 | 3. **Remove Filler:** Discard conversational filler, politeness phrases (e.g., \"please\", \"can you tell me\"), and vague phrasing (\"thing\", \"stuff\", \"how about\"). 40 | 4. **Rewrite for Clarity & Conciseness:** Construct a new query string that clearly represents the intent using the identified keywords. Make it specific and direct. 41 | 5. **Consider Expansion (Optional but Recommended):** If the original query is very sparse or could benefit from clarification, cautiously add 1-2 highly relevant synonyms or closely related terms that specify the intent further (e.g., adding \"nutrition\" if the query is just \"apples\"). Avoid overly broad expansion. 42 | 6. **Format for Embedding:** The final rewritten query should be a simple string, optimized for being turned into a vector embedding for semantic search. 43 | 44 | **Output:** Respond with ONLY the rewritten query string. Do not include any explanations or introductory text. 45 | 46 | **Example 1:** 47 | Original User Query: \"Hey, could you tell me about the financial performance of Tesla last year?\" 48 | Rewritten Query: `Tesla financial performance 2024 earnings report revenue analysis` 49 | 50 | **Example 2:** 51 | Original User Query: \"What's the deal with that new AI that makes pictures?\" 52 | Rewritten Query: `AI image generation model technology explanation diffusion transformer` 53 | 54 | **Example 3:** 55 | Original User Query: \"I need help understanding how to mitigate risks in my supply chain in Europe.\" 56 | Rewritten Query: `supply chain risk mitigation strategies Europe logistics management` 57 | 58 | **Now, process the following input:** 59 | 60 | Original User Query: \"{}\" 61 | Rewritten Query:",user_query); 62 | let response = self.client.generate(&prompt).await?; 63 | info!("Query rewritten: {:?}", response.content); 64 | Ok(Value::String(response.content.replace("`", ""))) 65 | } 66 | 67 | #[allow(unused_variables)] 68 | async fn post_process( 69 | &self, 70 | context: &mut Context, 71 | result: &Result, 72 | ) -> Result> { 73 | return match result { 74 | Ok(value) => { 75 | context.set("rewritten_query", value.clone()); 76 | Ok(ProcessResult::new( 77 | RagState::Default, 78 | "query_rewritten".to_string(), 79 | )) 80 | } 81 | Err(e) => { 82 | info!("Error rewriting query: {:?}", e); 83 | Ok(ProcessResult::new( 84 | RagState::QueryRewriteError, 85 | "query_rewrite_error".to_string(), 86 | )) 87 | } 88 | }; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /examples/basic.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use pocketflow_rs::{Context, Node, ProcessResult, ProcessState, build_flow}; 3 | use rand::Rng; 4 | use serde_json::Value; 5 | 6 | #[derive(Debug, Clone, PartialEq, Default)] 7 | enum NumberState { 8 | Small, 9 | Medium, 10 | Large, 11 | #[default] 12 | Default, 13 | } 14 | 15 | impl ProcessState for NumberState { 16 | fn is_default(&self) -> bool { 17 | matches!(self, NumberState::Default) 18 | } 19 | 20 | fn to_condition(&self) -> String { 21 | match self { 22 | NumberState::Small => "small".to_string(), 23 | NumberState::Medium => "medium".to_string(), 24 | NumberState::Large => "large".to_string(), 25 | NumberState::Default => "default".to_string(), 26 | } 27 | } 28 | } 29 | 30 | // A simple node that prints a message 31 | struct PrintNode { 32 | message: String, 33 | } 34 | 35 | impl PrintNode { 36 | fn new(message: &str) -> Self { 37 | Self { 38 | message: message.to_string(), 39 | } 40 | } 41 | } 42 | 43 | #[async_trait::async_trait] 44 | impl Node for PrintNode { 45 | type State = NumberState; 46 | 47 | async fn execute(&self, context: &Context) -> Result { 48 | println!("PrintNode: {}, Context: {}", self.message, context); 49 | Ok(Value::String(self.message.clone())) 50 | } 51 | } 52 | 53 | // A node that generates a random number 54 | struct RandomNumberNode { 55 | max: i64, 56 | } 57 | 58 | impl RandomNumberNode { 59 | fn new(max: i64) -> Self { 60 | Self { max } 61 | } 62 | } 63 | 64 | #[async_trait::async_trait] 65 | impl Node for RandomNumberNode { 66 | type State = NumberState; 67 | 68 | async fn execute(&self, context: &Context) -> Result { 69 | let num = rand::thread_rng().gen_range(0..self.max); 70 | println!( 71 | "RandomNumberNode: Generated number {}, Context: {}", 72 | num, context 73 | ); 74 | Ok(Value::Number(num.into())) 75 | } 76 | 77 | async fn post_process( 78 | &self, 79 | context: &mut Context, 80 | result: &Result, 81 | ) -> Result> { 82 | let num = result.as_ref().unwrap().as_i64().unwrap_or(0); 83 | context.set("number", Value::Number(num.into())); 84 | // Return different states based on the number 85 | let state = if num < self.max / 3 { 86 | NumberState::Small 87 | } else if num < 2 * self.max / 3 { 88 | NumberState::Medium 89 | } else { 90 | NumberState::Large 91 | }; 92 | let condition = state.to_condition(); 93 | Ok(ProcessResult::new(state, condition)) 94 | } 95 | } 96 | 97 | // A node that processes small numbers 98 | struct SmallNumberNode; 99 | 100 | #[async_trait::async_trait] 101 | impl Node for SmallNumberNode { 102 | type State = NumberState; 103 | 104 | async fn execute(&self, context: &Context) -> Result { 105 | let num = context.get("number").and_then(|v| v.as_i64()).unwrap_or(0); 106 | println!("SmallNumberNode: Processing small number {}", num); 107 | Ok(Value::String(format!("Small number processed: {}", num))) 108 | } 109 | } 110 | 111 | // A node that processes medium numbers 112 | struct MediumNumberNode; 113 | 114 | #[async_trait::async_trait] 115 | impl Node for MediumNumberNode { 116 | type State = NumberState; 117 | 118 | async fn execute(&self, context: &Context) -> Result { 119 | let num = context.get("number").and_then(|v| v.as_i64()).unwrap_or(0); 120 | println!("MediumNumberNode: Processing medium number {}", num); 121 | Ok(Value::String(format!("Medium number processed: {}", num))) 122 | } 123 | } 124 | 125 | // A node that processes large numbers 126 | struct LargeNumberNode; 127 | 128 | #[async_trait::async_trait] 129 | impl Node for LargeNumberNode { 130 | type State = NumberState; 131 | 132 | async fn execute(&self, context: &Context) -> Result { 133 | let num = context.get("number").and_then(|v| v.as_i64()).unwrap_or(0); 134 | println!("LargeNumberNode: Processing large number {}", num); 135 | Ok(Value::String(format!("Large number processed: {}", num))) 136 | } 137 | } 138 | 139 | #[tokio::main] 140 | async fn main() -> std::result::Result<(), Box> { 141 | // Create nodes 142 | let begin_node = PrintNode::new("Begin Node"); 143 | let random_node = RandomNumberNode::new(100); 144 | let small_node = SmallNumberNode; 145 | let medium_node = MediumNumberNode; 146 | let large_node = LargeNumberNode; 147 | 148 | // Create flow using macro 149 | let flow = build_flow!( 150 | start: ("start", begin_node), 151 | nodes: [ 152 | ("rand", random_node), 153 | ("small", small_node), 154 | ("medium", medium_node), 155 | ("large", large_node) 156 | ], 157 | edges: [ 158 | ("start", "rand", NumberState::Default), 159 | ("rand", "small", NumberState::Small), 160 | ("rand", "medium", NumberState::Medium), 161 | ("rand", "large", NumberState::Large) 162 | ] 163 | ); 164 | 165 | // Create context 166 | let context = Context::new(); 167 | 168 | // Run the flow 169 | println!("Starting flow execution..."); 170 | flow.run(context).await?; 171 | println!("Flow execution completed!"); 172 | 173 | Ok(()) 174 | } 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | Pocket Flow – 100-line minimalist LLM framework 3 |
4 | 5 | A Rust implementation of [PocketFlow](https://github.com/The-Pocket/PocketFlow), a minimalist flow-based programming framework. 6 | 7 | 📋 [Get started quickly with our template →](#template) 8 | 9 | ## Features 10 | 11 | - Type-safe state transitions using enums 12 | - Macro-based flow construction 13 | - Async node execution and post-processing 14 | - Batch flow support 15 | - Custom state management 16 | - Extensible node system 17 | 18 | ## Quick Start 19 | 20 | ### 0. Setup 21 | 22 | ```bash 23 | cargo add pocketflow_rs 24 | ``` 25 | 26 | ### 1. Define Custom States 27 | 28 | ```rust 29 | use pocketflow_rs::ProcessState; 30 | 31 | #[derive(Debug, Clone, PartialEq)] 32 | pub enum MyState { 33 | Success, 34 | Failure, 35 | Default, 36 | } 37 | 38 | impl ProcessState for MyState { 39 | fn is_default(&self) -> bool { 40 | matches!(self, MyState::Default) 41 | } 42 | fn to_condition(&self) -> String { 43 | match self { 44 | MyState::Success => "success".to_string(), 45 | MyState::Failure => "failure".to_string(), 46 | MyState::Default => "default".to_string(), 47 | } 48 | } 49 | } 50 | 51 | impl Default for MyState { 52 | fn default() -> Self { 53 | MyState::Default 54 | } 55 | } 56 | ``` 57 | 58 | ### 2. Implement Nodes 59 | 60 | ```rust 61 | use pocketflow_rs::{Node, ProcessResult, Context}; 62 | use anyhow::Result; 63 | use async_trait::async_trait; 64 | 65 | struct MyNode; 66 | 67 | #[async_trait] 68 | impl Node for MyNode { 69 | type State = MyState; 70 | 71 | async fn execute(&self, context: &Context) -> Result { 72 | // Your node logic here 73 | Ok(serde_json::json!({"data": 42})) 74 | } 75 | 76 | async fn post_process( 77 | &self, 78 | context: &mut Context, 79 | result: &Result, 80 | ) -> Result> { 81 | // Your post-processing logic here 82 | Ok(ProcessResult::new(MyState::Success, "success".to_string())) 83 | } 84 | } 85 | ``` 86 | 87 | ### 3. Build Flows 88 | 89 | ```rust 90 | use pocketflow_rs::{build_flow, Context}; 91 | 92 | let node1 = MyNode; 93 | let node2 = MyNode; 94 | 95 | let flow = build_flow!( 96 | start: ("start", node1), 97 | nodes: [("next", node2)], 98 | edges: [ 99 | ("start", "next", MyState::Success) 100 | ] 101 | ); 102 | 103 | let context = Context::new(); 104 | let result = flow.run(context).await?; 105 | ``` 106 | 107 | ### 4. Batch Processing 108 | 109 | ```rust 110 | use pocketflow_rs::build_batch_flow; 111 | 112 | let batch_flow = build_batch_flow!( 113 | start: ("start", node1), 114 | nodes: [("next", node2)], 115 | edges: [ 116 | ("start", "next", MyState::Success) 117 | ], 118 | batch_size: 10 119 | ); 120 | 121 | let contexts = vec![Context::new(); 10]; 122 | batch_flow.run_batch(contexts).await?; 123 | ``` 124 | 125 | ## Advanced Usage 126 | 127 | ### Custom State Management 128 | 129 | Define your own states to control flow transitions: 130 | 131 | ```rust 132 | #[derive(Debug, Clone, PartialEq)] 133 | pub enum WorkflowState { 134 | Initialized, 135 | Processing, 136 | Completed, 137 | Error, 138 | Default, 139 | } 140 | 141 | impl ProcessState for WorkflowState { 142 | fn is_default(&self) -> bool { 143 | matches!(self, WorkflowState::Default) 144 | } 145 | fn to_condition(&self) -> String { 146 | match self { 147 | WorkflowState::Initialized => "initialized".to_string(), 148 | WorkflowState::Processing => "processing".to_string(), 149 | WorkflowState::Completed => "completed".to_string(), 150 | WorkflowState::Error => "error".to_string(), 151 | WorkflowState::Default => "default".to_string(), 152 | } 153 | } 154 | } 155 | ``` 156 | 157 | ### Complex Flow Construction 158 | 159 | Build complex workflows with multiple nodes and state transitions: 160 | 161 | ```rust 162 | let flow = build_flow!( 163 | start: ("start", node1), 164 | nodes: [ 165 | ("process", node2), 166 | ("validate", node3), 167 | ("complete", node4) 168 | ], 169 | edges: [ 170 | ("start", "process", WorkflowState::Initialized), 171 | ("process", "validate", WorkflowState::Processing), 172 | ("validate", "process", WorkflowState::Error), 173 | ("validate", "complete", WorkflowState::Completed) 174 | ] 175 | ); 176 | ``` 177 | 178 | ## Available Features 179 | 180 | The following features are available: (feature for [utility_function](https://the-pocket.github.io/PocketFlow/utility_function/)) 181 | 182 | - `openai` (default): Enable OpenAI API integration for LLM capabilities 183 | - `websearch`: Enable web search functionality using Google Custom Search API 184 | - `qdrant`: Enable vector database integration using Qdrant 185 | - `debug`: Enable additional debug logging and information 186 | 187 | To use specific features, add them to your `Cargo.toml`: 188 | 189 | ```toml 190 | [dependencies] 191 | pocketflow_rs = { version = "0.1.0", features = ["openai", "websearch"] } 192 | ``` 193 | 194 | Or use them in the command line: 195 | 196 | ```bash 197 | cargo add pocketflow_rs --features "openai websearch" 198 | ``` 199 | 200 | ## Examples 201 | 202 | Check out the `examples/` directory for more detailed examples: 203 | 204 | - basic.rs: Basic flow with custom states 205 | - text2sql: Text-to-SQL workflow example 206 | - [pocketflow-rs-rag](./examples/pocketflow-rs-rag/README.md): Retrieval-Augmented Generation (RAG) workflow example 207 | 208 | ## Template 209 | 210 | Fork the [PocketFlow-Template-Rust](https://github.com/The-Pocket/PocketFlow-Template-Rust) repository and use it as a template for your own project. 211 | 212 | ## License 213 | 214 | MIT 215 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/README.md: -------------------------------------------------------------------------------- 1 | # PocketFlow RAG Example 2 | 3 | ## Overview 4 | 5 | This example demonstrates how to use PocketFlow to build a Retrieval-Augmented Generation (RAG) pipeline. The implementation consists of two main components: an offline pipeline for document processing and indexing, and an online pipeline for question answering. 6 | 7 | ### Offline Pipeline 8 | 9 | The offline pipeline processes and indexes documents for later retrieval. It consists of the following nodes: 10 | 11 | - `FileLoaderNode`: Loads documents from local files or URLs, supporting various formats including PDF, text, and web pages. 12 | - `ChunkDocumentsNode`: Splits documents into smaller chunks using configurable chunk size and overlap, with support for different chunking strategies. 13 | - `EmbedDocumentsNode`: Converts document chunks into vector embeddings using OpenAI's embedding models. 14 | - `CreateIndexNode`: Stores the embedded chunks in a Qdrant vector database with configurable distance metrics. 15 | 16 | ### Online Pipeline 17 | 18 | The online pipeline handles real-time question answering using the indexed documents. It includes: 19 | 20 | - `QueryRewriteNode`: Enhances the user's query using LLM to improve retrieval quality. 21 | - `EmbedQueryNode`: Converts the rewritten query into a vector embedding. 22 | - `RetrieveDocumentNode`: Retrieves the most relevant document chunks from the vector database. 23 | - `GenerateAnswerNode`: Generates a comprehensive answer based on the retrieved context and the original query. 24 | 25 | The pipeline supports various configuration options including: 26 | 27 | - Customizable embedding models and dimensions 28 | - Configurable chunk sizes and overlap 29 | - Adjustable number of retrieved documents 30 | - Different chat modes for answer generation 31 | - Flexible vector database settings 32 | 33 | ## Workflow Diagram 34 | 35 | ```mermaid 36 | graph TB 37 | subgraph Offline["Offline Pipeline"] 38 | direction LR 39 | FL[FileLoaderNode] --> CD[ChunkDocumentsNode] 40 | CD --> ED[EmbedDocumentsNode] 41 | ED --> CI[CreateIndexNode] 42 | end 43 | 44 | subgraph Online["Online Pipeline"] 45 | direction LR 46 | QR[QueryRewriteNode] --> EQ[EmbedQueryNode] 47 | EQ --> RD[RetrieveDocumentNode] 48 | RD --> GA[GenerateAnswerNode] 49 | end 50 | 51 | style Offline fill:#f9f,stroke:#333,stroke-width:2px 52 | style Online fill:#bbf,stroke:#333,stroke-width:2px 53 | ``` 54 | 55 | ## Example Usage 56 | 57 | ### run offline pipeline 58 | 59 | ```bash 60 | cargo run -- offline --db-url --collection --api-key --qdrant-api-key --endpoint --chunk-size --overlap --model --dimension https://www.usenix.org/system/files/fast23-li-qiang_more.pdf https://www.usenix.org/system/files/fast23-li-qiang.pdf 61 | ``` 62 | 63 | ### run online pipeline 64 | 65 | ```bash 66 | cargo run -- online --db-url --collection --api-key --qdrant-api-key --endpoint --embedding-model --chat-mode --dimension --k "Introduce Alibaba Cloud's Pangu distributed file system" 67 | ``` 68 | 69 | ### Output 70 | 71 | ```markdown 72 | Alibaba Cloud's Pangu is a large-scale, distributed storage system that has been in development and deployment since 2009. It serves as a unified storage platform for Alibaba Group and Alibaba Cloud, providing scalable, high-performance, and reliable storage services to support core businesses such as Taobao, Tmall, AntFin, and Alimama. A variety of cloud services, including Elastic Block Storage (EBS), Object Storage Service (OSS), Network-Attached Storage (NAS), PolarDB, and MaxCompute, are built on top of Pangu. Over more than a decade, Pangu has grown into a global storage system managing exabytes of data and trillions of files. 73 | 74 | ### Evolution of Pangu 75 | 76 | Pangu's evolution can be divided into two main phases: 77 | 78 | 1. **Pangu 1.0 (2009-2015)**: This version was designed on an infrastructure composed of servers with commodity CPUs and hard disk drives (HDDs), which have millisecond-level I/O latency, and Gbps-level datacenter networks. Pangu 1.0 featured a distributed kernel-space file system based on Linux Ext4 and kernel-space TCP, gradually adding support for multiple file types (e.g., TempFile, LogFile, and random access files) as required by different storage services. During this period, the primary focus was on providing large volumes of storage space rather than high performance. 79 | 80 | 2. **Pangu 2.0 (Since 2015)**: In response to the emergence of new hardware technologies, particularly solid-state drives (SSDs) and remote direct memory access (RDMA), Pangu 2.0 was developed to provide high-performance storage services with a 100µs-level I/O latency. Key innovations include: 81 | - **Embracing SSD and RDMA**: To leverage the low latency of SSDs and RDMA, Pangu 2.0 introduced a series of new designs in its file system and developed a user-space storage operating system. 82 | - **High Throughput and IOPS**: Pangu 2.0 aims to achieve high throughput and IOPS, with an effective throughput on storage servers approaching their capacity. 83 | - **Unified High-Performance Support**: The system provides unified high-performance support to all services running on top of it, such as online search, data streaming analytics, EBS, OSS, and databases. 84 | 85 | ### Design Goals of Pangu 2.0 86 | 87 | - **Low Latency**: Pangu 2.0 targets an average 100µs-level I/O latency in a computation-storage disaggregated architecture, even under dynamic environments like network traffic jitters and server failures. 88 | - **High Throughput**: The system aims to reach an effective throughput on storage servers that approaches their capacity. 89 | - **Unified High-Performance Support**: Pangu 2.0 provides unified high-performance support to all services, ensuring that all applications benefit from the advancements in hardware and software. 90 | 91 | ### Related Work 92 | 93 | Pangu is part of a broader ecosystem of distributed storage systems, both open-source (e.g., HDFS and Ceph) and proprietary (e.g., GFS, Tectonic, and AWS). Alibaba has shared its experiences in various aspects of Pangu, including the large-scale deployment of RDMA, key-value engines for scale-out cloud storage, co-design of network and storage software stacks for EBS, and key designs of the namespace metadata service. 94 | 95 | For more detailed information, you can refer to the following sources: 96 | 97 | - [FAST '23 Paper: "Fisc: A Lightweight Client for Large-Scale Distributed File Systems"](https://www.usenix.org/system/files/fast23-li-qiang.pdf)_more.pdf) 98 | 99 | These documents provide in-depth insights into the design, implementation, and operational experiences of Pangu. 100 | ``` 101 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use clap::{Parser, Subcommand}; 3 | use pocketflow_rs::utils::{text_chunking::ChunkingStrategy, vector_db::DistanceMetric}; 4 | use pocketflow_rs::{Context as FlowContext, build_flow}; 5 | use pocketflow_rs_rag::{ 6 | QueryRewriteNode, 7 | nodes::{ 8 | ChunkDocumentsNode, CreateIndexNode, EmbedDocumentsNode, EmbedQueryNode, FileLoaderNode, 9 | GenerateAnswerNode, RetrieveDocumentNode, 10 | }, 11 | state::RagState, 12 | }; 13 | use serde_json::json; 14 | use tracing::Level; 15 | use tracing_subscriber::FmtSubscriber; 16 | 17 | #[derive(Parser)] 18 | #[command(author, version, about, long_about = None)] 19 | struct Cli { 20 | #[command(subcommand)] 21 | command: Commands, 22 | } 23 | 24 | #[derive(Subcommand)] 25 | enum Commands { 26 | /// Process documents offline 27 | Offline { 28 | /// Qdrant database URL 29 | #[arg(long, default_value = "http://localhost:6333")] 30 | db_url: String, 31 | 32 | /// Collection name in Qdrant 33 | #[arg(long, default_value = "documents")] 34 | collection: String, 35 | 36 | /// OpenAI API key 37 | #[arg(long)] 38 | api_key: String, 39 | 40 | /// Qdrant API key 41 | #[arg(long)] 42 | qdrant_api_key: Option, 43 | 44 | /// OpenAI API endpoint 45 | #[arg(short, long, default_value = "https://api.openai.com/v1")] 46 | endpoint: String, 47 | 48 | /// Chunk size for document splitting 49 | #[arg(long, default_value = "1000")] 50 | chunk_size: usize, 51 | 52 | /// Overlap between chunks 53 | #[arg(long, default_value = "200")] 54 | overlap: usize, 55 | 56 | /// OpenAI model to use 57 | #[arg(long, default_value = "text-embedding-ada-002")] 58 | model: String, 59 | 60 | #[arg(long, default_value = "1024")] 61 | dimension: usize, 62 | 63 | /// Paths to document files 64 | #[arg(required = true)] 65 | files: Vec, 66 | }, 67 | /// Online processing: answer questions based on indexed documents 68 | Online { 69 | /// Qdrant database URL 70 | #[arg(long, default_value = "http://localhost:6333")] 71 | db_url: String, 72 | 73 | /// Collection name in Qdrant 74 | #[arg(long, default_value = "documents")] 75 | collection: String, 76 | 77 | /// OpenAI API key 78 | #[arg(long)] 79 | api_key: String, 80 | 81 | /// OpenAI API endpoint 82 | #[arg(long, default_value = "https://api.openai.com/v1")] 83 | endpoint: String, 84 | 85 | /// Number of documents to retrieve 86 | #[arg(short, long, default_value = "3")] 87 | k: usize, 88 | 89 | /// chat mode 90 | #[arg(long, default_value = "chat")] 91 | chat_mode: String, 92 | 93 | /// embedding dimension 94 | #[arg(long, default_value = "1024")] 95 | dimension: usize, 96 | 97 | /// Qdrant API key 98 | #[arg(long)] 99 | qdrant_api_key: Option, 100 | 101 | /// Embedding model 102 | #[arg(long, default_value = "text-embedding-ada-002")] 103 | embedding_model: String, 104 | 105 | /// Question to answer 106 | #[arg(required = true)] 107 | query: String, 108 | }, 109 | } 110 | 111 | #[tokio::main] 112 | async fn main() -> Result<()> { 113 | let cli = Cli::parse(); 114 | FmtSubscriber::builder().with_max_level(Level::INFO).init(); 115 | 116 | match cli.command { 117 | Commands::Offline { 118 | files, 119 | db_url, 120 | collection, 121 | api_key, 122 | qdrant_api_key, 123 | endpoint, 124 | chunk_size, 125 | overlap, 126 | model, 127 | dimension, 128 | } => { 129 | let file_loader = FileLoaderNode::new(files); 130 | let chunk_documents = 131 | ChunkDocumentsNode::new(chunk_size, overlap, ChunkingStrategy::Sentence); 132 | let embed_documents = EmbedDocumentsNode::new( 133 | api_key.clone(), 134 | endpoint.clone(), 135 | model.clone(), 136 | Some(dimension), 137 | ); 138 | let create_index = CreateIndexNode::new( 139 | db_url, 140 | qdrant_api_key, 141 | collection, 142 | dimension, 143 | DistanceMetric::Cosine, 144 | ) 145 | .await?; 146 | 147 | let flow = build_flow!( 148 | start: ("file_loader", file_loader), 149 | nodes: [ 150 | ("chunk_documents", chunk_documents), 151 | ("embed_documents", embed_documents), 152 | ("create_index", create_index) 153 | ], 154 | edges: [ 155 | ("file_loader", "chunk_documents", RagState::Default), 156 | ("chunk_documents", "embed_documents", RagState::Default), 157 | ("embed_documents", "create_index", RagState::Default) 158 | ] 159 | ); 160 | 161 | flow.run(FlowContext::new()).await?; 162 | } 163 | Commands::Online { 164 | query, 165 | db_url, 166 | collection, 167 | api_key, 168 | endpoint, 169 | k, 170 | chat_mode, 171 | dimension, 172 | qdrant_api_key, 173 | embedding_model, 174 | } => { 175 | let mut context = FlowContext::new(); 176 | context.set("user_query", json!(query.clone())); 177 | 178 | let query_rewrite_node = 179 | QueryRewriteNode::new(api_key.clone(), chat_mode.clone(), endpoint.clone()); 180 | 181 | let embed_query_node = EmbedQueryNode::new( 182 | api_key.clone(), 183 | endpoint.clone(), 184 | embedding_model.clone(), 185 | Some(dimension), 186 | ); 187 | 188 | let retrieve_node = RetrieveDocumentNode::new( 189 | db_url, 190 | qdrant_api_key, 191 | collection, 192 | dimension, 193 | DistanceMetric::Cosine, 194 | k, 195 | ) 196 | .await?; 197 | 198 | let generate_node = GenerateAnswerNode::new(api_key, chat_mode, endpoint, query); 199 | 200 | // Build and execute online flow 201 | let flow = build_flow!( 202 | start: ("query_rewrite", query_rewrite_node), 203 | nodes: [ 204 | ("embed_query", embed_query_node), 205 | ("retrieve", retrieve_node), 206 | ("generate", generate_node) 207 | ], 208 | edges: [ 209 | ("query_rewrite", "embed_query", RagState::Default), 210 | ("embed_query", "retrieve", RagState::Default), 211 | ("retrieve", "generate", RagState::Default) 212 | ] 213 | ); 214 | 215 | let result = flow.run(context).await?; 216 | 217 | termimad::print_text(result.as_str().unwrap()); 218 | } 219 | } 220 | 221 | Ok(()) 222 | } 223 | -------------------------------------------------------------------------------- /src/utils/vector_db.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "qdrant")] 2 | 3 | use async_trait::async_trait; 4 | use qdrant_client::Qdrant; 5 | use qdrant_client::qdrant::{ 6 | CreateCollectionBuilder, DeletePointsBuilder, Distance, PointStruct, ScoredPoint, 7 | SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, 8 | }; 9 | use qdrant_client::qdrant::{Value as QdrantValue, value::Kind as QdrantKind}; 10 | 11 | use serde_json::{Map as SerdeMap, Number as SerdeNumber, Value as SerdeValue, json}; 12 | 13 | use tracing::info; 14 | 15 | #[derive(Debug, Clone)] 16 | pub struct VectorDBOptions { 17 | pub collection_name: String, 18 | pub dimension: usize, 19 | pub distance_metric: DistanceMetric, 20 | } 21 | 22 | #[derive(Debug, Clone)] 23 | pub enum DistanceMetric { 24 | Cosine, 25 | Euclidean, 26 | DotProduct, 27 | } 28 | 29 | #[derive(Debug, Clone)] 30 | pub struct VectorRecord { 31 | pub id: String, 32 | pub vector: Vec, 33 | pub metadata: serde_json::Map, 34 | } 35 | 36 | impl VectorRecord { 37 | pub fn parse_by_value(value: &serde_json::Value) -> Self { 38 | let id = value.get("id").unwrap().as_str().unwrap().to_string(); 39 | let vector = value 40 | .get("vector") 41 | .unwrap() 42 | .as_array() 43 | .unwrap() 44 | .iter() 45 | .map(|v| v.as_f64().unwrap() as f32) 46 | .collect(); 47 | let metadata = value.get("metadata").unwrap().as_object().unwrap().clone(); 48 | Self { 49 | id, 50 | vector, 51 | metadata, 52 | } 53 | } 54 | 55 | pub fn to_value(&self) -> serde_json::Value { 56 | json!({ 57 | "id": self.id, 58 | "vector": self.vector, 59 | "metadata": self.metadata 60 | }) 61 | } 62 | } 63 | 64 | fn qdrant_value_to_serde_json(q_val: QdrantValue) -> SerdeValue { 65 | match q_val.kind { 66 | Some(QdrantKind::NullValue(_)) => SerdeValue::Null, 67 | Some(QdrantKind::BoolValue(b)) => SerdeValue::Bool(b), 68 | Some(QdrantKind::DoubleValue(d)) => { 69 | SerdeNumber::from_f64(d).map_or(SerdeValue::Null, SerdeValue::Number) 70 | } 71 | Some(QdrantKind::IntegerValue(i)) => SerdeValue::Number(i.into()), 72 | Some(QdrantKind::StringValue(s)) => SerdeValue::String(s), 73 | Some(QdrantKind::ListValue(list_value)) => { 74 | let serde_list: Vec = list_value 75 | .values 76 | .into_iter() 77 | .map(qdrant_value_to_serde_json) 78 | .collect(); 79 | SerdeValue::Array(serde_list) 80 | } 81 | Some(QdrantKind::StructValue(struct_value)) => { 82 | let mut serde_map = SerdeMap::new(); 83 | for (key, val) in struct_value.fields { 84 | serde_map.insert(key, qdrant_value_to_serde_json(val)); 85 | } 86 | SerdeValue::Object(serde_map) 87 | } 88 | None => SerdeValue::Null, // Treat absence of kind as Null 89 | } 90 | } 91 | 92 | impl VectorRecord { 93 | pub fn from_scored_point(point: ScoredPoint) -> Option { 94 | let id_str = match point.id { 95 | Some(point_id) => match point_id.point_id_options { 96 | Some(qdrant_client::qdrant::point_id::PointIdOptions::Num(n)) => n.to_string(), 97 | Some(qdrant_client::qdrant::point_id::PointIdOptions::Uuid(s)) => s, 98 | None => return None, 99 | }, 100 | None => return None, 101 | }; 102 | let vector_data = match point.vectors { 103 | Some(vector) => match vector.vectors_options { 104 | Some(qdrant_client::qdrant::vectors_output::VectorsOptions::Vector(v)) => v.data, 105 | _ => return None, 106 | }, 107 | None => return None, 108 | }; 109 | // 3. Convert Payload 110 | let metadata_map: SerdeMap = point 111 | .payload 112 | .into_iter() 113 | .map(|(key, q_val)| (key, qdrant_value_to_serde_json(q_val))) 114 | .collect(); 115 | 116 | Some(VectorRecord { 117 | id: id_str, 118 | vector: vector_data, 119 | metadata: metadata_map, 120 | }) 121 | } 122 | } 123 | 124 | #[async_trait] 125 | pub trait VectorDB { 126 | async fn insert(&self, records: Vec) -> anyhow::Result<()>; 127 | async fn search(&self, query: Vec, k: usize) -> anyhow::Result>; 128 | async fn delete(&self, ids: Vec) -> anyhow::Result<()>; 129 | } 130 | 131 | pub struct QdrantDB { 132 | client: Qdrant, 133 | options: VectorDBOptions, 134 | } 135 | 136 | impl QdrantDB { 137 | pub async fn new( 138 | db_url: String, 139 | api_key: Option, 140 | options: VectorDBOptions, 141 | ) -> anyhow::Result { 142 | let client = match api_key { 143 | Some(api_key) => Qdrant::from_url(db_url.as_str()).api_key(api_key).build()?, 144 | None => Qdrant::from_url(db_url.as_str()).build()?, 145 | }; 146 | 147 | // Create collection if it doesn't exist 148 | let collections = client.list_collections().await?; 149 | if !collections 150 | .collections 151 | .iter() 152 | .any(|c| c.name == options.collection_name) 153 | { 154 | let distance = match options.distance_metric { 155 | DistanceMetric::Cosine => Distance::Cosine, 156 | DistanceMetric::Euclidean => Distance::Euclid, 157 | DistanceMetric::DotProduct => Distance::Dot, 158 | }; 159 | let request = CreateCollectionBuilder::new(options.collection_name.clone()) 160 | .vectors_config(VectorParamsBuilder::new(options.dimension as u64, distance)); 161 | client.create_collection(request).await?; 162 | } 163 | 164 | Ok(Self { client, options }) 165 | } 166 | } 167 | 168 | #[async_trait] 169 | impl VectorDB for QdrantDB { 170 | async fn insert(&self, records: Vec) -> anyhow::Result<()> { 171 | let points: Vec = records 172 | .into_iter() 173 | .map(|record| PointStruct::new(record.id, record.vector, record.metadata)) 174 | .collect(); 175 | let points_request = UpsertPointsBuilder::new(&self.options.collection_name, points); 176 | 177 | info!("Inserting points into Qdrant"); 178 | self.client.upsert_points(points_request).await?; 179 | Ok(()) 180 | } 181 | 182 | async fn search(&self, query: Vec, k: usize) -> anyhow::Result> { 183 | info!( 184 | "Searching points in Qdrant, collection: {}", 185 | self.options.collection_name 186 | ); 187 | let response = self 188 | .client 189 | .search_points( 190 | SearchPointsBuilder::new(&self.options.collection_name, query, k as u64) 191 | .with_payload(true) 192 | .with_vectors(true), 193 | ) 194 | .await?; 195 | let results = response 196 | .result 197 | .into_iter() 198 | .filter_map(VectorRecord::from_scored_point) 199 | .collect::>(); 200 | info!("Retrieved results len: {:?}", results.len()); 201 | 202 | Ok(results) 203 | } 204 | 205 | async fn delete(&self, ids: Vec) -> anyhow::Result<()> { 206 | info!("Deleting points from Qdrant"); 207 | self.client 208 | .delete_points(DeletePointsBuilder::new(&self.options.collection_name).points(ids)) 209 | .await?; 210 | Ok(()) 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /examples/pocketflow-rs-rag/src/nodes/file_loader.rs: -------------------------------------------------------------------------------- 1 | use crate::state::RagState; 2 | use anyhow::{Context, Result}; 3 | use async_trait::async_trait; 4 | use pdf_extract::extract_text; 5 | use pocketflow_rs::{Context as FlowContext, Node, ProcessResult}; 6 | use reqwest::Client; 7 | use serde_json::{Value, json}; 8 | use std::fs; 9 | use std::path::Path; 10 | use std::sync::Arc; 11 | use std::time::SystemTime; 12 | use tracing::info; 13 | 14 | #[derive(Debug)] 15 | struct Document { 16 | content: String, 17 | metadata: Value, 18 | } 19 | 20 | impl Document { 21 | fn new(content: String, url: &str, file_type: &str) -> Self { 22 | let metadata = json!({ 23 | "url": url, 24 | "file_type": file_type, 25 | "timestamp": SystemTime::now() 26 | .duration_since(SystemTime::UNIX_EPOCH) 27 | .unwrap_or_default() 28 | .as_secs(), 29 | "content_length": content.len(), 30 | }); 31 | Self { content, metadata } 32 | } 33 | } 34 | 35 | pub struct FileLoaderNode { 36 | urls: Vec, 37 | client: Arc, 38 | } 39 | 40 | impl FileLoaderNode { 41 | pub fn new(urls: Vec) -> Self { 42 | Self { 43 | urls, 44 | client: Arc::new(Client::new()), 45 | } 46 | } 47 | 48 | fn detect_file_type(path: &Path) -> Result<&'static str> { 49 | let extension = path 50 | .extension() 51 | .and_then(|ext| ext.to_str()) 52 | .ok_or_else(|| anyhow::anyhow!("Could not determine file extension"))?; 53 | 54 | match extension.to_lowercase().as_str() { 55 | "pdf" => Ok("pdf"), 56 | "txt" => Ok("text"), 57 | _ => Err(anyhow::anyhow!("Unsupported file type: {}", extension)), 58 | } 59 | } 60 | 61 | async fn load_from_url(&self, url: &str) -> Result { 62 | info!("Loading content from URL: {}", url); 63 | if url.starts_with("http://") || url.starts_with("https://") { 64 | let response = self.client.get(url).send().await?; 65 | let content_type = response 66 | .headers() 67 | .get("content-type") 68 | .map(|header| header.to_str().unwrap_or("text/plain")); 69 | 70 | let mut file_type = "web"; 71 | let content = match content_type { 72 | Some("text/plain") => response.text().await?, 73 | Some("application/pdf") => { 74 | let bytes = response.bytes().await?; 75 | file_type = "pdf"; 76 | pdf_extract::extract_text_from_mem(&bytes)? 77 | } 78 | _ => response.text().await?, 79 | }; 80 | 81 | Ok(Document::new(content, url, file_type)) 82 | } else { 83 | info!("Loading content from local file: {}", url); 84 | let path = Path::new(url); 85 | let file_type = Self::detect_file_type(path)?; 86 | let content = match file_type { 87 | "pdf" => extract_text(path) 88 | .with_context(|| format!("Failed to extract text from PDF: {:?}", path))?, 89 | "text" => fs::read_to_string(path) 90 | .with_context(|| format!("Failed to read text file: {:?}", path))?, 91 | _ => unreachable!(), 92 | }; 93 | Ok(Document::new(content, url, file_type)) 94 | } 95 | } 96 | } 97 | 98 | #[async_trait] 99 | impl Node for FileLoaderNode { 100 | type State = RagState; 101 | 102 | #[allow(unused_variables)] 103 | async fn execute(&self, context: &FlowContext) -> Result { 104 | let mut documents = Vec::new(); 105 | 106 | for url in &self.urls { 107 | let doc = self 108 | .load_from_url(url) 109 | .await 110 | .with_context(|| format!("Failed to load content from URL: {}", url))?; 111 | info!("Document loaded: {:?}", doc.metadata); 112 | documents.push(json!({ 113 | "content": doc.content, 114 | "metadata": doc.metadata 115 | })); 116 | } 117 | 118 | if documents.is_empty() { 119 | return Err(anyhow::anyhow!("No documents loaded from any URL")); 120 | } 121 | 122 | Ok(Value::Array(documents)) 123 | } 124 | 125 | async fn post_process( 126 | &self, 127 | context: &mut FlowContext, 128 | result: &Result, 129 | ) -> Result> { 130 | match result { 131 | Ok(value) => { 132 | context.set("documents", value.clone()); 133 | Ok(ProcessResult::new( 134 | RagState::Default, 135 | "documents_loaded".to_string(), 136 | )) 137 | } 138 | Err(e) => Ok(ProcessResult::new( 139 | RagState::FileLoadedError, 140 | format!("loading_error: {}", e), 141 | )), 142 | } 143 | } 144 | } 145 | 146 | #[cfg(test)] 147 | mod tests { 148 | use super::*; 149 | use std::fs::File; 150 | use std::io::Write; 151 | use tempfile::tempdir; 152 | 153 | #[tokio::test] 154 | async fn test_load_text_file() { 155 | // Create a temporary directory 156 | let dir = tempdir().unwrap(); 157 | let file_path = dir.path().join("test.txt"); 158 | 159 | // Create a test text file 160 | let mut file = File::create(&file_path).unwrap(); 161 | writeln!(file, "Hello, World!").unwrap(); 162 | 163 | // Test loading the text file 164 | let loader = FileLoaderNode::new(vec![file_path.to_str().unwrap().to_string()]); 165 | let result = loader.execute(&FlowContext::new()).await.unwrap(); 166 | 167 | // Verify the result 168 | let documents = result.as_array().unwrap(); 169 | assert_eq!(documents.len(), 1); 170 | 171 | let doc = &documents[0]; 172 | assert_eq!(doc["content"].as_str().unwrap(), "Hello, World!\n"); 173 | assert_eq!(doc["metadata"]["file_type"].as_str().unwrap(), "text"); 174 | } 175 | 176 | #[tokio::test] 177 | async fn test_load_multiple_files() { 178 | let dir = tempdir().unwrap(); 179 | 180 | let text_path = dir.path().join("test.txt"); 181 | let mut text_file = File::create(&text_path).unwrap(); 182 | writeln!(text_file, "Text content").unwrap(); 183 | 184 | let urls = vec![ 185 | text_path.to_str().unwrap().to_string(), 186 | "https://pdfobject.com/pdf/sample.pdf".to_string(), 187 | ]; 188 | 189 | let loader = FileLoaderNode::new(urls); 190 | let result = loader.execute(&FlowContext::new()).await; 191 | 192 | if let Ok(result) = result { 193 | let documents = result.as_array().unwrap(); 194 | assert!(documents.len() > 0); 195 | 196 | for doc in documents { 197 | assert!(doc["content"].is_string()); 198 | assert!(doc["metadata"]["url"].is_string()); 199 | assert!(doc["metadata"]["file_type"].is_string()); 200 | assert!(doc["metadata"]["timestamp"].is_number()); 201 | assert!(doc["metadata"]["content_length"].is_number()); 202 | } 203 | } 204 | } 205 | 206 | #[tokio::test] 207 | async fn test_invalid_file_type() { 208 | let dir = tempdir().unwrap(); 209 | let file_path = dir.path().join("test.xyz"); 210 | 211 | let mut file = File::create(&file_path).unwrap(); 212 | writeln!(file, "Some content").unwrap(); 213 | 214 | let loader = FileLoaderNode::new(vec![file_path.to_str().unwrap().to_string()]); 215 | let result = loader.execute(&FlowContext::new()).await; 216 | 217 | assert!(result.is_err()); 218 | let error = result.unwrap_err(); 219 | assert!( 220 | error 221 | .to_string() 222 | .contains("Failed to load content from URL") 223 | ); 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/utils/text_chunking.rs: -------------------------------------------------------------------------------- 1 | use regex::Regex; 2 | use tracing::info; 3 | 4 | #[derive(Debug, Clone)] 5 | pub struct ChunkingOptions { 6 | pub chunk_size: usize, 7 | pub overlap: usize, 8 | pub strategy: ChunkingStrategy, 9 | } 10 | 11 | #[derive(Debug, Clone)] 12 | pub enum ChunkingStrategy { 13 | FixedSize, 14 | Sentence, 15 | Paragraph, 16 | } 17 | 18 | impl Default for ChunkingOptions { 19 | fn default() -> Self { 20 | Self { 21 | chunk_size: 1000, 22 | overlap: 100, 23 | strategy: ChunkingStrategy::FixedSize, 24 | } 25 | } 26 | } 27 | 28 | pub struct TextChunker { 29 | sentence_regex: Regex, 30 | paragraph_regex: Regex, 31 | } 32 | 33 | impl Default for TextChunker { 34 | fn default() -> Self { 35 | Self::new() 36 | } 37 | } 38 | 39 | impl TextChunker { 40 | pub fn new() -> Self { 41 | Self { 42 | sentence_regex: Regex::new(r"[.!?]+[\s]+").unwrap(), 43 | paragraph_regex: Regex::new(r"\n\s*\n").unwrap(), 44 | } 45 | } 46 | 47 | pub fn chunk_text(&self, text: &str, options: &ChunkingOptions) -> Vec { 48 | info!("Chunking text with strategy: {:?}", options.strategy); 49 | match options.strategy { 50 | ChunkingStrategy::FixedSize => self.chunk_by_size(text, options), 51 | ChunkingStrategy::Sentence => self.chunk_by_sentence(text, options), 52 | ChunkingStrategy::Paragraph => self.chunk_by_paragraph(text, options), 53 | } 54 | } 55 | 56 | fn chunk_by_size(&self, text: &str, options: &ChunkingOptions) -> Vec { 57 | let mut chunks = Vec::new(); 58 | let mut start = 0; 59 | let text_size = text.len(); 60 | 61 | while start < text_size { 62 | let end = (start + options.chunk_size).min(text_size); 63 | 64 | // Try to find a good breaking point (space or punctuation) 65 | let mut actual_end = end; 66 | if actual_end < text_size { 67 | while actual_end > start && !text[actual_end..].starts_with(char::is_whitespace) { 68 | actual_end -= 1; 69 | } 70 | // If we couldn't find a good breaking point, force a break at the chunk size 71 | if actual_end == start { 72 | actual_end = end; 73 | } 74 | } 75 | 76 | let chunk = text[start..actual_end].trim().to_string(); 77 | if !chunk.is_empty() { 78 | chunks.push(chunk); 79 | } 80 | 81 | // Ensure we always advance by at least 1 character to prevent infinite loop 82 | let new_start = actual_end.saturating_sub(options.overlap); 83 | if new_start <= start { 84 | start = actual_end; 85 | } else { 86 | start = new_start; 87 | } 88 | } 89 | 90 | chunks 91 | } 92 | 93 | fn chunk_by_sentence(&self, text: &str, options: &ChunkingOptions) -> Vec { 94 | let mut chunks = Vec::new(); 95 | let mut current_chunk = String::new(); 96 | 97 | for sentence in self.sentence_regex.split(text) { 98 | let sentence = sentence.trim(); 99 | if sentence.is_empty() { 100 | continue; 101 | } 102 | 103 | if current_chunk.len() + sentence.len() < options.chunk_size { 104 | if !current_chunk.is_empty() { 105 | current_chunk.push(' '); 106 | } 107 | current_chunk.push_str(sentence); 108 | } else { 109 | if !current_chunk.is_empty() { 110 | chunks.push(current_chunk); 111 | } 112 | current_chunk = sentence.to_string(); 113 | } 114 | } 115 | 116 | if !current_chunk.is_empty() { 117 | chunks.push(current_chunk); 118 | } 119 | 120 | // Add overlap between chunks 121 | if options.overlap > 0 && chunks.len() > 1 { 122 | let mut overlapped_chunks = Vec::with_capacity(chunks.len()); 123 | overlapped_chunks.push(chunks[0].clone()); 124 | 125 | for i in 1..chunks.len() { 126 | let prev_chunk = &chunks[i - 1]; 127 | let current_chunk = &chunks[i]; 128 | 129 | // Find the last sentence in the previous chunk 130 | let last_sentences: Vec<&str> = self 131 | .sentence_regex 132 | .split(prev_chunk) 133 | .filter(|s| !s.trim().is_empty()) 134 | .collect(); 135 | 136 | if let Some(last_sentence) = last_sentences.last() { 137 | let mut new_chunk = last_sentence.trim().to_string(); 138 | new_chunk.push(' '); 139 | new_chunk.push_str(current_chunk); 140 | overlapped_chunks.push(new_chunk); 141 | } else { 142 | overlapped_chunks.push(current_chunk.clone()); 143 | } 144 | } 145 | 146 | chunks = overlapped_chunks; 147 | } 148 | 149 | chunks 150 | } 151 | 152 | fn chunk_by_paragraph(&self, text: &str, options: &ChunkingOptions) -> Vec { 153 | let mut chunks = Vec::new(); 154 | let mut current_chunk = String::new(); 155 | 156 | for paragraph in self.paragraph_regex.split(text) { 157 | let paragraph = paragraph.trim(); 158 | if paragraph.is_empty() { 159 | continue; 160 | } 161 | 162 | if current_chunk.len() + paragraph.len() + 2 <= options.chunk_size { 163 | if !current_chunk.is_empty() { 164 | current_chunk.push_str("\n\n"); 165 | } 166 | current_chunk.push_str(paragraph); 167 | } else { 168 | if !current_chunk.is_empty() { 169 | chunks.push(current_chunk); 170 | } 171 | current_chunk = paragraph.to_string(); 172 | } 173 | } 174 | 175 | if !current_chunk.is_empty() { 176 | chunks.push(current_chunk); 177 | } 178 | 179 | // Add overlap between chunks 180 | if options.overlap > 0 && chunks.len() > 1 { 181 | let mut overlapped_chunks = Vec::with_capacity(chunks.len()); 182 | overlapped_chunks.push(chunks[0].clone()); 183 | 184 | for i in 1..chunks.len() { 185 | let prev_chunk = &chunks[i - 1]; 186 | let current_chunk = &chunks[i]; 187 | 188 | // Find the last paragraph in the previous chunk 189 | let last_paragraphs: Vec<&str> = self 190 | .paragraph_regex 191 | .split(prev_chunk) 192 | .filter(|p| !p.trim().is_empty()) 193 | .collect(); 194 | 195 | if let Some(last_paragraph) = last_paragraphs.last() { 196 | let mut new_chunk = last_paragraph.trim().to_string(); 197 | new_chunk.push_str("\n\n"); 198 | new_chunk.push_str(current_chunk); 199 | overlapped_chunks.push(new_chunk); 200 | } else { 201 | overlapped_chunks.push(current_chunk.clone()); 202 | } 203 | } 204 | 205 | chunks = overlapped_chunks; 206 | } 207 | 208 | chunks 209 | } 210 | } 211 | 212 | #[cfg(test)] 213 | mod tests { 214 | use super::*; 215 | 216 | #[test] 217 | fn test_fixed_size_chunking() { 218 | let chunker = TextChunker::new(); 219 | let text = "This is a test. This is another test. This is a third test."; 220 | let options = ChunkingOptions { 221 | chunk_size: 20, 222 | overlap: 5, 223 | strategy: ChunkingStrategy::FixedSize, 224 | }; 225 | 226 | let chunks = chunker.chunk_text(text, &options); 227 | assert_eq!(chunks.len(), 5); 228 | for chunk in chunks { 229 | assert!(chunk.len() <= 20); 230 | } 231 | } 232 | 233 | #[test] 234 | fn test_sentence_chunking() { 235 | let chunker = TextChunker::new(); 236 | let text = "This is a test. This is another test. This is a third test."; 237 | let options = ChunkingOptions { 238 | chunk_size: 30, 239 | overlap: 10, 240 | strategy: ChunkingStrategy::Sentence, 241 | }; 242 | 243 | let chunks = chunker.chunk_text(text, &options); 244 | assert_eq!(chunks.len(), 3); 245 | assert!(chunks[0].contains("This is a test")); 246 | assert!(chunks[1].contains("This is another test")); 247 | assert!(chunks[2].contains("This is a third test")); 248 | } 249 | 250 | #[test] 251 | fn test_paragraph_chunking() { 252 | let chunker = TextChunker::new(); 253 | let text = "This is a test.\n\nThis is another test.\n\nThis is a third test."; 254 | let options = ChunkingOptions { 255 | chunk_size: 30, 256 | overlap: 10, 257 | strategy: ChunkingStrategy::Paragraph, 258 | }; 259 | 260 | let chunks = chunker.chunk_text(text, &options); 261 | assert_eq!(chunks.len(), 3); 262 | assert!(chunks[0].contains("This is a test")); 263 | assert!(chunks[1].contains("This is another test")); 264 | assert!(chunks[2].contains("This is a third test")); 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /src/flow.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | context::Context, 3 | node::{Node, ProcessState}, 4 | }; 5 | use anyhow::Result; 6 | use serde_json::Value; 7 | use std::collections::HashMap; 8 | use std::sync::Arc; 9 | use tracing::info; 10 | 11 | pub struct Flow { 12 | nodes: HashMap>>, 13 | edges: HashMap>, // (to_node, condition) 14 | start_node: String, 15 | } 16 | 17 | impl Flow { 18 | pub fn new(start_node_name: &str, start_node: Arc>) -> Self { 19 | let mut nodes = HashMap::new(); 20 | nodes.insert(start_node_name.to_string(), start_node); 21 | 22 | Self { 23 | nodes, 24 | edges: HashMap::new(), 25 | start_node: start_node_name.to_string(), 26 | } 27 | } 28 | 29 | pub fn add_node(&mut self, name: &str, node: Arc>) { 30 | self.nodes.insert(name.to_string(), node); 31 | } 32 | 33 | pub fn add_edge(&mut self, from: &str, to: &str, condition: S) { 34 | self.edges 35 | .entry(from.to_string()) 36 | .or_default() 37 | .push((to.to_string(), condition.to_condition())); 38 | } 39 | 40 | pub async fn run(&self, mut context: Context) -> Result { 41 | let mut current_node = self.start_node.clone(); 42 | 43 | while let Some(node) = self.nodes.get(¤t_node) { 44 | // Prepare 45 | info!("Preparing node: {}", current_node); 46 | node.prepare(&mut context).await?; 47 | 48 | // Execute 49 | info!("Executing node: {}", current_node); 50 | let result = node.execute(&context).await; 51 | 52 | // Post process 53 | info!("Post processing node: {}", current_node); 54 | let process_result = node.post_process(&mut context, &result).await?; 55 | 56 | // Find next node based on the state returned by post_process 57 | if let Some(edges) = self.edges.get(¤t_node) { 58 | // Get the condition from the node state 59 | let condition = process_result.state.to_condition(); 60 | 61 | // Try to find an edge matching the condition 62 | let next_node_info = edges 63 | .iter() 64 | .find(|(_, edge_condition)| edge_condition == &condition); 65 | 66 | if let Some((next, _)) = next_node_info { 67 | current_node = next.clone(); 68 | } else { 69 | // If no matching edge found, try the default condition 70 | let default_edge = edges 71 | .iter() 72 | .find(|(_, edge_condition)| edge_condition == "default"); 73 | 74 | if let Some((next, _)) = default_edge { 75 | current_node = next.clone(); 76 | } else { 77 | info!( 78 | "No edge found for node '{}' with condition '{}'. Stopping flow.", 79 | current_node, condition 80 | ); 81 | break; 82 | } 83 | } 84 | } else { 85 | info!( 86 | "Node '{}' has no outgoing edges. Stopping flow.", 87 | current_node 88 | ); 89 | break; 90 | } 91 | } 92 | 93 | Ok(context.get("result").unwrap_or(&Value::Null).clone()) 94 | } 95 | } 96 | 97 | #[allow(dead_code)] 98 | pub struct BatchFlow { 99 | flow: Flow, 100 | batch_size: usize, 101 | } 102 | 103 | impl BatchFlow { 104 | pub fn new( 105 | start_node_name: &str, 106 | start_node: Arc>, 107 | batch_size: usize, 108 | ) -> Self { 109 | Self { 110 | flow: Flow::new(start_node_name, start_node), 111 | batch_size, 112 | } 113 | } 114 | 115 | pub async fn run_batch(&self, contexts: Vec) -> Result<()> { 116 | info!( 117 | "Starting batch flow execution with {} items", 118 | contexts.len() 119 | ); 120 | 121 | for context in contexts { 122 | self.flow.run(context).await?; 123 | } 124 | 125 | info!("Batch flow execution completed"); 126 | Ok(()) 127 | } 128 | } 129 | 130 | #[macro_export] 131 | macro_rules! build_flow { 132 | (start: ($name: expr, $node:expr)) => {{ 133 | $crate::flow::Flow::new($name, std::sync::Arc::new($node)) 134 | }}; 135 | 136 | ( 137 | start: ($start_name:expr, $start_node:expr), 138 | nodes: [$(($name:expr, $node:expr)),* $(,)?] 139 | ) => {{ 140 | let mut g = $crate::flow::Flow::new($start_name, std::sync::Arc::new($start_node)); 141 | $( 142 | g.add_node($name, std::sync::Arc::new($node)); 143 | )* 144 | g 145 | }}; 146 | 147 | // Complete version with proper-edge handling 148 | ( 149 | start: ($start_name:expr, $start_node:expr), 150 | nodes: [$(($name:expr, $node:expr)),* $(,)?], 151 | edges: [ 152 | $($edge:tt),* $(,)? 153 | ] 154 | ) => {{ 155 | let mut g = $crate::flow::Flow::new($start_name, std::sync::Arc::new($start_node)); 156 | // Add all nodes first 157 | $( 158 | g.add_node($name, std::sync::Arc::new($node)); 159 | )* 160 | // Handle edges appropriately 161 | $( 162 | build_flow!(@edge g, $edge); 163 | )* 164 | g 165 | }}; 166 | 167 | 168 | (@edge $g:expr, ($from:expr, $to:expr, $condition:expr)) => { 169 | $g.add_edge($from, $to, $condition); 170 | }; 171 | } 172 | 173 | #[macro_export] 174 | macro_rules! build_batch_flow { 175 | (start: ($name: expr, $node:expr), batch_size: $batch_size:expr) => {{ 176 | BatchFlow::new($name, std::sync::Arc::new($node), $batch_size) 177 | }}; 178 | 179 | ( 180 | start: ($start_name:expr, $start_node:expr), 181 | nodes: [$(($name:expr, $node:expr)),* $(,)?], 182 | batch_size: $batch_size:expr 183 | ) => {{ 184 | let mut g = BatchFlow::new($start_name, std::sync::Arc::new($start_node), $batch_size); 185 | $( 186 | g.flow.add_node($name, std::sync::Arc::new($node)); 187 | )* 188 | g 189 | }}; 190 | 191 | // Complete version with proper-edge handling 192 | ( 193 | start: ($start_name:expr, $start_node:expr), 194 | nodes: [$(($name:expr, $node:expr)),* $(,)?], 195 | edges: [ 196 | $($edge:tt),* $(,)? 197 | ], 198 | batch_size: $batch_size:expr 199 | ) => {{ 200 | let mut g = BatchFlow::new($start_name, std::sync::Arc::new($start_node), $batch_size); 201 | // Add all nodes first 202 | $( 203 | g.flow.add_node($name, std::sync::Arc::new($node)); 204 | )* 205 | // Handle edges appropriately 206 | $( 207 | build_flow!(@edge g.flow, $edge); 208 | )* 209 | g 210 | }}; 211 | } 212 | 213 | #[cfg(test)] 214 | mod tests { 215 | use super::*; 216 | use crate::node::{Node, ProcessResult, ProcessState}; 217 | use async_trait::async_trait; 218 | use serde_json::json; 219 | 220 | #[derive(Debug, Clone, PartialEq)] 221 | #[allow(dead_code)] 222 | #[derive(Default)] 223 | enum CustomState { 224 | Success, 225 | Failure, 226 | #[default] 227 | Default, 228 | } 229 | 230 | impl ProcessState for CustomState { 231 | fn is_default(&self) -> bool { 232 | matches!(self, CustomState::Default) 233 | } 234 | 235 | fn to_condition(&self) -> String { 236 | match self { 237 | CustomState::Success => "success".to_string(), 238 | CustomState::Failure => "failure".to_string(), 239 | CustomState::Default => "default".to_string(), 240 | } 241 | } 242 | } 243 | 244 | struct TestNode { 245 | result: Value, 246 | state: CustomState, 247 | } 248 | 249 | impl TestNode { 250 | fn new(result: Value, state: CustomState) -> Self { 251 | Self { result, state } 252 | } 253 | } 254 | 255 | #[async_trait] 256 | impl Node for TestNode { 257 | type State = CustomState; 258 | 259 | async fn execute(&self, _context: &Context) -> Result { 260 | Ok(self.result.clone()) 261 | } 262 | 263 | async fn post_process( 264 | &self, 265 | context: &mut Context, 266 | result: &Result, 267 | ) -> Result> { 268 | match result { 269 | Ok(value) => { 270 | context.set("result", value.clone()); 271 | Ok(ProcessResult::new(self.state.clone(), "test".to_string())) 272 | } 273 | Err(e) => { 274 | context.set("error", json!(e.to_string())); 275 | Ok(ProcessResult::new(CustomState::Default, e.to_string())) 276 | } 277 | } 278 | } 279 | } 280 | 281 | #[tokio::test] 282 | async fn test_flow_with_custom_state() { 283 | let node1 = Arc::new(TestNode::new( 284 | json!({"data": "test1"}), 285 | CustomState::Success, 286 | )); 287 | let node2 = Arc::new(TestNode::new( 288 | json!({"data": "test2"}), 289 | CustomState::Default, 290 | )); 291 | let end_node = Arc::new(TestNode::new( 292 | json!({"final_result": "finished"}), 293 | CustomState::Default, 294 | )); 295 | 296 | let mut flow = Flow::::new("start", node1); 297 | flow.add_node("next", node2); 298 | flow.add_node("end", end_node); 299 | 300 | flow.add_edge("start", "next", CustomState::Success); 301 | flow.add_edge("next", "end", CustomState::Default); 302 | 303 | let context = Context::new(); 304 | let result = flow.run(context).await.unwrap(); 305 | 306 | assert_eq!(result, json!({"final_result": "finished"})); 307 | } 308 | 309 | #[tokio::test] 310 | async fn test_batch_flow() { 311 | let node1 = TestNode::new(json!({"data": "test1"}), CustomState::Success); 312 | let node2 = TestNode::new(json!({"data": "test2"}), CustomState::Default); 313 | 314 | let mut batch_flow = BatchFlow::::new("start", Arc::new(node1), 10); 315 | batch_flow.flow.add_node("next", Arc::new(node2)); 316 | batch_flow 317 | .flow 318 | .add_edge("start", "next", CustomState::Success); 319 | batch_flow 320 | .flow 321 | .add_edge("next", "end", CustomState::Default); 322 | 323 | let contexts = vec![Context::new(), Context::new()]; 324 | batch_flow.run_batch(contexts).await.unwrap(); 325 | } 326 | 327 | #[tokio::test] 328 | async fn test_build_flow_macro() { 329 | // Test basic flow with start node only 330 | let node1 = TestNode::new(json!({"data": "test1"}), CustomState::Success); 331 | let flow1 = build_flow!( 332 | start: ("start", node1) 333 | ); 334 | let context = Context::new(); 335 | let result = flow1.run(context).await.unwrap(); 336 | assert_eq!(result, json!({"data": "test1"})); 337 | 338 | // Test flow with multiple nodes 339 | let node1 = TestNode::new(json!({"data": "test1"}), CustomState::Success); 340 | let node2 = TestNode::new(json!({"data": "test2"}), CustomState::Default); 341 | let end_node = TestNode::new(json!({"final_result": "finished"}), CustomState::Default); 342 | let flow2 = build_flow!( 343 | start: ("start", node1), 344 | nodes: [("next", node2), ("end", end_node)], 345 | edges: [ 346 | ("start", "next", CustomState::Success), 347 | ("next", "end", CustomState::Default) 348 | ] 349 | ); 350 | let context = Context::new(); 351 | let result = flow2.run(context).await.unwrap(); 352 | assert_eq!(result, json!({"final_result": "finished"})); 353 | 354 | // Test flow with default edges 355 | let node1 = TestNode::new(json!({"data": "test1"}), CustomState::Success); 356 | let node2 = TestNode::new(json!({"data": "test2"}), CustomState::Default); 357 | let flow3 = build_flow!( 358 | start: ("start", node1), 359 | nodes: [("next", node2)], 360 | edges: [ 361 | ("start", "next", CustomState::Default) 362 | ] 363 | ); 364 | let context = Context::new(); 365 | let result = flow3.run(context).await.unwrap(); 366 | assert_eq!(result, json!({"data": "test2"})); 367 | } 368 | } 369 | -------------------------------------------------------------------------------- /examples/text2sql/src/flow.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context as AnyhowContext, Result}; 2 | use async_trait::async_trait; 3 | use chrono::NaiveDate; 4 | use duckdb::types::ValueRef; 5 | use duckdb::{Connection, Result as DuckResult}; 6 | use openai_api_rust::chat::*; 7 | use openai_api_rust::*; 8 | use pocketflow_rs::{Context, Node, ProcessResult, ProcessState}; 9 | use serde_json::{Value, json}; 10 | use tracing::{error, info}; 11 | 12 | #[derive(Debug, Clone, PartialEq)] 13 | pub enum SqlExecutorState { 14 | SchemaRetrieved, 15 | SqlGenerated, 16 | SqlExecuted, 17 | Default, 18 | } 19 | 20 | impl ProcessState for SqlExecutorState { 21 | fn is_default(&self) -> bool { 22 | matches!(self, SqlExecutorState::Default) 23 | } 24 | 25 | fn to_condition(&self) -> String { 26 | match self { 27 | SqlExecutorState::SchemaRetrieved => "schema_retrieved".to_string(), 28 | SqlExecutorState::SqlGenerated => "sql_generated".to_string(), 29 | SqlExecutorState::SqlExecuted => "sql_executed".to_string(), 30 | SqlExecutorState::Default => "default".to_string(), 31 | } 32 | } 33 | } 34 | 35 | impl Default for SqlExecutorState { 36 | fn default() -> Self { 37 | SqlExecutorState::Default 38 | } 39 | } 40 | 41 | #[derive(Debug, thiserror::Error)] 42 | pub enum WorkflowError { 43 | #[error("NodeExecution: {0}")] 44 | NodeExecution(String), 45 | } 46 | 47 | pub struct SchemaRetrievalNode { 48 | db_path: String, 49 | } 50 | 51 | impl SchemaRetrievalNode { 52 | pub fn new(db_path: String) -> Self { 53 | Self { db_path } 54 | } 55 | } 56 | 57 | #[async_trait] 58 | impl Node for SchemaRetrievalNode { 59 | type State = SqlExecutorState; 60 | 61 | #[allow(unused_variables)] 62 | async fn execute(&self, context: &Context) -> Result { 63 | info!("Exec SchemaRetrievalNode"); 64 | let conn = Connection::open(&self.db_path)?; 65 | 66 | let query = "SELECT table_name FROM information_schema.tables WHERE table_schema='main'"; 67 | let mut stmt = conn.prepare(query)?; 68 | let tables = stmt.query_map([], |row| Ok(row.get(0)?)); 69 | 70 | let tables = tables.context("获取表名失败")?; 71 | 72 | let mut schema = serde_json::Map::new(); 73 | for table in tables { 74 | let table_name = table?; 75 | let query = format!( 76 | "SELECT column_name, data_type, is_nullable, column_default 77 | FROM information_schema.columns 78 | WHERE table_name='{}' AND table_schema='main'", 79 | table_name 80 | ); 81 | 82 | let mut stmt = conn.prepare(&query)?; 83 | let columns = stmt 84 | .query_map([], |row| { 85 | Ok(json!({ 86 | "name": row.get::<_, String>(0)?, 87 | "type": row.get::<_, String>(1)?, 88 | "nullable": row.get::<_, String>(2)? == "YES", 89 | "default_value": row.get::<_, Option>(3)?, 90 | })) 91 | })? 92 | .collect::>>() 93 | .context("Get Column Info Failed")?; 94 | 95 | schema.insert(table_name, Value::Array(columns)); 96 | } 97 | info!("Get Result Final"); 98 | 99 | Ok(Value::Object(schema)) 100 | } 101 | 102 | async fn post_process( 103 | &self, 104 | context: &mut Context, 105 | result: &Result, 106 | ) -> Result> { 107 | context.set("result", result.as_ref().unwrap().clone()); 108 | Ok(ProcessResult::new( 109 | SqlExecutorState::SchemaRetrieved, 110 | "schema_retrieved".to_string(), 111 | )) 112 | } 113 | } 114 | 115 | pub struct OpenAISQLGenerationNode { 116 | api_key: String, 117 | user_query: String, 118 | } 119 | 120 | impl OpenAISQLGenerationNode { 121 | pub fn new(api_key: String, user_query: String) -> Self { 122 | Self { 123 | api_key, 124 | user_query, 125 | } 126 | } 127 | } 128 | 129 | pub fn print_table(headers: &[String], data: &[Vec]) { 130 | if headers.is_empty() { 131 | println!("Query returned no columns."); 132 | return; 133 | } 134 | 135 | // Calculate column widths based on headers and data 136 | let mut widths: Vec = headers.iter().map(|h| h.len()).collect(); 137 | for row in data { 138 | for (i, cell) in row.iter().enumerate() { 139 | if i < widths.len() { 140 | widths[i] = widths[i].max(cell.len()); 141 | } 142 | } 143 | } 144 | 145 | // Print Header 146 | let header_line = headers 147 | .iter() 148 | .zip(&widths) 149 | .map(|(h, w)| format!("{:>() 151 | .join(" | "); 152 | println!("\n{}", header_line); 153 | 154 | // Print Separator 155 | let separator_line = widths 156 | .iter() 157 | .map(|w| "-".repeat(*w)) 158 | .collect::>() 159 | .join("-+-"); 160 | println!("{}", separator_line); 161 | 162 | // Print Data Rows 163 | if data.is_empty() { 164 | println!("(No rows returned)"); 165 | } else { 166 | for row in data { 167 | let row_line = row 168 | .iter() 169 | .zip(&widths) 170 | .map(|(cell, w)| format!("{:>() 172 | .join(" | "); 173 | println!("{}", row_line); 174 | } 175 | } 176 | } 177 | 178 | #[async_trait] 179 | impl Node for OpenAISQLGenerationNode { 180 | type State = SqlExecutorState; 181 | 182 | async fn execute(&self, context: &Context) -> Result { 183 | let schema = context.get("result").ok_or_else(|| { 184 | WorkflowError::NodeExecution("Failed to get database schema".to_string()) 185 | })?; 186 | 187 | let system_prompt = "You are a SQL expert. Based on the provided database schema and user query, generate the correct SQL query. Only return the SQL query, do not include any explanation or other text. The condition content uses English, you can choose to query some fields first, then make a general query."; 188 | 189 | let schema_json = 190 | serde_json::to_string_pretty(schema).context("Failed to serialize database schema")?; 191 | 192 | let user_prompt = format!( 193 | "database schema:\n{}\n\nuser query:\n{}\n\nPlease generate a SQL query to answer this question.", 194 | schema_json, self.user_query 195 | ); 196 | 197 | let auth = Auth::new(self.api_key.as_str()); 198 | let openai = OpenAI::new(auth, "https://dashscope.aliyuncs.com/compatible-mode/v1/"); 199 | let body = ChatBody { 200 | model: "qwen-plus".to_string(), 201 | max_tokens: Some(1024), 202 | temperature: Some(0.8_f32), 203 | top_p: Some(0_f32), 204 | n: Some(1), 205 | stream: Some(false), 206 | stop: None, 207 | presence_penalty: None, 208 | frequency_penalty: None, 209 | logit_bias: None, 210 | user: None, 211 | messages: vec![ 212 | Message { 213 | role: Role::System, 214 | content: system_prompt.to_string(), 215 | }, 216 | Message { 217 | role: Role::User, 218 | content: user_prompt, 219 | }, 220 | ], 221 | }; 222 | let rs = openai.chat_completion_create(&body); 223 | if rs.is_err() { 224 | error!("OpenAI Error {}", rs.as_ref().err().unwrap().to_string()); 225 | } 226 | let choice = rs.unwrap().choices; 227 | let message = &choice[0].message.as_ref().unwrap(); 228 | 229 | let sql = message.content.clone(); 230 | 231 | println!("生成的SQL查询: {}", sql); 232 | 233 | Ok(Value::String(sql)) 234 | } 235 | 236 | async fn post_process( 237 | &self, 238 | context: &mut Context, 239 | result: &Result, 240 | ) -> Result> { 241 | context.set("result", result.as_ref().unwrap().clone()); 242 | Ok(ProcessResult::new( 243 | SqlExecutorState::SqlGenerated, 244 | "sql_generated".to_string(), 245 | )) 246 | } 247 | } 248 | 249 | pub struct ExecuteSQLNode { 250 | db_path: String, 251 | } 252 | 253 | impl ExecuteSQLNode { 254 | pub fn new(db_path: String) -> Self { 255 | Self { db_path } 256 | } 257 | } 258 | 259 | #[async_trait] 260 | impl Node for ExecuteSQLNode { 261 | type State = SqlExecutorState; 262 | 263 | async fn execute(&self, context: &Context) -> Result { 264 | let conn = Connection::open(&self.db_path)?; 265 | 266 | let sql = context 267 | .get("result") 268 | .and_then(|v| v.as_str()) 269 | .ok_or_else(|| { 270 | WorkflowError::NodeExecution("SQL query not found in context".to_string()) 271 | })?; 272 | 273 | info!("ExecuteSQLNode: Get Sql: {}", sql); 274 | 275 | let mut stmt = conn.prepare(sql)?; 276 | let mut rows = stmt.query([])?; 277 | 278 | let mut headers = Vec::new(); 279 | let mut data_rows = Vec::new(); 280 | 281 | if let Some(first_row) = rows.next()? { 282 | // Get column names from the first row 283 | headers = first_row.as_ref().column_names(); 284 | let column_count = headers.len(); 285 | 286 | // Process first row 287 | let mut row_values = Vec::with_capacity(column_count); 288 | for i in 0..column_count { 289 | let value_ref = first_row.get_ref(i)?; 290 | let string_value = match value_ref { 291 | ValueRef::Null => "NULL".to_string(), 292 | ValueRef::Boolean(b) => b.to_string(), 293 | ValueRef::TinyInt(i) => i.to_string(), 294 | ValueRef::SmallInt(i) => i.to_string(), 295 | ValueRef::Int(i) => i.to_string(), 296 | ValueRef::BigInt(i) => i.to_string(), 297 | ValueRef::Float(f) => f.to_string(), 298 | ValueRef::Double(d) => d.to_string(), 299 | ValueRef::Text(bytes) => String::from_utf8_lossy(bytes).to_string(), 300 | ValueRef::Blob(_) => "[BLOB]".to_string(), 301 | ValueRef::Date32(d) => { 302 | let date = NaiveDate::from_num_days_from_ce_opt(d as i32 + 719163).unwrap(); 303 | date.format("%Y-%m-%d").to_string() 304 | } 305 | _ => format!("Unsupported: {:?}", value_ref), 306 | }; 307 | row_values.push(string_value); 308 | } 309 | data_rows.push(row_values); 310 | 311 | // Process remaining rows 312 | while let Some(row) = rows.next()? { 313 | let mut row_values = Vec::with_capacity(column_count); 314 | for i in 0..column_count { 315 | let value_ref = row.get_ref(i)?; 316 | let string_value = match value_ref { 317 | ValueRef::Null => "NULL".to_string(), 318 | ValueRef::Boolean(b) => b.to_string(), 319 | ValueRef::TinyInt(i) => i.to_string(), 320 | ValueRef::SmallInt(i) => i.to_string(), 321 | ValueRef::Int(i) => i.to_string(), 322 | ValueRef::BigInt(i) => i.to_string(), 323 | ValueRef::Float(f) => f.to_string(), 324 | ValueRef::Double(d) => d.to_string(), 325 | ValueRef::Text(bytes) => String::from_utf8_lossy(bytes).to_string(), 326 | ValueRef::Blob(_) => "[BLOB]".to_string(), 327 | ValueRef::Date32(d) => { 328 | let date = 329 | NaiveDate::from_num_days_from_ce_opt(d as i32 + 719163).unwrap(); 330 | date.format("%Y-%m-%d").to_string() 331 | } 332 | _ => format!("Unsupported: {:?}", value_ref), 333 | }; 334 | row_values.push(string_value); 335 | } 336 | data_rows.push(row_values); 337 | } 338 | } 339 | 340 | print_table(&headers, &data_rows); 341 | 342 | Ok(json!({ 343 | "columns": headers, 344 | "data": data_rows 345 | })) 346 | } 347 | 348 | async fn post_process( 349 | &self, 350 | context: &mut Context, 351 | result: &Result, 352 | ) -> Result> { 353 | context.set("result", result.as_ref().unwrap().clone()); 354 | Ok(ProcessResult::new( 355 | SqlExecutorState::SqlExecuted, 356 | "sql_executed".to_string(), 357 | )) 358 | } 359 | } 360 | -------------------------------------------------------------------------------- /examples/text2sql/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "adler2" 7 | version = "2.0.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" 10 | 11 | [[package]] 12 | name = "ahash" 13 | version = "0.7.8" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" 16 | dependencies = [ 17 | "getrandom", 18 | "once_cell", 19 | "version_check", 20 | ] 21 | 22 | [[package]] 23 | name = "ahash" 24 | version = "0.8.11" 25 | source = "registry+https://github.com/rust-lang/crates.io-index" 26 | checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" 27 | dependencies = [ 28 | "cfg-if", 29 | "const-random", 30 | "getrandom", 31 | "once_cell", 32 | "version_check", 33 | "zerocopy 0.7.35", 34 | ] 35 | 36 | [[package]] 37 | name = "aho-corasick" 38 | version = "1.1.3" 39 | source = "registry+https://github.com/rust-lang/crates.io-index" 40 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 41 | dependencies = [ 42 | "memchr", 43 | ] 44 | 45 | [[package]] 46 | name = "android-tzdata" 47 | version = "0.1.1" 48 | source = "registry+https://github.com/rust-lang/crates.io-index" 49 | checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" 50 | 51 | [[package]] 52 | name = "android_system_properties" 53 | version = "0.1.5" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" 56 | dependencies = [ 57 | "libc", 58 | ] 59 | 60 | [[package]] 61 | name = "arrayvec" 62 | version = "0.7.6" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 65 | 66 | [[package]] 67 | name = "arrow" 68 | version = "54.3.1" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "b5ec52ba94edeed950e4a41f75d35376df196e8cb04437f7280a5aa49f20f796" 71 | dependencies = [ 72 | "arrow-arith", 73 | "arrow-array", 74 | "arrow-buffer", 75 | "arrow-cast", 76 | "arrow-data", 77 | "arrow-ord", 78 | "arrow-row", 79 | "arrow-schema", 80 | "arrow-select", 81 | "arrow-string", 82 | ] 83 | 84 | [[package]] 85 | name = "arrow-arith" 86 | version = "54.3.1" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "8fc766fdacaf804cb10c7c70580254fcdb5d55cdfda2bc57b02baf5223a3af9e" 89 | dependencies = [ 90 | "arrow-array", 91 | "arrow-buffer", 92 | "arrow-data", 93 | "arrow-schema", 94 | "chrono", 95 | "num", 96 | ] 97 | 98 | [[package]] 99 | name = "arrow-array" 100 | version = "54.3.1" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "a12fcdb3f1d03f69d3ec26ac67645a8fe3f878d77b5ebb0b15d64a116c212985" 103 | dependencies = [ 104 | "ahash 0.8.11", 105 | "arrow-buffer", 106 | "arrow-data", 107 | "arrow-schema", 108 | "chrono", 109 | "half", 110 | "hashbrown 0.15.3", 111 | "num", 112 | ] 113 | 114 | [[package]] 115 | name = "arrow-buffer" 116 | version = "54.3.1" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "263f4801ff1839ef53ebd06f99a56cecd1dbaf314ec893d93168e2e860e0291c" 119 | dependencies = [ 120 | "bytes", 121 | "half", 122 | "num", 123 | ] 124 | 125 | [[package]] 126 | name = "arrow-cast" 127 | version = "54.3.1" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "ede6175fbc039dfc946a61c1b6d42fd682fcecf5ab5d148fbe7667705798cac9" 130 | dependencies = [ 131 | "arrow-array", 132 | "arrow-buffer", 133 | "arrow-data", 134 | "arrow-schema", 135 | "arrow-select", 136 | "atoi", 137 | "base64", 138 | "chrono", 139 | "comfy-table", 140 | "half", 141 | "lexical-core", 142 | "num", 143 | "ryu", 144 | ] 145 | 146 | [[package]] 147 | name = "arrow-data" 148 | version = "54.3.1" 149 | source = "registry+https://github.com/rust-lang/crates.io-index" 150 | checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429" 151 | dependencies = [ 152 | "arrow-buffer", 153 | "arrow-schema", 154 | "half", 155 | "num", 156 | ] 157 | 158 | [[package]] 159 | name = "arrow-ord" 160 | version = "54.3.1" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | checksum = "f0a3334a743bd2a1479dbc635540617a3923b4b2f6870f37357339e6b5363c21" 163 | dependencies = [ 164 | "arrow-array", 165 | "arrow-buffer", 166 | "arrow-data", 167 | "arrow-schema", 168 | "arrow-select", 169 | ] 170 | 171 | [[package]] 172 | name = "arrow-row" 173 | version = "54.3.1" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "8d1d7a7291d2c5107e92140f75257a99343956871f3d3ab33a7b41532f79cb68" 176 | dependencies = [ 177 | "arrow-array", 178 | "arrow-buffer", 179 | "arrow-data", 180 | "arrow-schema", 181 | "half", 182 | ] 183 | 184 | [[package]] 185 | name = "arrow-schema" 186 | version = "54.3.1" 187 | source = "registry+https://github.com/rust-lang/crates.io-index" 188 | checksum = "39cfaf5e440be44db5413b75b72c2a87c1f8f0627117d110264048f2969b99e9" 189 | dependencies = [ 190 | "bitflags", 191 | ] 192 | 193 | [[package]] 194 | name = "arrow-select" 195 | version = "54.3.1" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "69efcd706420e52cd44f5c4358d279801993846d1c2a8e52111853d61d55a619" 198 | dependencies = [ 199 | "ahash 0.8.11", 200 | "arrow-array", 201 | "arrow-buffer", 202 | "arrow-data", 203 | "arrow-schema", 204 | "num", 205 | ] 206 | 207 | [[package]] 208 | name = "arrow-string" 209 | version = "54.3.1" 210 | source = "registry+https://github.com/rust-lang/crates.io-index" 211 | checksum = "a21546b337ab304a32cfc0770f671db7411787586b45b78b4593ae78e64e2b03" 212 | dependencies = [ 213 | "arrow-array", 214 | "arrow-buffer", 215 | "arrow-data", 216 | "arrow-schema", 217 | "arrow-select", 218 | "memchr", 219 | "num", 220 | "regex", 221 | "regex-syntax", 222 | ] 223 | 224 | [[package]] 225 | name = "atoi" 226 | version = "2.0.0" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" 229 | dependencies = [ 230 | "num-traits", 231 | ] 232 | 233 | [[package]] 234 | name = "autocfg" 235 | version = "1.4.0" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 238 | 239 | [[package]] 240 | name = "base64" 241 | version = "0.22.1" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" 244 | 245 | [[package]] 246 | name = "bitflags" 247 | version = "2.9.0" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" 250 | 251 | [[package]] 252 | name = "bitvec" 253 | version = "1.0.1" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" 256 | dependencies = [ 257 | "funty", 258 | "radium", 259 | "tap", 260 | "wyz", 261 | ] 262 | 263 | [[package]] 264 | name = "borsh" 265 | version = "1.5.7" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" 268 | dependencies = [ 269 | "borsh-derive", 270 | "cfg_aliases", 271 | ] 272 | 273 | [[package]] 274 | name = "borsh-derive" 275 | version = "1.5.7" 276 | source = "registry+https://github.com/rust-lang/crates.io-index" 277 | checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" 278 | dependencies = [ 279 | "once_cell", 280 | "proc-macro-crate", 281 | "proc-macro2", 282 | "quote", 283 | "syn 2.0.101", 284 | ] 285 | 286 | [[package]] 287 | name = "bumpalo" 288 | version = "3.17.0" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" 291 | 292 | [[package]] 293 | name = "bytecheck" 294 | version = "0.6.12" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" 297 | dependencies = [ 298 | "bytecheck_derive", 299 | "ptr_meta", 300 | "simdutf8", 301 | ] 302 | 303 | [[package]] 304 | name = "bytecheck_derive" 305 | version = "0.6.12" 306 | source = "registry+https://github.com/rust-lang/crates.io-index" 307 | checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" 308 | dependencies = [ 309 | "proc-macro2", 310 | "quote", 311 | "syn 1.0.109", 312 | ] 313 | 314 | [[package]] 315 | name = "bytes" 316 | version = "1.10.1" 317 | source = "registry+https://github.com/rust-lang/crates.io-index" 318 | checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 319 | 320 | [[package]] 321 | name = "cast" 322 | version = "0.3.0" 323 | source = "registry+https://github.com/rust-lang/crates.io-index" 324 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 325 | 326 | [[package]] 327 | name = "cc" 328 | version = "1.2.20" 329 | source = "registry+https://github.com/rust-lang/crates.io-index" 330 | checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a" 331 | dependencies = [ 332 | "shlex", 333 | ] 334 | 335 | [[package]] 336 | name = "cfg-if" 337 | version = "1.0.0" 338 | source = "registry+https://github.com/rust-lang/crates.io-index" 339 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 340 | 341 | [[package]] 342 | name = "cfg_aliases" 343 | version = "0.2.1" 344 | source = "registry+https://github.com/rust-lang/crates.io-index" 345 | checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 346 | 347 | [[package]] 348 | name = "chrono" 349 | version = "0.4.41" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" 352 | dependencies = [ 353 | "android-tzdata", 354 | "iana-time-zone", 355 | "num-traits", 356 | "windows-link", 357 | ] 358 | 359 | [[package]] 360 | name = "comfy-table" 361 | version = "7.1.4" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" 364 | dependencies = [ 365 | "unicode-segmentation", 366 | "unicode-width", 367 | ] 368 | 369 | [[package]] 370 | name = "const-random" 371 | version = "0.1.18" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" 374 | dependencies = [ 375 | "const-random-macro", 376 | ] 377 | 378 | [[package]] 379 | name = "const-random-macro" 380 | version = "0.1.16" 381 | source = "registry+https://github.com/rust-lang/crates.io-index" 382 | checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" 383 | dependencies = [ 384 | "getrandom", 385 | "once_cell", 386 | "tiny-keccak", 387 | ] 388 | 389 | [[package]] 390 | name = "core-foundation-sys" 391 | version = "0.8.7" 392 | source = "registry+https://github.com/rust-lang/crates.io-index" 393 | checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" 394 | 395 | [[package]] 396 | name = "crc32fast" 397 | version = "1.4.2" 398 | source = "registry+https://github.com/rust-lang/crates.io-index" 399 | checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" 400 | dependencies = [ 401 | "cfg-if", 402 | ] 403 | 404 | [[package]] 405 | name = "crunchy" 406 | version = "0.2.3" 407 | source = "registry+https://github.com/rust-lang/crates.io-index" 408 | checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" 409 | 410 | [[package]] 411 | name = "duckdb" 412 | version = "1.2.2" 413 | source = "registry+https://github.com/rust-lang/crates.io-index" 414 | checksum = "49ac283b6621e3becf8014d1efa655522794075834c72f744573debef9c9f6c8" 415 | dependencies = [ 416 | "arrow", 417 | "cast", 418 | "fallible-iterator", 419 | "fallible-streaming-iterator", 420 | "hashlink", 421 | "libduckdb-sys", 422 | "memchr", 423 | "num-integer", 424 | "rust_decimal", 425 | "smallvec", 426 | "strum", 427 | ] 428 | 429 | [[package]] 430 | name = "equivalent" 431 | version = "1.0.2" 432 | source = "registry+https://github.com/rust-lang/crates.io-index" 433 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 434 | 435 | [[package]] 436 | name = "errno" 437 | version = "0.3.11" 438 | source = "registry+https://github.com/rust-lang/crates.io-index" 439 | checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" 440 | dependencies = [ 441 | "libc", 442 | "windows-sys", 443 | ] 444 | 445 | [[package]] 446 | name = "fallible-iterator" 447 | version = "0.3.0" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" 450 | 451 | [[package]] 452 | name = "fallible-streaming-iterator" 453 | version = "0.1.9" 454 | source = "registry+https://github.com/rust-lang/crates.io-index" 455 | checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 456 | 457 | [[package]] 458 | name = "filetime" 459 | version = "0.2.25" 460 | source = "registry+https://github.com/rust-lang/crates.io-index" 461 | checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" 462 | dependencies = [ 463 | "cfg-if", 464 | "libc", 465 | "libredox", 466 | "windows-sys", 467 | ] 468 | 469 | [[package]] 470 | name = "flate2" 471 | version = "1.1.1" 472 | source = "registry+https://github.com/rust-lang/crates.io-index" 473 | checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" 474 | dependencies = [ 475 | "crc32fast", 476 | "miniz_oxide", 477 | ] 478 | 479 | [[package]] 480 | name = "funty" 481 | version = "2.0.0" 482 | source = "registry+https://github.com/rust-lang/crates.io-index" 483 | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" 484 | 485 | [[package]] 486 | name = "getrandom" 487 | version = "0.2.16" 488 | source = "registry+https://github.com/rust-lang/crates.io-index" 489 | checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" 490 | dependencies = [ 491 | "cfg-if", 492 | "libc", 493 | "wasi", 494 | ] 495 | 496 | [[package]] 497 | name = "half" 498 | version = "2.6.0" 499 | source = "registry+https://github.com/rust-lang/crates.io-index" 500 | checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" 501 | dependencies = [ 502 | "cfg-if", 503 | "crunchy", 504 | "num-traits", 505 | ] 506 | 507 | [[package]] 508 | name = "hashbrown" 509 | version = "0.12.3" 510 | source = "registry+https://github.com/rust-lang/crates.io-index" 511 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 512 | dependencies = [ 513 | "ahash 0.7.8", 514 | ] 515 | 516 | [[package]] 517 | name = "hashbrown" 518 | version = "0.14.5" 519 | source = "registry+https://github.com/rust-lang/crates.io-index" 520 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 521 | dependencies = [ 522 | "ahash 0.8.11", 523 | ] 524 | 525 | [[package]] 526 | name = "hashbrown" 527 | version = "0.15.3" 528 | source = "registry+https://github.com/rust-lang/crates.io-index" 529 | checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" 530 | 531 | [[package]] 532 | name = "hashlink" 533 | version = "0.9.1" 534 | source = "registry+https://github.com/rust-lang/crates.io-index" 535 | checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" 536 | dependencies = [ 537 | "hashbrown 0.14.5", 538 | ] 539 | 540 | [[package]] 541 | name = "heck" 542 | version = "0.4.1" 543 | source = "registry+https://github.com/rust-lang/crates.io-index" 544 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 545 | 546 | [[package]] 547 | name = "iana-time-zone" 548 | version = "0.1.63" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" 551 | dependencies = [ 552 | "android_system_properties", 553 | "core-foundation-sys", 554 | "iana-time-zone-haiku", 555 | "js-sys", 556 | "log", 557 | "wasm-bindgen", 558 | "windows-core", 559 | ] 560 | 561 | [[package]] 562 | name = "iana-time-zone-haiku" 563 | version = "0.1.2" 564 | source = "registry+https://github.com/rust-lang/crates.io-index" 565 | checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" 566 | dependencies = [ 567 | "cc", 568 | ] 569 | 570 | [[package]] 571 | name = "indexmap" 572 | version = "2.9.0" 573 | source = "registry+https://github.com/rust-lang/crates.io-index" 574 | checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" 575 | dependencies = [ 576 | "equivalent", 577 | "hashbrown 0.15.3", 578 | ] 579 | 580 | [[package]] 581 | name = "itoa" 582 | version = "1.0.15" 583 | source = "registry+https://github.com/rust-lang/crates.io-index" 584 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 585 | 586 | [[package]] 587 | name = "js-sys" 588 | version = "0.3.77" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" 591 | dependencies = [ 592 | "once_cell", 593 | "wasm-bindgen", 594 | ] 595 | 596 | [[package]] 597 | name = "lexical-core" 598 | version = "1.0.5" 599 | source = "registry+https://github.com/rust-lang/crates.io-index" 600 | checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" 601 | dependencies = [ 602 | "lexical-parse-float", 603 | "lexical-parse-integer", 604 | "lexical-util", 605 | "lexical-write-float", 606 | "lexical-write-integer", 607 | ] 608 | 609 | [[package]] 610 | name = "lexical-parse-float" 611 | version = "1.0.5" 612 | source = "registry+https://github.com/rust-lang/crates.io-index" 613 | checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" 614 | dependencies = [ 615 | "lexical-parse-integer", 616 | "lexical-util", 617 | "static_assertions", 618 | ] 619 | 620 | [[package]] 621 | name = "lexical-parse-integer" 622 | version = "1.0.5" 623 | source = "registry+https://github.com/rust-lang/crates.io-index" 624 | checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" 625 | dependencies = [ 626 | "lexical-util", 627 | "static_assertions", 628 | ] 629 | 630 | [[package]] 631 | name = "lexical-util" 632 | version = "1.0.6" 633 | source = "registry+https://github.com/rust-lang/crates.io-index" 634 | checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" 635 | dependencies = [ 636 | "static_assertions", 637 | ] 638 | 639 | [[package]] 640 | name = "lexical-write-float" 641 | version = "1.0.5" 642 | source = "registry+https://github.com/rust-lang/crates.io-index" 643 | checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" 644 | dependencies = [ 645 | "lexical-util", 646 | "lexical-write-integer", 647 | "static_assertions", 648 | ] 649 | 650 | [[package]] 651 | name = "lexical-write-integer" 652 | version = "1.0.5" 653 | source = "registry+https://github.com/rust-lang/crates.io-index" 654 | checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" 655 | dependencies = [ 656 | "lexical-util", 657 | "static_assertions", 658 | ] 659 | 660 | [[package]] 661 | name = "libc" 662 | version = "0.2.172" 663 | source = "registry+https://github.com/rust-lang/crates.io-index" 664 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" 665 | 666 | [[package]] 667 | name = "libduckdb-sys" 668 | version = "1.2.2" 669 | source = "registry+https://github.com/rust-lang/crates.io-index" 670 | checksum = "12cac9d03484c43fefac8b2066a253c9b0b3b0cd02cbe02a9ea2312f7e382618" 671 | dependencies = [ 672 | "autocfg", 673 | "flate2", 674 | "pkg-config", 675 | "serde", 676 | "serde_json", 677 | "tar", 678 | "vcpkg", 679 | ] 680 | 681 | [[package]] 682 | name = "libm" 683 | version = "0.2.13" 684 | source = "registry+https://github.com/rust-lang/crates.io-index" 685 | checksum = "c9627da5196e5d8ed0b0495e61e518847578da83483c37288316d9b2e03a7f72" 686 | 687 | [[package]] 688 | name = "libredox" 689 | version = "0.1.3" 690 | source = "registry+https://github.com/rust-lang/crates.io-index" 691 | checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" 692 | dependencies = [ 693 | "bitflags", 694 | "libc", 695 | "redox_syscall", 696 | ] 697 | 698 | [[package]] 699 | name = "linux-raw-sys" 700 | version = "0.9.4" 701 | source = "registry+https://github.com/rust-lang/crates.io-index" 702 | checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" 703 | 704 | [[package]] 705 | name = "log" 706 | version = "0.4.27" 707 | source = "registry+https://github.com/rust-lang/crates.io-index" 708 | checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" 709 | 710 | [[package]] 711 | name = "memchr" 712 | version = "2.7.4" 713 | source = "registry+https://github.com/rust-lang/crates.io-index" 714 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 715 | 716 | [[package]] 717 | name = "miniz_oxide" 718 | version = "0.8.8" 719 | source = "registry+https://github.com/rust-lang/crates.io-index" 720 | checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" 721 | dependencies = [ 722 | "adler2", 723 | ] 724 | 725 | [[package]] 726 | name = "num" 727 | version = "0.4.3" 728 | source = "registry+https://github.com/rust-lang/crates.io-index" 729 | checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" 730 | dependencies = [ 731 | "num-bigint", 732 | "num-complex", 733 | "num-integer", 734 | "num-iter", 735 | "num-rational", 736 | "num-traits", 737 | ] 738 | 739 | [[package]] 740 | name = "num-bigint" 741 | version = "0.4.6" 742 | source = "registry+https://github.com/rust-lang/crates.io-index" 743 | checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" 744 | dependencies = [ 745 | "num-integer", 746 | "num-traits", 747 | ] 748 | 749 | [[package]] 750 | name = "num-complex" 751 | version = "0.4.6" 752 | source = "registry+https://github.com/rust-lang/crates.io-index" 753 | checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" 754 | dependencies = [ 755 | "num-traits", 756 | ] 757 | 758 | [[package]] 759 | name = "num-integer" 760 | version = "0.1.46" 761 | source = "registry+https://github.com/rust-lang/crates.io-index" 762 | checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" 763 | dependencies = [ 764 | "num-traits", 765 | ] 766 | 767 | [[package]] 768 | name = "num-iter" 769 | version = "0.1.45" 770 | source = "registry+https://github.com/rust-lang/crates.io-index" 771 | checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" 772 | dependencies = [ 773 | "autocfg", 774 | "num-integer", 775 | "num-traits", 776 | ] 777 | 778 | [[package]] 779 | name = "num-rational" 780 | version = "0.4.2" 781 | source = "registry+https://github.com/rust-lang/crates.io-index" 782 | checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" 783 | dependencies = [ 784 | "num-bigint", 785 | "num-integer", 786 | "num-traits", 787 | ] 788 | 789 | [[package]] 790 | name = "num-traits" 791 | version = "0.2.19" 792 | source = "registry+https://github.com/rust-lang/crates.io-index" 793 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 794 | dependencies = [ 795 | "autocfg", 796 | "libm", 797 | ] 798 | 799 | [[package]] 800 | name = "once_cell" 801 | version = "1.21.3" 802 | source = "registry+https://github.com/rust-lang/crates.io-index" 803 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 804 | 805 | [[package]] 806 | name = "pkg-config" 807 | version = "0.3.32" 808 | source = "registry+https://github.com/rust-lang/crates.io-index" 809 | checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 810 | 811 | [[package]] 812 | name = "ppv-lite86" 813 | version = "0.2.21" 814 | source = "registry+https://github.com/rust-lang/crates.io-index" 815 | checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 816 | dependencies = [ 817 | "zerocopy 0.8.25", 818 | ] 819 | 820 | [[package]] 821 | name = "proc-macro-crate" 822 | version = "3.3.0" 823 | source = "registry+https://github.com/rust-lang/crates.io-index" 824 | checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" 825 | dependencies = [ 826 | "toml_edit", 827 | ] 828 | 829 | [[package]] 830 | name = "proc-macro2" 831 | version = "1.0.95" 832 | source = "registry+https://github.com/rust-lang/crates.io-index" 833 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" 834 | dependencies = [ 835 | "unicode-ident", 836 | ] 837 | 838 | [[package]] 839 | name = "ptr_meta" 840 | version = "0.1.4" 841 | source = "registry+https://github.com/rust-lang/crates.io-index" 842 | checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" 843 | dependencies = [ 844 | "ptr_meta_derive", 845 | ] 846 | 847 | [[package]] 848 | name = "ptr_meta_derive" 849 | version = "0.1.4" 850 | source = "registry+https://github.com/rust-lang/crates.io-index" 851 | checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" 852 | dependencies = [ 853 | "proc-macro2", 854 | "quote", 855 | "syn 1.0.109", 856 | ] 857 | 858 | [[package]] 859 | name = "quote" 860 | version = "1.0.40" 861 | source = "registry+https://github.com/rust-lang/crates.io-index" 862 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 863 | dependencies = [ 864 | "proc-macro2", 865 | ] 866 | 867 | [[package]] 868 | name = "radium" 869 | version = "0.7.0" 870 | source = "registry+https://github.com/rust-lang/crates.io-index" 871 | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" 872 | 873 | [[package]] 874 | name = "rand" 875 | version = "0.8.5" 876 | source = "registry+https://github.com/rust-lang/crates.io-index" 877 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 878 | dependencies = [ 879 | "libc", 880 | "rand_chacha", 881 | "rand_core", 882 | ] 883 | 884 | [[package]] 885 | name = "rand_chacha" 886 | version = "0.3.1" 887 | source = "registry+https://github.com/rust-lang/crates.io-index" 888 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 889 | dependencies = [ 890 | "ppv-lite86", 891 | "rand_core", 892 | ] 893 | 894 | [[package]] 895 | name = "rand_core" 896 | version = "0.6.4" 897 | source = "registry+https://github.com/rust-lang/crates.io-index" 898 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 899 | dependencies = [ 900 | "getrandom", 901 | ] 902 | 903 | [[package]] 904 | name = "redox_syscall" 905 | version = "0.5.11" 906 | source = "registry+https://github.com/rust-lang/crates.io-index" 907 | checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3" 908 | dependencies = [ 909 | "bitflags", 910 | ] 911 | 912 | [[package]] 913 | name = "regex" 914 | version = "1.11.1" 915 | source = "registry+https://github.com/rust-lang/crates.io-index" 916 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 917 | dependencies = [ 918 | "aho-corasick", 919 | "memchr", 920 | "regex-automata", 921 | "regex-syntax", 922 | ] 923 | 924 | [[package]] 925 | name = "regex-automata" 926 | version = "0.4.9" 927 | source = "registry+https://github.com/rust-lang/crates.io-index" 928 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 929 | dependencies = [ 930 | "aho-corasick", 931 | "memchr", 932 | "regex-syntax", 933 | ] 934 | 935 | [[package]] 936 | name = "regex-syntax" 937 | version = "0.8.5" 938 | source = "registry+https://github.com/rust-lang/crates.io-index" 939 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 940 | 941 | [[package]] 942 | name = "rend" 943 | version = "0.4.2" 944 | source = "registry+https://github.com/rust-lang/crates.io-index" 945 | checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" 946 | dependencies = [ 947 | "bytecheck", 948 | ] 949 | 950 | [[package]] 951 | name = "rkyv" 952 | version = "0.7.45" 953 | source = "registry+https://github.com/rust-lang/crates.io-index" 954 | checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" 955 | dependencies = [ 956 | "bitvec", 957 | "bytecheck", 958 | "bytes", 959 | "hashbrown 0.12.3", 960 | "ptr_meta", 961 | "rend", 962 | "rkyv_derive", 963 | "seahash", 964 | "tinyvec", 965 | "uuid", 966 | ] 967 | 968 | [[package]] 969 | name = "rkyv_derive" 970 | version = "0.7.45" 971 | source = "registry+https://github.com/rust-lang/crates.io-index" 972 | checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" 973 | dependencies = [ 974 | "proc-macro2", 975 | "quote", 976 | "syn 1.0.109", 977 | ] 978 | 979 | [[package]] 980 | name = "rust_decimal" 981 | version = "1.37.1" 982 | source = "registry+https://github.com/rust-lang/crates.io-index" 983 | checksum = "faa7de2ba56ac291bd90c6b9bece784a52ae1411f9506544b3eae36dd2356d50" 984 | dependencies = [ 985 | "arrayvec", 986 | "borsh", 987 | "bytes", 988 | "num-traits", 989 | "rand", 990 | "rkyv", 991 | "serde", 992 | "serde_json", 993 | ] 994 | 995 | [[package]] 996 | name = "rustix" 997 | version = "1.0.7" 998 | source = "registry+https://github.com/rust-lang/crates.io-index" 999 | checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" 1000 | dependencies = [ 1001 | "bitflags", 1002 | "errno", 1003 | "libc", 1004 | "linux-raw-sys", 1005 | "windows-sys", 1006 | ] 1007 | 1008 | [[package]] 1009 | name = "rustversion" 1010 | version = "1.0.20" 1011 | source = "registry+https://github.com/rust-lang/crates.io-index" 1012 | checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" 1013 | 1014 | [[package]] 1015 | name = "ryu" 1016 | version = "1.0.20" 1017 | source = "registry+https://github.com/rust-lang/crates.io-index" 1018 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 1019 | 1020 | [[package]] 1021 | name = "seahash" 1022 | version = "4.1.0" 1023 | source = "registry+https://github.com/rust-lang/crates.io-index" 1024 | checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" 1025 | 1026 | [[package]] 1027 | name = "serde" 1028 | version = "1.0.219" 1029 | source = "registry+https://github.com/rust-lang/crates.io-index" 1030 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 1031 | dependencies = [ 1032 | "serde_derive", 1033 | ] 1034 | 1035 | [[package]] 1036 | name = "serde_derive" 1037 | version = "1.0.219" 1038 | source = "registry+https://github.com/rust-lang/crates.io-index" 1039 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 1040 | dependencies = [ 1041 | "proc-macro2", 1042 | "quote", 1043 | "syn 2.0.101", 1044 | ] 1045 | 1046 | [[package]] 1047 | name = "serde_json" 1048 | version = "1.0.140" 1049 | source = "registry+https://github.com/rust-lang/crates.io-index" 1050 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" 1051 | dependencies = [ 1052 | "itoa", 1053 | "memchr", 1054 | "ryu", 1055 | "serde", 1056 | ] 1057 | 1058 | [[package]] 1059 | name = "shlex" 1060 | version = "1.3.0" 1061 | source = "registry+https://github.com/rust-lang/crates.io-index" 1062 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1063 | 1064 | [[package]] 1065 | name = "simdutf8" 1066 | version = "0.1.5" 1067 | source = "registry+https://github.com/rust-lang/crates.io-index" 1068 | checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" 1069 | 1070 | [[package]] 1071 | name = "smallvec" 1072 | version = "1.15.0" 1073 | source = "registry+https://github.com/rust-lang/crates.io-index" 1074 | checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" 1075 | 1076 | [[package]] 1077 | name = "static_assertions" 1078 | version = "1.1.0" 1079 | source = "registry+https://github.com/rust-lang/crates.io-index" 1080 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 1081 | 1082 | [[package]] 1083 | name = "strum" 1084 | version = "0.25.0" 1085 | source = "registry+https://github.com/rust-lang/crates.io-index" 1086 | checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" 1087 | dependencies = [ 1088 | "strum_macros", 1089 | ] 1090 | 1091 | [[package]] 1092 | name = "strum_macros" 1093 | version = "0.25.3" 1094 | source = "registry+https://github.com/rust-lang/crates.io-index" 1095 | checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" 1096 | dependencies = [ 1097 | "heck", 1098 | "proc-macro2", 1099 | "quote", 1100 | "rustversion", 1101 | "syn 2.0.101", 1102 | ] 1103 | 1104 | [[package]] 1105 | name = "syn" 1106 | version = "1.0.109" 1107 | source = "registry+https://github.com/rust-lang/crates.io-index" 1108 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1109 | dependencies = [ 1110 | "proc-macro2", 1111 | "quote", 1112 | "unicode-ident", 1113 | ] 1114 | 1115 | [[package]] 1116 | name = "syn" 1117 | version = "2.0.101" 1118 | source = "registry+https://github.com/rust-lang/crates.io-index" 1119 | checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" 1120 | dependencies = [ 1121 | "proc-macro2", 1122 | "quote", 1123 | "unicode-ident", 1124 | ] 1125 | 1126 | [[package]] 1127 | name = "tap" 1128 | version = "1.0.1" 1129 | source = "registry+https://github.com/rust-lang/crates.io-index" 1130 | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" 1131 | 1132 | [[package]] 1133 | name = "tar" 1134 | version = "0.4.44" 1135 | source = "registry+https://github.com/rust-lang/crates.io-index" 1136 | checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" 1137 | dependencies = [ 1138 | "filetime", 1139 | "libc", 1140 | "xattr", 1141 | ] 1142 | 1143 | [[package]] 1144 | name = "text2sql" 1145 | version = "0.1.0" 1146 | dependencies = [ 1147 | "duckdb", 1148 | ] 1149 | 1150 | [[package]] 1151 | name = "tiny-keccak" 1152 | version = "2.0.2" 1153 | source = "registry+https://github.com/rust-lang/crates.io-index" 1154 | checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" 1155 | dependencies = [ 1156 | "crunchy", 1157 | ] 1158 | 1159 | [[package]] 1160 | name = "tinyvec" 1161 | version = "1.9.0" 1162 | source = "registry+https://github.com/rust-lang/crates.io-index" 1163 | checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" 1164 | dependencies = [ 1165 | "tinyvec_macros", 1166 | ] 1167 | 1168 | [[package]] 1169 | name = "tinyvec_macros" 1170 | version = "0.1.1" 1171 | source = "registry+https://github.com/rust-lang/crates.io-index" 1172 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 1173 | 1174 | [[package]] 1175 | name = "toml_datetime" 1176 | version = "0.6.9" 1177 | source = "registry+https://github.com/rust-lang/crates.io-index" 1178 | checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" 1179 | 1180 | [[package]] 1181 | name = "toml_edit" 1182 | version = "0.22.26" 1183 | source = "registry+https://github.com/rust-lang/crates.io-index" 1184 | checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" 1185 | dependencies = [ 1186 | "indexmap", 1187 | "toml_datetime", 1188 | "winnow", 1189 | ] 1190 | 1191 | [[package]] 1192 | name = "unicode-ident" 1193 | version = "1.0.18" 1194 | source = "registry+https://github.com/rust-lang/crates.io-index" 1195 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 1196 | 1197 | [[package]] 1198 | name = "unicode-segmentation" 1199 | version = "1.12.0" 1200 | source = "registry+https://github.com/rust-lang/crates.io-index" 1201 | checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" 1202 | 1203 | [[package]] 1204 | name = "unicode-width" 1205 | version = "0.2.0" 1206 | source = "registry+https://github.com/rust-lang/crates.io-index" 1207 | checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" 1208 | 1209 | [[package]] 1210 | name = "uuid" 1211 | version = "1.16.0" 1212 | source = "registry+https://github.com/rust-lang/crates.io-index" 1213 | checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" 1214 | 1215 | [[package]] 1216 | name = "vcpkg" 1217 | version = "0.2.15" 1218 | source = "registry+https://github.com/rust-lang/crates.io-index" 1219 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 1220 | 1221 | [[package]] 1222 | name = "version_check" 1223 | version = "0.9.5" 1224 | source = "registry+https://github.com/rust-lang/crates.io-index" 1225 | checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 1226 | 1227 | [[package]] 1228 | name = "wasi" 1229 | version = "0.11.0+wasi-snapshot-preview1" 1230 | source = "registry+https://github.com/rust-lang/crates.io-index" 1231 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1232 | 1233 | [[package]] 1234 | name = "wasm-bindgen" 1235 | version = "0.2.100" 1236 | source = "registry+https://github.com/rust-lang/crates.io-index" 1237 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" 1238 | dependencies = [ 1239 | "cfg-if", 1240 | "once_cell", 1241 | "rustversion", 1242 | "wasm-bindgen-macro", 1243 | ] 1244 | 1245 | [[package]] 1246 | name = "wasm-bindgen-backend" 1247 | version = "0.2.100" 1248 | source = "registry+https://github.com/rust-lang/crates.io-index" 1249 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" 1250 | dependencies = [ 1251 | "bumpalo", 1252 | "log", 1253 | "proc-macro2", 1254 | "quote", 1255 | "syn 2.0.101", 1256 | "wasm-bindgen-shared", 1257 | ] 1258 | 1259 | [[package]] 1260 | name = "wasm-bindgen-macro" 1261 | version = "0.2.100" 1262 | source = "registry+https://github.com/rust-lang/crates.io-index" 1263 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" 1264 | dependencies = [ 1265 | "quote", 1266 | "wasm-bindgen-macro-support", 1267 | ] 1268 | 1269 | [[package]] 1270 | name = "wasm-bindgen-macro-support" 1271 | version = "0.2.100" 1272 | source = "registry+https://github.com/rust-lang/crates.io-index" 1273 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" 1274 | dependencies = [ 1275 | "proc-macro2", 1276 | "quote", 1277 | "syn 2.0.101", 1278 | "wasm-bindgen-backend", 1279 | "wasm-bindgen-shared", 1280 | ] 1281 | 1282 | [[package]] 1283 | name = "wasm-bindgen-shared" 1284 | version = "0.2.100" 1285 | source = "registry+https://github.com/rust-lang/crates.io-index" 1286 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" 1287 | dependencies = [ 1288 | "unicode-ident", 1289 | ] 1290 | 1291 | [[package]] 1292 | name = "windows-core" 1293 | version = "0.61.0" 1294 | source = "registry+https://github.com/rust-lang/crates.io-index" 1295 | checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" 1296 | dependencies = [ 1297 | "windows-implement", 1298 | "windows-interface", 1299 | "windows-link", 1300 | "windows-result", 1301 | "windows-strings", 1302 | ] 1303 | 1304 | [[package]] 1305 | name = "windows-implement" 1306 | version = "0.60.0" 1307 | source = "registry+https://github.com/rust-lang/crates.io-index" 1308 | checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" 1309 | dependencies = [ 1310 | "proc-macro2", 1311 | "quote", 1312 | "syn 2.0.101", 1313 | ] 1314 | 1315 | [[package]] 1316 | name = "windows-interface" 1317 | version = "0.59.1" 1318 | source = "registry+https://github.com/rust-lang/crates.io-index" 1319 | checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" 1320 | dependencies = [ 1321 | "proc-macro2", 1322 | "quote", 1323 | "syn 2.0.101", 1324 | ] 1325 | 1326 | [[package]] 1327 | name = "windows-link" 1328 | version = "0.1.1" 1329 | source = "registry+https://github.com/rust-lang/crates.io-index" 1330 | checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" 1331 | 1332 | [[package]] 1333 | name = "windows-result" 1334 | version = "0.3.2" 1335 | source = "registry+https://github.com/rust-lang/crates.io-index" 1336 | checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" 1337 | dependencies = [ 1338 | "windows-link", 1339 | ] 1340 | 1341 | [[package]] 1342 | name = "windows-strings" 1343 | version = "0.4.0" 1344 | source = "registry+https://github.com/rust-lang/crates.io-index" 1345 | checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" 1346 | dependencies = [ 1347 | "windows-link", 1348 | ] 1349 | 1350 | [[package]] 1351 | name = "windows-sys" 1352 | version = "0.59.0" 1353 | source = "registry+https://github.com/rust-lang/crates.io-index" 1354 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1355 | dependencies = [ 1356 | "windows-targets", 1357 | ] 1358 | 1359 | [[package]] 1360 | name = "windows-targets" 1361 | version = "0.52.6" 1362 | source = "registry+https://github.com/rust-lang/crates.io-index" 1363 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1364 | dependencies = [ 1365 | "windows_aarch64_gnullvm", 1366 | "windows_aarch64_msvc", 1367 | "windows_i686_gnu", 1368 | "windows_i686_gnullvm", 1369 | "windows_i686_msvc", 1370 | "windows_x86_64_gnu", 1371 | "windows_x86_64_gnullvm", 1372 | "windows_x86_64_msvc", 1373 | ] 1374 | 1375 | [[package]] 1376 | name = "windows_aarch64_gnullvm" 1377 | version = "0.52.6" 1378 | source = "registry+https://github.com/rust-lang/crates.io-index" 1379 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1380 | 1381 | [[package]] 1382 | name = "windows_aarch64_msvc" 1383 | version = "0.52.6" 1384 | source = "registry+https://github.com/rust-lang/crates.io-index" 1385 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1386 | 1387 | [[package]] 1388 | name = "windows_i686_gnu" 1389 | version = "0.52.6" 1390 | source = "registry+https://github.com/rust-lang/crates.io-index" 1391 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1392 | 1393 | [[package]] 1394 | name = "windows_i686_gnullvm" 1395 | version = "0.52.6" 1396 | source = "registry+https://github.com/rust-lang/crates.io-index" 1397 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1398 | 1399 | [[package]] 1400 | name = "windows_i686_msvc" 1401 | version = "0.52.6" 1402 | source = "registry+https://github.com/rust-lang/crates.io-index" 1403 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1404 | 1405 | [[package]] 1406 | name = "windows_x86_64_gnu" 1407 | version = "0.52.6" 1408 | source = "registry+https://github.com/rust-lang/crates.io-index" 1409 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1410 | 1411 | [[package]] 1412 | name = "windows_x86_64_gnullvm" 1413 | version = "0.52.6" 1414 | source = "registry+https://github.com/rust-lang/crates.io-index" 1415 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1416 | 1417 | [[package]] 1418 | name = "windows_x86_64_msvc" 1419 | version = "0.52.6" 1420 | source = "registry+https://github.com/rust-lang/crates.io-index" 1421 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1422 | 1423 | [[package]] 1424 | name = "winnow" 1425 | version = "0.7.7" 1426 | source = "registry+https://github.com/rust-lang/crates.io-index" 1427 | checksum = "6cb8234a863ea0e8cd7284fcdd4f145233eb00fee02bbdd9861aec44e6477bc5" 1428 | dependencies = [ 1429 | "memchr", 1430 | ] 1431 | 1432 | [[package]] 1433 | name = "wyz" 1434 | version = "0.5.1" 1435 | source = "registry+https://github.com/rust-lang/crates.io-index" 1436 | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" 1437 | dependencies = [ 1438 | "tap", 1439 | ] 1440 | 1441 | [[package]] 1442 | name = "xattr" 1443 | version = "1.5.0" 1444 | source = "registry+https://github.com/rust-lang/crates.io-index" 1445 | checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" 1446 | dependencies = [ 1447 | "libc", 1448 | "rustix", 1449 | ] 1450 | 1451 | [[package]] 1452 | name = "zerocopy" 1453 | version = "0.7.35" 1454 | source = "registry+https://github.com/rust-lang/crates.io-index" 1455 | checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" 1456 | dependencies = [ 1457 | "zerocopy-derive 0.7.35", 1458 | ] 1459 | 1460 | [[package]] 1461 | name = "zerocopy" 1462 | version = "0.8.25" 1463 | source = "registry+https://github.com/rust-lang/crates.io-index" 1464 | checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" 1465 | dependencies = [ 1466 | "zerocopy-derive 0.8.25", 1467 | ] 1468 | 1469 | [[package]] 1470 | name = "zerocopy-derive" 1471 | version = "0.7.35" 1472 | source = "registry+https://github.com/rust-lang/crates.io-index" 1473 | checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" 1474 | dependencies = [ 1475 | "proc-macro2", 1476 | "quote", 1477 | "syn 2.0.101", 1478 | ] 1479 | 1480 | [[package]] 1481 | name = "zerocopy-derive" 1482 | version = "0.8.25" 1483 | source = "registry+https://github.com/rust-lang/crates.io-index" 1484 | checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" 1485 | dependencies = [ 1486 | "proc-macro2", 1487 | "quote", 1488 | "syn 2.0.101", 1489 | ] 1490 | --------------------------------------------------------------------------------