├── examples ├── requests.jsonl └── config.json ├── .gitignore ├── LICENSE ├── benches └── throughput.rs ├── Cargo.toml ├── src ├── lib.rs ├── error.rs ├── main.rs ├── request.rs ├── tracker.rs ├── client.rs ├── endpoint.rs ├── config.rs └── processor.rs └── README.md /examples/requests.jsonl: -------------------------------------------------------------------------------- 1 | {"input": "What is the capital of France?"} 2 | {"input": "Explain quantum computing in simple terms."} 3 | {"input": "Write a haiku about Rust programming."} 4 | {"input": "What are the benefits of functional programming?"} 5 | {"input": "Describe the difference between TCP and UDP."} 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated files 2 | /target/ 3 | Cargo.lock 4 | 5 | # IDE 6 | .idea/ 7 | .vscode/ 8 | *.swp 9 | *.swo 10 | *~ 11 | 12 | # OS 13 | .DS_Store 14 | Thumbs.db 15 | 16 | # Environment 17 | .env 18 | .env.local 19 | .envrc 20 | 21 | # Test files 22 | *.jsonl 23 | !examples/*.jsonl 24 | 25 | # Logs 26 | *.log 27 | logs/ 28 | 29 | # Coverage 30 | *.profraw 31 | *.profdata 32 | /coverage/ 33 | tarpaulin-report.html 34 | 35 | # Benchmarks output 36 | /criterion/ 37 | 38 | # Release artifacts 39 | /dist/ 40 | *.tar.gz 41 | *.zip 42 | -------------------------------------------------------------------------------- /examples/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "endpoints": [ 3 | { 4 | "url": "https://api.openai.com/v1/chat/completions", 5 | "weight": 2, 6 | "api_key": "${OPENAI_API_KEY}", 7 | "model": "gpt-4o-mini", 8 | "max_concurrent": 100 9 | }, 10 | { 11 | "url": "https://api.anthropic.com/v1/messages", 12 | "weight": 1, 13 | "api_key": "${ANTHROPIC_API_KEY}", 14 | "model": "claude-3-haiku-20240307", 15 | "max_concurrent": 50 16 | } 17 | ], 18 | "request": { 19 | "timeout": "30s", 20 | "rate_limit": 1000, 21 | "workers": 50 22 | }, 23 | "retry": { 24 | "max_attempts": 3, 25 | "initial_backoff": "100ms", 26 | "max_backoff": "10s", 27 | "multiplier": 2.0 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yiğit Konur 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benches/throughput.rs: -------------------------------------------------------------------------------- 1 | //! Throughput benchmarks for Blaze API. 2 | 3 | use criterion::{criterion_group, criterion_main, Criterion, Throughput}; 4 | 5 | fn benchmark_request_parsing(c: &mut Criterion) { 6 | let sample_json = r#"{"input": "What is the capital of France?"}"#; 7 | 8 | let mut group = c.benchmark_group("parsing"); 9 | group.throughput(Throughput::Elements(1)); 10 | 11 | group.bench_function("parse_request", |b| { 12 | b.iter(|| { 13 | let _: blaze_api::ApiRequest = serde_json::from_str(sample_json).unwrap(); 14 | }); 15 | }); 16 | 17 | group.finish(); 18 | } 19 | 20 | fn benchmark_load_balancer(c: &mut Criterion) { 21 | use blaze_api::{EndpointConfig, LoadBalancer}; 22 | 23 | let configs = vec![ 24 | EndpointConfig { 25 | url: "http://a.test".to_string(), 26 | weight: 1, 27 | api_key: None, 28 | model: None, 29 | max_concurrent: 100, 30 | }, 31 | EndpointConfig { 32 | url: "http://b.test".to_string(), 33 | weight: 2, 34 | api_key: None, 35 | model: None, 36 | max_concurrent: 100, 37 | }, 38 | EndpointConfig { 39 | url: "http://c.test".to_string(), 40 | weight: 3, 41 | api_key: None, 42 | model: None, 43 | max_concurrent: 100, 44 | }, 45 | ]; 46 | 47 | let lb = LoadBalancer::new(configs).unwrap(); 48 | 49 | let mut group = c.benchmark_group("load_balancer"); 50 | group.throughput(Throughput::Elements(1)); 51 | 52 | group.bench_function("select_endpoint", |b| { 53 | b.iter(|| { 54 | let _ = lb.select(); 55 | }); 56 | }); 57 | 58 | group.finish(); 59 | } 60 | 61 | criterion_group!(benches, benchmark_request_parsing, benchmark_load_balancer); 62 | criterion_main!(benches); 63 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "blaze-api" 3 | version = "1.0.0" 4 | edition = "2021" 5 | authors = ["Yiğit Konur "] 6 | description = "High-performance async API client with load balancing — 10K+ req/sec for LLM batch processing" 7 | repository = "https://github.com/yigitkonur/blaze-api" 8 | license = "MIT" 9 | keywords = ["api", "llm", "load-balancing", "async", "batch-processing"] 10 | categories = ["command-line-utilities", "asynchronous", "web-programming"] 11 | readme = "README.md" 12 | rust-version = "1.75" 13 | 14 | [[bin]] 15 | name = "blaze" 16 | path = "src/main.rs" 17 | 18 | [lib] 19 | name = "blaze_api" 20 | path = "src/lib.rs" 21 | 22 | [dependencies] 23 | # Async runtime 24 | tokio = { version = "1.43", features = ["full", "tracing"] } 25 | tokio-stream = "0.1" 26 | 27 | # HTTP client 28 | reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls", "gzip", "brotli", "stream"] } 29 | 30 | # Serialization 31 | serde = { version = "1.0", features = ["derive"] } 32 | serde_json = "1.0" 33 | 34 | # CLI 35 | clap = { version = "4.5", features = ["derive", "env", "wrap_help"] } 36 | 37 | # Logging & Tracing 38 | tracing = "0.1" 39 | tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } 40 | 41 | # Error handling 42 | thiserror = "2.0" 43 | anyhow = "1.0" 44 | 45 | # Utilities 46 | rand = "0.9" 47 | chrono = { version = "0.4", features = ["serde"] } 48 | futures = "0.3" 49 | parking_lot = "0.12" 50 | governor = "0.8" 51 | nonzero_ext = "0.3" 52 | 53 | # Progress & UI 54 | indicatif = { version = "0.17", features = ["tokio"] } 55 | console = "0.15" 56 | 57 | [dev-dependencies] 58 | tokio-test = "0.4" 59 | wiremock = "0.6" 60 | tempfile = "3.15" 61 | criterion = { version = "0.5", features = ["async_tokio"] } 62 | 63 | [profile.release] 64 | lto = "thin" 65 | codegen-units = 1 66 | panic = "abort" 67 | strip = true 68 | 69 | [profile.release-fast] 70 | inherits = "release" 71 | lto = false 72 | codegen-units = 16 73 | 74 | [[bench]] 75 | name = "throughput" 76 | harness = false 77 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Blaze API 2 | //! 3 | //! High-performance async API client with load balancing for batch LLM processing. 4 | //! 5 | //! Blaze API is designed to handle massive throughput (10,000+ requests per second) 6 | //! with intelligent load balancing, automatic retries, and comprehensive error handling. 7 | //! 8 | //! ## Features 9 | //! 10 | //! - **Weighted Load Balancing**: Distribute requests across multiple endpoints based on weights 11 | //! - **Automatic Retries**: Exponential backoff with jitter for failed requests 12 | //! - **Rate Limiting**: Control throughput to respect API limits 13 | //! - **Connection Pooling**: Efficient HTTP/2 connection management 14 | //! - **Progress Tracking**: Real-time statistics and progress visualization 15 | //! 16 | //! ## Quick Start 17 | //! 18 | //! ```rust,no_run 19 | //! use blaze_api::{Config, Processor, EndpointConfig}; 20 | //! 21 | //! #[tokio::main] 22 | //! async fn main() -> anyhow::Result<()> { 23 | //! let config = Config { 24 | //! endpoints: vec![EndpointConfig { 25 | //! url: "https://api.example.com/v1/completions".to_string(), 26 | //! weight: 1, 27 | //! api_key: Some("your-api-key".to_string()), 28 | //! model: Some("gpt-4".to_string()), 29 | //! max_concurrent: 100, 30 | //! }], 31 | //! ..Default::default() 32 | //! }; 33 | //! 34 | //! let processor = Processor::new(config)?; 35 | //! let result = processor.process_file( 36 | //! "requests.jsonl".into(), 37 | //! Some("results.jsonl".into()), 38 | //! "errors.jsonl".into(), 39 | //! true, 40 | //! ).await?; 41 | //! 42 | //! result.print_summary(); 43 | //! Ok(()) 44 | //! } 45 | //! ``` 46 | //! 47 | //! ## Configuration 48 | //! 49 | //! Blaze supports configuration via: 50 | //! - Command-line arguments 51 | //! - Environment variables (prefixed with `BLAZE_`) 52 | //! - JSON configuration files 53 | //! 54 | //! See [`Config`] for all available options. 55 | 56 | #![warn(missing_docs)] 57 | #![warn(clippy::all)] 58 | #![warn(clippy::pedantic)] 59 | #![allow(clippy::module_name_repetitions)] 60 | 61 | pub mod client; 62 | pub mod config; 63 | pub mod endpoint; 64 | pub mod error; 65 | pub mod processor; 66 | pub mod request; 67 | pub mod tracker; 68 | 69 | // Re-exports for convenience 70 | pub use config::{Args, Config, EndpointConfig, RequestConfig, RetryConfig}; 71 | pub use endpoint::{Endpoint, LoadBalancer}; 72 | pub use error::{BlazeError, Result}; 73 | pub use processor::{ProcessingResult, Processor}; 74 | pub use request::{ApiRequest, ApiResponse, ErrorResponse, RequestResult}; 75 | pub use tracker::{StatsSnapshot, StatsTracker}; 76 | 77 | /// Library version. 78 | pub const VERSION: &str = env!("CARGO_PKG_VERSION"); 79 | 80 | /// Default configuration for quick setup. 81 | impl Default for Config { 82 | fn default() -> Self { 83 | Self { 84 | endpoints: vec![], 85 | request: RequestConfig::default(), 86 | retry: RetryConfig::default(), 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! Custom error types for Blaze API. 2 | //! 3 | //! This module defines all error types used throughout the application, 4 | //! following Rust best practices with `thiserror` for library errors. 5 | 6 | use std::path::PathBuf; 7 | use thiserror::Error; 8 | 9 | /// Errors that can occur during API processing. 10 | #[derive(Error, Debug)] 11 | #[allow(missing_docs)] 12 | pub enum BlazeError { 13 | /// Failed to read the input file. 14 | #[error("failed to read input file '{path}': {source}")] 15 | InputFileRead { 16 | /// Path to the file that could not be read. 17 | path: PathBuf, 18 | /// The underlying I/O error. 19 | #[source] 20 | source: std::io::Error, 21 | }, 22 | 23 | /// Failed to write to the output file. 24 | #[error("failed to write to output file '{path}': {source}")] 25 | OutputFileWrite { 26 | /// Path to the file that could not be written. 27 | path: PathBuf, 28 | /// The underlying I/O error. 29 | #[source] 30 | source: std::io::Error, 31 | }, 32 | 33 | /// Failed to parse JSON from the input file. 34 | #[error("failed to parse JSON at line {line}: {source}")] 35 | JsonParse { 36 | /// Line number where the error occurred. 37 | line: usize, 38 | /// The underlying JSON parsing error. 39 | #[source] 40 | source: serde_json::Error, 41 | }, 42 | 43 | /// Failed to serialize JSON for output. 44 | #[error("failed to serialize JSON: {0}")] 45 | JsonSerialize(#[from] serde_json::Error), 46 | 47 | /// HTTP request failed. 48 | #[error("HTTP request failed: {0}")] 49 | HttpRequest(#[from] reqwest::Error), 50 | 51 | /// No endpoints configured. 52 | #[error("no endpoints configured - at least one endpoint is required")] 53 | NoEndpoints, 54 | 55 | /// All endpoints are unhealthy. 56 | #[error("all endpoints are currently unhealthy")] 57 | AllEndpointsUnhealthy, 58 | 59 | /// Invalid configuration. 60 | #[error("invalid configuration: {0}")] 61 | InvalidConfig(String), 62 | 63 | /// Request timed out. 64 | #[error("request timed out after {attempts} attempts")] 65 | Timeout { 66 | /// Number of attempts made before timeout. 67 | attempts: u32, 68 | }, 69 | 70 | /// Rate limit exceeded. 71 | #[error("rate limit exceeded for endpoint '{endpoint}'")] 72 | RateLimitExceeded { 73 | /// The endpoint that exceeded its rate limit. 74 | endpoint: String, 75 | }, 76 | 77 | /// Invalid response from API. 78 | #[error("invalid API response: {message}")] 79 | InvalidResponse { 80 | /// Description of what was invalid. 81 | message: String, 82 | }, 83 | 84 | /// Endpoint returned an error status. 85 | #[error("endpoint returned error status {status}: {body}")] 86 | EndpointError { 87 | /// HTTP status code returned. 88 | status: u16, 89 | /// Response body content. 90 | body: String, 91 | }, 92 | } 93 | 94 | /// Result type alias for Blaze operations. 95 | pub type Result = std::result::Result; 96 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | //! Blaze API CLI - High-performance batch API client. 2 | //! 3 | //! Run `blaze --help` for usage information. 4 | 5 | use anyhow::Result; 6 | use blaze_api::{Args, Config, Processor}; 7 | use console::style; 8 | use tracing::{error, info, Level}; 9 | use tracing_subscriber::{fmt, prelude::*, EnvFilter}; 10 | 11 | #[tokio::main] 12 | async fn main() -> Result<()> { 13 | // Parse CLI arguments 14 | let args = Args::parse_args(); 15 | 16 | // Setup logging 17 | setup_logging(&args); 18 | 19 | // Print banner 20 | if !args.json_logs { 21 | print_banner(); 22 | } 23 | 24 | // Load configuration 25 | let config = match Config::from_args(&args) { 26 | Ok(c) => c, 27 | Err(e) => { 28 | error!("Configuration error: {}", e); 29 | eprintln!("{} {}", style("Error:").red().bold(), e); 30 | std::process::exit(1); 31 | } 32 | }; 33 | 34 | // Validate input file exists 35 | if !args.input.exists() { 36 | error!("Input file not found: {:?}", args.input); 37 | eprintln!( 38 | "{} Input file not found: {}", 39 | style("Error:").red().bold(), 40 | args.input.display() 41 | ); 42 | std::process::exit(1); 43 | } 44 | 45 | // Dry run mode 46 | if args.dry_run { 47 | println!("\n{}", style("DRY RUN MODE").yellow().bold()); 48 | println!("Configuration validated successfully.\n"); 49 | print_config_summary(&args, &config); 50 | return Ok(()); 51 | } 52 | 53 | // Print configuration summary 54 | if args.verbose && !args.json_logs { 55 | print_config_summary(&args, &config); 56 | } 57 | 58 | // Create processor and run 59 | let processor = Processor::new(config)?; 60 | 61 | info!( 62 | input = %args.input.display(), 63 | output = ?args.output, 64 | "Starting processing" 65 | ); 66 | 67 | let result = processor 68 | .process_file( 69 | args.input.clone(), 70 | args.output.clone(), 71 | args.errors.clone(), 72 | !args.no_progress && !args.json_logs, 73 | ) 74 | .await?; 75 | 76 | // Print results 77 | if !args.json_logs { 78 | result.print_summary(); 79 | 80 | if let Some(output) = &args.output { 81 | println!( 82 | "\n{} Results saved to: {}", 83 | style("✓").green().bold(), 84 | output.display() 85 | ); 86 | } 87 | 88 | if result.failure_count > 0 { 89 | println!( 90 | "{} Errors saved to: {}", 91 | style("⚠").yellow().bold(), 92 | args.errors.display() 93 | ); 94 | } 95 | } else { 96 | // JSON output for programmatic consumption 97 | let json_result = serde_json::json!({ 98 | "status": "complete", 99 | "total_processed": result.total_processed, 100 | "success_count": result.success_count, 101 | "failure_count": result.failure_count, 102 | "success_rate": result.success_rate(), 103 | "elapsed_seconds": result.elapsed.as_secs_f64(), 104 | "avg_latency_ms": result.avg_latency_ms, 105 | "throughput_rps": result.overall_rps, 106 | }); 107 | println!("{}", serde_json::to_string(&json_result)?); 108 | } 109 | 110 | // Exit with error code if there were failures 111 | if result.failure_count > 0 && result.success_count == 0 { 112 | std::process::exit(1); 113 | } 114 | 115 | Ok(()) 116 | } 117 | 118 | fn setup_logging(args: &Args) { 119 | let level = if args.verbose { Level::DEBUG } else { Level::INFO }; 120 | 121 | let filter = EnvFilter::try_from_default_env() 122 | .unwrap_or_else(|_| EnvFilter::new(format!("blaze_api={},blaze={}", level, level))); 123 | 124 | if args.json_logs { 125 | tracing_subscriber::registry() 126 | .with(filter) 127 | .with(fmt::layer().json()) 128 | .init(); 129 | } else { 130 | tracing_subscriber::registry() 131 | .with(filter) 132 | .with( 133 | fmt::layer() 134 | .with_target(false) 135 | .with_thread_ids(false) 136 | .compact(), 137 | ) 138 | .init(); 139 | } 140 | } 141 | 142 | fn print_banner() { 143 | let banner = r#" 144 | ____ __ ___ ____ ____ 145 | / __ )/ /___ _____ ___ / | / __ \/ _/ 146 | / __ / / __ `/_ / / _ \ / /| | / /_/ // / 147 | / /_/ / / /_/ / / /_/ __/ / ___ |/ ____// / 148 | /_____/_/\__,_/ /___/\___/ /_/ |_/_/ /___/ 149 | 150 | "#; 151 | 152 | println!("{}", style(banner).cyan().bold()); 153 | println!( 154 | " {}", 155 | style("High-Performance Batch API Client").white().dim() 156 | ); 157 | println!( 158 | " {}", 159 | style(format!("v{}", blaze_api::VERSION)).white().dim() 160 | ); 161 | println!(); 162 | } 163 | 164 | fn print_config_summary(args: &Args, config: &Config) { 165 | println!("{}", style("Configuration:").bold()); 166 | println!(" Input: {}", args.input.display()); 167 | if let Some(output) = &args.output { 168 | println!(" Output: {}", output.display()); 169 | } 170 | println!(" Errors: {}", args.errors.display()); 171 | println!(" Rate Limit: {} req/sec", config.request.rate_limit); 172 | println!(" Workers: {}", config.request.workers); 173 | println!(" Timeout: {:?}", config.request.timeout); 174 | println!(" Retries: {}", config.retry.max_attempts); 175 | println!(" Endpoints: {}", config.endpoints.len()); 176 | for (i, ep) in config.endpoints.iter().enumerate() { 177 | println!( 178 | " {}. {} (weight: {}, max: {})", 179 | i + 1, 180 | ep.url, 181 | ep.weight, 182 | ep.max_concurrent 183 | ); 184 | } 185 | println!(); 186 | } 187 | -------------------------------------------------------------------------------- /src/request.rs: -------------------------------------------------------------------------------- 1 | //! Request and response types for API processing. 2 | //! 3 | //! This module defines the data structures for API requests and responses, 4 | //! supporting flexible input formats and structured output. 5 | 6 | use serde::{Deserialize, Serialize}; 7 | use serde_json::Value; 8 | use std::collections::HashMap; 9 | 10 | /// An API request read from the input file. 11 | #[derive(Debug, Clone, Serialize, Deserialize)] 12 | pub struct ApiRequest { 13 | /// The main input content (for LLM requests). 14 | #[serde(default)] 15 | pub input: Option, 16 | 17 | /// Custom request body (overrides default formatting). 18 | #[serde(default)] 19 | pub body: Option, 20 | 21 | /// Custom headers for this specific request. 22 | #[serde(default)] 23 | pub headers: Option>, 24 | 25 | /// Request-specific metadata (passed through to response). 26 | #[serde(default, flatten)] 27 | pub metadata: HashMap, 28 | 29 | /// Line number in the input file (set during parsing). 30 | #[serde(skip)] 31 | pub line_number: usize, 32 | } 33 | 34 | impl ApiRequest { 35 | /// Create a simple request with just input text. 36 | pub fn simple(input: impl Into) -> Self { 37 | Self { 38 | input: Some(input.into()), 39 | body: None, 40 | headers: None, 41 | metadata: HashMap::new(), 42 | line_number: 0, 43 | } 44 | } 45 | 46 | /// Create a request with a custom body. 47 | pub fn with_body(body: Value) -> Self { 48 | Self { 49 | input: None, 50 | body: Some(body), 51 | headers: None, 52 | metadata: HashMap::new(), 53 | line_number: 0, 54 | } 55 | } 56 | 57 | /// Build the request body for an LLM endpoint. 58 | pub fn build_llm_body(&self, model: Option<&str>) -> Value { 59 | if let Some(body) = &self.body { 60 | // Use custom body if provided 61 | return body.clone(); 62 | } 63 | 64 | // Build standard LLM request body 65 | let input = self.input.as_deref().unwrap_or(""); 66 | let mut body = serde_json::json!({ 67 | "messages": [{ 68 | "role": "user", 69 | "content": input 70 | }] 71 | }); 72 | 73 | if let Some(model) = model { 74 | body["model"] = Value::String(model.to_string()); 75 | } 76 | 77 | body 78 | } 79 | 80 | /// Get a display string for logging. 81 | pub fn display_input(&self) -> String { 82 | if let Some(input) = &self.input { 83 | if input.len() > 50 { 84 | format!("{}...", &input[..50]) 85 | } else { 86 | input.clone() 87 | } 88 | } else if self.body.is_some() { 89 | "[custom body]".to_string() 90 | } else { 91 | "[empty]".to_string() 92 | } 93 | } 94 | } 95 | 96 | /// A successful API response. 97 | #[derive(Debug, Clone, Serialize, Deserialize)] 98 | pub struct ApiResponse { 99 | /// The original input (for correlation). 100 | #[serde(skip_serializing_if = "Option::is_none")] 101 | pub input: Option, 102 | 103 | /// The response body from the API. 104 | pub response: Value, 105 | 106 | /// Response metadata. 107 | #[serde(skip_serializing_if = "Option::is_none")] 108 | pub metadata: Option, 109 | } 110 | 111 | /// Metadata about the response. 112 | #[derive(Debug, Clone, Serialize, Deserialize)] 113 | pub struct ResponseMetadata { 114 | /// Which endpoint handled the request. 115 | pub endpoint: String, 116 | 117 | /// Response latency in milliseconds. 118 | pub latency_ms: u64, 119 | 120 | /// Number of retry attempts. 121 | pub attempts: u32, 122 | } 123 | 124 | impl ApiResponse { 125 | /// Create a new API response. 126 | pub fn new(input: Option, response: Value) -> Self { 127 | Self { 128 | input, 129 | response, 130 | metadata: None, 131 | } 132 | } 133 | 134 | /// Add metadata to the response. 135 | pub fn with_metadata(mut self, metadata: ResponseMetadata) -> Self { 136 | self.metadata = Some(metadata); 137 | self 138 | } 139 | } 140 | 141 | /// An error response for failed requests. 142 | #[derive(Debug, Clone, Serialize, Deserialize)] 143 | pub struct ErrorResponse { 144 | /// The original input that failed. 145 | #[serde(skip_serializing_if = "Option::is_none")] 146 | pub input: Option, 147 | 148 | /// The original request body. 149 | #[serde(skip_serializing_if = "Option::is_none")] 150 | pub body: Option, 151 | 152 | /// Error message. 153 | pub error: String, 154 | 155 | /// HTTP status code (if available). 156 | #[serde(skip_serializing_if = "Option::is_none")] 157 | pub status_code: Option, 158 | 159 | /// Line number in the input file. 160 | #[serde(skip_serializing_if = "is_zero")] 161 | pub line_number: usize, 162 | 163 | /// Number of attempts made. 164 | pub attempts: u32, 165 | } 166 | 167 | fn is_zero(n: &usize) -> bool { 168 | *n == 0 169 | } 170 | 171 | impl ErrorResponse { 172 | /// Create a new error response. 173 | pub fn new(request: &ApiRequest, error: impl Into, attempts: u32) -> Self { 174 | Self { 175 | input: request.input.clone(), 176 | body: request.body.clone(), 177 | error: error.into(), 178 | status_code: None, 179 | line_number: request.line_number, 180 | attempts, 181 | } 182 | } 183 | 184 | /// Set the HTTP status code. 185 | pub fn with_status(mut self, status: u16) -> Self { 186 | self.status_code = Some(status); 187 | self 188 | } 189 | } 190 | 191 | /// Result of processing a single request. 192 | #[derive(Debug)] 193 | pub enum RequestResult { 194 | /// Request succeeded. 195 | Success(ApiResponse), 196 | /// Request failed after all retries. 197 | Failure(ErrorResponse), 198 | } 199 | 200 | impl RequestResult { 201 | /// Check if this is a success. 202 | pub fn is_success(&self) -> bool { 203 | matches!(self, Self::Success(_)) 204 | } 205 | } 206 | 207 | #[cfg(test)] 208 | mod tests { 209 | use super::*; 210 | 211 | #[test] 212 | fn test_simple_request() { 213 | let req = ApiRequest::simple("Hello, world!"); 214 | assert_eq!(req.input, Some("Hello, world!".to_string())); 215 | assert!(req.body.is_none()); 216 | } 217 | 218 | #[test] 219 | fn test_build_llm_body() { 220 | let req = ApiRequest::simple("Test input"); 221 | let body = req.build_llm_body(Some("gpt-4")); 222 | 223 | assert_eq!(body["model"], "gpt-4"); 224 | assert_eq!(body["messages"][0]["content"], "Test input"); 225 | } 226 | 227 | #[test] 228 | fn test_custom_body() { 229 | let custom = serde_json::json!({"custom": "data"}); 230 | let req = ApiRequest::with_body(custom.clone()); 231 | let body = req.build_llm_body(Some("gpt-4")); 232 | 233 | assert_eq!(body, custom); 234 | } 235 | 236 | #[test] 237 | fn test_error_response() { 238 | let req = ApiRequest::simple("Test"); 239 | let err = ErrorResponse::new(&req, "Connection refused", 3); 240 | 241 | assert_eq!(err.error, "Connection refused"); 242 | assert_eq!(err.attempts, 3); 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /src/tracker.rs: -------------------------------------------------------------------------------- 1 | //! Statistics tracking for request processing. 2 | //! 3 | //! This module provides real-time tracking of request statistics 4 | //! including success/failure counts, latency, and throughput. 5 | 6 | use parking_lot::Mutex; 7 | use std::collections::VecDeque; 8 | use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; 9 | use std::time::{Duration, Instant}; 10 | 11 | /// Statistics tracker for request processing. 12 | #[derive(Debug)] 13 | pub struct StatsTracker { 14 | /// Start time of processing. 15 | start_time: Instant, 16 | /// Total requests processed. 17 | total_processed: AtomicU64, 18 | /// Successful requests. 19 | success_count: AtomicU64, 20 | /// Failed requests. 21 | failure_count: AtomicU64, 22 | /// Total latency in microseconds. 23 | total_latency_us: AtomicU64, 24 | /// Requests in the last second (for RPS calculation). 25 | recent_requests: Mutex>, 26 | /// Total input lines. 27 | total_lines: AtomicUsize, 28 | } 29 | 30 | impl StatsTracker { 31 | /// Create a new statistics tracker. 32 | pub fn new() -> Self { 33 | Self { 34 | start_time: Instant::now(), 35 | total_processed: AtomicU64::new(0), 36 | success_count: AtomicU64::new(0), 37 | failure_count: AtomicU64::new(0), 38 | total_latency_us: AtomicU64::new(0), 39 | recent_requests: Mutex::new(VecDeque::new()), 40 | total_lines: AtomicUsize::new(0), 41 | } 42 | } 43 | 44 | /// Set the total number of input lines. 45 | pub fn set_total_lines(&self, total: usize) { 46 | self.total_lines.store(total, Ordering::Relaxed); 47 | } 48 | 49 | /// Record a successful request. 50 | pub fn record_success(&self, latency: Duration) { 51 | self.total_processed.fetch_add(1, Ordering::Relaxed); 52 | self.success_count.fetch_add(1, Ordering::Relaxed); 53 | self.total_latency_us 54 | .fetch_add(latency.as_micros() as u64, Ordering::Relaxed); 55 | self.record_recent(); 56 | } 57 | 58 | /// Record a failed request. 59 | pub fn record_failure(&self) { 60 | self.total_processed.fetch_add(1, Ordering::Relaxed); 61 | self.failure_count.fetch_add(1, Ordering::Relaxed); 62 | self.record_recent(); 63 | } 64 | 65 | /// Record a request for RPS calculation. 66 | fn record_recent(&self) { 67 | let now = Instant::now(); 68 | let mut recent = self.recent_requests.lock(); 69 | recent.push_back(now); 70 | 71 | // Remove entries older than 1 second 72 | let cutoff = now - Duration::from_secs(1); 73 | while let Some(front) = recent.front() { 74 | if *front < cutoff { 75 | recent.pop_front(); 76 | } else { 77 | break; 78 | } 79 | } 80 | } 81 | 82 | /// Get the current requests per second. 83 | pub fn requests_per_second(&self) -> f64 { 84 | let now = Instant::now(); 85 | let mut recent = self.recent_requests.lock(); 86 | 87 | // Remove old entries 88 | let cutoff = now - Duration::from_secs(1); 89 | while let Some(front) = recent.front() { 90 | if *front < cutoff { 91 | recent.pop_front(); 92 | } else { 93 | break; 94 | } 95 | } 96 | 97 | recent.len() as f64 98 | } 99 | 100 | /// Get the current statistics snapshot. 101 | pub fn snapshot(&self) -> StatsSnapshot { 102 | let elapsed = self.start_time.elapsed(); 103 | let total = self.total_processed.load(Ordering::Relaxed); 104 | let success = self.success_count.load(Ordering::Relaxed); 105 | let failure = self.failure_count.load(Ordering::Relaxed); 106 | let total_latency = self.total_latency_us.load(Ordering::Relaxed); 107 | let total_lines = self.total_lines.load(Ordering::Relaxed); 108 | 109 | let avg_latency_ms = if success > 0 { 110 | (total_latency as f64 / success as f64) / 1000.0 111 | } else { 112 | 0.0 113 | }; 114 | 115 | let overall_rps = if elapsed.as_secs_f64() > 0.0 { 116 | total as f64 / elapsed.as_secs_f64() 117 | } else { 118 | 0.0 119 | }; 120 | 121 | let progress = if total_lines > 0 { 122 | (total as f64 / total_lines as f64) * 100.0 123 | } else { 124 | 0.0 125 | }; 126 | 127 | StatsSnapshot { 128 | elapsed, 129 | total_processed: total, 130 | success_count: success, 131 | failure_count: failure, 132 | avg_latency_ms, 133 | current_rps: self.requests_per_second(), 134 | overall_rps, 135 | total_lines, 136 | progress, 137 | } 138 | } 139 | } 140 | 141 | impl Default for StatsTracker { 142 | fn default() -> Self { 143 | Self::new() 144 | } 145 | } 146 | 147 | /// A snapshot of current statistics. 148 | #[derive(Debug, Clone)] 149 | pub struct StatsSnapshot { 150 | /// Elapsed time since start. 151 | pub elapsed: Duration, 152 | /// Total requests processed. 153 | pub total_processed: u64, 154 | /// Successful requests. 155 | pub success_count: u64, 156 | /// Failed requests. 157 | pub failure_count: u64, 158 | /// Average latency in milliseconds. 159 | pub avg_latency_ms: f64, 160 | /// Current requests per second. 161 | pub current_rps: f64, 162 | /// Overall requests per second. 163 | pub overall_rps: f64, 164 | /// Total input lines. 165 | pub total_lines: usize, 166 | /// Progress percentage. 167 | pub progress: f64, 168 | } 169 | 170 | impl StatsSnapshot { 171 | /// Get the success rate as a percentage. 172 | pub fn success_rate(&self) -> f64 { 173 | if self.total_processed > 0 { 174 | (self.success_count as f64 / self.total_processed as f64) * 100.0 175 | } else { 176 | 100.0 177 | } 178 | } 179 | 180 | /// Get the estimated time remaining. 181 | pub fn eta(&self) -> Option { 182 | if self.overall_rps > 0.0 && self.total_lines > 0 { 183 | let remaining = self.total_lines.saturating_sub(self.total_processed as usize); 184 | let seconds = remaining as f64 / self.overall_rps; 185 | Some(Duration::from_secs_f64(seconds)) 186 | } else { 187 | None 188 | } 189 | } 190 | 191 | /// Format as a human-readable summary. 192 | pub fn summary(&self) -> String { 193 | format!( 194 | "Processed: {}/{} ({:.1}%) | Success: {} | Failed: {} | Avg Latency: {:.1}ms | RPS: {:.0}", 195 | self.total_processed, 196 | self.total_lines, 197 | self.progress, 198 | self.success_count, 199 | self.failure_count, 200 | self.avg_latency_ms, 201 | self.current_rps 202 | ) 203 | } 204 | } 205 | 206 | #[cfg(test)] 207 | mod tests { 208 | use super::*; 209 | 210 | #[test] 211 | fn test_stats_tracking() { 212 | let tracker = StatsTracker::new(); 213 | tracker.set_total_lines(100); 214 | 215 | tracker.record_success(Duration::from_millis(50)); 216 | tracker.record_success(Duration::from_millis(100)); 217 | tracker.record_failure(); 218 | 219 | let snapshot = tracker.snapshot(); 220 | assert_eq!(snapshot.total_processed, 3); 221 | assert_eq!(snapshot.success_count, 2); 222 | assert_eq!(snapshot.failure_count, 1); 223 | assert_eq!(snapshot.avg_latency_ms, 75.0); 224 | } 225 | 226 | #[test] 227 | fn test_success_rate() { 228 | let tracker = StatsTracker::new(); 229 | 230 | for _ in 0..8 { 231 | tracker.record_success(Duration::from_millis(10)); 232 | } 233 | for _ in 0..2 { 234 | tracker.record_failure(); 235 | } 236 | 237 | let snapshot = tracker.snapshot(); 238 | assert_eq!(snapshot.success_rate(), 80.0); 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /src/client.rs: -------------------------------------------------------------------------------- 1 | //! HTTP client with retry logic and connection pooling. 2 | //! 3 | //! This module provides a high-performance HTTP client optimized for 4 | //! high-throughput API requests with automatic retries. 5 | 6 | use crate::config::Config; 7 | use crate::endpoint::Endpoint; 8 | use crate::error::{BlazeError, Result}; 9 | use crate::request::{ApiRequest, ApiResponse, ErrorResponse, RequestResult, ResponseMetadata}; 10 | use reqwest::{header, Client}; 11 | use std::sync::Arc; 12 | use std::time::{Duration, Instant}; 13 | use tokio::time::sleep; 14 | use tracing::{debug, trace, warn}; 15 | 16 | /// HTTP client wrapper with retry logic. 17 | #[derive(Debug, Clone)] 18 | pub struct ApiClient { 19 | client: Client, 20 | config: Arc, 21 | } 22 | 23 | impl ApiClient { 24 | /// Create a new API client. 25 | pub fn new(config: Arc) -> Result { 26 | let mut headers = header::HeaderMap::new(); 27 | headers.insert( 28 | header::CONTENT_TYPE, 29 | header::HeaderValue::from_static("application/json"), 30 | ); 31 | headers.insert( 32 | header::ACCEPT, 33 | header::HeaderValue::from_static("application/json"), 34 | ); 35 | 36 | let client = Client::builder() 37 | .timeout(config.request.timeout) 38 | .pool_max_idle_per_host(config.request.workers) 39 | .pool_idle_timeout(Duration::from_secs(90)) 40 | .tcp_keepalive(Duration::from_secs(60)) 41 | .tcp_nodelay(true) 42 | .default_headers(headers) 43 | .gzip(true) 44 | .brotli(true) 45 | .build() 46 | .map_err(BlazeError::HttpRequest)?; 47 | 48 | Ok(Self { 49 | client, 50 | config: config, 51 | }) 52 | } 53 | 54 | /// Send a request to an endpoint with retries. 55 | pub async fn send_with_retry( 56 | &self, 57 | request: &ApiRequest, 58 | endpoint: Arc, 59 | ) -> RequestResult { 60 | let mut attempts = 0; 61 | let mut last_error: Option = None; 62 | let mut last_status: Option = None; 63 | 64 | let body = request.build_llm_body(endpoint.model()); 65 | let start = Instant::now(); 66 | 67 | while attempts < self.config.retry.max_attempts { 68 | attempts += 1; 69 | 70 | match self.send_once(&body, &endpoint).await { 71 | Ok(response) => { 72 | let latency = start.elapsed(); 73 | endpoint.record_success(latency); 74 | endpoint.release(); 75 | 76 | let api_response = ApiResponse::new(request.input.clone(), response) 77 | .with_metadata(ResponseMetadata { 78 | endpoint: endpoint.url().to_string(), 79 | latency_ms: latency.as_millis() as u64, 80 | attempts, 81 | }); 82 | 83 | return RequestResult::Success(api_response); 84 | } 85 | Err((error, status)) => { 86 | last_error = Some(error.clone()); 87 | last_status = status; 88 | 89 | // Don't retry on certain status codes 90 | if let Some(code) = status { 91 | if code == 400 || code == 401 || code == 403 || code == 404 { 92 | warn!( 93 | endpoint = endpoint.url(), 94 | status = code, 95 | "Non-retryable error" 96 | ); 97 | break; 98 | } 99 | } 100 | 101 | if attempts < self.config.retry.max_attempts { 102 | let backoff = self.calculate_backoff(attempts); 103 | debug!( 104 | attempt = attempts, 105 | max_attempts = self.config.retry.max_attempts, 106 | backoff_ms = backoff.as_millis(), 107 | error = %error, 108 | "Request failed, retrying" 109 | ); 110 | sleep(backoff).await; 111 | } 112 | } 113 | } 114 | } 115 | 116 | endpoint.record_failure(); 117 | endpoint.release(); 118 | 119 | let error_response = 120 | ErrorResponse::new(request, last_error.unwrap_or_else(|| "Unknown error".to_string()), attempts); 121 | 122 | let error_response = if let Some(status) = last_status { 123 | error_response.with_status(status) 124 | } else { 125 | error_response 126 | }; 127 | 128 | RequestResult::Failure(error_response) 129 | } 130 | 131 | /// Send a single request without retries. 132 | async fn send_once( 133 | &self, 134 | body: &serde_json::Value, 135 | endpoint: &Endpoint, 136 | ) -> std::result::Result)> { 137 | let mut request = self.client.post(endpoint.url()).json(body); 138 | 139 | // Add authorization header if API key is configured 140 | if let Some(api_key) = endpoint.api_key() { 141 | request = request.header(header::AUTHORIZATION, format!("Bearer {}", api_key)); 142 | } 143 | 144 | trace!(endpoint = endpoint.url(), "Sending request"); 145 | 146 | let response = request.send().await.map_err(|e| { 147 | let error = format!("Request failed: {}", e); 148 | (error, e.status().map(|s| s.as_u16())) 149 | })?; 150 | 151 | let status = response.status(); 152 | 153 | if status.is_success() { 154 | let body: serde_json::Value = response.json().await.map_err(|e| { 155 | (format!("Failed to parse response: {}", e), Some(status.as_u16())) 156 | })?; 157 | Ok(body) 158 | } else { 159 | let error_body = response.text().await.unwrap_or_default(); 160 | let truncated = if error_body.len() > 500 { 161 | format!("{}...", &error_body[..500]) 162 | } else { 163 | error_body 164 | }; 165 | Err(( 166 | format!("HTTP {}: {}", status.as_u16(), truncated), 167 | Some(status.as_u16()), 168 | )) 169 | } 170 | } 171 | 172 | /// Calculate backoff duration for a given attempt. 173 | fn calculate_backoff(&self, attempt: u32) -> Duration { 174 | let base = self.config.retry.initial_backoff.as_millis() as f64; 175 | let multiplier = self.config.retry.multiplier.powi(attempt as i32 - 1); 176 | let backoff_ms = base * multiplier; 177 | 178 | // Add jitter (±25%) 179 | let jitter = 1.0 + (rand::random::() - 0.5) * 0.5; 180 | let final_ms = (backoff_ms * jitter) as u64; 181 | 182 | Duration::from_millis(final_ms.min(self.config.retry.max_backoff.as_millis() as u64)) 183 | } 184 | } 185 | 186 | #[cfg(test)] 187 | mod tests { 188 | use super::*; 189 | use crate::config::{EndpointConfig, RequestConfig, RetryConfig}; 190 | 191 | fn test_config() -> Config { 192 | Config { 193 | endpoints: vec![EndpointConfig { 194 | url: "http://localhost:8080".to_string(), 195 | weight: 1, 196 | api_key: None, 197 | model: None, 198 | max_concurrent: 100, 199 | }], 200 | request: RequestConfig::default(), 201 | retry: RetryConfig::default(), 202 | } 203 | } 204 | 205 | #[test] 206 | fn test_backoff_calculation() { 207 | let config = Arc::new(test_config()); 208 | let client = ApiClient::new(config).unwrap(); 209 | 210 | let b1 = client.calculate_backoff(1); 211 | let b2 = client.calculate_backoff(2); 212 | let b3 = client.calculate_backoff(3); 213 | 214 | // Backoff should generally increase (allowing for jitter) 215 | assert!(b1 < Duration::from_secs(1)); 216 | assert!(b2 < Duration::from_secs(2)); 217 | assert!(b3 < Duration::from_secs(5)); 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/endpoint.rs: -------------------------------------------------------------------------------- 1 | //! Endpoint management with weighted load balancing. 2 | //! 3 | //! This module provides a load balancer that distributes requests 4 | //! across multiple endpoints based on configurable weights. 5 | 6 | use crate::config::EndpointConfig; 7 | use crate::error::{BlazeError, Result}; 8 | use parking_lot::RwLock; 9 | use rand::prelude::*; 10 | use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; 11 | use std::sync::Arc; 12 | use std::time::{Duration, Instant}; 13 | 14 | /// A single API endpoint with health tracking. 15 | #[derive(Debug)] 16 | pub struct Endpoint { 17 | /// Endpoint configuration. 18 | pub config: EndpointConfig, 19 | /// Current number of in-flight requests. 20 | pub in_flight: AtomicUsize, 21 | /// Total successful requests. 22 | pub success_count: AtomicU64, 23 | /// Total failed requests. 24 | pub failure_count: AtomicU64, 25 | /// Total latency in microseconds. 26 | pub total_latency_us: AtomicU64, 27 | /// Whether the endpoint is healthy. 28 | healthy: RwLock, 29 | /// Last health check time. 30 | last_health_check: RwLock>, 31 | /// Consecutive failures. 32 | consecutive_failures: AtomicUsize, 33 | } 34 | 35 | impl Endpoint { 36 | /// Create a new endpoint from configuration. 37 | pub fn new(config: EndpointConfig) -> Self { 38 | Self { 39 | config, 40 | in_flight: AtomicUsize::new(0), 41 | success_count: AtomicU64::new(0), 42 | failure_count: AtomicU64::new(0), 43 | total_latency_us: AtomicU64::new(0), 44 | healthy: RwLock::new(true), 45 | last_health_check: RwLock::new(None), 46 | consecutive_failures: AtomicUsize::new(0), 47 | } 48 | } 49 | 50 | /// Get the endpoint URL. 51 | pub fn url(&self) -> &str { 52 | &self.config.url 53 | } 54 | 55 | /// Get the API key if configured. 56 | pub fn api_key(&self) -> Option<&str> { 57 | self.config.api_key.as_deref() 58 | } 59 | 60 | /// Get the model if configured. 61 | pub fn model(&self) -> Option<&str> { 62 | self.config.model.as_deref() 63 | } 64 | 65 | /// Check if the endpoint is healthy. 66 | pub fn is_healthy(&self) -> bool { 67 | *self.healthy.read() 68 | } 69 | 70 | /// Mark the endpoint as healthy. 71 | pub fn mark_healthy(&self) { 72 | *self.healthy.write() = true; 73 | self.consecutive_failures.store(0, Ordering::Relaxed); 74 | } 75 | 76 | /// Mark the endpoint as unhealthy. 77 | pub fn mark_unhealthy(&self) { 78 | *self.healthy.write() = false; 79 | *self.last_health_check.write() = Some(Instant::now()); 80 | } 81 | 82 | /// Check if the endpoint should be retried (after cooldown). 83 | pub fn should_retry(&self, cooldown: Duration) -> bool { 84 | if self.is_healthy() { 85 | return true; 86 | } 87 | 88 | let last_check = self.last_health_check.read(); 89 | match *last_check { 90 | Some(instant) => instant.elapsed() >= cooldown, 91 | None => true, 92 | } 93 | } 94 | 95 | /// Record a successful request. 96 | pub fn record_success(&self, latency: Duration) { 97 | self.success_count.fetch_add(1, Ordering::Relaxed); 98 | self.total_latency_us 99 | .fetch_add(latency.as_micros() as u64, Ordering::Relaxed); 100 | self.consecutive_failures.store(0, Ordering::Relaxed); 101 | self.mark_healthy(); 102 | } 103 | 104 | /// Record a failed request. 105 | pub fn record_failure(&self) { 106 | self.failure_count.fetch_add(1, Ordering::Relaxed); 107 | let failures = self.consecutive_failures.fetch_add(1, Ordering::Relaxed) + 1; 108 | 109 | // Mark unhealthy after 3 consecutive failures 110 | if failures >= 3 { 111 | self.mark_unhealthy(); 112 | } 113 | } 114 | 115 | /// Check if we can send more requests to this endpoint. 116 | pub fn can_accept(&self) -> bool { 117 | self.in_flight.load(Ordering::Relaxed) < self.config.max_concurrent as usize 118 | } 119 | 120 | /// Acquire a slot for sending a request. 121 | pub fn acquire(&self) -> bool { 122 | let current = self.in_flight.load(Ordering::Relaxed); 123 | if current >= self.config.max_concurrent as usize { 124 | return false; 125 | } 126 | self.in_flight.fetch_add(1, Ordering::Relaxed); 127 | true 128 | } 129 | 130 | /// Release a slot after completing a request. 131 | pub fn release(&self) { 132 | self.in_flight.fetch_sub(1, Ordering::Relaxed); 133 | } 134 | 135 | /// Get average latency in milliseconds. 136 | pub fn avg_latency_ms(&self) -> f64 { 137 | let total = self.total_latency_us.load(Ordering::Relaxed); 138 | let count = self.success_count.load(Ordering::Relaxed); 139 | if count == 0 { 140 | 0.0 141 | } else { 142 | (total as f64 / count as f64) / 1000.0 143 | } 144 | } 145 | } 146 | 147 | /// Weighted load balancer for distributing requests across endpoints. 148 | #[derive(Debug)] 149 | pub struct LoadBalancer { 150 | endpoints: Vec>, 151 | #[allow(dead_code)] 152 | total_weight: u32, 153 | } 154 | 155 | impl LoadBalancer { 156 | /// Create a new load balancer from endpoint configurations. 157 | pub fn new(configs: Vec) -> Result { 158 | if configs.is_empty() { 159 | return Err(BlazeError::NoEndpoints); 160 | } 161 | 162 | let endpoints: Vec> = configs 163 | .into_iter() 164 | .map(|c| Arc::new(Endpoint::new(c))) 165 | .collect(); 166 | 167 | let total_weight = endpoints.iter().map(|e| e.config.weight).sum(); 168 | 169 | Ok(Self { 170 | endpoints, 171 | total_weight, 172 | }) 173 | } 174 | 175 | /// Select an endpoint using weighted random selection. 176 | pub fn select(&self) -> Result> { 177 | self.select_with_cooldown(Duration::from_secs(30)) 178 | } 179 | 180 | /// Select an endpoint with a custom cooldown for unhealthy endpoints. 181 | pub fn select_with_cooldown(&self, cooldown: Duration) -> Result> { 182 | // First, try to find a healthy endpoint with capacity 183 | let available: Vec<_> = self 184 | .endpoints 185 | .iter() 186 | .filter(|e| e.is_healthy() && e.can_accept()) 187 | .collect(); 188 | 189 | if !available.is_empty() { 190 | return Ok(self.weighted_select(&available)); 191 | } 192 | 193 | // If no healthy endpoints, try endpoints past their cooldown 194 | let recovering: Vec<_> = self 195 | .endpoints 196 | .iter() 197 | .filter(|e| e.should_retry(cooldown) && e.can_accept()) 198 | .collect(); 199 | 200 | if !recovering.is_empty() { 201 | return Ok(self.weighted_select(&recovering)); 202 | } 203 | 204 | Err(BlazeError::AllEndpointsUnhealthy) 205 | } 206 | 207 | /// Perform weighted random selection. 208 | fn weighted_select(&self, endpoints: &[&Arc]) -> Arc { 209 | let total: u32 = endpoints.iter().map(|e| e.config.weight).sum(); 210 | let mut rng = rand::rng(); 211 | let mut pick = rng.random_range(0..total); 212 | 213 | for endpoint in endpoints { 214 | if pick < endpoint.config.weight { 215 | return Arc::clone(endpoint); 216 | } 217 | pick -= endpoint.config.weight; 218 | } 219 | 220 | // Fallback to first endpoint (shouldn't happen) 221 | Arc::clone(endpoints[0]) 222 | } 223 | 224 | /// Get all endpoints. 225 | pub fn endpoints(&self) -> &[Arc] { 226 | &self.endpoints 227 | } 228 | 229 | /// Get the number of healthy endpoints. 230 | pub fn healthy_count(&self) -> usize { 231 | self.endpoints.iter().filter(|e| e.is_healthy()).count() 232 | } 233 | 234 | /// Get the total number of in-flight requests. 235 | pub fn total_in_flight(&self) -> usize { 236 | self.endpoints 237 | .iter() 238 | .map(|e| e.in_flight.load(Ordering::Relaxed)) 239 | .sum() 240 | } 241 | } 242 | 243 | #[cfg(test)] 244 | mod tests { 245 | use super::*; 246 | 247 | fn test_endpoint() -> EndpointConfig { 248 | EndpointConfig { 249 | url: "http://localhost:8080".to_string(), 250 | weight: 1, 251 | api_key: None, 252 | model: None, 253 | max_concurrent: 100, 254 | } 255 | } 256 | 257 | #[test] 258 | fn test_endpoint_health() { 259 | let endpoint = Endpoint::new(test_endpoint()); 260 | assert!(endpoint.is_healthy()); 261 | 262 | endpoint.mark_unhealthy(); 263 | assert!(!endpoint.is_healthy()); 264 | 265 | endpoint.mark_healthy(); 266 | assert!(endpoint.is_healthy()); 267 | } 268 | 269 | #[test] 270 | fn test_endpoint_stats() { 271 | let endpoint = Endpoint::new(test_endpoint()); 272 | 273 | endpoint.record_success(Duration::from_millis(100)); 274 | endpoint.record_success(Duration::from_millis(200)); 275 | 276 | assert_eq!(endpoint.success_count.load(Ordering::Relaxed), 2); 277 | assert_eq!(endpoint.avg_latency_ms(), 150.0); 278 | } 279 | 280 | #[test] 281 | fn test_load_balancer() { 282 | let configs = vec![ 283 | EndpointConfig { 284 | url: "http://a.test".to_string(), 285 | weight: 1, 286 | api_key: None, 287 | model: None, 288 | max_concurrent: 100, 289 | }, 290 | EndpointConfig { 291 | url: "http://b.test".to_string(), 292 | weight: 2, 293 | api_key: None, 294 | model: None, 295 | max_concurrent: 100, 296 | }, 297 | ]; 298 | 299 | let lb = LoadBalancer::new(configs).unwrap(); 300 | assert_eq!(lb.endpoints().len(), 2); 301 | assert_eq!(lb.healthy_count(), 2); 302 | } 303 | } 304 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | //! Configuration management for Blaze API. 2 | //! 3 | //! Supports configuration via CLI arguments, environment variables, 4 | //! and configuration files with sensible defaults. 5 | 6 | use crate::error::{BlazeError, Result}; 7 | use clap::Parser; 8 | use serde::{Deserialize, Serialize}; 9 | use std::num::NonZeroU32; 10 | use std::path::PathBuf; 11 | use std::time::Duration; 12 | 13 | /// CLI arguments for the Blaze API client. 14 | #[derive(Parser, Debug, Clone)] 15 | #[command( 16 | name = "blaze", 17 | author = "Yiğit Konur ", 18 | version, 19 | about = "🔥 High-performance async API client with load balancing", 20 | long_about = "Blaze API is a blazing-fast API client designed for batch LLM processing.\n\n\ 21 | It supports weighted load balancing, automatic retries with exponential backoff,\n\ 22 | and can handle 10,000+ requests per second on modest hardware.", 23 | after_help = "EXAMPLES:\n \ 24 | blaze --input requests.jsonl --output results.jsonl\n \ 25 | blaze -i data.jsonl -o out.jsonl --rate 5000 --workers 100\n \ 26 | blaze --config endpoints.json --input batch.jsonl" 27 | )] 28 | pub struct Args { 29 | /// Path to the JSONL file containing requests 30 | #[arg(short, long, env = "BLAZE_INPUT")] 31 | pub input: PathBuf, 32 | 33 | /// Path to save successful responses (optional) 34 | #[arg(short, long, env = "BLAZE_OUTPUT")] 35 | pub output: Option, 36 | 37 | /// Path to save error responses 38 | #[arg(short, long, default_value = "errors.jsonl", env = "BLAZE_ERRORS")] 39 | pub errors: PathBuf, 40 | 41 | /// Maximum requests per second 42 | #[arg(short, long, default_value = "1000", env = "BLAZE_RATE")] 43 | pub rate: u32, 44 | 45 | /// Maximum retry attempts per request 46 | #[arg(short = 'a', long, default_value = "3", env = "BLAZE_MAX_ATTEMPTS")] 47 | pub max_attempts: u32, 48 | 49 | /// Number of concurrent workers 50 | #[arg(short, long, default_value = "50", env = "BLAZE_WORKERS")] 51 | pub workers: usize, 52 | 53 | /// Request timeout in seconds 54 | #[arg(short, long, default_value = "30", env = "BLAZE_TIMEOUT")] 55 | pub timeout: u64, 56 | 57 | /// Path to endpoint configuration file (JSON) 58 | #[arg(short, long, env = "BLAZE_CONFIG")] 59 | pub config: Option, 60 | 61 | /// Enable verbose logging 62 | #[arg(short, long, env = "BLAZE_VERBOSE")] 63 | pub verbose: bool, 64 | 65 | /// Output logs as JSON 66 | #[arg(long, env = "BLAZE_JSON_LOGS")] 67 | pub json_logs: bool, 68 | 69 | /// Disable progress bar 70 | #[arg(long, env = "BLAZE_NO_PROGRESS")] 71 | pub no_progress: bool, 72 | 73 | /// Dry run - validate config without sending requests 74 | #[arg(long)] 75 | pub dry_run: bool, 76 | } 77 | 78 | impl Args { 79 | /// Parse CLI arguments. 80 | pub fn parse_args() -> Self { 81 | Self::parse() 82 | } 83 | } 84 | 85 | /// Configuration for a single API endpoint. 86 | #[derive(Debug, Clone, Serialize, Deserialize)] 87 | pub struct EndpointConfig { 88 | /// The endpoint URL. 89 | pub url: String, 90 | 91 | /// Weight for load balancing (higher = more traffic). 92 | #[serde(default = "default_weight")] 93 | pub weight: u32, 94 | 95 | /// API key for authentication. 96 | #[serde(default)] 97 | pub api_key: Option, 98 | 99 | /// Model identifier (for LLM endpoints). 100 | #[serde(default)] 101 | pub model: Option, 102 | 103 | /// Maximum concurrent requests to this endpoint. 104 | #[serde(default = "default_max_concurrent")] 105 | pub max_concurrent: u32, 106 | } 107 | 108 | fn default_weight() -> u32 { 109 | 1 110 | } 111 | 112 | fn default_max_concurrent() -> u32 { 113 | 100 114 | } 115 | 116 | /// Full application configuration. 117 | #[derive(Debug, Clone, Serialize, Deserialize)] 118 | pub struct Config { 119 | /// API endpoints for load balancing. 120 | pub endpoints: Vec, 121 | 122 | /// Request settings. 123 | #[serde(default)] 124 | pub request: RequestConfig, 125 | 126 | /// Retry settings. 127 | #[serde(default)] 128 | pub retry: RetryConfig, 129 | } 130 | 131 | /// Request-specific configuration. 132 | #[derive(Debug, Clone, Serialize, Deserialize)] 133 | pub struct RequestConfig { 134 | /// Request timeout. 135 | #[serde(with = "humantime_serde", default = "default_timeout")] 136 | pub timeout: Duration, 137 | 138 | /// Maximum requests per second. 139 | #[serde(default = "default_rate")] 140 | pub rate_limit: u32, 141 | 142 | /// Number of concurrent workers. 143 | #[serde(default = "default_workers")] 144 | pub workers: usize, 145 | } 146 | 147 | impl Default for RequestConfig { 148 | fn default() -> Self { 149 | Self { 150 | timeout: default_timeout(), 151 | rate_limit: default_rate(), 152 | workers: default_workers(), 153 | } 154 | } 155 | } 156 | 157 | fn default_timeout() -> Duration { 158 | Duration::from_secs(30) 159 | } 160 | 161 | fn default_rate() -> u32 { 162 | 1000 163 | } 164 | 165 | fn default_workers() -> usize { 166 | 50 167 | } 168 | 169 | /// Retry configuration. 170 | #[derive(Debug, Clone, Serialize, Deserialize)] 171 | pub struct RetryConfig { 172 | /// Maximum number of retry attempts. 173 | #[serde(default = "default_max_attempts")] 174 | pub max_attempts: u32, 175 | 176 | /// Initial backoff duration. 177 | #[serde(with = "humantime_serde", default = "default_initial_backoff")] 178 | pub initial_backoff: Duration, 179 | 180 | /// Maximum backoff duration. 181 | #[serde(with = "humantime_serde", default = "default_max_backoff")] 182 | pub max_backoff: Duration, 183 | 184 | /// Backoff multiplier. 185 | #[serde(default = "default_multiplier")] 186 | pub multiplier: f64, 187 | } 188 | 189 | impl Default for RetryConfig { 190 | fn default() -> Self { 191 | Self { 192 | max_attempts: default_max_attempts(), 193 | initial_backoff: default_initial_backoff(), 194 | max_backoff: default_max_backoff(), 195 | multiplier: default_multiplier(), 196 | } 197 | } 198 | } 199 | 200 | fn default_max_attempts() -> u32 { 201 | 3 202 | } 203 | 204 | fn default_initial_backoff() -> Duration { 205 | Duration::from_millis(100) 206 | } 207 | 208 | fn default_max_backoff() -> Duration { 209 | Duration::from_secs(10) 210 | } 211 | 212 | fn default_multiplier() -> f64 { 213 | 2.0 214 | } 215 | 216 | impl Config { 217 | /// Load configuration from a file. 218 | pub fn from_file(path: &PathBuf) -> Result { 219 | let content = std::fs::read_to_string(path).map_err(|e| BlazeError::InputFileRead { 220 | path: path.clone(), 221 | source: e, 222 | })?; 223 | 224 | serde_json::from_str(&content).map_err(|e| BlazeError::JsonParse { line: 0, source: e }) 225 | } 226 | 227 | /// Create configuration from CLI arguments. 228 | pub fn from_args(args: &Args) -> Result { 229 | let config = if let Some(config_path) = &args.config { 230 | let mut config = Self::from_file(config_path)?; 231 | // Override with CLI args 232 | config.request.rate_limit = args.rate; 233 | config.request.workers = args.workers; 234 | config.request.timeout = Duration::from_secs(args.timeout); 235 | config.retry.max_attempts = args.max_attempts; 236 | config 237 | } else { 238 | // Use default endpoint from environment or error 239 | let endpoint = EndpointConfig { 240 | url: std::env::var("BLAZE_ENDPOINT_URL") 241 | .unwrap_or_else(|_| "http://localhost:8080/v1/completions".to_string()), 242 | weight: 1, 243 | api_key: std::env::var("BLAZE_API_KEY").ok(), 244 | model: std::env::var("BLAZE_MODEL").ok(), 245 | max_concurrent: 100, 246 | }; 247 | 248 | Self { 249 | endpoints: vec![endpoint], 250 | request: RequestConfig { 251 | timeout: Duration::from_secs(args.timeout), 252 | rate_limit: args.rate, 253 | workers: args.workers, 254 | }, 255 | retry: RetryConfig { 256 | max_attempts: args.max_attempts, 257 | ..Default::default() 258 | }, 259 | } 260 | }; 261 | 262 | config.validate()?; 263 | Ok(config) 264 | } 265 | 266 | /// Validate the configuration. 267 | pub fn validate(&self) -> Result<()> { 268 | if self.endpoints.is_empty() { 269 | return Err(BlazeError::NoEndpoints); 270 | } 271 | 272 | for endpoint in &self.endpoints { 273 | if endpoint.url.is_empty() { 274 | return Err(BlazeError::InvalidConfig( 275 | "endpoint URL cannot be empty".to_string(), 276 | )); 277 | } 278 | if endpoint.weight == 0 { 279 | return Err(BlazeError::InvalidConfig( 280 | "endpoint weight must be greater than 0".to_string(), 281 | )); 282 | } 283 | } 284 | 285 | if self.request.workers == 0 { 286 | return Err(BlazeError::InvalidConfig( 287 | "workers must be greater than 0".to_string(), 288 | )); 289 | } 290 | 291 | Ok(()) 292 | } 293 | 294 | /// Get the rate limit as a NonZeroU32. 295 | pub fn rate_limit_nonzero(&self) -> NonZeroU32 { 296 | NonZeroU32::new(self.request.rate_limit).unwrap_or(NonZeroU32::MIN) 297 | } 298 | } 299 | 300 | /// Custom serde module for humantime Duration parsing. 301 | mod humantime_serde { 302 | use serde::{Deserialize, Deserializer, Serializer}; 303 | use std::time::Duration; 304 | 305 | pub fn serialize(duration: &Duration, serializer: S) -> Result 306 | where 307 | S: Serializer, 308 | { 309 | serializer.serialize_str(&format!("{}s", duration.as_secs())) 310 | } 311 | 312 | pub fn deserialize<'de, D>(deserializer: D) -> Result 313 | where 314 | D: Deserializer<'de>, 315 | { 316 | let s = String::deserialize(deserializer)?; 317 | // Simple parsing: support "30s", "100ms", or just seconds as number 318 | if let Some(secs) = s.strip_suffix('s') { 319 | secs.parse::() 320 | .map(Duration::from_secs) 321 | .map_err(serde::de::Error::custom) 322 | } else if let Some(ms) = s.strip_suffix("ms") { 323 | ms.parse::() 324 | .map(Duration::from_millis) 325 | .map_err(serde::de::Error::custom) 326 | } else { 327 | s.parse::() 328 | .map(Duration::from_secs) 329 | .map_err(serde::de::Error::custom) 330 | } 331 | } 332 | } 333 | -------------------------------------------------------------------------------- /src/processor.rs: -------------------------------------------------------------------------------- 1 | //! Main processing orchestration for batch API requests. 2 | //! 3 | //! This module coordinates reading requests, distributing them across 4 | //! endpoints, and writing results with rate limiting and concurrency control. 5 | 6 | use crate::client::ApiClient; 7 | use crate::config::Config; 8 | use crate::endpoint::LoadBalancer; 9 | use crate::error::{BlazeError, Result}; 10 | use crate::request::{ApiRequest, RequestResult}; 11 | use crate::tracker::StatsTracker; 12 | use futures::stream::{self, StreamExt}; 13 | use governor::{Quota, RateLimiter}; 14 | use indicatif::{ProgressBar, ProgressStyle}; 15 | use parking_lot::Mutex; 16 | use std::num::NonZeroU32; 17 | use std::path::PathBuf; 18 | use std::sync::Arc; 19 | use std::time::Duration; 20 | use tokio::fs::File; 21 | use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader, BufWriter}; 22 | use tracing::{info, warn}; 23 | 24 | /// Processor for batch API requests. 25 | pub struct Processor { 26 | config: Arc, 27 | client: ApiClient, 28 | load_balancer: Arc, 29 | stats: Arc, 30 | } 31 | 32 | impl Processor { 33 | /// Create a new processor. 34 | pub fn new(config: Config) -> Result { 35 | let config = Arc::new(config); 36 | let client = ApiClient::new(Arc::clone(&config))?; 37 | let load_balancer = Arc::new(LoadBalancer::new(config.endpoints.clone())?); 38 | let stats = Arc::new(StatsTracker::new()); 39 | 40 | Ok(Self { 41 | config, 42 | client, 43 | load_balancer, 44 | stats, 45 | }) 46 | } 47 | 48 | /// Process requests from a file. 49 | pub async fn process_file( 50 | &self, 51 | input_path: PathBuf, 52 | output_path: Option, 53 | error_path: PathBuf, 54 | show_progress: bool, 55 | ) -> Result { 56 | // Read all requests first to get total count 57 | let requests = self.read_requests(&input_path).await?; 58 | let total = requests.len(); 59 | 60 | info!(total_requests = total, "Loaded requests from file"); 61 | self.stats.set_total_lines(total); 62 | 63 | // Setup output files 64 | let output_writer = if let Some(path) = &output_path { 65 | let file = File::create(path).await.map_err(|e| BlazeError::OutputFileWrite { 66 | path: path.clone(), 67 | source: e, 68 | })?; 69 | Some(Arc::new(Mutex::new(BufWriter::new(file)))) 70 | } else { 71 | None 72 | }; 73 | 74 | let error_file = File::create(&error_path).await.map_err(|e| BlazeError::OutputFileWrite { 75 | path: error_path.clone(), 76 | source: e, 77 | })?; 78 | let error_writer = Arc::new(Mutex::new(BufWriter::new(error_file))); 79 | 80 | // Setup progress bar 81 | let progress = if show_progress { 82 | let pb = ProgressBar::new(total as u64); 83 | pb.set_style( 84 | ProgressStyle::default_bar() 85 | .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({percent}%) | {msg}") 86 | .unwrap() 87 | .progress_chars("█▓▒░"), 88 | ); 89 | pb.enable_steady_tick(Duration::from_millis(100)); 90 | Some(pb) 91 | } else { 92 | None 93 | }; 94 | 95 | // Setup rate limiter 96 | let rate_limiter = RateLimiter::direct(Quota::per_second( 97 | NonZeroU32::new(self.config.request.rate_limit).unwrap_or(NonZeroU32::MIN), 98 | )); 99 | 100 | // Process requests concurrently 101 | let workers = self.config.request.workers; 102 | let results = stream::iter(requests) 103 | .map(|request| { 104 | let client = self.client.clone(); 105 | let lb = Arc::clone(&self.load_balancer); 106 | let stats = Arc::clone(&self.stats); 107 | let rate_limiter = &rate_limiter; 108 | let output = output_writer.clone(); 109 | let errors = Arc::clone(&error_writer); 110 | let progress = progress.clone(); 111 | 112 | async move { 113 | // Wait for rate limiter 114 | rate_limiter.until_ready().await; 115 | 116 | // Select an endpoint 117 | let endpoint = match lb.select() { 118 | Ok(ep) => ep, 119 | Err(e) => { 120 | warn!("Failed to select endpoint: {}", e); 121 | return Err(e); 122 | } 123 | }; 124 | 125 | // Acquire a slot 126 | if !endpoint.acquire() { 127 | // Wait a bit and try again 128 | tokio::time::sleep(Duration::from_millis(10)).await; 129 | if !endpoint.acquire() { 130 | warn!("Endpoint at capacity, waiting..."); 131 | tokio::time::sleep(Duration::from_millis(100)).await; 132 | endpoint.acquire(); 133 | } 134 | } 135 | 136 | // Send request 137 | let result = client.send_with_retry(&request, endpoint).await; 138 | 139 | // Record stats and write output 140 | match &result { 141 | RequestResult::Success(response) => { 142 | let latency = response 143 | .metadata 144 | .as_ref() 145 | .map(|m| Duration::from_millis(m.latency_ms)) 146 | .unwrap_or_default(); 147 | stats.record_success(latency); 148 | 149 | if let Some(writer) = &output { 150 | let line = serde_json::to_string(&response).unwrap_or_default(); 151 | let mut w = writer.lock(); 152 | let _ = futures::executor::block_on(async { 153 | w.write_all(line.as_bytes()).await?; 154 | w.write_all(b"\n").await 155 | }); 156 | } 157 | } 158 | RequestResult::Failure(error) => { 159 | stats.record_failure(); 160 | let line = serde_json::to_string(&error).unwrap_or_default(); 161 | let mut w = errors.lock(); 162 | let _ = futures::executor::block_on(async { 163 | w.write_all(line.as_bytes()).await?; 164 | w.write_all(b"\n").await 165 | }); 166 | } 167 | } 168 | 169 | // Update progress bar 170 | if let Some(pb) = &progress { 171 | let snapshot = stats.snapshot(); 172 | pb.set_message(format!( 173 | "RPS: {:.0} | Success: {} | Failed: {} | Latency: {:.0}ms", 174 | snapshot.current_rps, 175 | snapshot.success_count, 176 | snapshot.failure_count, 177 | snapshot.avg_latency_ms 178 | )); 179 | pb.inc(1); 180 | } 181 | 182 | Ok(result) 183 | } 184 | }) 185 | .buffer_unordered(workers) 186 | .collect::>() 187 | .await; 188 | 189 | // Flush writers 190 | if let Some(writer) = &output_writer { 191 | let mut w = writer.lock(); 192 | w.flush().await.ok(); 193 | } 194 | { 195 | let mut w = error_writer.lock(); 196 | w.flush().await.ok(); 197 | } 198 | 199 | // Finish progress bar 200 | if let Some(pb) = &progress { 201 | pb.finish_with_message("Complete!"); 202 | } 203 | 204 | // Build result 205 | let snapshot = self.stats.snapshot(); 206 | let success_count = results.iter().filter(|r| r.as_ref().map(|r| r.is_success()).unwrap_or(false)).count(); 207 | let failure_count = results.len() - success_count; 208 | 209 | Ok(ProcessingResult { 210 | total_processed: results.len(), 211 | success_count, 212 | failure_count, 213 | elapsed: snapshot.elapsed, 214 | avg_latency_ms: snapshot.avg_latency_ms, 215 | overall_rps: snapshot.overall_rps, 216 | }) 217 | } 218 | 219 | /// Read requests from a JSONL file. 220 | async fn read_requests(&self, path: &PathBuf) -> Result> { 221 | let file = File::open(path).await.map_err(|e| BlazeError::InputFileRead { 222 | path: path.clone(), 223 | source: e, 224 | })?; 225 | 226 | let reader = BufReader::new(file); 227 | let mut lines = reader.lines(); 228 | let mut requests = Vec::new(); 229 | let mut line_number = 0; 230 | 231 | while let Some(line) = lines.next_line().await.map_err(|e| BlazeError::InputFileRead { 232 | path: path.clone(), 233 | source: e, 234 | })? { 235 | line_number += 1; 236 | 237 | // Skip empty lines 238 | let trimmed = line.trim(); 239 | if trimmed.is_empty() { 240 | continue; 241 | } 242 | 243 | let mut request: ApiRequest = 244 | serde_json::from_str(trimmed).map_err(|e| BlazeError::JsonParse { 245 | line: line_number, 246 | source: e, 247 | })?; 248 | 249 | request.line_number = line_number; 250 | requests.push(request); 251 | } 252 | 253 | Ok(requests) 254 | } 255 | 256 | /// Get the current stats snapshot. 257 | pub fn stats(&self) -> crate::tracker::StatsSnapshot { 258 | self.stats.snapshot() 259 | } 260 | 261 | /// Get the load balancer. 262 | pub fn load_balancer(&self) -> &LoadBalancer { 263 | &self.load_balancer 264 | } 265 | } 266 | 267 | /// Result of processing a batch of requests. 268 | #[derive(Debug)] 269 | pub struct ProcessingResult { 270 | /// Total requests processed. 271 | pub total_processed: usize, 272 | /// Successful requests. 273 | pub success_count: usize, 274 | /// Failed requests. 275 | pub failure_count: usize, 276 | /// Total elapsed time. 277 | pub elapsed: Duration, 278 | /// Average latency in milliseconds. 279 | pub avg_latency_ms: f64, 280 | /// Overall requests per second. 281 | pub overall_rps: f64, 282 | } 283 | 284 | impl ProcessingResult { 285 | /// Get the success rate as a percentage. 286 | pub fn success_rate(&self) -> f64 { 287 | if self.total_processed > 0 { 288 | (self.success_count as f64 / self.total_processed as f64) * 100.0 289 | } else { 290 | 100.0 291 | } 292 | } 293 | 294 | /// Print a summary of the results. 295 | pub fn print_summary(&self) { 296 | println!("\n{}", "═".repeat(60)); 297 | println!(" PROCESSING COMPLETE"); 298 | println!("{}", "═".repeat(60)); 299 | println!(" Total Processed: {}", self.total_processed); 300 | println!( 301 | " Successful: {} ({:.1}%)", 302 | self.success_count, 303 | self.success_rate() 304 | ); 305 | println!(" Failed: {}", self.failure_count); 306 | println!(" Elapsed Time: {:.2}s", self.elapsed.as_secs_f64()); 307 | println!(" Avg Latency: {:.1}ms", self.avg_latency_ms); 308 | println!(" Throughput: {:.0} req/sec", self.overall_rps); 309 | println!("{}", "═".repeat(60)); 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

🔥 Blaze API 🔥

2 |

Stop waiting for API responses. Start blazing through them.

3 | 4 |

5 | 6 | The ultimate batch API client for your LLM workloads. It load-balances across endpoints, retries intelligently, and processes 10,000+ requests per second on a laptop. 7 | 8 |

9 | 10 |

11 | 12 | crates.io 13 | rust 14 |   •   15 | 16 | license 17 | platform 18 |

19 | 20 |

21 | zero config 22 | 10k rps 23 |

24 | 25 |
26 | 27 | ### 🧭 Quick Navigation 28 | 29 | [**⚡ Get Started**](#-get-started-in-60-seconds) • 30 | [**✨ Key Features**](#-feature-breakdown-the-secret-sauce) • 31 | [**🎮 Usage & Examples**](#-usage-fire-and-forget) • 32 | [**⚙️ Configuration**](#%EF%B8%8F-configuration) • 33 | [**🆚 Why Blaze**](#-why-blaze-slaps-other-methods) 34 | 35 |
36 | 37 | --- 38 | 39 | **Blaze API** is the batch processor your LLM workloads deserve. Stop writing brittle Python scripts that crash at 100 req/sec. This tool acts like a fleet of pro API consumers, intelligently distributing requests across endpoints, handling failures gracefully, and maxing out your API capacity without breaking a sweat. 40 | 41 |
42 | 43 | 44 | 49 | 54 | 59 | 64 | 65 |
45 |

46 | Blazing Fast
47 | 10K+ req/sec on 8 cores 48 |
50 |

🎯

51 | Smart Load Balancing
52 | Weighted distribution across endpoints 53 |
55 |

🔄

56 | Auto Retry
57 | Exponential backoff with jitter 58 |
60 |

📊

61 | Real-time Stats
62 | Progress, RPS, latency tracking 63 |
66 |
67 | 68 | How it slaps: 69 | - **You:** `blaze -i requests.jsonl -o results.jsonl` 70 | - **Blaze:** Load balances, retries failures, tracks progress, writes results. 71 | - **You:** Go grab a coffee while 100K requests complete. ☕ 72 | - **Result:** Perfectly formatted JSONL with every response. Zero babysitting. 73 | 74 | --- 75 | 76 | ## 💥 Why Blaze Slaps Other Methods 77 | 78 | Manually scripting API requests is a vibe-killer. Blaze makes other methods look ancient. 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 95 | 104 | 105 |
❌ The Old Way (Pain)✅ The Blaze Way (Glory)
87 |
    88 |
  1. Write Python script with asyncio.
  2. 89 |
  3. Hit GIL limits at 500 req/sec.
  4. 90 |
  5. Script crashes, lose progress.
  6. 91 |
  7. Add retry logic, still flaky.
  8. 92 |
  9. Manually restart, pray it works.
  10. 93 |
94 |
96 |
    97 |
  1. blaze -i data.jsonl -o out.jsonl
  2. 98 |
  3. Watch the progress bar fly.
  4. 99 |
  5. Failures auto-retry with backoff.
  6. 100 |
  7. Results stream to disk instantly.
  8. 101 |
  9. Go grab a coffee. ☕
  10. 102 |
103 |
106 | 107 | We're not just sending requests. We're building a **high-throughput, fault-tolerant pipeline** with weighted load balancing, connection pooling, and intelligent retry logic that actually respects your API provider's limits. 108 | 109 | --- 110 | 111 | ## 🚀 Get Started in 60 Seconds 112 | 113 |
114 | 115 | | Platform | Method | Command | 116 | |:--------:|:------:|:--------| 117 | | 🦀 **All** | Cargo | `cargo install blaze-api` | 118 | | 🍎 **macOS** | Homebrew | `brew install yigitkonur/tap/blaze` | 119 | | 🐧 **Linux** | Binary | See [releases](https://github.com/yigitkonur/blaze-api/releases) | 120 | | 🪟 **Windows** | Binary | See [releases](https://github.com/yigitkonur/blaze-api/releases) | 121 | 122 |
123 | 124 | ### 🦀 From Source (Recommended for Development) 125 | 126 | ```bash 127 | # Clone and build 128 | git clone https://github.com/yigitkonur/blaze-api.git 129 | cd blaze-api 130 | cargo build --release 131 | 132 | # Binary is at ./target/release/blaze 133 | ``` 134 | 135 | ### 📦 From crates.io 136 | 137 | ```bash 138 | cargo install blaze-api 139 | ``` 140 | 141 | > **✨ Zero Config:** After installation, `blaze` is ready to go. Just point it at your JSONL file! 142 | 143 | --- 144 | 145 | ## 🎮 Usage: Fire and Forget 146 | 147 | The workflow is dead simple. 148 | 149 | ### Basic Usage 150 | 151 | ```bash 152 | # Process requests and save results 153 | blaze --input requests.jsonl --output results.jsonl 154 | 155 | # Short flags work too 156 | blaze -i requests.jsonl -o results.jsonl 157 | 158 | # High-throughput mode (10K req/sec) 159 | blaze -i data.jsonl -o out.jsonl --rate 10000 --workers 200 160 | ``` 161 | 162 | ### With Custom Endpoints 163 | 164 | ```bash 165 | # Use a config file for multiple endpoints 166 | blaze -i requests.jsonl -o results.jsonl --config endpoints.json 167 | 168 | # Or set via environment 169 | export BLAZE_ENDPOINT_URL="https://api.openai.com/v1/completions" 170 | export BLAZE_API_KEY="sk-..." 171 | export BLAZE_MODEL="gpt-4" 172 | blaze -i requests.jsonl -o results.jsonl 173 | ``` 174 | 175 | ### Input Format 176 | 177 | Your `requests.jsonl` file should have one JSON object per line: 178 | 179 | ```jsonl 180 | {"input": "What is the capital of France?"} 181 | {"input": "Explain quantum computing in simple terms."} 182 | {"input": "Write a haiku about Rust programming."} 183 | ``` 184 | 185 | Or with custom request bodies: 186 | 187 | ```jsonl 188 | {"body": {"messages": [{"role": "user", "content": "Hello!"}], "model": "gpt-4"}} 189 | {"body": {"messages": [{"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hi!"}]}} 190 | ``` 191 | 192 | ### Output Format 193 | 194 | Results are written as JSONL: 195 | 196 | ```jsonl 197 | {"input": "What is the capital of France?", "response": {"choices": [...]}, "metadata": {"endpoint": "...", "latency_ms": 234, "attempts": 1}} 198 | {"input": "Explain quantum computing...", "response": {"choices": [...]}, "metadata": {"endpoint": "...", "latency_ms": 189, "attempts": 1}} 199 | ``` 200 | 201 | Errors go to `errors.jsonl`: 202 | 203 | ```jsonl 204 | {"input": "...", "error": "HTTP 429: Rate limit exceeded", "status_code": 429, "attempts": 3} 205 | ``` 206 | 207 | --- 208 | 209 | ## ✨ Feature Breakdown: The Secret Sauce 210 | 211 |
212 | 213 | | Feature | What It Does | Why You Care | 214 | | :---: | :--- | :--- | 215 | | **⚡ Async Everything**
`Tokio runtime` | Non-blocking I/O with work-stealing scheduler | Saturates your CPU cores efficiently | 216 | | **🎯 Weighted Load Balancing**
`Smart distribution` | Route traffic based on endpoint capacity | Max out multiple API keys simultaneously | 217 | | **🔄 Exponential Backoff**
`With jitter` | Intelligent retry with randomized delays | Respects rate limits, avoids thundering herd | 218 | | **📊 Real-time Progress**
`Live stats` | RPS, success rate, latency, ETA | Know exactly what's happening | 219 | | **🔌 Connection Pooling**
`HTTP/2 keep-alive` | Reuses connections across requests | Eliminates TCP handshake overhead | 220 | | **💾 Streaming Output**
`Immediate writes` | Results written as they complete | Never lose progress on crashes | 221 | | **🏥 Health Tracking**
`Per-endpoint` | Automatic failover on errors | Unhealthy endpoints get cooled off | 222 | | **🔧 Flexible Config**
`CLI + ENV + JSON` | Configure via args, env vars, or files | Fits any workflow | 223 | 224 |
225 | 226 | --- 227 | 228 | ## ⚙️ Configuration 229 | 230 | ### CLI Flags 231 | 232 | ``` 233 | USAGE: 234 | blaze [OPTIONS] --input 235 | 236 | OPTIONS: 237 | -i, --input Path to JSONL input file [env: BLAZE_INPUT] 238 | -o, --output Path for successful responses [env: BLAZE_OUTPUT] 239 | -e, --errors Path for error responses [default: errors.jsonl] 240 | -r, --rate Max requests per second [default: 1000] 241 | -w, --workers Concurrent workers [default: 50] 242 | -t, --timeout Request timeout [default: 30] 243 | -a, --max-attempts Max retry attempts [default: 3] 244 | -c, --config Endpoint config file (JSON) 245 | -v, --verbose Enable debug logging 246 | --json-logs Output logs as JSON 247 | --no-progress Disable progress bar 248 | --dry-run Validate config without processing 249 | -h, --help Print help 250 | -V, --version Print version 251 | ``` 252 | 253 | ### Environment Variables 254 | 255 | All options can be set via environment variables with `BLAZE_` prefix: 256 | 257 | ```bash 258 | export BLAZE_INPUT="requests.jsonl" 259 | export BLAZE_OUTPUT="results.jsonl" 260 | export BLAZE_RATE="5000" 261 | export BLAZE_WORKERS="100" 262 | export BLAZE_ENDPOINT_URL="https://api.example.com/v1/completions" 263 | export BLAZE_API_KEY="your-api-key" 264 | export BLAZE_MODEL="gpt-4" 265 | ``` 266 | 267 | ### Configuration File 268 | 269 | For multiple endpoints, create `endpoints.json`: 270 | 271 | ```json 272 | { 273 | "endpoints": [ 274 | { 275 | "url": "https://api.openai.com/v1/completions", 276 | "weight": 2, 277 | "api_key": "sk-key-1", 278 | "model": "gpt-4", 279 | "max_concurrent": 100 280 | }, 281 | { 282 | "url": "https://api.openai.com/v1/completions", 283 | "weight": 1, 284 | "api_key": "sk-key-2", 285 | "model": "gpt-4", 286 | "max_concurrent": 50 287 | } 288 | ], 289 | "request": { 290 | "timeout": "30s", 291 | "rate_limit": 5000, 292 | "workers": 100 293 | }, 294 | "retry": { 295 | "max_attempts": 3, 296 | "initial_backoff": "100ms", 297 | "max_backoff": "10s", 298 | "multiplier": 2.0 299 | } 300 | } 301 | ``` 302 | 303 | Then run: 304 | 305 | ```bash 306 | blaze -i requests.jsonl -o results.jsonl --config endpoints.json 307 | ``` 308 | 309 | --- 310 | 311 | ## 📈 Performance Tips 312 | 313 | ### Maximize Throughput 314 | 315 | ```bash 316 | # For maximum speed (adjust based on your API limits) 317 | blaze -i data.jsonl -o out.jsonl \ 318 | --rate 10000 \ 319 | --workers 200 \ 320 | --timeout 60 321 | ``` 322 | 323 | ### Balance Load Across Keys 324 | 325 | ```json 326 | { 327 | "endpoints": [ 328 | {"url": "...", "api_key": "key-1", "weight": 3, "max_concurrent": 150}, 329 | {"url": "...", "api_key": "key-2", "weight": 2, "max_concurrent": 100}, 330 | {"url": "...", "api_key": "key-3", "weight": 1, "max_concurrent": 50} 331 | ] 332 | } 333 | ``` 334 | 335 | ### Handle Rate Limits Gracefully 336 | 337 | ```json 338 | { 339 | "retry": { 340 | "max_attempts": 5, 341 | "initial_backoff": "500ms", 342 | "max_backoff": "30s", 343 | "multiplier": 2.0 344 | } 345 | } 346 | ``` 347 | 348 | --- 349 | 350 | ## 🛠️ For Developers & Tinkerers 351 | 352 | ### Building from Source 353 | 354 | ```bash 355 | git clone https://github.com/yigitkonur/blaze-api.git 356 | cd blaze-api 357 | 358 | # Debug build 359 | cargo build 360 | 361 | # Release build (optimized) 362 | cargo build --release 363 | 364 | # Run tests 365 | cargo test 366 | 367 | # Run benchmarks 368 | cargo bench 369 | ``` 370 | 371 | ### Using as a Library 372 | 373 | ```rust 374 | use blaze_api::{Config, EndpointConfig, Processor}; 375 | 376 | #[tokio::main] 377 | async fn main() -> anyhow::Result<()> { 378 | let config = Config { 379 | endpoints: vec![EndpointConfig { 380 | url: "https://api.example.com/v1/completions".to_string(), 381 | weight: 1, 382 | api_key: Some("your-key".to_string()), 383 | model: Some("gpt-4".to_string()), 384 | max_concurrent: 100, 385 | }], 386 | ..Default::default() 387 | }; 388 | 389 | let processor = Processor::new(config)?; 390 | let result = processor.process_file( 391 | "requests.jsonl".into(), 392 | Some("results.jsonl".into()), 393 | "errors.jsonl".into(), 394 | true, 395 | ).await?; 396 | 397 | result.print_summary(); 398 | Ok(()) 399 | } 400 | ``` 401 | 402 | ### Project Structure 403 | 404 | ``` 405 | src/ 406 | ├── lib.rs # Library entry point 407 | ├── main.rs # CLI binary 408 | ├── config.rs # Configuration management 409 | ├── client.rs # HTTP client with retry logic 410 | ├── endpoint.rs # Load balancer implementation 411 | ├── processor.rs # Main processing orchestration 412 | ├── request.rs # Request/response types 413 | ├── tracker.rs # Statistics tracking 414 | └── error.rs # Error types 415 | ``` 416 | 417 | --- 418 | 419 | ## 🔥 Common Issues & Quick Fixes 420 | 421 |
422 | Expand for troubleshooting tips 423 | 424 | | Problem | Solution | 425 | | :--- | :--- | 426 | | **"Too many open files"** | Increase ulimit: `ulimit -n 65535` | 427 | | **Connection timeouts** | Increase `--timeout` or reduce `--workers` | 428 | | **Rate limit errors (429)** | Lower `--rate` or add more API keys | 429 | | **Memory usage high** | Reduce `--workers` for large requests | 430 | | **Progress bar not showing** | Don't pipe output, or use `--no-progress --json-logs` | 431 | 432 | **Build Issues:** 433 | 434 | | Problem | Solution | 435 | | :--- | :--- | 436 | | **OpenSSL errors** | Install OpenSSL dev: `apt install libssl-dev` or use `--features rustls` | 437 | | **Rust version error** | Update Rust: `rustup update stable` (requires 1.75+) | 438 | 439 |
440 | 441 | --- 442 | 443 | ## 🤝 Contributing 444 | 445 | Contributions are welcome! Please feel free to submit a Pull Request. 446 | 447 | ```bash 448 | # Fork the repo, then: 449 | git clone https://github.com/YOUR_USERNAME/blaze-api.git 450 | cd blaze-api 451 | cargo test 452 | # Make your changes 453 | cargo fmt 454 | cargo clippy 455 | cargo test 456 | # Submit PR 457 | ``` 458 | 459 | --- 460 | 461 | ## 📄 License 462 | 463 | MIT © [Yiğit Konur](https://github.com/yigitkonur) 464 | 465 | --- 466 | 467 |
468 | 469 | **Built with 🔥 because waiting for API responses is a soul-crushing waste of time.** 470 | 471 | [⬆ Back to Top](#-blaze-api-) 472 | 473 |
474 | --------------------------------------------------------------------------------