├── examples
    ├── requests.jsonl
    └── config.json
├── .gitignore
├── LICENSE
├── benches
    └── throughput.rs
├── Cargo.toml
├── src
    ├── lib.rs
    ├── error.rs
    ├── main.rs
    ├── request.rs
    ├── tracker.rs
    ├── client.rs
    ├── endpoint.rs
    ├── config.rs
    └── processor.rs
└── README.md


/examples/requests.jsonl:
--------------------------------------------------------------------------------
1 | {"input": "What is the capital of France?"}
2 | {"input": "Explain quantum computing in simple terms."}
3 | {"input": "Write a haiku about Rust programming."}
4 | {"input": "What are the benefits of functional programming?"}
5 | {"input": "Describe the difference between TCP and UDP."}
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated files
 2 | /target/
 3 | Cargo.lock
 4 | 
 5 | # IDE
 6 | .idea/
 7 | .vscode/
 8 | *.swp
 9 | *.swo
10 | *~
11 | 
12 | # OS
13 | .DS_Store
14 | Thumbs.db
15 | 
16 | # Environment
17 | .env
18 | .env.local
19 | .envrc
20 | 
21 | # Test files
22 | *.jsonl
23 | !examples/*.jsonl
24 | 
25 | # Logs
26 | *.log
27 | logs/
28 | 
29 | # Coverage
30 | *.profraw
31 | *.profdata
32 | /coverage/
33 | tarpaulin-report.html
34 | 
35 | # Benchmarks output
36 | /criterion/
37 | 
38 | # Release artifacts
39 | /dist/
40 | *.tar.gz
41 | *.zip
42 | 


--------------------------------------------------------------------------------
/examples/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "endpoints": [
 3 |     {
 4 |       "url": "https://api.openai.com/v1/chat/completions",
 5 |       "weight": 2,
 6 |       "api_key": "${OPENAI_API_KEY}",
 7 |       "model": "gpt-4o-mini",
 8 |       "max_concurrent": 100
 9 |     },
10 |     {
11 |       "url": "https://api.anthropic.com/v1/messages",
12 |       "weight": 1,
13 |       "api_key": "${ANTHROPIC_API_KEY}",
14 |       "model": "claude-3-haiku-20240307",
15 |       "max_concurrent": 50
16 |     }
17 |   ],
18 |   "request": {
19 |     "timeout": "30s",
20 |     "rate_limit": 1000,
21 |     "workers": 50
22 |   },
23 |   "retry": {
24 |     "max_attempts": 3,
25 |     "initial_backoff": "100ms",
26 |     "max_backoff": "10s",
27 |     "multiplier": 2.0
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Yiğit Konur
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/benches/throughput.rs:
--------------------------------------------------------------------------------
 1 | //! Throughput benchmarks for Blaze API.
 2 | 
 3 | use criterion::{criterion_group, criterion_main, Criterion, Throughput};
 4 | 
 5 | fn benchmark_request_parsing(c: &mut Criterion) {
 6 |     let sample_json = r#"{"input": "What is the capital of France?"}"#;
 7 | 
 8 |     let mut group = c.benchmark_group("parsing");
 9 |     group.throughput(Throughput::Elements(1));
10 | 
11 |     group.bench_function("parse_request", |b| {
12 |         b.iter(|| {
13 |             let _: blaze_api::ApiRequest = serde_json::from_str(sample_json).unwrap();
14 |         });
15 |     });
16 | 
17 |     group.finish();
18 | }
19 | 
20 | fn benchmark_load_balancer(c: &mut Criterion) {
21 |     use blaze_api::{EndpointConfig, LoadBalancer};
22 | 
23 |     let configs = vec![
24 |         EndpointConfig {
25 |             url: "http://a.test".to_string(),
26 |             weight: 1,
27 |             api_key: None,
28 |             model: None,
29 |             max_concurrent: 100,
30 |         },
31 |         EndpointConfig {
32 |             url: "http://b.test".to_string(),
33 |             weight: 2,
34 |             api_key: None,
35 |             model: None,
36 |             max_concurrent: 100,
37 |         },
38 |         EndpointConfig {
39 |             url: "http://c.test".to_string(),
40 |             weight: 3,
41 |             api_key: None,
42 |             model: None,
43 |             max_concurrent: 100,
44 |         },
45 |     ];
46 | 
47 |     let lb = LoadBalancer::new(configs).unwrap();
48 | 
49 |     let mut group = c.benchmark_group("load_balancer");
50 |     group.throughput(Throughput::Elements(1));
51 | 
52 |     group.bench_function("select_endpoint", |b| {
53 |         b.iter(|| {
54 |             let _ = lb.select();
55 |         });
56 |     });
57 | 
58 |     group.finish();
59 | }
60 | 
61 | criterion_group!(benches, benchmark_request_parsing, benchmark_load_balancer);
62 | criterion_main!(benches);
63 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "blaze-api"
 3 | version = "1.0.0"
 4 | edition = "2021"
 5 | authors = ["Yiğit Konur <yigit@wope.com>"]
 6 | description = "High-performance async API client with load balancing — 10K+ req/sec for LLM batch processing"
 7 | repository = "https://github.com/yigitkonur/blaze-api"
 8 | license = "MIT"
 9 | keywords = ["api", "llm", "load-balancing", "async", "batch-processing"]
10 | categories = ["command-line-utilities", "asynchronous", "web-programming"]
11 | readme = "README.md"
12 | rust-version = "1.75"
13 | 
14 | [[bin]]
15 | name = "blaze"
16 | path = "src/main.rs"
17 | 
18 | [lib]
19 | name = "blaze_api"
20 | path = "src/lib.rs"
21 | 
22 | [dependencies]
23 | # Async runtime
24 | tokio = { version = "1.43", features = ["full", "tracing"] }
25 | tokio-stream = "0.1"
26 | 
27 | # HTTP client
28 | reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls", "gzip", "brotli", "stream"] }
29 | 
30 | # Serialization
31 | serde = { version = "1.0", features = ["derive"] }
32 | serde_json = "1.0"
33 | 
34 | # CLI
35 | clap = { version = "4.5", features = ["derive", "env", "wrap_help"] }
36 | 
37 | # Logging & Tracing
38 | tracing = "0.1"
39 | tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
40 | 
41 | # Error handling
42 | thiserror = "2.0"
43 | anyhow = "1.0"
44 | 
45 | # Utilities
46 | rand = "0.9"
47 | chrono = { version = "0.4", features = ["serde"] }
48 | futures = "0.3"
49 | parking_lot = "0.12"
50 | governor = "0.8"
51 | nonzero_ext = "0.3"
52 | 
53 | # Progress & UI
54 | indicatif = { version = "0.17", features = ["tokio"] }
55 | console = "0.15"
56 | 
57 | [dev-dependencies]
58 | tokio-test = "0.4"
59 | wiremock = "0.6"
60 | tempfile = "3.15"
61 | criterion = { version = "0.5", features = ["async_tokio"] }
62 | 
63 | [profile.release]
64 | lto = "thin"
65 | codegen-units = 1
66 | panic = "abort"
67 | strip = true
68 | 
69 | [profile.release-fast]
70 | inherits = "release"
71 | lto = false
72 | codegen-units = 16
73 | 
74 | [[bench]]
75 | name = "throughput"
76 | harness = false
77 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! # Blaze API
 2 | //!
 3 | //! High-performance async API client with load balancing for batch LLM processing.
 4 | //!
 5 | //! Blaze API is designed to handle massive throughput (10,000+ requests per second)
 6 | //! with intelligent load balancing, automatic retries, and comprehensive error handling.
 7 | //!
 8 | //! ## Features
 9 | //!
10 | //! - **Weighted Load Balancing**: Distribute requests across multiple endpoints based on weights
11 | //! - **Automatic Retries**: Exponential backoff with jitter for failed requests
12 | //! - **Rate Limiting**: Control throughput to respect API limits
13 | //! - **Connection Pooling**: Efficient HTTP/2 connection management
14 | //! - **Progress Tracking**: Real-time statistics and progress visualization
15 | //!
16 | //! ## Quick Start
17 | //!
18 | //! ```rust,no_run
19 | //! use blaze_api::{Config, Processor, EndpointConfig};
20 | //!
21 | //! #[tokio::main]
22 | //! async fn main() -> anyhow::Result<()> {
23 | //!     let config = Config {
24 | //!         endpoints: vec![EndpointConfig {
25 | //!             url: "https://api.example.com/v1/completions".to_string(),
26 | //!             weight: 1,
27 | //!             api_key: Some("your-api-key".to_string()),
28 | //!             model: Some("gpt-4".to_string()),
29 | //!             max_concurrent: 100,
30 | //!         }],
31 | //!         ..Default::default()
32 | //!     };
33 | //!
34 | //!     let processor = Processor::new(config)?;
35 | //!     let result = processor.process_file(
36 | //!         "requests.jsonl".into(),
37 | //!         Some("results.jsonl".into()),
38 | //!         "errors.jsonl".into(),
39 | //!         true,
40 | //!     ).await?;
41 | //!
42 | //!     result.print_summary();
43 | //!     Ok(())
44 | //! }
45 | //! ```
46 | //!
47 | //! ## Configuration
48 | //!
49 | //! Blaze supports configuration via:
50 | //! - Command-line arguments
51 | //! - Environment variables (prefixed with `BLAZE_`)
52 | //! - JSON configuration files
53 | //!
54 | //! See [`Config`] for all available options.
55 | 
56 | #![warn(missing_docs)]
57 | #![warn(clippy::all)]
58 | #![warn(clippy::pedantic)]
59 | #![allow(clippy::module_name_repetitions)]
60 | 
61 | pub mod client;
62 | pub mod config;
63 | pub mod endpoint;
64 | pub mod error;
65 | pub mod processor;
66 | pub mod request;
67 | pub mod tracker;
68 | 
69 | // Re-exports for convenience
70 | pub use config::{Args, Config, EndpointConfig, RequestConfig, RetryConfig};
71 | pub use endpoint::{Endpoint, LoadBalancer};
72 | pub use error::{BlazeError, Result};
73 | pub use processor::{ProcessingResult, Processor};
74 | pub use request::{ApiRequest, ApiResponse, ErrorResponse, RequestResult};
75 | pub use tracker::{StatsSnapshot, StatsTracker};
76 | 
77 | /// Library version.
78 | pub const VERSION: &str = env!("CARGO_PKG_VERSION");
79 | 
80 | /// Default configuration for quick setup.
81 | impl Default for Config {
82 |     fn default() -> Self {
83 |         Self {
84 |             endpoints: vec![],
85 |             request: RequestConfig::default(),
86 |             retry: RetryConfig::default(),
87 |         }
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | //! Custom error types for Blaze API.
 2 | //!
 3 | //! This module defines all error types used throughout the application,
 4 | //! following Rust best practices with `thiserror` for library errors.
 5 | 
 6 | use std::path::PathBuf;
 7 | use thiserror::Error;
 8 | 
 9 | /// Errors that can occur during API processing.
10 | #[derive(Error, Debug)]
11 | #[allow(missing_docs)]
12 | pub enum BlazeError {
13 |     /// Failed to read the input file.
14 |     #[error("failed to read input file '{path}': {source}")]
15 |     InputFileRead {
16 |         /// Path to the file that could not be read.
17 |         path: PathBuf,
18 |         /// The underlying I/O error.
19 |         #[source]
20 |         source: std::io::Error,
21 |     },
22 | 
23 |     /// Failed to write to the output file.
24 |     #[error("failed to write to output file '{path}': {source}")]
25 |     OutputFileWrite {
26 |         /// Path to the file that could not be written.
27 |         path: PathBuf,
28 |         /// The underlying I/O error.
29 |         #[source]
30 |         source: std::io::Error,
31 |     },
32 | 
33 |     /// Failed to parse JSON from the input file.
34 |     #[error("failed to parse JSON at line {line}: {source}")]
35 |     JsonParse {
36 |         /// Line number where the error occurred.
37 |         line: usize,
38 |         /// The underlying JSON parsing error.
39 |         #[source]
40 |         source: serde_json::Error,
41 |     },
42 | 
43 |     /// Failed to serialize JSON for output.
44 |     #[error("failed to serialize JSON: {0}")]
45 |     JsonSerialize(#[from] serde_json::Error),
46 | 
47 |     /// HTTP request failed.
48 |     #[error("HTTP request failed: {0}")]
49 |     HttpRequest(#[from] reqwest::Error),
50 | 
51 |     /// No endpoints configured.
52 |     #[error("no endpoints configured - at least one endpoint is required")]
53 |     NoEndpoints,
54 | 
55 |     /// All endpoints are unhealthy.
56 |     #[error("all endpoints are currently unhealthy")]
57 |     AllEndpointsUnhealthy,
58 | 
59 |     /// Invalid configuration.
60 |     #[error("invalid configuration: {0}")]
61 |     InvalidConfig(String),
62 | 
63 |     /// Request timed out.
64 |     #[error("request timed out after {attempts} attempts")]
65 |     Timeout {
66 |         /// Number of attempts made before timeout.
67 |         attempts: u32,
68 |     },
69 | 
70 |     /// Rate limit exceeded.
71 |     #[error("rate limit exceeded for endpoint '{endpoint}'")]
72 |     RateLimitExceeded {
73 |         /// The endpoint that exceeded its rate limit.
74 |         endpoint: String,
75 |     },
76 | 
77 |     /// Invalid response from API.
78 |     #[error("invalid API response: {message}")]
79 |     InvalidResponse {
80 |         /// Description of what was invalid.
81 |         message: String,
82 |     },
83 | 
84 |     /// Endpoint returned an error status.
85 |     #[error("endpoint returned error status {status}: {body}")]
86 |     EndpointError {
87 |         /// HTTP status code returned.
88 |         status: u16,
89 |         /// Response body content.
90 |         body: String,
91 |     },
92 | }
93 | 
94 | /// Result type alias for Blaze operations.
95 | pub type Result<T> = std::result::Result<T, BlazeError>;
96 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | //! Blaze API CLI - High-performance batch API client.
  2 | //!
  3 | //! Run `blaze --help` for usage information.
  4 | 
  5 | use anyhow::Result;
  6 | use blaze_api::{Args, Config, Processor};
  7 | use console::style;
  8 | use tracing::{error, info, Level};
  9 | use tracing_subscriber::{fmt, prelude::*, EnvFilter};
 10 | 
 11 | #[tokio::main]
 12 | async fn main() -> Result<()> {
 13 |     // Parse CLI arguments
 14 |     let args = Args::parse_args();
 15 | 
 16 |     // Setup logging
 17 |     setup_logging(&args);
 18 | 
 19 |     // Print banner
 20 |     if !args.json_logs {
 21 |         print_banner();
 22 |     }
 23 | 
 24 |     // Load configuration
 25 |     let config = match Config::from_args(&args) {
 26 |         Ok(c) => c,
 27 |         Err(e) => {
 28 |             error!("Configuration error: {}", e);
 29 |             eprintln!("{} {}", style("Error:").red().bold(), e);
 30 |             std::process::exit(1);
 31 |         }
 32 |     };
 33 | 
 34 |     // Validate input file exists
 35 |     if !args.input.exists() {
 36 |         error!("Input file not found: {:?}", args.input);
 37 |         eprintln!(
 38 |             "{} Input file not found: {}",
 39 |             style("Error:").red().bold(),
 40 |             args.input.display()
 41 |         );
 42 |         std::process::exit(1);
 43 |     }
 44 | 
 45 |     // Dry run mode
 46 |     if args.dry_run {
 47 |         println!("\n{}", style("DRY RUN MODE").yellow().bold());
 48 |         println!("Configuration validated successfully.\n");
 49 |         print_config_summary(&args, &config);
 50 |         return Ok(());
 51 |     }
 52 | 
 53 |     // Print configuration summary
 54 |     if args.verbose && !args.json_logs {
 55 |         print_config_summary(&args, &config);
 56 |     }
 57 | 
 58 |     // Create processor and run
 59 |     let processor = Processor::new(config)?;
 60 | 
 61 |     info!(
 62 |         input = %args.input.display(),
 63 |         output = ?args.output,
 64 |         "Starting processing"
 65 |     );
 66 | 
 67 |     let result = processor
 68 |         .process_file(
 69 |             args.input.clone(),
 70 |             args.output.clone(),
 71 |             args.errors.clone(),
 72 |             !args.no_progress && !args.json_logs,
 73 |         )
 74 |         .await?;
 75 | 
 76 |     // Print results
 77 |     if !args.json_logs {
 78 |         result.print_summary();
 79 | 
 80 |         if let Some(output) = &args.output {
 81 |             println!(
 82 |                 "\n{} Results saved to: {}",
 83 |                 style("✓").green().bold(),
 84 |                 output.display()
 85 |             );
 86 |         }
 87 | 
 88 |         if result.failure_count > 0 {
 89 |             println!(
 90 |                 "{} Errors saved to: {}",
 91 |                 style("⚠").yellow().bold(),
 92 |                 args.errors.display()
 93 |             );
 94 |         }
 95 |     } else {
 96 |         // JSON output for programmatic consumption
 97 |         let json_result = serde_json::json!({
 98 |             "status": "complete",
 99 |             "total_processed": result.total_processed,
100 |             "success_count": result.success_count,
101 |             "failure_count": result.failure_count,
102 |             "success_rate": result.success_rate(),
103 |             "elapsed_seconds": result.elapsed.as_secs_f64(),
104 |             "avg_latency_ms": result.avg_latency_ms,
105 |             "throughput_rps": result.overall_rps,
106 |         });
107 |         println!("{}", serde_json::to_string(&json_result)?);
108 |     }
109 | 
110 |     // Exit with error code if there were failures
111 |     if result.failure_count > 0 && result.success_count == 0 {
112 |         std::process::exit(1);
113 |     }
114 | 
115 |     Ok(())
116 | }
117 | 
118 | fn setup_logging(args: &Args) {
119 |     let level = if args.verbose { Level::DEBUG } else { Level::INFO };
120 | 
121 |     let filter = EnvFilter::try_from_default_env()
122 |         .unwrap_or_else(|_| EnvFilter::new(format!("blaze_api={},blaze={}", level, level)));
123 | 
124 |     if args.json_logs {
125 |         tracing_subscriber::registry()
126 |             .with(filter)
127 |             .with(fmt::layer().json())
128 |             .init();
129 |     } else {
130 |         tracing_subscriber::registry()
131 |             .with(filter)
132 |             .with(
133 |                 fmt::layer()
134 |                     .with_target(false)
135 |                     .with_thread_ids(false)
136 |                     .compact(),
137 |             )
138 |             .init();
139 |     }
140 | }
141 | 
142 | fn print_banner() {
143 |     let banner = r#"
144 |     ____  __                       ___    ____  ____
145 |    / __ )/ /___ _____  ___        /   |  / __ \/  _/
146 |   / __  / / __ `/_  / / _ \      / /| | / /_/ // /  
147 |  / /_/ / / /_/ / / /_/  __/     / ___ |/ ____// /   
148 | /_____/_/\__,_/ /___/\___/     /_/  |_/_/   /___/   
149 |                                                     
150 |     "#;
151 | 
152 |     println!("{}", style(banner).cyan().bold());
153 |     println!(
154 |         "    {}",
155 |         style("High-Performance Batch API Client").white().dim()
156 |     );
157 |     println!(
158 |         "    {}",
159 |         style(format!("v{}", blaze_api::VERSION)).white().dim()
160 |     );
161 |     println!();
162 | }
163 | 
164 | fn print_config_summary(args: &Args, config: &Config) {
165 |     println!("{}", style("Configuration:").bold());
166 |     println!("  Input:      {}", args.input.display());
167 |     if let Some(output) = &args.output {
168 |         println!("  Output:     {}", output.display());
169 |     }
170 |     println!("  Errors:     {}", args.errors.display());
171 |     println!("  Rate Limit: {} req/sec", config.request.rate_limit);
172 |     println!("  Workers:    {}", config.request.workers);
173 |     println!("  Timeout:    {:?}", config.request.timeout);
174 |     println!("  Retries:    {}", config.retry.max_attempts);
175 |     println!("  Endpoints:  {}", config.endpoints.len());
176 |     for (i, ep) in config.endpoints.iter().enumerate() {
177 |         println!(
178 |             "    {}. {} (weight: {}, max: {})",
179 |             i + 1,
180 |             ep.url,
181 |             ep.weight,
182 |             ep.max_concurrent
183 |         );
184 |     }
185 |     println!();
186 | }
187 | 


--------------------------------------------------------------------------------
/src/request.rs:
--------------------------------------------------------------------------------
  1 | //! Request and response types for API processing.
  2 | //!
  3 | //! This module defines the data structures for API requests and responses,
  4 | //! supporting flexible input formats and structured output.
  5 | 
  6 | use serde::{Deserialize, Serialize};
  7 | use serde_json::Value;
  8 | use std::collections::HashMap;
  9 | 
 10 | /// An API request read from the input file.
 11 | #[derive(Debug, Clone, Serialize, Deserialize)]
 12 | pub struct ApiRequest {
 13 |     /// The main input content (for LLM requests).
 14 |     #[serde(default)]
 15 |     pub input: Option<String>,
 16 | 
 17 |     /// Custom request body (overrides default formatting).
 18 |     #[serde(default)]
 19 |     pub body: Option<Value>,
 20 | 
 21 |     /// Custom headers for this specific request.
 22 |     #[serde(default)]
 23 |     pub headers: Option<HashMap<String, String>>,
 24 | 
 25 |     /// Request-specific metadata (passed through to response).
 26 |     #[serde(default, flatten)]
 27 |     pub metadata: HashMap<String, Value>,
 28 | 
 29 |     /// Line number in the input file (set during parsing).
 30 |     #[serde(skip)]
 31 |     pub line_number: usize,
 32 | }
 33 | 
 34 | impl ApiRequest {
 35 |     /// Create a simple request with just input text.
 36 |     pub fn simple(input: impl Into<String>) -> Self {
 37 |         Self {
 38 |             input: Some(input.into()),
 39 |             body: None,
 40 |             headers: None,
 41 |             metadata: HashMap::new(),
 42 |             line_number: 0,
 43 |         }
 44 |     }
 45 | 
 46 |     /// Create a request with a custom body.
 47 |     pub fn with_body(body: Value) -> Self {
 48 |         Self {
 49 |             input: None,
 50 |             body: Some(body),
 51 |             headers: None,
 52 |             metadata: HashMap::new(),
 53 |             line_number: 0,
 54 |         }
 55 |     }
 56 | 
 57 |     /// Build the request body for an LLM endpoint.
 58 |     pub fn build_llm_body(&self, model: Option<&str>) -> Value {
 59 |         if let Some(body) = &self.body {
 60 |             // Use custom body if provided
 61 |             return body.clone();
 62 |         }
 63 | 
 64 |         // Build standard LLM request body
 65 |         let input = self.input.as_deref().unwrap_or("");
 66 |         let mut body = serde_json::json!({
 67 |             "messages": [{
 68 |                 "role": "user",
 69 |                 "content": input
 70 |             }]
 71 |         });
 72 | 
 73 |         if let Some(model) = model {
 74 |             body["model"] = Value::String(model.to_string());
 75 |         }
 76 | 
 77 |         body
 78 |     }
 79 | 
 80 |     /// Get a display string for logging.
 81 |     pub fn display_input(&self) -> String {
 82 |         if let Some(input) = &self.input {
 83 |             if input.len() > 50 {
 84 |                 format!("{}...", &input[..50])
 85 |             } else {
 86 |                 input.clone()
 87 |             }
 88 |         } else if self.body.is_some() {
 89 |             "[custom body]".to_string()
 90 |         } else {
 91 |             "[empty]".to_string()
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | /// A successful API response.
 97 | #[derive(Debug, Clone, Serialize, Deserialize)]
 98 | pub struct ApiResponse {
 99 |     /// The original input (for correlation).
100 |     #[serde(skip_serializing_if = "Option::is_none")]
101 |     pub input: Option<String>,
102 | 
103 |     /// The response body from the API.
104 |     pub response: Value,
105 | 
106 |     /// Response metadata.
107 |     #[serde(skip_serializing_if = "Option::is_none")]
108 |     pub metadata: Option<ResponseMetadata>,
109 | }
110 | 
111 | /// Metadata about the response.
112 | #[derive(Debug, Clone, Serialize, Deserialize)]
113 | pub struct ResponseMetadata {
114 |     /// Which endpoint handled the request.
115 |     pub endpoint: String,
116 | 
117 |     /// Response latency in milliseconds.
118 |     pub latency_ms: u64,
119 | 
120 |     /// Number of retry attempts.
121 |     pub attempts: u32,
122 | }
123 | 
124 | impl ApiResponse {
125 |     /// Create a new API response.
126 |     pub fn new(input: Option<String>, response: Value) -> Self {
127 |         Self {
128 |             input,
129 |             response,
130 |             metadata: None,
131 |         }
132 |     }
133 | 
134 |     /// Add metadata to the response.
135 |     pub fn with_metadata(mut self, metadata: ResponseMetadata) -> Self {
136 |         self.metadata = Some(metadata);
137 |         self
138 |     }
139 | }
140 | 
141 | /// An error response for failed requests.
142 | #[derive(Debug, Clone, Serialize, Deserialize)]
143 | pub struct ErrorResponse {
144 |     /// The original input that failed.
145 |     #[serde(skip_serializing_if = "Option::is_none")]
146 |     pub input: Option<String>,
147 | 
148 |     /// The original request body.
149 |     #[serde(skip_serializing_if = "Option::is_none")]
150 |     pub body: Option<Value>,
151 | 
152 |     /// Error message.
153 |     pub error: String,
154 | 
155 |     /// HTTP status code (if available).
156 |     #[serde(skip_serializing_if = "Option::is_none")]
157 |     pub status_code: Option<u16>,
158 | 
159 |     /// Line number in the input file.
160 |     #[serde(skip_serializing_if = "is_zero")]
161 |     pub line_number: usize,
162 | 
163 |     /// Number of attempts made.
164 |     pub attempts: u32,
165 | }
166 | 
167 | fn is_zero(n: &usize) -> bool {
168 |     *n == 0
169 | }
170 | 
171 | impl ErrorResponse {
172 |     /// Create a new error response.
173 |     pub fn new(request: &ApiRequest, error: impl Into<String>, attempts: u32) -> Self {
174 |         Self {
175 |             input: request.input.clone(),
176 |             body: request.body.clone(),
177 |             error: error.into(),
178 |             status_code: None,
179 |             line_number: request.line_number,
180 |             attempts,
181 |         }
182 |     }
183 | 
184 |     /// Set the HTTP status code.
185 |     pub fn with_status(mut self, status: u16) -> Self {
186 |         self.status_code = Some(status);
187 |         self
188 |     }
189 | }
190 | 
191 | /// Result of processing a single request.
192 | #[derive(Debug)]
193 | pub enum RequestResult {
194 |     /// Request succeeded.
195 |     Success(ApiResponse),
196 |     /// Request failed after all retries.
197 |     Failure(ErrorResponse),
198 | }
199 | 
200 | impl RequestResult {
201 |     /// Check if this is a success.
202 |     pub fn is_success(&self) -> bool {
203 |         matches!(self, Self::Success(_))
204 |     }
205 | }
206 | 
207 | #[cfg(test)]
208 | mod tests {
209 |     use super::*;
210 | 
211 |     #[test]
212 |     fn test_simple_request() {
213 |         let req = ApiRequest::simple("Hello, world!");
214 |         assert_eq!(req.input, Some("Hello, world!".to_string()));
215 |         assert!(req.body.is_none());
216 |     }
217 | 
218 |     #[test]
219 |     fn test_build_llm_body() {
220 |         let req = ApiRequest::simple("Test input");
221 |         let body = req.build_llm_body(Some("gpt-4"));
222 | 
223 |         assert_eq!(body["model"], "gpt-4");
224 |         assert_eq!(body["messages"][0]["content"], "Test input");
225 |     }
226 | 
227 |     #[test]
228 |     fn test_custom_body() {
229 |         let custom = serde_json::json!({"custom": "data"});
230 |         let req = ApiRequest::with_body(custom.clone());
231 |         let body = req.build_llm_body(Some("gpt-4"));
232 | 
233 |         assert_eq!(body, custom);
234 |     }
235 | 
236 |     #[test]
237 |     fn test_error_response() {
238 |         let req = ApiRequest::simple("Test");
239 |         let err = ErrorResponse::new(&req, "Connection refused", 3);
240 | 
241 |         assert_eq!(err.error, "Connection refused");
242 |         assert_eq!(err.attempts, 3);
243 |     }
244 | }
245 | 


--------------------------------------------------------------------------------
/src/tracker.rs:
--------------------------------------------------------------------------------
  1 | //! Statistics tracking for request processing.
  2 | //!
  3 | //! This module provides real-time tracking of request statistics
  4 | //! including success/failure counts, latency, and throughput.
  5 | 
  6 | use parking_lot::Mutex;
  7 | use std::collections::VecDeque;
  8 | use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
  9 | use std::time::{Duration, Instant};
 10 | 
 11 | /// Statistics tracker for request processing.
 12 | #[derive(Debug)]
 13 | pub struct StatsTracker {
 14 |     /// Start time of processing.
 15 |     start_time: Instant,
 16 |     /// Total requests processed.
 17 |     total_processed: AtomicU64,
 18 |     /// Successful requests.
 19 |     success_count: AtomicU64,
 20 |     /// Failed requests.
 21 |     failure_count: AtomicU64,
 22 |     /// Total latency in microseconds.
 23 |     total_latency_us: AtomicU64,
 24 |     /// Requests in the last second (for RPS calculation).
 25 |     recent_requests: Mutex<VecDeque<Instant>>,
 26 |     /// Total input lines.
 27 |     total_lines: AtomicUsize,
 28 | }
 29 | 
 30 | impl StatsTracker {
 31 |     /// Create a new statistics tracker.
 32 |     pub fn new() -> Self {
 33 |         Self {
 34 |             start_time: Instant::now(),
 35 |             total_processed: AtomicU64::new(0),
 36 |             success_count: AtomicU64::new(0),
 37 |             failure_count: AtomicU64::new(0),
 38 |             total_latency_us: AtomicU64::new(0),
 39 |             recent_requests: Mutex::new(VecDeque::new()),
 40 |             total_lines: AtomicUsize::new(0),
 41 |         }
 42 |     }
 43 | 
 44 |     /// Set the total number of input lines.
 45 |     pub fn set_total_lines(&self, total: usize) {
 46 |         self.total_lines.store(total, Ordering::Relaxed);
 47 |     }
 48 | 
 49 |     /// Record a successful request.
 50 |     pub fn record_success(&self, latency: Duration) {
 51 |         self.total_processed.fetch_add(1, Ordering::Relaxed);
 52 |         self.success_count.fetch_add(1, Ordering::Relaxed);
 53 |         self.total_latency_us
 54 |             .fetch_add(latency.as_micros() as u64, Ordering::Relaxed);
 55 |         self.record_recent();
 56 |     }
 57 | 
 58 |     /// Record a failed request.
 59 |     pub fn record_failure(&self) {
 60 |         self.total_processed.fetch_add(1, Ordering::Relaxed);
 61 |         self.failure_count.fetch_add(1, Ordering::Relaxed);
 62 |         self.record_recent();
 63 |     }
 64 | 
 65 |     /// Record a request for RPS calculation.
 66 |     fn record_recent(&self) {
 67 |         let now = Instant::now();
 68 |         let mut recent = self.recent_requests.lock();
 69 |         recent.push_back(now);
 70 | 
 71 |         // Remove entries older than 1 second
 72 |         let cutoff = now - Duration::from_secs(1);
 73 |         while let Some(front) = recent.front() {
 74 |             if *front < cutoff {
 75 |                 recent.pop_front();
 76 |             } else {
 77 |                 break;
 78 |             }
 79 |         }
 80 |     }
 81 | 
 82 |     /// Get the current requests per second.
 83 |     pub fn requests_per_second(&self) -> f64 {
 84 |         let now = Instant::now();
 85 |         let mut recent = self.recent_requests.lock();
 86 | 
 87 |         // Remove old entries
 88 |         let cutoff = now - Duration::from_secs(1);
 89 |         while let Some(front) = recent.front() {
 90 |             if *front < cutoff {
 91 |                 recent.pop_front();
 92 |             } else {
 93 |                 break;
 94 |             }
 95 |         }
 96 | 
 97 |         recent.len() as f64
 98 |     }
 99 | 
100 |     /// Get the current statistics snapshot.
101 |     pub fn snapshot(&self) -> StatsSnapshot {
102 |         let elapsed = self.start_time.elapsed();
103 |         let total = self.total_processed.load(Ordering::Relaxed);
104 |         let success = self.success_count.load(Ordering::Relaxed);
105 |         let failure = self.failure_count.load(Ordering::Relaxed);
106 |         let total_latency = self.total_latency_us.load(Ordering::Relaxed);
107 |         let total_lines = self.total_lines.load(Ordering::Relaxed);
108 | 
109 |         let avg_latency_ms = if success > 0 {
110 |             (total_latency as f64 / success as f64) / 1000.0
111 |         } else {
112 |             0.0
113 |         };
114 | 
115 |         let overall_rps = if elapsed.as_secs_f64() > 0.0 {
116 |             total as f64 / elapsed.as_secs_f64()
117 |         } else {
118 |             0.0
119 |         };
120 | 
121 |         let progress = if total_lines > 0 {
122 |             (total as f64 / total_lines as f64) * 100.0
123 |         } else {
124 |             0.0
125 |         };
126 | 
127 |         StatsSnapshot {
128 |             elapsed,
129 |             total_processed: total,
130 |             success_count: success,
131 |             failure_count: failure,
132 |             avg_latency_ms,
133 |             current_rps: self.requests_per_second(),
134 |             overall_rps,
135 |             total_lines,
136 |             progress,
137 |         }
138 |     }
139 | }
140 | 
141 | impl Default for StatsTracker {
142 |     fn default() -> Self {
143 |         Self::new()
144 |     }
145 | }
146 | 
147 | /// A snapshot of current statistics.
148 | #[derive(Debug, Clone)]
149 | pub struct StatsSnapshot {
150 |     /// Elapsed time since start.
151 |     pub elapsed: Duration,
152 |     /// Total requests processed.
153 |     pub total_processed: u64,
154 |     /// Successful requests.
155 |     pub success_count: u64,
156 |     /// Failed requests.
157 |     pub failure_count: u64,
158 |     /// Average latency in milliseconds.
159 |     pub avg_latency_ms: f64,
160 |     /// Current requests per second.
161 |     pub current_rps: f64,
162 |     /// Overall requests per second.
163 |     pub overall_rps: f64,
164 |     /// Total input lines.
165 |     pub total_lines: usize,
166 |     /// Progress percentage.
167 |     pub progress: f64,
168 | }
169 | 
170 | impl StatsSnapshot {
171 |     /// Get the success rate as a percentage.
172 |     pub fn success_rate(&self) -> f64 {
173 |         if self.total_processed > 0 {
174 |             (self.success_count as f64 / self.total_processed as f64) * 100.0
175 |         } else {
176 |             100.0
177 |         }
178 |     }
179 | 
180 |     /// Get the estimated time remaining.
181 |     pub fn eta(&self) -> Option<Duration> {
182 |         if self.overall_rps > 0.0 && self.total_lines > 0 {
183 |             let remaining = self.total_lines.saturating_sub(self.total_processed as usize);
184 |             let seconds = remaining as f64 / self.overall_rps;
185 |             Some(Duration::from_secs_f64(seconds))
186 |         } else {
187 |             None
188 |         }
189 |     }
190 | 
191 |     /// Format as a human-readable summary.
192 |     pub fn summary(&self) -> String {
193 |         format!(
194 |             "Processed: {}/{} ({:.1}%) | Success: {} | Failed: {} | Avg Latency: {:.1}ms | RPS: {:.0}",
195 |             self.total_processed,
196 |             self.total_lines,
197 |             self.progress,
198 |             self.success_count,
199 |             self.failure_count,
200 |             self.avg_latency_ms,
201 |             self.current_rps
202 |         )
203 |     }
204 | }
205 | 
206 | #[cfg(test)]
207 | mod tests {
208 |     use super::*;
209 | 
210 |     #[test]
211 |     fn test_stats_tracking() {
212 |         let tracker = StatsTracker::new();
213 |         tracker.set_total_lines(100);
214 | 
215 |         tracker.record_success(Duration::from_millis(50));
216 |         tracker.record_success(Duration::from_millis(100));
217 |         tracker.record_failure();
218 | 
219 |         let snapshot = tracker.snapshot();
220 |         assert_eq!(snapshot.total_processed, 3);
221 |         assert_eq!(snapshot.success_count, 2);
222 |         assert_eq!(snapshot.failure_count, 1);
223 |         assert_eq!(snapshot.avg_latency_ms, 75.0);
224 |     }
225 | 
226 |     #[test]
227 |     fn test_success_rate() {
228 |         let tracker = StatsTracker::new();
229 | 
230 |         for _ in 0..8 {
231 |             tracker.record_success(Duration::from_millis(10));
232 |         }
233 |         for _ in 0..2 {
234 |             tracker.record_failure();
235 |         }
236 | 
237 |         let snapshot = tracker.snapshot();
238 |         assert_eq!(snapshot.success_rate(), 80.0);
239 |     }
240 | }
241 | 


--------------------------------------------------------------------------------
/src/client.rs:
--------------------------------------------------------------------------------
  1 | //! HTTP client with retry logic and connection pooling.
  2 | //!
  3 | //! This module provides a high-performance HTTP client optimized for
  4 | //! high-throughput API requests with automatic retries.
  5 | 
  6 | use crate::config::Config;
  7 | use crate::endpoint::Endpoint;
  8 | use crate::error::{BlazeError, Result};
  9 | use crate::request::{ApiRequest, ApiResponse, ErrorResponse, RequestResult, ResponseMetadata};
 10 | use reqwest::{header, Client};
 11 | use std::sync::Arc;
 12 | use std::time::{Duration, Instant};
 13 | use tokio::time::sleep;
 14 | use tracing::{debug, trace, warn};
 15 | 
 16 | /// HTTP client wrapper with retry logic.
 17 | #[derive(Debug, Clone)]
 18 | pub struct ApiClient {
 19 |     client: Client,
 20 |     config: Arc<Config>,
 21 | }
 22 | 
 23 | impl ApiClient {
 24 |     /// Create a new API client.
 25 |     pub fn new(config: Arc<Config>) -> Result<Self> {
 26 |         let mut headers = header::HeaderMap::new();
 27 |         headers.insert(
 28 |             header::CONTENT_TYPE,
 29 |             header::HeaderValue::from_static("application/json"),
 30 |         );
 31 |         headers.insert(
 32 |             header::ACCEPT,
 33 |             header::HeaderValue::from_static("application/json"),
 34 |         );
 35 | 
 36 |         let client = Client::builder()
 37 |             .timeout(config.request.timeout)
 38 |             .pool_max_idle_per_host(config.request.workers)
 39 |             .pool_idle_timeout(Duration::from_secs(90))
 40 |             .tcp_keepalive(Duration::from_secs(60))
 41 |             .tcp_nodelay(true)
 42 |             .default_headers(headers)
 43 |             .gzip(true)
 44 |             .brotli(true)
 45 |             .build()
 46 |             .map_err(BlazeError::HttpRequest)?;
 47 | 
 48 |         Ok(Self {
 49 |             client,
 50 |             config: config,
 51 |         })
 52 |     }
 53 | 
 54 |     /// Send a request to an endpoint with retries.
 55 |     pub async fn send_with_retry(
 56 |         &self,
 57 |         request: &ApiRequest,
 58 |         endpoint: Arc<Endpoint>,
 59 |     ) -> RequestResult {
 60 |         let mut attempts = 0;
 61 |         let mut last_error: Option<String> = None;
 62 |         let mut last_status: Option<u16> = None;
 63 | 
 64 |         let body = request.build_llm_body(endpoint.model());
 65 |         let start = Instant::now();
 66 | 
 67 |         while attempts < self.config.retry.max_attempts {
 68 |             attempts += 1;
 69 | 
 70 |             match self.send_once(&body, &endpoint).await {
 71 |                 Ok(response) => {
 72 |                     let latency = start.elapsed();
 73 |                     endpoint.record_success(latency);
 74 |                     endpoint.release();
 75 | 
 76 |                     let api_response = ApiResponse::new(request.input.clone(), response)
 77 |                         .with_metadata(ResponseMetadata {
 78 |                             endpoint: endpoint.url().to_string(),
 79 |                             latency_ms: latency.as_millis() as u64,
 80 |                             attempts,
 81 |                         });
 82 | 
 83 |                     return RequestResult::Success(api_response);
 84 |                 }
 85 |                 Err((error, status)) => {
 86 |                     last_error = Some(error.clone());
 87 |                     last_status = status;
 88 | 
 89 |                     // Don't retry on certain status codes
 90 |                     if let Some(code) = status {
 91 |                         if code == 400 || code == 401 || code == 403 || code == 404 {
 92 |                             warn!(
 93 |                                 endpoint = endpoint.url(),
 94 |                                 status = code,
 95 |                                 "Non-retryable error"
 96 |                             );
 97 |                             break;
 98 |                         }
 99 |                     }
100 | 
101 |                     if attempts < self.config.retry.max_attempts {
102 |                         let backoff = self.calculate_backoff(attempts);
103 |                         debug!(
104 |                             attempt = attempts,
105 |                             max_attempts = self.config.retry.max_attempts,
106 |                             backoff_ms = backoff.as_millis(),
107 |                             error = %error,
108 |                             "Request failed, retrying"
109 |                         );
110 |                         sleep(backoff).await;
111 |                     }
112 |                 }
113 |             }
114 |         }
115 | 
116 |         endpoint.record_failure();
117 |         endpoint.release();
118 | 
119 |         let error_response =
120 |             ErrorResponse::new(request, last_error.unwrap_or_else(|| "Unknown error".to_string()), attempts);
121 | 
122 |         let error_response = if let Some(status) = last_status {
123 |             error_response.with_status(status)
124 |         } else {
125 |             error_response
126 |         };
127 | 
128 |         RequestResult::Failure(error_response)
129 |     }
130 | 
131 |     /// Send a single request without retries.
132 |     async fn send_once(
133 |         &self,
134 |         body: &serde_json::Value,
135 |         endpoint: &Endpoint,
136 |     ) -> std::result::Result<serde_json::Value, (String, Option<u16>)> {
137 |         let mut request = self.client.post(endpoint.url()).json(body);
138 | 
139 |         // Add authorization header if API key is configured
140 |         if let Some(api_key) = endpoint.api_key() {
141 |             request = request.header(header::AUTHORIZATION, format!("Bearer {}", api_key));
142 |         }
143 | 
144 |         trace!(endpoint = endpoint.url(), "Sending request");
145 | 
146 |         let response = request.send().await.map_err(|e| {
147 |             let error = format!("Request failed: {}", e);
148 |             (error, e.status().map(|s| s.as_u16()))
149 |         })?;
150 | 
151 |         let status = response.status();
152 | 
153 |         if status.is_success() {
154 |             let body: serde_json::Value = response.json().await.map_err(|e| {
155 |                 (format!("Failed to parse response: {}", e), Some(status.as_u16()))
156 |             })?;
157 |             Ok(body)
158 |         } else {
159 |             let error_body = response.text().await.unwrap_or_default();
160 |             let truncated = if error_body.len() > 500 {
161 |                 format!("{}...", &error_body[..500])
162 |             } else {
163 |                 error_body
164 |             };
165 |             Err((
166 |                 format!("HTTP {}: {}", status.as_u16(), truncated),
167 |                 Some(status.as_u16()),
168 |             ))
169 |         }
170 |     }
171 | 
172 |     /// Calculate backoff duration for a given attempt.
173 |     fn calculate_backoff(&self, attempt: u32) -> Duration {
174 |         let base = self.config.retry.initial_backoff.as_millis() as f64;
175 |         let multiplier = self.config.retry.multiplier.powi(attempt as i32 - 1);
176 |         let backoff_ms = base * multiplier;
177 | 
178 |         // Add jitter (±25%)
179 |         let jitter = 1.0 + (rand::random::<f64>() - 0.5) * 0.5;
180 |         let final_ms = (backoff_ms * jitter) as u64;
181 | 
182 |         Duration::from_millis(final_ms.min(self.config.retry.max_backoff.as_millis() as u64))
183 |     }
184 | }
185 | 
186 | #[cfg(test)]
187 | mod tests {
188 |     use super::*;
189 |     use crate::config::{EndpointConfig, RequestConfig, RetryConfig};
190 | 
191 |     fn test_config() -> Config {
192 |         Config {
193 |             endpoints: vec![EndpointConfig {
194 |                 url: "http://localhost:8080".to_string(),
195 |                 weight: 1,
196 |                 api_key: None,
197 |                 model: None,
198 |                 max_concurrent: 100,
199 |             }],
200 |             request: RequestConfig::default(),
201 |             retry: RetryConfig::default(),
202 |         }
203 |     }
204 | 
205 |     #[test]
206 |     fn test_backoff_calculation() {
207 |         let config = Arc::new(test_config());
208 |         let client = ApiClient::new(config).unwrap();
209 | 
210 |         let b1 = client.calculate_backoff(1);
211 |         let b2 = client.calculate_backoff(2);
212 |         let b3 = client.calculate_backoff(3);
213 | 
214 |         // Backoff should generally increase (allowing for jitter)
215 |         assert!(b1 < Duration::from_secs(1));
216 |         assert!(b2 < Duration::from_secs(2));
217 |         assert!(b3 < Duration::from_secs(5));
218 |     }
219 | }
220 | 


--------------------------------------------------------------------------------
/src/endpoint.rs:
--------------------------------------------------------------------------------
  1 | //! Endpoint management with weighted load balancing.
  2 | //!
  3 | //! This module provides a load balancer that distributes requests
  4 | //! across multiple endpoints based on configurable weights.
  5 | 
  6 | use crate::config::EndpointConfig;
  7 | use crate::error::{BlazeError, Result};
  8 | use parking_lot::RwLock;
  9 | use rand::prelude::*;
 10 | use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
 11 | use std::sync::Arc;
 12 | use std::time::{Duration, Instant};
 13 | 
 14 | /// A single API endpoint with health tracking.
 15 | #[derive(Debug)]
 16 | pub struct Endpoint {
 17 |     /// Endpoint configuration.
 18 |     pub config: EndpointConfig,
 19 |     /// Current number of in-flight requests.
 20 |     pub in_flight: AtomicUsize,
 21 |     /// Total successful requests.
 22 |     pub success_count: AtomicU64,
 23 |     /// Total failed requests.
 24 |     pub failure_count: AtomicU64,
 25 |     /// Total latency in microseconds.
 26 |     pub total_latency_us: AtomicU64,
 27 |     /// Whether the endpoint is healthy.
 28 |     healthy: RwLock<bool>,
 29 |     /// Last health check time.
 30 |     last_health_check: RwLock<Option<Instant>>,
 31 |     /// Consecutive failures.
 32 |     consecutive_failures: AtomicUsize,
 33 | }
 34 | 
 35 | impl Endpoint {
 36 |     /// Create a new endpoint from configuration.
 37 |     pub fn new(config: EndpointConfig) -> Self {
 38 |         Self {
 39 |             config,
 40 |             in_flight: AtomicUsize::new(0),
 41 |             success_count: AtomicU64::new(0),
 42 |             failure_count: AtomicU64::new(0),
 43 |             total_latency_us: AtomicU64::new(0),
 44 |             healthy: RwLock::new(true),
 45 |             last_health_check: RwLock::new(None),
 46 |             consecutive_failures: AtomicUsize::new(0),
 47 |         }
 48 |     }
 49 | 
 50 |     /// Get the endpoint URL.
 51 |     pub fn url(&self) -> &str {
 52 |         &self.config.url
 53 |     }
 54 | 
 55 |     /// Get the API key if configured.
 56 |     pub fn api_key(&self) -> Option<&str> {
 57 |         self.config.api_key.as_deref()
 58 |     }
 59 | 
 60 |     /// Get the model if configured.
 61 |     pub fn model(&self) -> Option<&str> {
 62 |         self.config.model.as_deref()
 63 |     }
 64 | 
 65 |     /// Check if the endpoint is healthy.
 66 |     pub fn is_healthy(&self) -> bool {
 67 |         *self.healthy.read()
 68 |     }
 69 | 
 70 |     /// Mark the endpoint as healthy.
 71 |     pub fn mark_healthy(&self) {
 72 |         *self.healthy.write() = true;
 73 |         self.consecutive_failures.store(0, Ordering::Relaxed);
 74 |     }
 75 | 
 76 |     /// Mark the endpoint as unhealthy.
 77 |     pub fn mark_unhealthy(&self) {
 78 |         *self.healthy.write() = false;
 79 |         *self.last_health_check.write() = Some(Instant::now());
 80 |     }
 81 | 
 82 |     /// Check if the endpoint should be retried (after cooldown).
 83 |     pub fn should_retry(&self, cooldown: Duration) -> bool {
 84 |         if self.is_healthy() {
 85 |             return true;
 86 |         }
 87 | 
 88 |         let last_check = self.last_health_check.read();
 89 |         match *last_check {
 90 |             Some(instant) => instant.elapsed() >= cooldown,
 91 |             None => true,
 92 |         }
 93 |     }
 94 | 
 95 |     /// Record a successful request.
 96 |     pub fn record_success(&self, latency: Duration) {
 97 |         self.success_count.fetch_add(1, Ordering::Relaxed);
 98 |         self.total_latency_us
 99 |             .fetch_add(latency.as_micros() as u64, Ordering::Relaxed);
100 |         self.consecutive_failures.store(0, Ordering::Relaxed);
101 |         self.mark_healthy();
102 |     }
103 | 
104 |     /// Record a failed request.
105 |     pub fn record_failure(&self) {
106 |         self.failure_count.fetch_add(1, Ordering::Relaxed);
107 |         let failures = self.consecutive_failures.fetch_add(1, Ordering::Relaxed) + 1;
108 | 
109 |         // Mark unhealthy after 3 consecutive failures
110 |         if failures >= 3 {
111 |             self.mark_unhealthy();
112 |         }
113 |     }
114 | 
115 |     /// Check if we can send more requests to this endpoint.
116 |     pub fn can_accept(&self) -> bool {
117 |         self.in_flight.load(Ordering::Relaxed) < self.config.max_concurrent as usize
118 |     }
119 | 
120 |     /// Acquire a slot for sending a request.
121 |     pub fn acquire(&self) -> bool {
122 |         let current = self.in_flight.load(Ordering::Relaxed);
123 |         if current >= self.config.max_concurrent as usize {
124 |             return false;
125 |         }
126 |         self.in_flight.fetch_add(1, Ordering::Relaxed);
127 |         true
128 |     }
129 | 
130 |     /// Release a slot after completing a request.
131 |     pub fn release(&self) {
132 |         self.in_flight.fetch_sub(1, Ordering::Relaxed);
133 |     }
134 | 
135 |     /// Get average latency in milliseconds.
136 |     pub fn avg_latency_ms(&self) -> f64 {
137 |         let total = self.total_latency_us.load(Ordering::Relaxed);
138 |         let count = self.success_count.load(Ordering::Relaxed);
139 |         if count == 0 {
140 |             0.0
141 |         } else {
142 |             (total as f64 / count as f64) / 1000.0
143 |         }
144 |     }
145 | }
146 | 
147 | /// Weighted load balancer for distributing requests across endpoints.
148 | #[derive(Debug)]
149 | pub struct LoadBalancer {
150 |     endpoints: Vec<Arc<Endpoint>>,
151 |     #[allow(dead_code)]
152 |     total_weight: u32,
153 | }
154 | 
155 | impl LoadBalancer {
156 |     /// Create a new load balancer from endpoint configurations.
157 |     pub fn new(configs: Vec<EndpointConfig>) -> Result<Self> {
158 |         if configs.is_empty() {
159 |             return Err(BlazeError::NoEndpoints);
160 |         }
161 | 
162 |         let endpoints: Vec<Arc<Endpoint>> = configs
163 |             .into_iter()
164 |             .map(|c| Arc::new(Endpoint::new(c)))
165 |             .collect();
166 | 
167 |         let total_weight = endpoints.iter().map(|e| e.config.weight).sum();
168 | 
169 |         Ok(Self {
170 |             endpoints,
171 |             total_weight,
172 |         })
173 |     }
174 | 
175 |     /// Select an endpoint using weighted random selection.
176 |     pub fn select(&self) -> Result<Arc<Endpoint>> {
177 |         self.select_with_cooldown(Duration::from_secs(30))
178 |     }
179 | 
180 |     /// Select an endpoint with a custom cooldown for unhealthy endpoints.
181 |     pub fn select_with_cooldown(&self, cooldown: Duration) -> Result<Arc<Endpoint>> {
182 |         // First, try to find a healthy endpoint with capacity
183 |         let available: Vec<_> = self
184 |             .endpoints
185 |             .iter()
186 |             .filter(|e| e.is_healthy() && e.can_accept())
187 |             .collect();
188 | 
189 |         if !available.is_empty() {
190 |             return Ok(self.weighted_select(&available));
191 |         }
192 | 
193 |         // If no healthy endpoints, try endpoints past their cooldown
194 |         let recovering: Vec<_> = self
195 |             .endpoints
196 |             .iter()
197 |             .filter(|e| e.should_retry(cooldown) && e.can_accept())
198 |             .collect();
199 | 
200 |         if !recovering.is_empty() {
201 |             return Ok(self.weighted_select(&recovering));
202 |         }
203 | 
204 |         Err(BlazeError::AllEndpointsUnhealthy)
205 |     }
206 | 
207 |     /// Perform weighted random selection.
208 |     fn weighted_select(&self, endpoints: &[&Arc<Endpoint>]) -> Arc<Endpoint> {
209 |         let total: u32 = endpoints.iter().map(|e| e.config.weight).sum();
210 |         let mut rng = rand::rng();
211 |         let mut pick = rng.random_range(0..total);
212 | 
213 |         for endpoint in endpoints {
214 |             if pick < endpoint.config.weight {
215 |                 return Arc::clone(endpoint);
216 |             }
217 |             pick -= endpoint.config.weight;
218 |         }
219 | 
220 |         // Fallback to first endpoint (shouldn't happen)
221 |         Arc::clone(endpoints[0])
222 |     }
223 | 
224 |     /// Get all endpoints.
225 |     pub fn endpoints(&self) -> &[Arc<Endpoint>] {
226 |         &self.endpoints
227 |     }
228 | 
229 |     /// Get the number of healthy endpoints.
230 |     pub fn healthy_count(&self) -> usize {
231 |         self.endpoints.iter().filter(|e| e.is_healthy()).count()
232 |     }
233 | 
234 |     /// Get the total number of in-flight requests.
235 |     pub fn total_in_flight(&self) -> usize {
236 |         self.endpoints
237 |             .iter()
238 |             .map(|e| e.in_flight.load(Ordering::Relaxed))
239 |             .sum()
240 |     }
241 | }
242 | 
243 | #[cfg(test)]
244 | mod tests {
245 |     use super::*;
246 | 
247 |     fn test_endpoint() -> EndpointConfig {
248 |         EndpointConfig {
249 |             url: "http://localhost:8080".to_string(),
250 |             weight: 1,
251 |             api_key: None,
252 |             model: None,
253 |             max_concurrent: 100,
254 |         }
255 |     }
256 | 
257 |     #[test]
258 |     fn test_endpoint_health() {
259 |         let endpoint = Endpoint::new(test_endpoint());
260 |         assert!(endpoint.is_healthy());
261 | 
262 |         endpoint.mark_unhealthy();
263 |         assert!(!endpoint.is_healthy());
264 | 
265 |         endpoint.mark_healthy();
266 |         assert!(endpoint.is_healthy());
267 |     }
268 | 
269 |     #[test]
270 |     fn test_endpoint_stats() {
271 |         let endpoint = Endpoint::new(test_endpoint());
272 | 
273 |         endpoint.record_success(Duration::from_millis(100));
274 |         endpoint.record_success(Duration::from_millis(200));
275 | 
276 |         assert_eq!(endpoint.success_count.load(Ordering::Relaxed), 2);
277 |         assert_eq!(endpoint.avg_latency_ms(), 150.0);
278 |     }
279 | 
280 |     #[test]
281 |     fn test_load_balancer() {
282 |         let configs = vec![
283 |             EndpointConfig {
284 |                 url: "http://a.test".to_string(),
285 |                 weight: 1,
286 |                 api_key: None,
287 |                 model: None,
288 |                 max_concurrent: 100,
289 |             },
290 |             EndpointConfig {
291 |                 url: "http://b.test".to_string(),
292 |                 weight: 2,
293 |                 api_key: None,
294 |                 model: None,
295 |                 max_concurrent: 100,
296 |             },
297 |         ];
298 | 
299 |         let lb = LoadBalancer::new(configs).unwrap();
300 |         assert_eq!(lb.endpoints().len(), 2);
301 |         assert_eq!(lb.healthy_count(), 2);
302 |     }
303 | }
304 | 


--------------------------------------------------------------------------------
/src/config.rs:
--------------------------------------------------------------------------------
  1 | //! Configuration management for Blaze API.
  2 | //!
  3 | //! Supports configuration via CLI arguments, environment variables,
  4 | //! and configuration files with sensible defaults.
  5 | 
  6 | use crate::error::{BlazeError, Result};
  7 | use clap::Parser;
  8 | use serde::{Deserialize, Serialize};
  9 | use std::num::NonZeroU32;
 10 | use std::path::PathBuf;
 11 | use std::time::Duration;
 12 | 
 13 | /// CLI arguments for the Blaze API client.
 14 | #[derive(Parser, Debug, Clone)]
 15 | #[command(
 16 |     name = "blaze",
 17 |     author = "Yiğit Konur <yigit@wope.com>",
 18 |     version,
 19 |     about = "🔥 High-performance async API client with load balancing",
 20 |     long_about = "Blaze API is a blazing-fast API client designed for batch LLM processing.\n\n\
 21 |                   It supports weighted load balancing, automatic retries with exponential backoff,\n\
 22 |                   and can handle 10,000+ requests per second on modest hardware.",
 23 |     after_help = "EXAMPLES:\n    \
 24 |         blaze --input requests.jsonl --output results.jsonl\n    \
 25 |         blaze -i data.jsonl -o out.jsonl --rate 5000 --workers 100\n    \
 26 |         blaze --config endpoints.json --input batch.jsonl"
 27 | )]
 28 | pub struct Args {
 29 |     /// Path to the JSONL file containing requests
 30 |     #[arg(short, long, env = "BLAZE_INPUT")]
 31 |     pub input: PathBuf,
 32 | 
 33 |     /// Path to save successful responses (optional)
 34 |     #[arg(short, long, env = "BLAZE_OUTPUT")]
 35 |     pub output: Option<PathBuf>,
 36 | 
 37 |     /// Path to save error responses
 38 |     #[arg(short, long, default_value = "errors.jsonl", env = "BLAZE_ERRORS")]
 39 |     pub errors: PathBuf,
 40 | 
 41 |     /// Maximum requests per second
 42 |     #[arg(short, long, default_value = "1000", env = "BLAZE_RATE")]
 43 |     pub rate: u32,
 44 | 
 45 |     /// Maximum retry attempts per request
 46 |     #[arg(short = 'a', long, default_value = "3", env = "BLAZE_MAX_ATTEMPTS")]
 47 |     pub max_attempts: u32,
 48 | 
 49 |     /// Number of concurrent workers
 50 |     #[arg(short, long, default_value = "50", env = "BLAZE_WORKERS")]
 51 |     pub workers: usize,
 52 | 
 53 |     /// Request timeout in seconds
 54 |     #[arg(short, long, default_value = "30", env = "BLAZE_TIMEOUT")]
 55 |     pub timeout: u64,
 56 | 
 57 |     /// Path to endpoint configuration file (JSON)
 58 |     #[arg(short, long, env = "BLAZE_CONFIG")]
 59 |     pub config: Option<PathBuf>,
 60 | 
 61 |     /// Enable verbose logging
 62 |     #[arg(short, long, env = "BLAZE_VERBOSE")]
 63 |     pub verbose: bool,
 64 | 
 65 |     /// Output logs as JSON
 66 |     #[arg(long, env = "BLAZE_JSON_LOGS")]
 67 |     pub json_logs: bool,
 68 | 
 69 |     /// Disable progress bar
 70 |     #[arg(long, env = "BLAZE_NO_PROGRESS")]
 71 |     pub no_progress: bool,
 72 | 
 73 |     /// Dry run - validate config without sending requests
 74 |     #[arg(long)]
 75 |     pub dry_run: bool,
 76 | }
 77 | 
 78 | impl Args {
 79 |     /// Parse CLI arguments.
 80 |     pub fn parse_args() -> Self {
 81 |         Self::parse()
 82 |     }
 83 | }
 84 | 
 85 | /// Configuration for a single API endpoint.
 86 | #[derive(Debug, Clone, Serialize, Deserialize)]
 87 | pub struct EndpointConfig {
 88 |     /// The endpoint URL.
 89 |     pub url: String,
 90 | 
 91 |     /// Weight for load balancing (higher = more traffic).
 92 |     #[serde(default = "default_weight")]
 93 |     pub weight: u32,
 94 | 
 95 |     /// API key for authentication.
 96 |     #[serde(default)]
 97 |     pub api_key: Option<String>,
 98 | 
 99 |     /// Model identifier (for LLM endpoints).
100 |     #[serde(default)]
101 |     pub model: Option<String>,
102 | 
103 |     /// Maximum concurrent requests to this endpoint.
104 |     #[serde(default = "default_max_concurrent")]
105 |     pub max_concurrent: u32,
106 | }
107 | 
108 | fn default_weight() -> u32 {
109 |     1
110 | }
111 | 
112 | fn default_max_concurrent() -> u32 {
113 |     100
114 | }
115 | 
116 | /// Full application configuration.
117 | #[derive(Debug, Clone, Serialize, Deserialize)]
118 | pub struct Config {
119 |     /// API endpoints for load balancing.
120 |     pub endpoints: Vec<EndpointConfig>,
121 | 
122 |     /// Request settings.
123 |     #[serde(default)]
124 |     pub request: RequestConfig,
125 | 
126 |     /// Retry settings.
127 |     #[serde(default)]
128 |     pub retry: RetryConfig,
129 | }
130 | 
131 | /// Request-specific configuration.
132 | #[derive(Debug, Clone, Serialize, Deserialize)]
133 | pub struct RequestConfig {
134 |     /// Request timeout.
135 |     #[serde(with = "humantime_serde", default = "default_timeout")]
136 |     pub timeout: Duration,
137 | 
138 |     /// Maximum requests per second.
139 |     #[serde(default = "default_rate")]
140 |     pub rate_limit: u32,
141 | 
142 |     /// Number of concurrent workers.
143 |     #[serde(default = "default_workers")]
144 |     pub workers: usize,
145 | }
146 | 
147 | impl Default for RequestConfig {
148 |     fn default() -> Self {
149 |         Self {
150 |             timeout: default_timeout(),
151 |             rate_limit: default_rate(),
152 |             workers: default_workers(),
153 |         }
154 |     }
155 | }
156 | 
157 | fn default_timeout() -> Duration {
158 |     Duration::from_secs(30)
159 | }
160 | 
161 | fn default_rate() -> u32 {
162 |     1000
163 | }
164 | 
165 | fn default_workers() -> usize {
166 |     50
167 | }
168 | 
169 | /// Retry configuration.
170 | #[derive(Debug, Clone, Serialize, Deserialize)]
171 | pub struct RetryConfig {
172 |     /// Maximum number of retry attempts.
173 |     #[serde(default = "default_max_attempts")]
174 |     pub max_attempts: u32,
175 | 
176 |     /// Initial backoff duration.
177 |     #[serde(with = "humantime_serde", default = "default_initial_backoff")]
178 |     pub initial_backoff: Duration,
179 | 
180 |     /// Maximum backoff duration.
181 |     #[serde(with = "humantime_serde", default = "default_max_backoff")]
182 |     pub max_backoff: Duration,
183 | 
184 |     /// Backoff multiplier.
185 |     #[serde(default = "default_multiplier")]
186 |     pub multiplier: f64,
187 | }
188 | 
189 | impl Default for RetryConfig {
190 |     fn default() -> Self {
191 |         Self {
192 |             max_attempts: default_max_attempts(),
193 |             initial_backoff: default_initial_backoff(),
194 |             max_backoff: default_max_backoff(),
195 |             multiplier: default_multiplier(),
196 |         }
197 |     }
198 | }
199 | 
200 | fn default_max_attempts() -> u32 {
201 |     3
202 | }
203 | 
204 | fn default_initial_backoff() -> Duration {
205 |     Duration::from_millis(100)
206 | }
207 | 
208 | fn default_max_backoff() -> Duration {
209 |     Duration::from_secs(10)
210 | }
211 | 
212 | fn default_multiplier() -> f64 {
213 |     2.0
214 | }
215 | 
216 | impl Config {
217 |     /// Load configuration from a file.
218 |     pub fn from_file(path: &PathBuf) -> Result<Self> {
219 |         let content = std::fs::read_to_string(path).map_err(|e| BlazeError::InputFileRead {
220 |             path: path.clone(),
221 |             source: e,
222 |         })?;
223 | 
224 |         serde_json::from_str(&content).map_err(|e| BlazeError::JsonParse { line: 0, source: e })
225 |     }
226 | 
227 |     /// Create configuration from CLI arguments.
228 |     pub fn from_args(args: &Args) -> Result<Self> {
229 |         let config = if let Some(config_path) = &args.config {
230 |             let mut config = Self::from_file(config_path)?;
231 |             // Override with CLI args
232 |             config.request.rate_limit = args.rate;
233 |             config.request.workers = args.workers;
234 |             config.request.timeout = Duration::from_secs(args.timeout);
235 |             config.retry.max_attempts = args.max_attempts;
236 |             config
237 |         } else {
238 |             // Use default endpoint from environment or error
239 |             let endpoint = EndpointConfig {
240 |                 url: std::env::var("BLAZE_ENDPOINT_URL")
241 |                     .unwrap_or_else(|_| "http://localhost:8080/v1/completions".to_string()),
242 |                 weight: 1,
243 |                 api_key: std::env::var("BLAZE_API_KEY").ok(),
244 |                 model: std::env::var("BLAZE_MODEL").ok(),
245 |                 max_concurrent: 100,
246 |             };
247 | 
248 |             Self {
249 |                 endpoints: vec![endpoint],
250 |                 request: RequestConfig {
251 |                     timeout: Duration::from_secs(args.timeout),
252 |                     rate_limit: args.rate,
253 |                     workers: args.workers,
254 |                 },
255 |                 retry: RetryConfig {
256 |                     max_attempts: args.max_attempts,
257 |                     ..Default::default()
258 |                 },
259 |             }
260 |         };
261 | 
262 |         config.validate()?;
263 |         Ok(config)
264 |     }
265 | 
266 |     /// Validate the configuration.
267 |     pub fn validate(&self) -> Result<()> {
268 |         if self.endpoints.is_empty() {
269 |             return Err(BlazeError::NoEndpoints);
270 |         }
271 | 
272 |         for endpoint in &self.endpoints {
273 |             if endpoint.url.is_empty() {
274 |                 return Err(BlazeError::InvalidConfig(
275 |                     "endpoint URL cannot be empty".to_string(),
276 |                 ));
277 |             }
278 |             if endpoint.weight == 0 {
279 |                 return Err(BlazeError::InvalidConfig(
280 |                     "endpoint weight must be greater than 0".to_string(),
281 |                 ));
282 |             }
283 |         }
284 | 
285 |         if self.request.workers == 0 {
286 |             return Err(BlazeError::InvalidConfig(
287 |                 "workers must be greater than 0".to_string(),
288 |             ));
289 |         }
290 | 
291 |         Ok(())
292 |     }
293 | 
294 |     /// Get the rate limit as a NonZeroU32.
295 |     pub fn rate_limit_nonzero(&self) -> NonZeroU32 {
296 |         NonZeroU32::new(self.request.rate_limit).unwrap_or(NonZeroU32::MIN)
297 |     }
298 | }
299 | 
300 | /// Custom serde module for humantime Duration parsing.
301 | mod humantime_serde {
302 |     use serde::{Deserialize, Deserializer, Serializer};
303 |     use std::time::Duration;
304 | 
305 |     pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
306 |     where
307 |         S: Serializer,
308 |     {
309 |         serializer.serialize_str(&format!("{}s", duration.as_secs()))
310 |     }
311 | 
312 |     pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
313 |     where
314 |         D: Deserializer<'de>,
315 |     {
316 |         let s = String::deserialize(deserializer)?;
317 |         // Simple parsing: support "30s", "100ms", or just seconds as number
318 |         if let Some(secs) = s.strip_suffix('s') {
319 |             secs.parse::<u64>()
320 |                 .map(Duration::from_secs)
321 |                 .map_err(serde::de::Error::custom)
322 |         } else if let Some(ms) = s.strip_suffix("ms") {
323 |             ms.parse::<u64>()
324 |                 .map(Duration::from_millis)
325 |                 .map_err(serde::de::Error::custom)
326 |         } else {
327 |             s.parse::<u64>()
328 |                 .map(Duration::from_secs)
329 |                 .map_err(serde::de::Error::custom)
330 |         }
331 |     }
332 | }
333 | 


--------------------------------------------------------------------------------
/src/processor.rs:
--------------------------------------------------------------------------------
  1 | //! Main processing orchestration for batch API requests.
  2 | //!
  3 | //! This module coordinates reading requests, distributing them across
  4 | //! endpoints, and writing results with rate limiting and concurrency control.
  5 | 
  6 | use crate::client::ApiClient;
  7 | use crate::config::Config;
  8 | use crate::endpoint::LoadBalancer;
  9 | use crate::error::{BlazeError, Result};
 10 | use crate::request::{ApiRequest, RequestResult};
 11 | use crate::tracker::StatsTracker;
 12 | use futures::stream::{self, StreamExt};
 13 | use governor::{Quota, RateLimiter};
 14 | use indicatif::{ProgressBar, ProgressStyle};
 15 | use parking_lot::Mutex;
 16 | use std::num::NonZeroU32;
 17 | use std::path::PathBuf;
 18 | use std::sync::Arc;
 19 | use std::time::Duration;
 20 | use tokio::fs::File;
 21 | use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader, BufWriter};
 22 | use tracing::{info, warn};
 23 | 
 24 | /// Processor for batch API requests.
 25 | pub struct Processor {
 26 |     config: Arc<Config>,
 27 |     client: ApiClient,
 28 |     load_balancer: Arc<LoadBalancer>,
 29 |     stats: Arc<StatsTracker>,
 30 | }
 31 | 
 32 | impl Processor {
 33 |     /// Create a new processor.
 34 |     pub fn new(config: Config) -> Result<Self> {
 35 |         let config = Arc::new(config);
 36 |         let client = ApiClient::new(Arc::clone(&config))?;
 37 |         let load_balancer = Arc::new(LoadBalancer::new(config.endpoints.clone())?);
 38 |         let stats = Arc::new(StatsTracker::new());
 39 | 
 40 |         Ok(Self {
 41 |             config,
 42 |             client,
 43 |             load_balancer,
 44 |             stats,
 45 |         })
 46 |     }
 47 | 
 48 |     /// Process requests from a file.
 49 |     pub async fn process_file(
 50 |         &self,
 51 |         input_path: PathBuf,
 52 |         output_path: Option<PathBuf>,
 53 |         error_path: PathBuf,
 54 |         show_progress: bool,
 55 |     ) -> Result<ProcessingResult> {
 56 |         // Read all requests first to get total count
 57 |         let requests = self.read_requests(&input_path).await?;
 58 |         let total = requests.len();
 59 | 
 60 |         info!(total_requests = total, "Loaded requests from file");
 61 |         self.stats.set_total_lines(total);
 62 | 
 63 |         // Setup output files
 64 |         let output_writer = if let Some(path) = &output_path {
 65 |             let file = File::create(path).await.map_err(|e| BlazeError::OutputFileWrite {
 66 |                 path: path.clone(),
 67 |                 source: e,
 68 |             })?;
 69 |             Some(Arc::new(Mutex::new(BufWriter::new(file))))
 70 |         } else {
 71 |             None
 72 |         };
 73 | 
 74 |         let error_file = File::create(&error_path).await.map_err(|e| BlazeError::OutputFileWrite {
 75 |             path: error_path.clone(),
 76 |             source: e,
 77 |         })?;
 78 |         let error_writer = Arc::new(Mutex::new(BufWriter::new(error_file)));
 79 | 
 80 |         // Setup progress bar
 81 |         let progress = if show_progress {
 82 |             let pb = ProgressBar::new(total as u64);
 83 |             pb.set_style(
 84 |                 ProgressStyle::default_bar()
 85 |                     .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({percent}%) | {msg}")
 86 |                     .unwrap()
 87 |                     .progress_chars("█▓▒░"),
 88 |             );
 89 |             pb.enable_steady_tick(Duration::from_millis(100));
 90 |             Some(pb)
 91 |         } else {
 92 |             None
 93 |         };
 94 | 
 95 |         // Setup rate limiter
 96 |         let rate_limiter = RateLimiter::direct(Quota::per_second(
 97 |             NonZeroU32::new(self.config.request.rate_limit).unwrap_or(NonZeroU32::MIN),
 98 |         ));
 99 | 
100 |         // Process requests concurrently
101 |         let workers = self.config.request.workers;
102 |         let results = stream::iter(requests)
103 |             .map(|request| {
104 |                 let client = self.client.clone();
105 |                 let lb = Arc::clone(&self.load_balancer);
106 |                 let stats = Arc::clone(&self.stats);
107 |                 let rate_limiter = &rate_limiter;
108 |                 let output = output_writer.clone();
109 |                 let errors = Arc::clone(&error_writer);
110 |                 let progress = progress.clone();
111 | 
112 |                 async move {
113 |                     // Wait for rate limiter
114 |                     rate_limiter.until_ready().await;
115 | 
116 |                     // Select an endpoint
117 |                     let endpoint = match lb.select() {
118 |                         Ok(ep) => ep,
119 |                         Err(e) => {
120 |                             warn!("Failed to select endpoint: {}", e);
121 |                             return Err(e);
122 |                         }
123 |                     };
124 | 
125 |                     // Acquire a slot
126 |                     if !endpoint.acquire() {
127 |                         // Wait a bit and try again
128 |                         tokio::time::sleep(Duration::from_millis(10)).await;
129 |                         if !endpoint.acquire() {
130 |                             warn!("Endpoint at capacity, waiting...");
131 |                             tokio::time::sleep(Duration::from_millis(100)).await;
132 |                             endpoint.acquire();
133 |                         }
134 |                     }
135 | 
136 |                     // Send request
137 |                     let result = client.send_with_retry(&request, endpoint).await;
138 | 
139 |                     // Record stats and write output
140 |                     match &result {
141 |                         RequestResult::Success(response) => {
142 |                             let latency = response
143 |                                 .metadata
144 |                                 .as_ref()
145 |                                 .map(|m| Duration::from_millis(m.latency_ms))
146 |                                 .unwrap_or_default();
147 |                             stats.record_success(latency);
148 | 
149 |                             if let Some(writer) = &output {
150 |                                 let line = serde_json::to_string(&response).unwrap_or_default();
151 |                                 let mut w = writer.lock();
152 |                                 let _ = futures::executor::block_on(async {
153 |                                     w.write_all(line.as_bytes()).await?;
154 |                                     w.write_all(b"\n").await
155 |                                 });
156 |                             }
157 |                         }
158 |                         RequestResult::Failure(error) => {
159 |                             stats.record_failure();
160 |                             let line = serde_json::to_string(&error).unwrap_or_default();
161 |                             let mut w = errors.lock();
162 |                             let _ = futures::executor::block_on(async {
163 |                                 w.write_all(line.as_bytes()).await?;
164 |                                 w.write_all(b"\n").await
165 |                             });
166 |                         }
167 |                     }
168 | 
169 |                     // Update progress bar
170 |                     if let Some(pb) = &progress {
171 |                         let snapshot = stats.snapshot();
172 |                         pb.set_message(format!(
173 |                             "RPS: {:.0} | Success: {} | Failed: {} | Latency: {:.0}ms",
174 |                             snapshot.current_rps,
175 |                             snapshot.success_count,
176 |                             snapshot.failure_count,
177 |                             snapshot.avg_latency_ms
178 |                         ));
179 |                         pb.inc(1);
180 |                     }
181 | 
182 |                     Ok(result)
183 |                 }
184 |             })
185 |             .buffer_unordered(workers)
186 |             .collect::<Vec<_>>()
187 |             .await;
188 | 
189 |         // Flush writers
190 |         if let Some(writer) = &output_writer {
191 |             let mut w = writer.lock();
192 |             w.flush().await.ok();
193 |         }
194 |         {
195 |             let mut w = error_writer.lock();
196 |             w.flush().await.ok();
197 |         }
198 | 
199 |         // Finish progress bar
200 |         if let Some(pb) = &progress {
201 |             pb.finish_with_message("Complete!");
202 |         }
203 | 
204 |         // Build result
205 |         let snapshot = self.stats.snapshot();
206 |         let success_count = results.iter().filter(|r| r.as_ref().map(|r| r.is_success()).unwrap_or(false)).count();
207 |         let failure_count = results.len() - success_count;
208 | 
209 |         Ok(ProcessingResult {
210 |             total_processed: results.len(),
211 |             success_count,
212 |             failure_count,
213 |             elapsed: snapshot.elapsed,
214 |             avg_latency_ms: snapshot.avg_latency_ms,
215 |             overall_rps: snapshot.overall_rps,
216 |         })
217 |     }
218 | 
219 |     /// Read requests from a JSONL file.
220 |     async fn read_requests(&self, path: &PathBuf) -> Result<Vec<ApiRequest>> {
221 |         let file = File::open(path).await.map_err(|e| BlazeError::InputFileRead {
222 |             path: path.clone(),
223 |             source: e,
224 |         })?;
225 | 
226 |         let reader = BufReader::new(file);
227 |         let mut lines = reader.lines();
228 |         let mut requests = Vec::new();
229 |         let mut line_number = 0;
230 | 
231 |         while let Some(line) = lines.next_line().await.map_err(|e| BlazeError::InputFileRead {
232 |             path: path.clone(),
233 |             source: e,
234 |         })? {
235 |             line_number += 1;
236 | 
237 |             // Skip empty lines
238 |             let trimmed = line.trim();
239 |             if trimmed.is_empty() {
240 |                 continue;
241 |             }
242 | 
243 |             let mut request: ApiRequest =
244 |                 serde_json::from_str(trimmed).map_err(|e| BlazeError::JsonParse {
245 |                     line: line_number,
246 |                     source: e,
247 |                 })?;
248 | 
249 |             request.line_number = line_number;
250 |             requests.push(request);
251 |         }
252 | 
253 |         Ok(requests)
254 |     }
255 | 
256 |     /// Get the current stats snapshot.
257 |     pub fn stats(&self) -> crate::tracker::StatsSnapshot {
258 |         self.stats.snapshot()
259 |     }
260 | 
261 |     /// Get the load balancer.
262 |     pub fn load_balancer(&self) -> &LoadBalancer {
263 |         &self.load_balancer
264 |     }
265 | }
266 | 
267 | /// Result of processing a batch of requests.
268 | #[derive(Debug)]
269 | pub struct ProcessingResult {
270 |     /// Total requests processed.
271 |     pub total_processed: usize,
272 |     /// Successful requests.
273 |     pub success_count: usize,
274 |     /// Failed requests.
275 |     pub failure_count: usize,
276 |     /// Total elapsed time.
277 |     pub elapsed: Duration,
278 |     /// Average latency in milliseconds.
279 |     pub avg_latency_ms: f64,
280 |     /// Overall requests per second.
281 |     pub overall_rps: f64,
282 | }
283 | 
284 | impl ProcessingResult {
285 |     /// Get the success rate as a percentage.
286 |     pub fn success_rate(&self) -> f64 {
287 |         if self.total_processed > 0 {
288 |             (self.success_count as f64 / self.total_processed as f64) * 100.0
289 |         } else {
290 |             100.0
291 |         }
292 |     }
293 | 
294 |     /// Print a summary of the results.
295 |     pub fn print_summary(&self) {
296 |         println!("\n{}", "═".repeat(60));
297 |         println!("                    PROCESSING COMPLETE");
298 |         println!("{}", "═".repeat(60));
299 |         println!("  Total Processed:  {}", self.total_processed);
300 |         println!(
301 |             "  Successful:       {} ({:.1}%)",
302 |             self.success_count,
303 |             self.success_rate()
304 |         );
305 |         println!("  Failed:           {}", self.failure_count);
306 |         println!("  Elapsed Time:     {:.2}s", self.elapsed.as_secs_f64());
307 |         println!("  Avg Latency:      {:.1}ms", self.avg_latency_ms);
308 |         println!("  Throughput:       {:.0} req/sec", self.overall_rps);
309 |         println!("{}", "═".repeat(60));
310 |     }
311 | }
312 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">🔥 Blaze API 🔥</h1>
  2 | <h3 align="center">Stop waiting for API responses. Start blazing through them.</h3>
  3 | 
  4 | <p align="center">
  5 |   <strong>
  6 |     <em>The ultimate batch API client for your LLM workloads. It load-balances across endpoints, retries intelligently, and processes 10,000+ requests per second on a laptop.</em>
  7 |   </strong>
  8 | </p>
  9 | 
 10 | <p align="center">
 11 |   <!-- Package Info -->
 12 |   <a href="https://crates.io/crates/blaze-api"><img alt="crates.io" src="https://img.shields.io/crates/v/blaze-api.svg?style=flat-square&color=4D87E6"></a>
 13 |   <a href="#"><img alt="rust" src="https://img.shields.io/badge/rust-1.75+-4D87E6.svg?style=flat-square"></a>
 14 |   &nbsp;&nbsp;•&nbsp;&nbsp;
 15 |   <!-- Features -->
 16 |   <a href="https://opensource.org/licenses/MIT"><img alt="license" src="https://img.shields.io/badge/License-MIT-F9A825.svg?style=flat-square"></a>
 17 |   <a href="#"><img alt="platform" src="https://img.shields.io/badge/platform-macOS_|_Linux_|_Windows-2ED573.svg?style=flat-square"></a>
 18 | </p>
 19 | 
 20 | <p align="center">
 21 |   <img alt="zero config" src="https://img.shields.io/badge/⚙️_zero_config-works_out_of_the_box-2ED573.svg?style=for-the-badge">
 22 |   <img alt="10k rps" src="https://img.shields.io/badge/🚀_10K+_req/sec-on_modest_hardware-2ED573.svg?style=for-the-badge">
 23 | </p>
 24 | 
 25 | <div align="center">
 26 | 
 27 | ### 🧭 Quick Navigation
 28 | 
 29 | [**⚡ Get Started**](#-get-started-in-60-seconds) •
 30 | [**✨ Key Features**](#-feature-breakdown-the-secret-sauce) •
 31 | [**🎮 Usage & Examples**](#-usage-fire-and-forget) •
 32 | [**⚙️ Configuration**](#%EF%B8%8F-configuration) •
 33 | [**🆚 Why Blaze**](#-why-blaze-slaps-other-methods)
 34 | 
 35 | </div>
 36 | 
 37 | ---
 38 | 
 39 | **Blaze API** is the batch processor your LLM workloads deserve. Stop writing brittle Python scripts that crash at 100 req/sec. This tool acts like a fleet of pro API consumers, intelligently distributing requests across endpoints, handling failures gracefully, and maxing out your API capacity without breaking a sweat.
 40 | 
 41 | <div align="center">
 42 | <table>
 43 | <tr>
 44 | <td align="center">
 45 | <h3>⚡</h3>
 46 | <b>Blazing Fast</b><br/>
 47 | <sub>10K+ req/sec on 8 cores</sub>
 48 | </td>
 49 | <td align="center">
 50 | <h3>🎯</h3>
 51 | <b>Smart Load Balancing</b><br/>
 52 | <sub>Weighted distribution across endpoints</sub>
 53 | </td>
 54 | <td align="center">
 55 | <h3>🔄</h3>
 56 | <b>Auto Retry</b><br/>
 57 | <sub>Exponential backoff with jitter</sub>
 58 | </td>
 59 | <td align="center">
 60 | <h3>📊</h3>
 61 | <b>Real-time Stats</b><br/>
 62 | <sub>Progress, RPS, latency tracking</sub>
 63 | </td>
 64 | </tr>
 65 | </table>
 66 | </div>
 67 | 
 68 | How it slaps:
 69 | - **You:** `blaze -i requests.jsonl -o results.jsonl`
 70 | - **Blaze:** Load balances, retries failures, tracks progress, writes results.
 71 | - **You:** Go grab a coffee while 100K requests complete. ☕
 72 | - **Result:** Perfectly formatted JSONL with every response. Zero babysitting.
 73 | 
 74 | ---
 75 | 
 76 | ## 💥 Why Blaze Slaps Other Methods
 77 | 
 78 | Manually scripting API requests is a vibe-killer. Blaze makes other methods look ancient.
 79 | 
 80 | <table align="center">
 81 | <tr>
 82 | <td align="center"><b>❌ The Old Way (Pain)</b></td>
 83 | <td align="center"><b>✅ The Blaze Way (Glory)</b></td>
 84 | </tr>
 85 | <tr>
 86 | <td>
 87 | <ol>
 88 |   <li>Write Python script with asyncio.</li>
 89 |   <li>Hit GIL limits at 500 req/sec.</li>
 90 |   <li>Script crashes, lose progress.</li>
 91 |   <li>Add retry logic, still flaky.</li>
 92 |   <li>Manually restart, pray it works.</li>
 93 | </ol>
 94 | </td>
 95 | <td>
 96 | <ol>
 97 |   <li><code>blaze -i data.jsonl -o out.jsonl</code></li>
 98 |   <li>Watch the progress bar fly.</li>
 99 |   <li>Failures auto-retry with backoff.</li>
100 |   <li>Results stream to disk instantly.</li>
101 |   <li>Go grab a coffee. ☕</li>
102 | </ol>
103 | </td>
104 | </tr>
105 | </table>
106 | 
107 | We're not just sending requests. We're building a **high-throughput, fault-tolerant pipeline** with weighted load balancing, connection pooling, and intelligent retry logic that actually respects your API provider's limits.
108 | 
109 | ---
110 | 
111 | ## 🚀 Get Started in 60 Seconds
112 | 
113 | <div align="center">
114 | 
115 | | Platform | Method | Command |
116 | |:--------:|:------:|:--------|
117 | | 🦀 **All** | Cargo | `cargo install blaze-api` |
118 | | 🍎 **macOS** | Homebrew | `brew install yigitkonur/tap/blaze` |
119 | | 🐧 **Linux** | Binary | See [releases](https://github.com/yigitkonur/blaze-api/releases) |
120 | | 🪟 **Windows** | Binary | See [releases](https://github.com/yigitkonur/blaze-api/releases) |
121 | 
122 | </div>
123 | 
124 | ### 🦀 From Source (Recommended for Development)
125 | 
126 | ```bash
127 | # Clone and build
128 | git clone https://github.com/yigitkonur/blaze-api.git
129 | cd blaze-api
130 | cargo build --release
131 | 
132 | # Binary is at ./target/release/blaze
133 | ```
134 | 
135 | ### 📦 From crates.io
136 | 
137 | ```bash
138 | cargo install blaze-api
139 | ```
140 | 
141 | > **✨ Zero Config:** After installation, `blaze` is ready to go. Just point it at your JSONL file!
142 | 
143 | ---
144 | 
145 | ## 🎮 Usage: Fire and Forget
146 | 
147 | The workflow is dead simple.
148 | 
149 | ### Basic Usage
150 | 
151 | ```bash
152 | # Process requests and save results
153 | blaze --input requests.jsonl --output results.jsonl
154 | 
155 | # Short flags work too
156 | blaze -i requests.jsonl -o results.jsonl
157 | 
158 | # High-throughput mode (10K req/sec)
159 | blaze -i data.jsonl -o out.jsonl --rate 10000 --workers 200
160 | ```
161 | 
162 | ### With Custom Endpoints
163 | 
164 | ```bash
165 | # Use a config file for multiple endpoints
166 | blaze -i requests.jsonl -o results.jsonl --config endpoints.json
167 | 
168 | # Or set via environment
169 | export BLAZE_ENDPOINT_URL="https://api.openai.com/v1/completions"
170 | export BLAZE_API_KEY="sk-..."
171 | export BLAZE_MODEL="gpt-4"
172 | blaze -i requests.jsonl -o results.jsonl
173 | ```
174 | 
175 | ### Input Format
176 | 
177 | Your `requests.jsonl` file should have one JSON object per line:
178 | 
179 | ```jsonl
180 | {"input": "What is the capital of France?"}
181 | {"input": "Explain quantum computing in simple terms."}
182 | {"input": "Write a haiku about Rust programming."}
183 | ```
184 | 
185 | Or with custom request bodies:
186 | 
187 | ```jsonl
188 | {"body": {"messages": [{"role": "user", "content": "Hello!"}], "model": "gpt-4"}}
189 | {"body": {"messages": [{"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hi!"}]}}
190 | ```
191 | 
192 | ### Output Format
193 | 
194 | Results are written as JSONL:
195 | 
196 | ```jsonl
197 | {"input": "What is the capital of France?", "response": {"choices": [...]}, "metadata": {"endpoint": "...", "latency_ms": 234, "attempts": 1}}
198 | {"input": "Explain quantum computing...", "response": {"choices": [...]}, "metadata": {"endpoint": "...", "latency_ms": 189, "attempts": 1}}
199 | ```
200 | 
201 | Errors go to `errors.jsonl`:
202 | 
203 | ```jsonl
204 | {"input": "...", "error": "HTTP 429: Rate limit exceeded", "status_code": 429, "attempts": 3}
205 | ```
206 | 
207 | ---
208 | 
209 | ## ✨ Feature Breakdown: The Secret Sauce
210 | 
211 | <div align="center">
212 | 
213 | | Feature | What It Does | Why You Care |
214 | | :---: | :--- | :--- |
215 | | **⚡ Async Everything**<br/>`Tokio runtime` | Non-blocking I/O with work-stealing scheduler | Saturates your CPU cores efficiently |
216 | | **🎯 Weighted Load Balancing**<br/>`Smart distribution` | Route traffic based on endpoint capacity | Max out multiple API keys simultaneously |
217 | | **🔄 Exponential Backoff**<br/>`With jitter` | Intelligent retry with randomized delays | Respects rate limits, avoids thundering herd |
218 | | **📊 Real-time Progress**<br/>`Live stats` | RPS, success rate, latency, ETA | Know exactly what's happening |
219 | | **🔌 Connection Pooling**<br/>`HTTP/2 keep-alive` | Reuses connections across requests | Eliminates TCP handshake overhead |
220 | | **💾 Streaming Output**<br/>`Immediate writes` | Results written as they complete | Never lose progress on crashes |
221 | | **🏥 Health Tracking**<br/>`Per-endpoint` | Automatic failover on errors | Unhealthy endpoints get cooled off |
222 | | **🔧 Flexible Config**<br/>`CLI + ENV + JSON` | Configure via args, env vars, or files | Fits any workflow |
223 | 
224 | </div>
225 | 
226 | ---
227 | 
228 | ## ⚙️ Configuration
229 | 
230 | ### CLI Flags
231 | 
232 | ```
233 | USAGE:
234 |     blaze [OPTIONS] --input <FILE>
235 | 
236 | OPTIONS:
237 |     -i, --input <FILE>        Path to JSONL input file [env: BLAZE_INPUT]
238 |     -o, --output <FILE>       Path for successful responses [env: BLAZE_OUTPUT]
239 |     -e, --errors <FILE>       Path for error responses [default: errors.jsonl]
240 |     -r, --rate <N>            Max requests per second [default: 1000]
241 |     -w, --workers <N>         Concurrent workers [default: 50]
242 |     -t, --timeout <SECS>      Request timeout [default: 30]
243 |     -a, --max-attempts <N>    Max retry attempts [default: 3]
244 |     -c, --config <FILE>       Endpoint config file (JSON)
245 |     -v, --verbose             Enable debug logging
246 |         --json-logs           Output logs as JSON
247 |         --no-progress         Disable progress bar
248 |         --dry-run             Validate config without processing
249 |     -h, --help                Print help
250 |     -V, --version             Print version
251 | ```
252 | 
253 | ### Environment Variables
254 | 
255 | All options can be set via environment variables with `BLAZE_` prefix:
256 | 
257 | ```bash
258 | export BLAZE_INPUT="requests.jsonl"
259 | export BLAZE_OUTPUT="results.jsonl"
260 | export BLAZE_RATE="5000"
261 | export BLAZE_WORKERS="100"
262 | export BLAZE_ENDPOINT_URL="https://api.example.com/v1/completions"
263 | export BLAZE_API_KEY="your-api-key"
264 | export BLAZE_MODEL="gpt-4"
265 | ```
266 | 
267 | ### Configuration File
268 | 
269 | For multiple endpoints, create `endpoints.json`:
270 | 
271 | ```json
272 | {
273 |   "endpoints": [
274 |     {
275 |       "url": "https://api.openai.com/v1/completions",
276 |       "weight": 2,
277 |       "api_key": "sk-key-1",
278 |       "model": "gpt-4",
279 |       "max_concurrent": 100
280 |     },
281 |     {
282 |       "url": "https://api.openai.com/v1/completions",
283 |       "weight": 1,
284 |       "api_key": "sk-key-2",
285 |       "model": "gpt-4",
286 |       "max_concurrent": 50
287 |     }
288 |   ],
289 |   "request": {
290 |     "timeout": "30s",
291 |     "rate_limit": 5000,
292 |     "workers": 100
293 |   },
294 |   "retry": {
295 |     "max_attempts": 3,
296 |     "initial_backoff": "100ms",
297 |     "max_backoff": "10s",
298 |     "multiplier": 2.0
299 |   }
300 | }
301 | ```
302 | 
303 | Then run:
304 | 
305 | ```bash
306 | blaze -i requests.jsonl -o results.jsonl --config endpoints.json
307 | ```
308 | 
309 | ---
310 | 
311 | ## 📈 Performance Tips
312 | 
313 | ### Maximize Throughput
314 | 
315 | ```bash
316 | # For maximum speed (adjust based on your API limits)
317 | blaze -i data.jsonl -o out.jsonl \
318 |   --rate 10000 \
319 |   --workers 200 \
320 |   --timeout 60
321 | ```
322 | 
323 | ### Balance Load Across Keys
324 | 
325 | ```json
326 | {
327 |   "endpoints": [
328 |     {"url": "...", "api_key": "key-1", "weight": 3, "max_concurrent": 150},
329 |     {"url": "...", "api_key": "key-2", "weight": 2, "max_concurrent": 100},
330 |     {"url": "...", "api_key": "key-3", "weight": 1, "max_concurrent": 50}
331 |   ]
332 | }
333 | ```
334 | 
335 | ### Handle Rate Limits Gracefully
336 | 
337 | ```json
338 | {
339 |   "retry": {
340 |     "max_attempts": 5,
341 |     "initial_backoff": "500ms",
342 |     "max_backoff": "30s",
343 |     "multiplier": 2.0
344 |   }
345 | }
346 | ```
347 | 
348 | ---
349 | 
350 | ## 🛠️ For Developers & Tinkerers
351 | 
352 | ### Building from Source
353 | 
354 | ```bash
355 | git clone https://github.com/yigitkonur/blaze-api.git
356 | cd blaze-api
357 | 
358 | # Debug build
359 | cargo build
360 | 
361 | # Release build (optimized)
362 | cargo build --release
363 | 
364 | # Run tests
365 | cargo test
366 | 
367 | # Run benchmarks
368 | cargo bench
369 | ```
370 | 
371 | ### Using as a Library
372 | 
373 | ```rust
374 | use blaze_api::{Config, EndpointConfig, Processor};
375 | 
376 | #[tokio::main]
377 | async fn main() -> anyhow::Result<()> {
378 |     let config = Config {
379 |         endpoints: vec![EndpointConfig {
380 |             url: "https://api.example.com/v1/completions".to_string(),
381 |             weight: 1,
382 |             api_key: Some("your-key".to_string()),
383 |             model: Some("gpt-4".to_string()),
384 |             max_concurrent: 100,
385 |         }],
386 |         ..Default::default()
387 |     };
388 | 
389 |     let processor = Processor::new(config)?;
390 |     let result = processor.process_file(
391 |         "requests.jsonl".into(),
392 |         Some("results.jsonl".into()),
393 |         "errors.jsonl".into(),
394 |         true,
395 |     ).await?;
396 | 
397 |     result.print_summary();
398 |     Ok(())
399 | }
400 | ```
401 | 
402 | ### Project Structure
403 | 
404 | ```
405 | src/
406 | ├── lib.rs        # Library entry point
407 | ├── main.rs       # CLI binary
408 | ├── config.rs     # Configuration management
409 | ├── client.rs     # HTTP client with retry logic
410 | ├── endpoint.rs   # Load balancer implementation
411 | ├── processor.rs  # Main processing orchestration
412 | ├── request.rs    # Request/response types
413 | ├── tracker.rs    # Statistics tracking
414 | └── error.rs      # Error types
415 | ```
416 | 
417 | ---
418 | 
419 | ## 🔥 Common Issues & Quick Fixes
420 | 
421 | <details>
422 | <summary><b>Expand for troubleshooting tips</b></summary>
423 | 
424 | | Problem | Solution |
425 | | :--- | :--- |
426 | | **"Too many open files"** | Increase ulimit: `ulimit -n 65535` |
427 | | **Connection timeouts** | Increase `--timeout` or reduce `--workers` |
428 | | **Rate limit errors (429)** | Lower `--rate` or add more API keys |
429 | | **Memory usage high** | Reduce `--workers` for large requests |
430 | | **Progress bar not showing** | Don't pipe output, or use `--no-progress --json-logs` |
431 | 
432 | **Build Issues:**
433 | 
434 | | Problem | Solution |
435 | | :--- | :--- |
436 | | **OpenSSL errors** | Install OpenSSL dev: `apt install libssl-dev` or use `--features rustls` |
437 | | **Rust version error** | Update Rust: `rustup update stable` (requires 1.75+) |
438 | 
439 | </details>
440 | 
441 | ---
442 | 
443 | ## 🤝 Contributing
444 | 
445 | Contributions are welcome! Please feel free to submit a Pull Request.
446 | 
447 | ```bash
448 | # Fork the repo, then:
449 | git clone https://github.com/YOUR_USERNAME/blaze-api.git
450 | cd blaze-api
451 | cargo test
452 | # Make your changes
453 | cargo fmt
454 | cargo clippy
455 | cargo test
456 | # Submit PR
457 | ```
458 | 
459 | ---
460 | 
461 | ## 📄 License
462 | 
463 | MIT © [Yiğit Konur](https://github.com/yigitkonur)
464 | 
465 | ---
466 | 
467 | <div align="center">
468 | 
469 | **Built with 🔥 because waiting for API responses is a soul-crushing waste of time.**
470 | 
471 | [⬆ Back to Top](#-blaze-api-)
472 | 
473 | </div>
474 | 


--------------------------------------------------------------------------------