├── tests ├── test_providers.py ├── benchmarks │ ├── __init__.py │ └── README.md ├── test_utils.py ├── test_hf_api.py ├── test_exceptions.py ├── test_together.py └── __init__.py ├── cascadeflow ├── integrations │ └── langchain │ │ ├── tests │ │ └── __init__.py │ │ ├── routers │ │ └── __init__.py │ │ ├── __init__.py │ │ └── types.py ├── limits │ └── __init__.py ├── tools │ ├── __init__.py │ ├── examples.py │ ├── formats.py │ ├── result.py │ └── call.py ├── core │ ├── __init__.py │ └── batch_config.py ├── ml │ └── __init__.py ├── resilience │ └── __init__.py ├── dynamic_config │ └── __init__.py ├── utils │ ├── __init__.py │ └── caching.py ├── guardrails │ ├── __init__.py │ └── manager.py ├── profiles │ ├── __init__.py │ ├── profile_manager.py │ └── tier_config.py ├── interface │ └── __init__.py ├── streaming │ └── __init__.py ├── providers │ ├── deepseek.py │ └── __init__.py ├── schema │ └── __init__.py └── scripts │ ├── format_code.bat │ └── format_code.sh ├── packages ├── ml │ ├── .npmignore │ ├── src │ │ ├── index.ts │ │ └── types.ts │ ├── tsconfig.json │ └── package.json ├── core │ ├── .gitignore │ ├── examples │ │ ├── browser │ │ │ ├── vercel-edge │ │ │ │ ├── vercel.json │ │ │ │ ├── package.json │ │ │ │ └── api │ │ │ │ │ └── chat.ts │ │ │ └── README.md │ │ ├── nodejs │ │ │ ├── tsconfig.json │ │ │ ├── package.json │ │ │ └── test-complexity-quick.ts │ │ ├── package.json │ │ ├── run-example.sh │ │ └── scripts │ │ │ └── test-typescript.sh │ ├── src │ │ ├── config │ │ │ └── index.ts │ │ ├── tools │ │ │ └── index.ts │ │ └── streaming │ │ │ └── index.ts │ ├── tsconfig.json │ ├── .eslintrc.js │ ├── typedoc.json │ ├── quick-perf-test.sh │ ├── test-examples.sh │ ├── package.json │ └── tests │ │ └── basic-test.ts ├── integrations │ └── n8n │ │ ├── gulpfile.js │ │ ├── .eslintrc.js │ │ ├── tsconfig.json │ │ ├── nodes │ │ └── LmChatCascadeFlow │ │ │ └── cascadeflow.svg │ │ ├── package.json │ │ ├── credentials │ │ └── CascadeFlowApi.credentials.ts │ │ ├── DEPRECATE_5.0.x.sh │ │ └── TROUBLESHOOTING.md └── langchain-cascadeflow │ ├── vitest.config.ts │ ├── tsconfig.json │ ├── examples │ ├── inspect-metadata.ts │ ├── streaming-cascade.ts │ └── analyze-models.ts │ ├── package.json │ └── src │ ├── index.ts │ └── types.ts ├── .github ├── assets │ ├── n8n-CF.png │ ├── LC-logo-bright.png │ ├── LC-logo-dark.png │ ├── n8n-CF-domains.jpg │ ├── CF_icon_dark.svg │ ├── CF_icon_bright.svg │ ├── CF_n8n_color.svg │ ├── CF_ts_color.svg │ ├── README.md │ ├── CF_python_color.svg │ ├── Lemony_logo_dark.svg │ ├── Lemony_logo_bright.svg │ └── CF_logo_dark.svg ├── ISSUE_TEMPLATE │ ├── milestone.md │ ├── config.yml │ └── question.yml ├── workflows │ ├── labeler.yml │ └── release.yml ├── dependabot.yml ├── labeler.yml └── CODEOWNERS ├── pnpm-workspace.yaml ├── examples ├── integrations │ ├── prometheus.yml │ ├── grafana-datasource.yml │ ├── otel-collector-config.yaml │ └── docker-compose.yml ├── batch_processing.py ├── docker │ └── multi-instance-ollama │ │ └── docker-compose.yml ├── vllm_example.py ├── guardrails_usage.py └── langchain_basic_usage.py ├── turbo.json ├── package.json ├── LICENSE ├── requirements.txt ├── scripts └── test-typescript-examples.sh ├── docs └── README.md └── requirements-dev.txt /tests/test_providers.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cascadeflow/integrations/langchain/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for LangChain integration.""" 2 | -------------------------------------------------------------------------------- /packages/ml/.npmignore: -------------------------------------------------------------------------------- 1 | src/ 2 | tsconfig.json 3 | *.test.ts 4 | .DS_Store 5 | node_modules/ 6 | -------------------------------------------------------------------------------- /packages/core/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | *.log 4 | .DS_Store 5 | coverage/ 6 | .turbo/ 7 | -------------------------------------------------------------------------------- /.github/assets/n8n-CF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/n8n-CF.png -------------------------------------------------------------------------------- /.github/assets/LC-logo-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/LC-logo-bright.png -------------------------------------------------------------------------------- /.github/assets/LC-logo-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/LC-logo-dark.png -------------------------------------------------------------------------------- /.github/assets/n8n-CF-domains.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/n8n-CF-domains.jpg -------------------------------------------------------------------------------- /pnpm-workspace.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - 'packages/*' 3 | - 'packages/integrations/*' 4 | - 'packages/core/examples' 5 | -------------------------------------------------------------------------------- /packages/integrations/n8n/gulpfile.js: -------------------------------------------------------------------------------- 1 | const { src, dest } = require('gulp'); 2 | 3 | function buildIcons() { 4 | return src('nodes/**/*.svg').pipe(dest('dist/nodes')); 5 | } 6 | 7 | exports['build:icons'] = buildIcons; 8 | -------------------------------------------------------------------------------- /packages/core/examples/browser/vercel-edge/vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "functions": { 3 | "api/**/*.ts": { 4 | "runtime": "@vercel/node@3.0.0" 5 | } 6 | }, 7 | "env": { 8 | "OPENAI_API_KEY": "@openai-api-key" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /examples/integrations/prometheus.yml: -------------------------------------------------------------------------------- 1 | # Prometheus Configuration for cascadeflow 2 | # 3 | # Scrapes metrics from OpenTelemetry Collector 4 | 5 | global: 6 | scrape_interval: 15s 7 | evaluation_interval: 15s 8 | 9 | scrape_configs: 10 | - job_name: 'otel-collector' 11 | static_configs: 12 | - targets: ['otel-collector:8889'] 13 | -------------------------------------------------------------------------------- /examples/integrations/grafana-datasource.yml: -------------------------------------------------------------------------------- 1 | # Grafana Datasource Configuration 2 | # 3 | # Automatically configures Prometheus as a datasource 4 | 5 | apiVersion: 1 6 | 7 | datasources: 8 | - name: Prometheus 9 | type: prometheus 10 | access: proxy 11 | url: http://prometheus:9090 12 | isDefault: true 13 | editable: true 14 | -------------------------------------------------------------------------------- /packages/ml/src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @cascadeflow/ml - ML semantic detection for cascadeflow TypeScript 3 | * 4 | * Brings TypeScript to feature parity with Python's ML capabilities using Transformers.js. 5 | */ 6 | 7 | export { UnifiedEmbeddingService, EmbeddingCache } from './embedding'; 8 | export type { EmbeddingVector, CacheInfo } from './types'; 9 | -------------------------------------------------------------------------------- /packages/core/examples/nodejs/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "ESNext", 5 | "moduleResolution": "bundler", 6 | "lib": ["ES2022"], 7 | "strict": true, 8 | "esModuleInterop": true, 9 | "skipLibCheck": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "resolveJsonModule": true, 12 | "types": ["node"] 13 | }, 14 | "include": ["*.ts"], 15 | "exclude": ["node_modules"] 16 | } 17 | -------------------------------------------------------------------------------- /packages/core/examples/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cascadeflow-examples", 3 | "version": "0.1.0", 4 | "private": true, 5 | "type": "module", 6 | "description": "TypeScript examples for cascadeflow", 7 | "dependencies": { 8 | "@cascadeflow/core": "workspace:*", 9 | "openai": "^4.73.1", 10 | "@anthropic-ai/sdk": "^0.30.0", 11 | "groq-sdk": "^0.5.0" 12 | }, 13 | "devDependencies": { 14 | "tsx": "^4.7.0", 15 | "typescript": "^5.3.3" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/vitest.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitest/config'; 2 | 3 | export default defineConfig({ 4 | test: { 5 | globals: true, 6 | environment: 'node', 7 | coverage: { 8 | provider: 'v8', 9 | reporter: ['text', 'json', 'html'], 10 | exclude: [ 11 | 'node_modules/', 12 | 'dist/', 13 | 'examples/', 14 | '**/*.d.ts', 15 | '**/*.config.*', 16 | '**/types.ts', 17 | ], 18 | }, 19 | }, 20 | }); 21 | -------------------------------------------------------------------------------- /packages/core/examples/browser/vercel-edge/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cascadeflow-vercel-edge-example", 3 | "version": "1.0.0", 4 | "private": true, 5 | "description": "cascadeflow Vercel Edge Function example", 6 | "type": "module", 7 | "scripts": { 8 | "dev": "vercel dev", 9 | "deploy": "vercel deploy --prod" 10 | }, 11 | "dependencies": { 12 | "@cascadeflow/core": "workspace:*", 13 | "openai": "^4.73.1" 14 | }, 15 | "devDependencies": { 16 | "vercel": "^37.0.0" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /turbo.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://turbo.build/schema.json", 3 | "pipeline": { 4 | "build": { 5 | "dependsOn": ["^build"], 6 | "outputs": ["dist/**", ".next/**", "!.next/cache/**"] 7 | }, 8 | "test": { 9 | "dependsOn": ["build"], 10 | "outputs": ["coverage/**"] 11 | }, 12 | "lint": { 13 | "outputs": [] 14 | }, 15 | "dev": { 16 | "cache": false, 17 | "persistent": true 18 | }, 19 | "clean": { 20 | "cache": false 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /.github/assets/CF_icon_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /packages/core/src/config/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Configuration module exports 3 | */ 4 | 5 | export { 6 | type DomainConfig, 7 | type DomainConfigMap, 8 | type DomainValidationMethod, 9 | DEFAULT_DOMAIN_CONFIG, 10 | BUILTIN_DOMAIN_CONFIGS, 11 | createDomainConfig, 12 | validateDomainConfig, 13 | getBuiltinDomainConfig, 14 | validationMethodToDomain, 15 | domainValidationToMethod, 16 | } from './domain-config'; 17 | 18 | export { 19 | type ModelRegistryEntry, 20 | ModelRegistry, 21 | defaultModelRegistry, 22 | getModel, 23 | hasModel, 24 | } from './model-registry'; 25 | -------------------------------------------------------------------------------- /packages/integrations/n8n/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | parser: '@typescript-eslint/parser', 3 | parserOptions: { 4 | ecmaVersion: 2020, 5 | sourceType: 'module', 6 | }, 7 | plugins: ['eslint-plugin-n8n-nodes-base'], 8 | extends: [ 9 | 'plugin:n8n-nodes-base/nodes', 10 | 'plugin:n8n-nodes-base/credentials', 11 | 'plugin:n8n-nodes-base/community', 12 | ], 13 | rules: { 14 | // Disable conflicting rules for documentationUrl 15 | 'n8n-nodes-base/cred-class-field-documentation-url-miscased': 'off', 16 | 'n8n-nodes-base/cred-class-field-documentation-url-not-http-url': 'off', 17 | }, 18 | }; 19 | -------------------------------------------------------------------------------- /packages/ml/src/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Type definitions for ML package 3 | */ 4 | 5 | /** 6 | * Embedding vector with dimensions 7 | */ 8 | export interface EmbeddingVector { 9 | /** Float32Array containing the embedding data */ 10 | data: Float32Array; 11 | /** Number of dimensions (384 for BGE-small-en-v1.5) */ 12 | dimensions: number; 13 | } 14 | 15 | /** 16 | * Cache information for debugging 17 | */ 18 | export interface CacheInfo { 19 | /** Number of cached embeddings */ 20 | size: number; 21 | /** List of cached text keys (limited to first 5 for debugging) */ 22 | texts: string[]; 23 | } 24 | -------------------------------------------------------------------------------- /packages/ml/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "lib": ["ES2020"], 6 | "moduleResolution": "node", 7 | "resolveJsonModule": true, 8 | "declaration": true, 9 | "declarationMap": true, 10 | "sourceMap": true, 11 | "outDir": "./dist", 12 | "rootDir": "./src", 13 | "strict": true, 14 | "esModuleInterop": true, 15 | "skipLibCheck": true, 16 | "forceConsistentCasingInFileNames": true, 17 | "allowSyntheticDefaultImports": true 18 | }, 19 | "include": ["src/**/*"], 20 | "exclude": ["node_modules", "dist", "**/*.test.ts"] 21 | } 22 | -------------------------------------------------------------------------------- /packages/core/examples/run-example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Helper script to run TypeScript examples with proper module resolution 3 | # 4 | # Usage: ./run-example.sh nodejs/basic-usage.ts 5 | 6 | set -e 7 | 8 | # Ensure we're in the core package directory 9 | cd "$(dirname "$0")/.." 10 | 11 | # Build if needed 12 | if [ ! -d "dist" ]; then 13 | echo "📦 Building @cascadeflow/core..." 14 | pnpm build 15 | fi 16 | 17 | # Load environment variables 18 | if [ -f "../../../.env" ]; then 19 | set -a 20 | source ../../../.env 21 | set +a 22 | fi 23 | 24 | # Run with tsx and use node_modules resolution 25 | npx tsx --conditions=import "examples/$1" 26 | -------------------------------------------------------------------------------- /packages/core/examples/scripts/test-typescript.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Test TypeScript examples 3 | 4 | cd packages/core/examples || exit 1 5 | 6 | # Load env vars 7 | set -a 8 | source ../../../.env 2>/dev/null || true 9 | set +a 10 | 11 | PASSED=0 12 | FAILED=0 13 | 14 | for example in nodejs/*.ts streaming.ts; do 15 | [ -f "$example" ] || continue 16 | echo "Testing: $example" 17 | if npx tsx "$example" > /dev/null 2>&1; then 18 | echo "✅ PASSED" 19 | ((PASSED++)) 20 | else 21 | echo "❌ FAILED" 22 | ((FAILED++)) 23 | fi 24 | done 25 | 26 | echo "" 27 | echo "Passed: $PASSED, Failed: $FAILED" 28 | exit $FAILED 29 | -------------------------------------------------------------------------------- /packages/integrations/n8n/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "commonjs", 5 | "lib": ["ES2020"], 6 | "outDir": "dist", 7 | "rootDir": ".", 8 | "declaration": true, 9 | "declarationMap": true, 10 | "sourceMap": true, 11 | "strict": true, 12 | "esModuleInterop": true, 13 | "skipLibCheck": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "resolveJsonModule": true, 16 | "moduleResolution": "node", 17 | "types": ["node"] 18 | }, 19 | "include": [ 20 | "credentials/**/*", 21 | "nodes/**/*" 22 | ], 23 | "exclude": [ 24 | "node_modules", 25 | "dist" 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /examples/integrations/otel-collector-config.yaml: -------------------------------------------------------------------------------- 1 | # OpenTelemetry Collector Configuration for cascadeflow 2 | # 3 | # This configuration: 4 | # 1. Receives metrics via OTLP HTTP (port 4318) 5 | # 2. Exports metrics to Prometheus (port 8889) 6 | 7 | receivers: 8 | otlp: 9 | protocols: 10 | http: 11 | endpoint: 0.0.0.0:4318 12 | grpc: 13 | endpoint: 0.0.0.0:4317 14 | 15 | processors: 16 | batch: 17 | timeout: 10s 18 | send_batch_size: 1024 19 | 20 | exporters: 21 | prometheus: 22 | endpoint: "0.0.0.0:8889" 23 | namespace: cascadeflow 24 | 25 | logging: 26 | loglevel: info 27 | 28 | service: 29 | pipelines: 30 | metrics: 31 | receivers: [otlp] 32 | processors: [batch] 33 | exporters: [prometheus, logging] 34 | -------------------------------------------------------------------------------- /.github/assets/CF_icon_bright.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/milestone.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Milestone 3 | about: Track implementation milestones 4 | title: '[MILESTONE] ' 5 | labels: milestone 6 | assignees: '' 7 | --- 8 | 9 | ## Milestone Overview 10 | 11 | 12 | ## Tasks 13 | 14 | - [ ] Task 1 15 | - [ ] Task 2 16 | - [ ] Task 3 17 | 18 | ## Acceptance Criteria 19 | 20 | - [ ] Criterion 1 21 | - [ ] Criterion 2 22 | 23 | ## Tests Required 24 | 25 | - [ ] Unit tests: X+ 26 | - [ ] Integration tests: Y+ 27 | 28 | ## Documentation 29 | 30 | - [ ] API documentation 31 | - [ ] Usage examples 32 | - [ ] README updates 33 | 34 | ## Estimated Duration 35 | 36 | X-Y days 37 | -------------------------------------------------------------------------------- /packages/integrations/n8n/nodes/LmChatCascadeFlow/cascadeflow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /cascadeflow/integrations/langchain/routers/__init__.py: -------------------------------------------------------------------------------- 1 | """Router framework for CascadeFlow LangChain integration. 2 | 3 | This module contains the PreRouter and base router classes for 4 | intelligent query routing based on complexity detection. 5 | """ 6 | 7 | from .base import ( 8 | Router, 9 | RouterChain, 10 | RoutingDecision, 11 | RoutingDecisionHelper, 12 | RoutingStrategy, 13 | ) 14 | from .pre_router import ( 15 | PreRouter, 16 | PreRouterConfig, 17 | PreRouterStats, 18 | create_pre_router, 19 | ) 20 | 21 | __all__ = [ 22 | # Base router framework 23 | "Router", 24 | "RouterChain", 25 | "RoutingDecision", 26 | "RoutingDecisionHelper", 27 | "RoutingStrategy", 28 | # PreRouter 29 | "PreRouter", 30 | "PreRouterConfig", 31 | "PreRouterStats", 32 | "create_pre_router", 33 | ] 34 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cascadeflow-monorepo", 3 | "version": "0.1.0", 4 | "private": true, 5 | "description": "cascadeflow monorepo - Python and TypeScript libraries", 6 | "author": { 7 | "name": "Lemony Inc.", 8 | "email": "hello@lemony.ai" 9 | }, 10 | "license": "MIT", 11 | "repository": { 12 | "type": "git", 13 | "url": "https://github.com/lemony-ai/cascadeflow.git" 14 | }, 15 | "scripts": { 16 | "build": "turbo run build", 17 | "dev": "turbo run dev", 18 | "test": "turbo run test", 19 | "lint": "turbo run lint", 20 | "clean": "turbo run clean" 21 | }, 22 | "devDependencies": { 23 | "turbo": "^1.11.0" 24 | }, 25 | "engines": { 26 | "node": ">=18.0.0", 27 | "pnpm": ">=8.0.0" 28 | }, 29 | "packageManager": "pnpm@8.15.0", 30 | "dependencies": { 31 | "tsx": "^4.7.0" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /packages/core/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "lib": ["ES2020"], 6 | "moduleResolution": "bundler", 7 | "resolveJsonModule": true, 8 | "allowJs": false, 9 | "checkJs": false, 10 | "outDir": "./dist", 11 | "rootDir": "./src", 12 | "removeComments": true, 13 | "declaration": true, 14 | "declarationMap": true, 15 | "sourceMap": true, 16 | "strict": true, 17 | "noUnusedLocals": false, 18 | "noUnusedParameters": false, 19 | "noImplicitReturns": true, 20 | "noFallthroughCasesInSwitch": true, 21 | "esModuleInterop": true, 22 | "skipLibCheck": true, 23 | "forceConsistentCasingInFileNames": true, 24 | "allowSyntheticDefaultImports": true 25 | }, 26 | "include": ["src/**/*"], 27 | "exclude": ["node_modules", "dist", "tests"] 28 | } 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: 📚 Documentation 4 | url: https://github.com/lemony-ai/cascadeflow/blob/main/README.md 5 | about: Check out our comprehensive documentation and guides 6 | - name: 💬 GitHub Discussions 7 | url: https://github.com/lemony-ai/cascadeflow/discussions 8 | about: Join the community discussion for questions and ideas 9 | - name: 📖 Examples 10 | url: https://github.com/lemony-ai/cascadeflow/tree/main/examples 11 | about: Browse working examples for all supported providers 12 | - name: 🐛 Known Issues 13 | url: https://github.com/lemony-ai/cascadeflow/issues?q=is%3Aissue+label%3Abug 14 | about: Check if your issue has already been reported 15 | - name: 💼 Support 16 | url: mailto:hello@lemony.ai 17 | about: Contact us for support, consulting, or enterprise inquiries -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "lib": ["ES2020"], 6 | "moduleResolution": "bundler", 7 | "resolveJsonModule": true, 8 | "allowJs": false, 9 | "checkJs": false, 10 | "outDir": "./dist", 11 | "rootDir": "./src", 12 | "removeComments": true, 13 | "declaration": true, 14 | "declarationMap": true, 15 | "sourceMap": true, 16 | "strict": true, 17 | "noUnusedLocals": false, 18 | "noUnusedParameters": false, 19 | "noImplicitReturns": true, 20 | "noFallthroughCasesInSwitch": true, 21 | "esModuleInterop": true, 22 | "skipLibCheck": true, 23 | "forceConsistentCasingInFileNames": true, 24 | "allowSyntheticDefaultImports": true 25 | }, 26 | "include": ["src/**/*"], 27 | "exclude": ["node_modules", "dist", "tests"] 28 | } 29 | -------------------------------------------------------------------------------- /packages/core/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | parser: '@typescript-eslint/parser', 3 | parserOptions: { 4 | ecmaVersion: 2020, 5 | sourceType: 'module', 6 | project: './tsconfig.json', 7 | }, 8 | plugins: ['@typescript-eslint'], 9 | extends: [ 10 | 'eslint:recommended', 11 | 'plugin:@typescript-eslint/recommended', 12 | ], 13 | rules: { 14 | // Type safety (warnings to allow gradual improvement) 15 | '@typescript-eslint/no-explicit-any': 'warn', 16 | '@typescript-eslint/no-unused-vars': ['warn', { argsIgnorePattern: '^_' }], 17 | '@typescript-eslint/no-var-requires': 'warn', // Allow require() for now 18 | 19 | // Code quality (warnings instead of errors) 20 | 'prefer-const': 'warn', 21 | 'no-useless-escape': 'warn', 22 | }, 23 | ignorePatterns: [ 24 | 'dist', 25 | 'node_modules', 26 | '*.js', 27 | 'examples', 28 | '__tests__', 29 | ], 30 | }; 31 | -------------------------------------------------------------------------------- /packages/core/examples/nodejs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cascadeflow-nodejs-examples", 3 | "version": "0.1.0", 4 | "private": true, 5 | "description": "Node.js examples for @cascadeflow/core", 6 | "type": "module", 7 | "scripts": { 8 | "basic": "tsx basic-usage.ts", 9 | "tools": "tsx tool-calling.ts", 10 | "multi": "tsx multi-provider.ts" 11 | }, 12 | "dependencies": { 13 | "@cascadeflow/core": "workspace:*" 14 | }, 15 | "devDependencies": { 16 | "tsx": "^4.7.0", 17 | "typescript": "^5.3.3", 18 | "dotenv": "^16.3.1", 19 | "@types/node": "^20.10.0" 20 | }, 21 | "peerDependencies": { 22 | "openai": "^4.0.0", 23 | "@anthropic-ai/sdk": "^0.30.0", 24 | "groq-sdk": "^0.5.0" 25 | }, 26 | "peerDependenciesMeta": { 27 | "openai": { 28 | "optional": true 29 | }, 30 | "@anthropic-ai/sdk": { 31 | "optional": true 32 | }, 33 | "groq-sdk": { 34 | "optional": true 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /cascadeflow/limits/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rate Limiting for cascadeflow. 3 | 4 | Provides per-user and per-tier rate limiting with sliding window algorithm 5 | for controlling API usage and enforcing subscription tier limits. 6 | 7 | Key components: 8 | - RateLimiter: Sliding window rate limiter 9 | - RateLimitError: Exception for rate limit violations 10 | 11 | Example usage: 12 | from cascadeflow.limits import RateLimiter 13 | from cascadeflow import UserProfile, TierLevel 14 | 15 | profile = UserProfile.from_tier(TierLevel.PRO, user_id="user_123") 16 | limiter = RateLimiter() 17 | 18 | # Check if request is allowed 19 | if await limiter.check_rate_limit(profile): 20 | # Process request 21 | result = await agent.run(query) 22 | else: 23 | # Rate limit exceeded 24 | raise RateLimitError("Rate limit exceeded") 25 | """ 26 | 27 | from .rate_limiter import RateLimiter, RateLimitState, RateLimitError 28 | 29 | __all__ = [ 30 | "RateLimiter", 31 | "RateLimitState", 32 | "RateLimitError", 33 | ] 34 | -------------------------------------------------------------------------------- /cascadeflow/tools/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | cascadeflow tool calling system. 3 | 4 | This is an OPTIONAL feature - existing cascadeflow code works unchanged. 5 | 6 | Quick Start: 7 | from cascadeflow.tools import tool, ToolExecutor 8 | 9 | @tool 10 | def get_weather(city: str) -> dict: 11 | '''Get weather for a city.''' 12 | return {"temp": 22, "condition": "sunny"} 13 | 14 | executor = ToolExecutor([get_weather]) 15 | result = await executor.execute(tool_call) 16 | """ 17 | 18 | from .call import ToolCall, ToolCallFormat 19 | from .config import ToolConfig, create_tool_from_function, tool 20 | from .examples import example_calculator, example_get_weather 21 | from .executor import ToolExecutor 22 | from .result import ToolResult 23 | 24 | __version__ = "0.1.0" 25 | 26 | __all__ = [ 27 | # Core classes 28 | "ToolConfig", 29 | "ToolCall", 30 | "ToolResult", 31 | "ToolExecutor", 32 | # Enums 33 | "ToolCallFormat", 34 | # Utilities 35 | "tool", 36 | "create_tool_from_function", 37 | # Examples 38 | "example_calculator", 39 | "example_get_weather", 40 | ] 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Lemony Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /examples/batch_processing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example: Batch Processing with cascadeflow v0.2.1 3 | 4 | This example demonstrates batch processing capabilities. 5 | """ 6 | 7 | import asyncio 8 | 9 | from cascadeflow import CascadeAgent 10 | 11 | 12 | async def main(): 13 | # Create agent 14 | agent = CascadeAgent.from_env() 15 | 16 | # Simple batch processing 17 | queries = [ 18 | "What is Python?", 19 | "What is JavaScript?", 20 | "What is Rust?", 21 | ] 22 | 23 | print("Processing 3 queries in batch...") 24 | result = await agent.run_batch(queries) 25 | 26 | print(f"\n✓ Success: {result.success_count}/{len(queries)}") 27 | print(f"✓ Total cost: ${result.total_cost:.4f}") 28 | print(f"✓ Average cost: ${result.average_cost:.4f}") 29 | print(f"✓ Total time: {result.total_time:.2f}s") 30 | print(f"✓ Strategy: {result.strategy_used}") 31 | 32 | for i, cascade_result in enumerate(result.results): 33 | if cascade_result: 34 | print(f"\nQuery {i+1}: {cascade_result.content[:100]}...") 35 | 36 | 37 | if __name__ == "__main__": 38 | asyncio.run(main()) 39 | -------------------------------------------------------------------------------- /cascadeflow/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core cascade execution engine. 3 | 4 | This module contains: 5 | - Execution planning and strategy selection 6 | - Domain detection and model scoring 7 | - Speculative cascade implementation 8 | - Batch processing (v0.2.1+) 9 | """ 10 | 11 | from .cascade import ( 12 | SpeculativeCascade, 13 | SpeculativeResult, 14 | WholeResponseCascade, 15 | ) 16 | from .execution import ( 17 | DomainDetector, 18 | ExecutionPlan, 19 | ExecutionStrategy, 20 | LatencyAwareExecutionPlanner, 21 | ModelScorer, 22 | ) 23 | from .batch_config import BatchConfig, BatchStrategy 24 | from .batch import BatchProcessor, BatchResult, BatchProcessingError 25 | 26 | __all__ = [ 27 | # Execution 28 | "DomainDetector", 29 | "ExecutionPlan", 30 | "ExecutionStrategy", 31 | "LatencyAwareExecutionPlanner", 32 | "ModelScorer", 33 | # Cascade 34 | "WholeResponseCascade", 35 | "SpeculativeCascade", 36 | "SpeculativeResult", 37 | # Batch Processing (v0.2.1+) 38 | "BatchConfig", 39 | "BatchStrategy", 40 | "BatchProcessor", 41 | "BatchResult", 42 | "BatchProcessingError", 43 | ] 44 | -------------------------------------------------------------------------------- /packages/core/src/tools/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Tools Module 3 | * 4 | * Universal tool configuration and execution for LLM function calling. 5 | * 6 | * @module tools 7 | */ 8 | 9 | // Tool Configuration 10 | export { 11 | ToolConfig, 12 | createTool, 13 | tool, 14 | inferJsonType, 15 | buildParameterSchema, 16 | } from './config'; 17 | 18 | export type { 19 | ToolFunction, 20 | ToolParameters, 21 | ToolConfigOptions, 22 | } from './config'; 23 | 24 | // Tool Execution 25 | export { ToolExecutor } from './executor'; 26 | 27 | // Tool Calls 28 | export { ToolCall } from './call'; 29 | export type { ToolCallOptions } from './call'; 30 | 31 | // Tool Results 32 | export { ToolResult } from './result'; 33 | export type { ToolResultOptions } from './result'; 34 | 35 | // Format Conversion 36 | export { 37 | ToolCallFormat, 38 | toOpenAIFormat, 39 | toAnthropicFormat, 40 | toOllamaFormat, 41 | toProviderFormat, 42 | getProviderFormatType, 43 | } from './formats'; 44 | 45 | // Tool Validation 46 | export { ToolValidator, formatToolQualityScore } from './validator'; 47 | export type { ToolQualityScore, ComplexityLevel } from './validator'; 48 | -------------------------------------------------------------------------------- /tests/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | """Benchmark framework components.""" 2 | 3 | from .base import Benchmark, BenchmarkResult, BenchmarkSummary 4 | from .metrics import CostMetrics, LatencyMetrics, QualityMetrics 5 | from .profiler import CascadeProfile, CascadeProfiler, ComponentProfile 6 | from .reporter import BenchmarkReporter 7 | from .benchmark_config import ( 8 | BenchmarkConfig, 9 | BenchmarkMode, 10 | BenchmarkTargets, 11 | DomainBenchmarkConfig, 12 | DEFAULT_TARGETS, 13 | DRAFTER_MODELS, 14 | VERIFIER_MODELS, 15 | DOMAIN_CONFIGS, 16 | ) 17 | 18 | __all__ = [ 19 | # Base classes 20 | "Benchmark", 21 | "BenchmarkResult", 22 | "BenchmarkSummary", 23 | # Metrics 24 | "CostMetrics", 25 | "LatencyMetrics", 26 | "QualityMetrics", 27 | # Reporter 28 | "BenchmarkReporter", 29 | # Profiler 30 | "CascadeProfile", 31 | "CascadeProfiler", 32 | "ComponentProfile", 33 | # Configuration 34 | "BenchmarkConfig", 35 | "BenchmarkMode", 36 | "BenchmarkTargets", 37 | "DomainBenchmarkConfig", 38 | "DEFAULT_TARGETS", 39 | "DRAFTER_MODELS", 40 | "VERIFIER_MODELS", 41 | "DOMAIN_CONFIGS", 42 | ] 43 | -------------------------------------------------------------------------------- /cascadeflow/ml/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ML Module for cascadeflow 3 | 4 | Optional machine learning components for enhanced quality validation, 5 | domain detection, and complexity analysis. 6 | 7 | All ML features: 8 | - Are completely OPTIONAL (graceful degradation) 9 | - Require `fastembed` package (pip install fastembed) 10 | - Use lightweight ONNX models (~40MB) 11 | - Provide better accuracy than rule-based 12 | - Add ~25-50ms latency (optimized) 13 | 14 | Components: 15 | - UnifiedEmbeddingService: Single embedding model for all tasks 16 | - EmbeddingCache: Request-scoped caching for performance 17 | 18 | Example: 19 | >>> from cascadeflow.ml import UnifiedEmbeddingService 20 | >>> 21 | >>> # Initialize (lazy loads model) 22 | >>> embedder = UnifiedEmbeddingService() 23 | >>> 24 | >>> if embedder.is_available: 25 | ... similarity = embedder.similarity("query", "response") 26 | ... print(f"Similarity: {similarity:.2%}") 27 | """ 28 | 29 | from .embedding import ( 30 | UnifiedEmbeddingService, 31 | EmbeddingCache, 32 | ) 33 | 34 | __all__ = [ 35 | "UnifiedEmbeddingService", 36 | "EmbeddingCache", 37 | ] 38 | 39 | __version__ = "0.7.0" # ML integration 40 | -------------------------------------------------------------------------------- /cascadeflow/resilience/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Resilience patterns for CascadeFlow. 3 | 4 | Provides production-grade resilience features: 5 | - Circuit Breaker: Prevent cascading failures 6 | - Health monitoring: Track provider health 7 | 8 | Example: 9 | >>> from cascadeflow.resilience import CircuitBreaker, CircuitState 10 | >>> 11 | >>> # Create circuit breaker for a provider 12 | >>> breaker = CircuitBreaker( 13 | ... failure_threshold=5, 14 | ... recovery_timeout=30.0, 15 | ... half_open_max_calls=3 16 | ... ) 17 | >>> 18 | >>> # Check if provider is available 19 | >>> if breaker.can_execute(): 20 | ... try: 21 | ... result = await provider.complete(...) 22 | ... breaker.record_success() 23 | ... except Exception as e: 24 | ... breaker.record_failure(e) 25 | """ 26 | 27 | from .circuit_breaker import ( 28 | CircuitBreaker, 29 | CircuitBreakerConfig, 30 | CircuitBreakerRegistry, 31 | CircuitState, 32 | get_circuit_breaker, 33 | ) 34 | 35 | __all__ = [ 36 | "CircuitBreaker", 37 | "CircuitBreakerConfig", 38 | "CircuitBreakerRegistry", 39 | "CircuitState", 40 | "get_circuit_breaker", 41 | ] 42 | -------------------------------------------------------------------------------- /cascadeflow/dynamic_config/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dynamic Configuration Management for CascadeFlow. 3 | 4 | Provides runtime configuration updates without service restart: 5 | - ConfigManager: Central config management with event system 6 | - ConfigWatcher: File watching for automatic config reload 7 | - Thread-safe configuration updates 8 | - Event callbacks for config changes 9 | 10 | Example: 11 | >>> from cascadeflow.config import ConfigManager, ConfigWatcher 12 | >>> 13 | >>> # Create manager with initial config 14 | >>> manager = ConfigManager(config_path="cascadeflow.yaml") 15 | >>> 16 | >>> # Register callback for config changes 17 | >>> @manager.on_change("quality_threshold") 18 | ... def on_threshold_change(old, new): 19 | ... print(f"Threshold changed: {old} -> {new}") 20 | >>> 21 | >>> # Enable file watching for auto-reload 22 | >>> watcher = ConfigWatcher(manager, interval=5.0) 23 | >>> watcher.start() 24 | >>> 25 | >>> # Manual config update 26 | >>> manager.update(quality_threshold=0.85) 27 | """ 28 | 29 | from .manager import ( 30 | ConfigManager, 31 | ConfigChangeEvent, 32 | ConfigSection, 33 | ) 34 | from .watcher import ( 35 | ConfigWatcher, 36 | ) 37 | 38 | __all__ = [ 39 | "ConfigManager", 40 | "ConfigChangeEvent", 41 | "ConfigSection", 42 | "ConfigWatcher", 43 | ] 44 | -------------------------------------------------------------------------------- /cascadeflow/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions and helpers for cascadeflow. 3 | 4 | This module provides: 5 | - Logging and formatting utilities (helpers.py) 6 | - Response caching (caching.py) 7 | - Convenience presets for quick setup (presets.py) 8 | """ 9 | 10 | # Caching 11 | from .caching import ResponseCache 12 | 13 | # Helpers (was utils.py) 14 | from .helpers import ( 15 | calculate_cosine_similarity, 16 | estimate_tokens, 17 | format_cost, 18 | get_env_or_raise, 19 | parse_model_identifier, 20 | setup_logging, 21 | truncate_text, 22 | ) 23 | 24 | # Presets (v0.2.0 - function-based presets) 25 | from .presets import ( 26 | auto_agent, 27 | get_balanced_agent, 28 | get_cost_optimized_agent, 29 | get_development_agent, 30 | get_quality_optimized_agent, 31 | get_speed_optimized_agent, 32 | ) 33 | 34 | __all__ = [ 35 | # Helpers 36 | "setup_logging", 37 | "format_cost", 38 | "estimate_tokens", 39 | "truncate_text", 40 | "calculate_cosine_similarity", 41 | "get_env_or_raise", 42 | "parse_model_identifier", 43 | # Caching 44 | "ResponseCache", 45 | # Presets (v0.2.0 - function-based) 46 | "get_cost_optimized_agent", 47 | "get_balanced_agent", 48 | "get_speed_optimized_agent", 49 | "get_quality_optimized_agent", 50 | "get_development_agent", 51 | "auto_agent", 52 | ] 53 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | name: Auto Labeler 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | issues: 7 | types: [opened] 8 | 9 | permissions: 10 | contents: read 11 | pull-requests: write 12 | issues: write 13 | 14 | jobs: 15 | label-pr: 16 | name: Label Pull Requests 17 | runs-on: ubuntu-latest 18 | if: github.event_name == 'pull_request' 19 | 20 | steps: 21 | - name: Checkout code 22 | uses: actions/checkout@v4 23 | 24 | - name: Label based on changed files 25 | uses: actions/labeler@v5 26 | with: 27 | repo-token: ${{ secrets.GITHUB_TOKEN }} 28 | configuration-path: .github/labeler.yml 29 | 30 | label-size: 31 | name: Label PR Size 32 | runs-on: ubuntu-latest 33 | if: github.event_name == 'pull_request' 34 | 35 | steps: 36 | - name: Label PR by size 37 | uses: codelytv/pr-size-labeler@v1 38 | with: 39 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 40 | xs_label: 'size/xs' 41 | xs_max_size: 10 42 | s_label: 'size/s' 43 | s_max_size: 100 44 | m_label: 'size/m' 45 | m_max_size: 500 46 | l_label: 'size/l' 47 | l_max_size: 1000 48 | xl_label: 'size/xl' 49 | message_if_xl: 'This PR is extremely large. Consider splitting it into smaller PRs.' -------------------------------------------------------------------------------- /.github/assets/CF_n8n_color.svg: -------------------------------------------------------------------------------- 1 | n8n -------------------------------------------------------------------------------- /cascadeflow/guardrails/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Guardrails for cascadeflow - Content Safety and Compliance. 3 | 4 | Provides basic content moderation and PII detection for production safety. 5 | 6 | Key components: 7 | - ContentModerator: Detect harmful content 8 | - PIIDetector: Detect personally identifiable information 9 | - GuardrailsManager: Centralized guardrails management 10 | 11 | Example usage: 12 | from cascadeflow.guardrails import GuardrailsManager 13 | from cascadeflow import UserProfile, TierLevel 14 | 15 | profile = UserProfile.from_tier( 16 | TierLevel.PRO, 17 | user_id="user_123", 18 | enable_content_moderation=True, 19 | enable_pii_detection=True 20 | ) 21 | 22 | manager = GuardrailsManager() 23 | 24 | # Check content before processing 25 | safe, violations = await manager.check_content( 26 | text="User input here", 27 | profile=profile 28 | ) 29 | 30 | if not safe: 31 | raise GuardrailViolation(f"Content blocked: {violations}") 32 | """ 33 | 34 | from .content_moderator import ContentModerator, ModerationResult 35 | from .pii_detector import PIIDetector, PIIMatch 36 | from .manager import GuardrailsManager, GuardrailViolation 37 | 38 | __all__ = [ 39 | "ContentModerator", 40 | "ModerationResult", 41 | "PIIDetector", 42 | "PIIMatch", 43 | "GuardrailsManager", 44 | "GuardrailViolation", 45 | ] 46 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | """Tests for utility functions.""" 2 | 3 | import pytest 4 | 5 | from cascadeflow.utils import estimate_tokens, format_cost 6 | 7 | 8 | def test_format_cost_zero(): 9 | """Test formatting zero cost.""" 10 | assert format_cost(0.0) == "$0.0000" 11 | 12 | 13 | def test_format_cost_small(): 14 | """Test formatting small costs.""" 15 | assert format_cost(0.002) == "$0.0020" 16 | assert format_cost(0.00001) == "$0.0000" 17 | 18 | 19 | def test_format_cost_medium(): 20 | """Test formatting medium costs.""" 21 | assert format_cost(0.5) == "$0.5000" 22 | assert format_cost(1.5) == "$1.5000" 23 | 24 | 25 | def test_format_cost_large(): 26 | """Test formatting large costs.""" 27 | assert format_cost(10.0) == "$10.0000" 28 | assert format_cost(100.5) == "$100.5000" 29 | 30 | 31 | def test_estimate_tokens_empty(): 32 | """Test with empty string.""" 33 | assert estimate_tokens("") == 1 # Minimum 1 34 | 35 | 36 | def test_estimate_tokens_short(): 37 | """Test with short text.""" 38 | tokens = estimate_tokens("Hello") 39 | assert tokens > 0 40 | 41 | 42 | def test_estimate_tokens_long(): 43 | """Test with longer text.""" 44 | text = "This is a longer sentence with multiple words" 45 | tokens = estimate_tokens(text) 46 | assert tokens == len(text) // 4 47 | 48 | 49 | if __name__ == "__main__": 50 | pytest.main([__file__, "-v"]) 51 | -------------------------------------------------------------------------------- /cascadeflow/profiles/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | User Profile System for cascadeflow. 3 | 4 | This module provides a comprehensive user profile system for managing 5 | thousands of users with different subscription tiers, limits, and preferences. 6 | 7 | Key components: 8 | - TierConfig: Predefined subscription tiers (FREE, STARTER, PRO, BUSINESS, ENTERPRISE) 9 | - UserProfile: Multi-dimensional user profile (identity, tier, limits, preferences, guardrails, telemetry) 10 | - UserProfileManager: Profile management at scale with caching and database integration 11 | 12 | Example usage: 13 | from cascadeflow.profiles import UserProfile, TierLevel 14 | from cascadeflow import CascadeAgent 15 | 16 | # Create profile from tier preset 17 | profile = UserProfile.from_tier(TierLevel.PRO, user_id="user_123") 18 | 19 | # Create agent from profile 20 | agent = CascadeAgent.from_profile(profile) 21 | 22 | # Use profile manager for scaling 23 | from cascadeflow.profiles import UserProfileManager 24 | 25 | manager = UserProfileManager(cache_ttl_seconds=300) 26 | profile = await manager.get_profile("user_123") 27 | """ 28 | 29 | from .tier_config import TierConfig, TierLevel, TIER_PRESETS 30 | from .user_profile import UserProfile 31 | from .profile_manager import UserProfileManager 32 | 33 | __all__ = [ 34 | # Tier system 35 | "TierConfig", 36 | "TierLevel", 37 | "TIER_PRESETS", 38 | # User profiles 39 | "UserProfile", 40 | # Profile management 41 | "UserProfileManager", 42 | ] 43 | -------------------------------------------------------------------------------- /cascadeflow/interface/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | cascadeflow Interface Module 3 | ============================ 4 | 5 | User-facing interface components for visual feedback and UI. 6 | 7 | Current Components: 8 | ------------------- 9 | - VisualConsumer: Base visual feedback system 10 | - TerminalVisualConsumer: Terminal streaming with indicators 11 | - SilentConsumer: No-output consumer for testing 12 | 13 | Future Extensions: 14 | ------------------ 15 | - TerminalUI: Enhanced terminal with Rich formatting 16 | - Progress bars, tables, live updates 17 | - Color schemes, themes 18 | - ASCII art, spinners 19 | 20 | - JupyterUI: Jupyter notebook widgets 21 | - IPython display integration 22 | - Interactive widgets 23 | - Plotly/Matplotlib charts 24 | - Real-time metrics 25 | 26 | - WebUI: Web dashboard interface 27 | - FastAPI/Flask endpoints 28 | - WebSocket streaming 29 | - React/Vue components 30 | - REST API 31 | 32 | - Formatters: Output formatting 33 | - JSON exporter 34 | - Markdown formatter 35 | - CSV exporter 36 | - HTML reports 37 | 38 | Usage: 39 | ------ 40 | ```python 41 | from cascadeflow.interface import TerminalVisualConsumer 42 | 43 | # Create consumer 44 | consumer = TerminalVisualConsumer(enable_visual=True) 45 | 46 | # Use with agent 47 | result = await agent.run_streaming(query) 48 | ``` 49 | """ 50 | 51 | from .visual_consumer import ( 52 | SilentConsumer, 53 | TerminalVisualConsumer, 54 | ) 55 | 56 | __all__ = [ 57 | "TerminalVisualConsumer", 58 | "SilentConsumer", 59 | ] 60 | 61 | __version__ = "2.2.0" 62 | -------------------------------------------------------------------------------- /tests/benchmarks/README.md: -------------------------------------------------------------------------------- 1 | ### Benchmark Suite 2 | 3 | Professional benchmarks to validate CascadeFlow performance across real-world use cases. 4 | 5 | #### Datasets 6 | 7 | 1. **HumanEval** - Code generation (164 programming problems) 8 | 2. **Bitext Customer Support** - Customer service Q&A (27,000+ examples) 9 | 3. **Banking77** - Banking intent classification (13,000+ examples) 10 | 4. **GSM8K** - Grade school math reasoning (8,500+ problems) 11 | 12 | #### Metrics 13 | 14 | Each benchmark measures: 15 | - **Cost savings** vs. always-powerful-model baseline 16 | - **Quality maintenance** (accuracy/pass rate) 17 | - **Latency** improvements 18 | - **Escalation rates** (drafter acceptance %) 19 | 20 | #### Running Benchmarks 21 | 22 | ```bash 23 | # Run a single benchmark 24 | python -m benchmarks.datasets.humaneval 25 | 26 | # Run all benchmarks 27 | python -m benchmarks.run_all 28 | 29 | # View results 30 | ls benchmarks/results/ 31 | ``` 32 | 33 | #### Output 34 | 35 | - **JSON**: Detailed results for analysis 36 | - **CSV**: Tabular data for Excel/graphs 37 | - **Markdown**: Human-readable reports with ROI calculations 38 | 39 | #### Structure 40 | 41 | ``` 42 | benchmarks/ 43 | ├── base.py # Abstract benchmark class 44 | ├── metrics.py # Cost/latency/quality calculations 45 | ├── reporter.py # Report generation 46 | ├── humaneval.py # Code generation benchmark 47 | ├── customer_support.py # Customer service Q&A 48 | ├── banking77.py # Banking intent classification 49 | ├── gsm8k.py # Math reasoning 50 | └── results/ # Output directory 51 | ``` 52 | 53 | All benchmarks extend the `Benchmark` base class. 54 | -------------------------------------------------------------------------------- /.github/assets/CF_ts_color.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_hf_api.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | import httpx 5 | 6 | 7 | async def test_hf(): 8 | token = os.getenv("HF_TOKEN") 9 | 10 | # Test different models 11 | models = ["distilgpt2", "gpt2", "openai-community/gpt2"] 12 | 13 | for model in models: 14 | print(f"\nTesting: {model}") 15 | print("=" * 50) 16 | 17 | try: 18 | async with httpx.AsyncClient(timeout=30.0) as client: 19 | # Updated to new HuggingFace Inference Providers API endpoint 20 | # Old: https://api-inference.huggingface.co (deprecated Jan 2025) 21 | # New: https://router.huggingface.co/hf-inference (as of Nov 2025) 22 | response = await client.post( 23 | f"https://router.huggingface.co/hf-inference/models/{model}", 24 | headers={"Authorization": f"Bearer {token}"}, 25 | json={"inputs": "Hello"}, 26 | ) 27 | 28 | print(f"Status: {response.status_code}") 29 | 30 | if response.status_code == 200: 31 | data = response.json() 32 | print(f"✅ Success: {data}") 33 | return model # Return working model 34 | else: 35 | print(f"❌ Error: {response.text[:200]}") 36 | 37 | except Exception as e: 38 | print(f"❌ Exception: {e}") 39 | 40 | return None 41 | 42 | 43 | if __name__ == "__main__": 44 | working_model = asyncio.run(test_hf()) 45 | if working_model: 46 | print(f"\n✅ Use this model in tests: {working_model}") 47 | else: 48 | print("\n❌ No models working - HF API may be down or account issue") 49 | -------------------------------------------------------------------------------- /tests/test_exceptions.py: -------------------------------------------------------------------------------- 1 | """Tests for custom exceptions.""" 2 | 3 | import pytest 4 | 5 | from cascadeflow import ( 6 | BudgetExceededError, 7 | ModelError, 8 | ProviderError, 9 | QualityThresholdError, 10 | cascadeflowError, 11 | ) 12 | 13 | 14 | def test_base_exception(): 15 | """Test base cascadeflowError.""" 16 | error = cascadeflowError("Test error") 17 | assert "Test error" in str(error) 18 | 19 | 20 | def test_budget_exceeded_error(): 21 | """Test BudgetExceededError creation and attributes.""" 22 | error = BudgetExceededError("Budget exceeded", remaining=0.5) 23 | 24 | assert "Budget exceeded" in str(error) 25 | assert error.remaining == 0.5 26 | assert isinstance(error, cascadeflowError) 27 | 28 | 29 | def test_quality_threshold_error(): 30 | """Test QualityThresholdError.""" 31 | error = QualityThresholdError("Quality too low") 32 | 33 | assert "Quality too low" in str(error) 34 | assert isinstance(error, cascadeflowError) 35 | 36 | 37 | def test_provider_error(): 38 | """Test ProviderError with provider attribute.""" 39 | error = ProviderError("API failed", provider="openai") 40 | 41 | assert "API failed" in str(error) 42 | assert error.provider == "openai" 43 | assert isinstance(error, cascadeflowError) 44 | 45 | 46 | def test_model_error(): 47 | """Test ModelError with model and provider attributes.""" 48 | error = ModelError("Model failed", model="gpt-4", provider="openai") 49 | 50 | assert "Model failed" in str(error) 51 | assert error.model == "gpt-4" 52 | assert error.provider == "openai" 53 | assert isinstance(error, cascadeflowError) 54 | 55 | 56 | if __name__ == "__main__": 57 | pytest.main([__file__, "-v"]) 58 | -------------------------------------------------------------------------------- /packages/core/typedoc.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://typedoc.org/schema.json", 3 | "entryPoints": ["src/index.ts"], 4 | "out": "docs/api", 5 | "plugin": [], 6 | "exclude": [ 7 | "**/*.test.ts", 8 | "**/__tests__/**", 9 | "**/node_modules/**" 10 | ], 11 | "excludePrivate": true, 12 | "excludeProtected": false, 13 | "excludeInternal": false, 14 | "includeVersion": true, 15 | "sort": ["source-order"], 16 | "kindSortOrder": [ 17 | "Class", 18 | "Interface", 19 | "Function", 20 | "Variable", 21 | "TypeAlias", 22 | "Enum" 23 | ], 24 | "categorizeByGroup": true, 25 | "categoryOrder": [ 26 | "Core", 27 | "Models", 28 | "Quality", 29 | "Streaming", 30 | "Tools", 31 | "Providers", 32 | "Utilities", 33 | "*" 34 | ], 35 | "readme": "README.md", 36 | "name": "@cascadeflow/core", 37 | "navigationLinks": { 38 | "GitHub": "https://github.com/lemony-ai/cascadeflow", 39 | "Examples": "https://github.com/lemony-ai/cascadeflow/tree/main/packages/core/examples", 40 | "Python Docs": "https://github.com/lemony-ai/cascadeflow/tree/main/docs" 41 | }, 42 | "searchInComments": true, 43 | "validation": { 44 | "notExported": false, 45 | "invalidLink": false, 46 | "notDocumented": false 47 | }, 48 | "skipErrorChecking": true, 49 | "visibilityFilters": { 50 | "protected": false, 51 | "private": false, 52 | "inherited": true, 53 | "external": false 54 | }, 55 | "githubPages": true, 56 | "gitRevision": "main", 57 | "sourceLinkTemplate": "https://github.com/lemony-ai/cascadeflow/blob/{gitRevision}/packages/core/{path}#L{line}", 58 | "theme": "default", 59 | "hideGenerator": false, 60 | "basePath": ".", 61 | "treatWarningsAsErrors": false 62 | } 63 | -------------------------------------------------------------------------------- /examples/docker/multi-instance-ollama/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | # Draft model - Fast, small model for initial responses 5 | # Uses GPU 0 for quick inference 6 | ollama-draft: 7 | image: ollama/ollama:latest 8 | container_name: ollama-draft 9 | ports: 10 | - "11434:11434" 11 | volumes: 12 | - ollama-draft-data:/root/.ollama 13 | environment: 14 | - OLLAMA_HOST=0.0.0.0:11434 15 | deploy: 16 | resources: 17 | reservations: 18 | devices: 19 | - driver: nvidia 20 | device_ids: ['0'] # Use first GPU 21 | capabilities: [gpu] 22 | restart: unless-stopped 23 | healthcheck: 24 | test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] 25 | interval: 30s 26 | timeout: 10s 27 | retries: 3 28 | 29 | # Verifier model - Powerful, large model for quality checks 30 | # Uses GPU 1 for accurate responses 31 | ollama-verifier: 32 | image: ollama/ollama:latest 33 | container_name: ollama-verifier 34 | ports: 35 | - "11435:11434" # Different external port 36 | volumes: 37 | - ollama-verifier-data:/root/.ollama 38 | environment: 39 | - OLLAMA_HOST=0.0.0.0:11434 40 | deploy: 41 | resources: 42 | reservations: 43 | devices: 44 | - driver: nvidia 45 | device_ids: ['1'] # Use second GPU 46 | capabilities: [gpu] 47 | restart: unless-stopped 48 | healthcheck: 49 | test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] 50 | interval: 30s 51 | timeout: 10s 52 | retries: 3 53 | 54 | volumes: 55 | ollama-draft-data: 56 | driver: local 57 | ollama-verifier-data: 58 | driver: local 59 | -------------------------------------------------------------------------------- /packages/ml/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@cascadeflow/ml", 3 | "version": "0.6.5", 4 | "description": "ML semantic detection for cascadeflow TypeScript - Feature parity with Python", 5 | "author": { 6 | "name": "Lemony Inc.", 7 | "email": "hello@lemony.ai" 8 | }, 9 | "license": "MIT", 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/lemony-ai/cascadeflow.git", 13 | "directory": "packages/ml" 14 | }, 15 | "main": "./dist/index.js", 16 | "module": "./dist/index.mjs", 17 | "types": "./dist/index.d.ts", 18 | "exports": { 19 | ".": { 20 | "types": "./dist/index.d.ts", 21 | "import": "./dist/index.mjs", 22 | "require": "./dist/index.js" 23 | } 24 | }, 25 | "files": [ 26 | "dist", 27 | "README.md" 28 | ], 29 | "scripts": { 30 | "build": "tsup src/index.ts --format cjs,esm --dts --clean", 31 | "dev": "tsup src/index.ts --format cjs,esm --dts --watch", 32 | "test": "vitest run", 33 | "test:watch": "vitest", 34 | "lint": "eslint src --ext .ts", 35 | "typecheck": "tsc --noEmit", 36 | "clean": "rm -rf dist" 37 | }, 38 | "keywords": [ 39 | "ai", 40 | "ml", 41 | "embeddings", 42 | "semantic-search", 43 | "transformers", 44 | "bge", 45 | "cascadeflow", 46 | "domain-detection", 47 | "semantic-validation" 48 | ], 49 | "dependencies": { 50 | "@xenova/transformers": "^2.17.2" 51 | }, 52 | "devDependencies": { 53 | "@types/node": "^20.10.0", 54 | "eslint": "^8.55.0", 55 | "@typescript-eslint/eslint-plugin": "^6.15.0", 56 | "@typescript-eslint/parser": "^6.15.0", 57 | "tsup": "^8.0.1", 58 | "typescript": "^5.3.3", 59 | "vitest": "^1.0.4" 60 | }, 61 | "engines": { 62 | "node": ">=18.0.0" 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /cascadeflow/streaming/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | cascadeflow Streaming Module 3 | ============================= 4 | 5 | Provides real-time streaming for both text and tool-calling cascades. 6 | 7 | Modules: 8 | - base: Text streaming (StreamManager, StreamEvent) 9 | - tools: Tool streaming (ToolStreamManager, ToolStreamEvent) 10 | - utils: Shared utilities (ProgressiveJSONParser, etc.) 11 | 12 | Usage: 13 | # Text streaming 14 | from cascadeflow.streaming import StreamManager, StreamEvent, StreamEventType 15 | 16 | manager = StreamManager(cascade) 17 | async for event in manager.stream(query): 18 | if event.type == StreamEventType.CHUNK: 19 | print(event.content, end='') 20 | 21 | # Tool streaming 22 | from cascadeflow.streaming import ToolStreamManager, ToolStreamEvent 23 | 24 | tool_manager = ToolStreamManager(cascade) 25 | async for event in tool_manager.stream(query, tools=tools): 26 | if event.type == ToolStreamEventType.TOOL_CALL_START: 27 | print(f"[Calling: {event.tool_call['name']}]") 28 | """ 29 | 30 | # Text streaming 31 | from .base import ( 32 | StreamEvent, 33 | StreamEventType, 34 | StreamManager, 35 | ) 36 | 37 | # Tool streaming 38 | from .tools import ( 39 | ToolStreamEvent, 40 | ToolStreamEventType, 41 | ToolStreamManager, 42 | ) 43 | 44 | # Utilities 45 | from .utils import ( 46 | JSONParseState, 47 | ProgressiveJSONParser, 48 | ) 49 | 50 | __all__ = [ 51 | # Text streaming 52 | "StreamEventType", 53 | "StreamEvent", 54 | "StreamManager", 55 | # Tool streaming 56 | "ToolStreamEventType", 57 | "ToolStreamEvent", 58 | "ToolStreamManager", 59 | # Utilities 60 | "ProgressiveJSONParser", 61 | "JSONParseState", 62 | ] 63 | 64 | __version__ = "2.0.0" 65 | -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/examples/inspect-metadata.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Metadata Inspection Script 3 | * 4 | * Checks what metadata is actually being injected into responses 5 | */ 6 | 7 | import { ChatOpenAI } from '@langchain/openai'; 8 | import { withCascade } from '../src/index.js'; 9 | 10 | async function main() { 11 | const drafter = new ChatOpenAI({ model: 'gpt-4o-mini', temperature: 0.7 }); 12 | const verifier = new ChatOpenAI({ model: 'gpt-4o', temperature: 0.7 }); 13 | 14 | const cascadeModel = withCascade({ 15 | drafter, 16 | verifier, 17 | qualityThreshold: 0.7, 18 | enableCostTracking: true, 19 | }); 20 | 21 | console.log('Testing metadata injection...\n'); 22 | 23 | // Test with simple query 24 | const result = await cascadeModel.invoke('What is 2+2?'); 25 | 26 | console.log('=== Response Structure ==='); 27 | console.log('Content:', result.content); 28 | console.log('\n=== Additional Kwargs ==='); 29 | console.log(JSON.stringify(result.additional_kwargs, null, 2)); 30 | 31 | console.log('\n=== Response Metadata ==='); 32 | console.log(JSON.stringify(result.response_metadata, null, 2)); 33 | 34 | console.log('\n=== Last Cascade Result ==='); 35 | const stats = cascadeModel.getLastCascadeResult(); 36 | console.log(JSON.stringify(stats, null, 2)); 37 | 38 | // Test calling _generate directly to see llmOutput 39 | console.log('\n\n=== Testing _generate directly ==='); 40 | const { HumanMessage } = await import('@langchain/core/messages'); 41 | const chatResult = await cascadeModel._generate([new HumanMessage('What is the capital of France?')], {}); 42 | 43 | console.log('llmOutput:', JSON.stringify(chatResult.llmOutput, null, 2)); 44 | console.log('\nGeneration text:', chatResult.generations[0].text); 45 | } 46 | 47 | main().catch(console.error); 48 | -------------------------------------------------------------------------------- /cascadeflow/tools/examples.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example tools for cascadeflow. 3 | 4 | Save this as: cascadeflow/tools/examples.py 5 | 6 | Provides ready-to-use example tools for testing and documentation. 7 | """ 8 | 9 | from typing import Any 10 | 11 | 12 | def example_calculator(operation: str, x: float, y: float) -> float: 13 | """ 14 | Perform basic arithmetic operations. 15 | 16 | Args: 17 | operation: Operation to perform (add, subtract, multiply, divide) 18 | x: First number 19 | y: Second number 20 | 21 | Returns: 22 | Result of the operation 23 | 24 | Raises: 25 | ValueError: If operation is not recognized 26 | """ 27 | operations = { 28 | "add": lambda a, b: a + b, 29 | "subtract": lambda a, b: a - b, 30 | "multiply": lambda a, b: a * b, 31 | "divide": lambda a, b: a / b if b != 0 else float("inf"), 32 | } 33 | 34 | if operation not in operations: 35 | raise ValueError( 36 | f"Unknown operation: {operation}. " f"Valid operations: {', '.join(operations.keys())}" 37 | ) 38 | 39 | return operations[operation](x, y) 40 | 41 | 42 | def example_get_weather(location: str, unit: str = "celsius") -> dict[str, Any]: 43 | """ 44 | Get current weather for a location (mock implementation). 45 | 46 | Args: 47 | location: City name 48 | unit: Temperature unit (celsius or fahrenheit) 49 | 50 | Returns: 51 | Weather data dictionary with temperature, condition, and humidity 52 | """ 53 | # Mock implementation - returns fixed data 54 | temp = 22 if unit == "celsius" else 72 55 | 56 | return { 57 | "location": location, 58 | "temperature": temp, 59 | "unit": unit, 60 | "condition": "sunny", 61 | "humidity": 65, 62 | } 63 | -------------------------------------------------------------------------------- /packages/core/src/streaming/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Streaming Module Index 3 | * 4 | * Exports all streaming-related functionality: 5 | * - StreamManager for cascade streaming 6 | * - ToolStreamManager for tool-calling streaming 7 | * - Utilities for JSON parsing, validation, confidence estimation 8 | * - Event types and interfaces 9 | */ 10 | 11 | // Re-export from main streaming file 12 | export { 13 | StreamEventType, 14 | createStreamEvent, 15 | isChunkEvent, 16 | isCompleteEvent, 17 | isErrorEvent, 18 | collectStream, 19 | collectResult, 20 | } from '../streaming'; 21 | export type { 22 | StreamEvent, 23 | StreamEventData, 24 | StreamChunk, 25 | StreamOptions, 26 | } from '../streaming'; 27 | 28 | // Stream Manager 29 | export { 30 | StreamManager, 31 | createStreamManager, 32 | } from './stream-manager'; 33 | export type { 34 | StreamManagerConfig, 35 | StreamOptions as StreamManagerOptions, 36 | } from './stream-manager'; 37 | 38 | // Tool Stream Manager 39 | export { 40 | ToolStreamManager, 41 | createToolStreamManager, 42 | ToolStreamEventType, 43 | } from './tool-stream-manager'; 44 | export type { 45 | ToolStreamEvent, 46 | ToolStreamManagerConfig, 47 | ToolStreamOptions, 48 | } from './tool-stream-manager'; 49 | 50 | // Utilities 51 | export { 52 | ProgressiveJSONParser, 53 | ToolCallValidator, 54 | JSONParseState, 55 | estimateConfidenceFromLogprobs, 56 | estimateConfidenceFromContent, 57 | estimateTokens, 58 | } from './utils'; 59 | export type { 60 | ParseResult, 61 | } from './utils'; 62 | 63 | // Event Formatter 64 | export { 65 | EventFormatter, 66 | createEventFormatter, 67 | getDefaultFormatter, 68 | quickFormat, 69 | VISUAL_ICONS, 70 | COLORS, 71 | } from './event-formatter'; 72 | export type { 73 | EventFormatterConfig, 74 | } from './event-formatter'; 75 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # CascadeFlow Production Requirements 2 | # Minimal core dependencies only - provider SDKs are optional extras 3 | 4 | # ============================================================================ 5 | # CORE DEPENDENCIES (Always Required) 6 | # ============================================================================ 7 | 8 | # Data validation and settings 9 | pydantic>=2.0.0 10 | 11 | # HTTP client for API calls 12 | httpx>=0.25.0 13 | 14 | # Token counting and cost estimation 15 | tiktoken>=0.5.0 16 | 17 | # Terminal output and logging 18 | rich>=13.0.0 19 | 20 | 21 | # ============================================================================ 22 | # PROVIDER INSTALLATION (Optional - Use Extras) 23 | # ============================================================================ 24 | 25 | # Install specific providers as needed: 26 | # pip install cascadeflow[openai] → Adds openai>=1.0.0 27 | # pip install cascadeflow[anthropic] → Adds anthropic>=0.8.0 28 | # pip install cascadeflow[groq] → Adds groq>=0.4.0 29 | # pip install cascadeflow[providers] → Adds OpenAI + Anthropic + Groq 30 | # pip install cascadeflow[all] → Everything 31 | 32 | # FREE/LOCAL OPTIONS (No Python packages needed): 33 | # Ollama → Just HTTP to localhost:11434 34 | # vLLM → Can use HTTP to vLLM server (or install cascadeflow[vllm]) 35 | 36 | 37 | # ============================================================================ 38 | # INSTALLATION EXAMPLES 39 | # ============================================================================ 40 | 41 | # Minimal (core only): 42 | # pip install cascadeflow 43 | # 44 | # With OpenAI: 45 | # pip install cascadeflow[openai] 46 | # 47 | # With common providers: 48 | # pip install cascadeflow[providers] 49 | # 50 | # Everything: 51 | # pip install cascadeflow[all] -------------------------------------------------------------------------------- /packages/integrations/n8n/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@cascadeflow/n8n-nodes-cascadeflow", 3 | "version": "0.6.7", 4 | "description": "n8n node for cascadeflow - Smart AI model cascading with 40-85% cost savings", 5 | "keywords": [ 6 | "n8n-community-node-package", 7 | "n8n", 8 | "cascadeflow", 9 | "ai", 10 | "llm", 11 | "cost-optimization" 12 | ], 13 | "license": "MIT", 14 | "homepage": "https://github.com/lemony-ai/cascadeflow", 15 | "author": { 16 | "name": "Lemony Inc.", 17 | "email": "hello@lemony.ai" 18 | }, 19 | "repository": { 20 | "type": "git", 21 | "url": "https://github.com/lemony-ai/cascadeflow.git", 22 | "directory": "packages/integrations/n8n" 23 | }, 24 | "scripts": { 25 | "build": "tsc && gulp build:icons", 26 | "dev": "tsc --watch", 27 | "format": "prettier nodes credentials --write", 28 | "lint": "eslint \"nodes/**/*.ts\" \"credentials/**/*.ts\" package.json", 29 | "lintfix": "eslint \"nodes/**/*.ts\" \"credentials/**/*.ts\" package.json --fix", 30 | "prepublishOnly": "npm run build && npm run lint" 31 | }, 32 | "files": [ 33 | "dist" 34 | ], 35 | "n8n": { 36 | "n8nNodesApiVersion": 1, 37 | "credentials": [ 38 | "dist/credentials/CascadeFlowApi.credentials.js" 39 | ], 40 | "nodes": [ 41 | "dist/nodes/LmChatCascadeFlow/LmChatCascadeFlow.node.js" 42 | ] 43 | }, 44 | "devDependencies": { 45 | "@types/node": "^20.10.0", 46 | "@typescript-eslint/parser": "^6.0.0", 47 | "eslint": "^8.42.0", 48 | "eslint-plugin-n8n-nodes-base": "^1.11.0", 49 | "gulp": "^4.0.2", 50 | "n8n-workflow": "^1.0.0", 51 | "prettier": "^2.7.1", 52 | "typescript": "^5.1.6" 53 | }, 54 | "peerDependencies": { 55 | "n8n-workflow": "*" 56 | }, 57 | "dependencies": { 58 | "@langchain/core": "^0.3.0", 59 | "@cascadeflow/core": "^0.6.0" 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /packages/core/examples/browser/vercel-edge/api/chat.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Vercel Edge Function for cascadeflow 3 | * 4 | * This edge function runs globally on Vercel's network for low-latency AI inference. 5 | * 6 | * Deploy: vercel deploy 7 | * Test locally: vercel dev 8 | */ 9 | 10 | import { CascadeAgent } from '@cascadeflow/core'; 11 | 12 | export const config = { 13 | runtime: 'edge', 14 | }; 15 | 16 | export default async function handler(req: Request) { 17 | // Only allow POST requests 18 | if (req.method !== 'POST') { 19 | return new Response('Method not allowed', { status: 405 }); 20 | } 21 | 22 | try { 23 | const { query, options } = await req.json(); 24 | 25 | if (!query || typeof query !== 'string') { 26 | return new Response('Invalid query', { status: 400 }); 27 | } 28 | 29 | // Create cascade agent 30 | const agent = new CascadeAgent({ 31 | models: [ 32 | { 33 | name: 'gpt-4o-mini', 34 | provider: 'openai', 35 | cost: 0.00015, 36 | apiKey: process.env.OPENAI_API_KEY, 37 | }, 38 | { 39 | name: 'gpt-4o', 40 | provider: 'openai', 41 | cost: 0.00625, 42 | apiKey: process.env.OPENAI_API_KEY, 43 | }, 44 | ], 45 | }); 46 | 47 | // Run cascade 48 | const result = await agent.run(query, options); 49 | 50 | // Return result 51 | return new Response(JSON.stringify(result), { 52 | status: 200, 53 | headers: { 54 | 'Content-Type': 'application/json', 55 | 'Access-Control-Allow-Origin': '*', // Adjust for production 56 | }, 57 | }); 58 | } catch (error: any) { 59 | console.error('Edge function error:', error); 60 | return new Response( 61 | JSON.stringify({ 62 | error: error.message || 'Internal server error', 63 | }), 64 | { 65 | status: 500, 66 | headers: { 'Content-Type': 'application/json' }, 67 | } 68 | ); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /examples/vllm_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | vLLM provider example. 3 | 4 | Demonstrates using vLLM for high-performance local inference. 5 | 6 | Prerequisites: 7 | 1. Install vLLM: pip install vllm 8 | 2. Start vLLM server: 9 | python -m vllm.entrypoints.openai.api_server \ 10 | --model meta-llama/Llama-3-8B-Instruct \ 11 | --host 0.0.0.0 \ 12 | --port 8000 13 | """ 14 | 15 | import asyncio 16 | 17 | from cascadeflow.providers.vllm import VLLMProvider 18 | 19 | 20 | async def main(): 21 | """Test vLLM provider.""" 22 | 23 | print("vLLM Provider Test\n") 24 | 25 | # Initialize provider 26 | provider = VLLMProvider(base_url="http://localhost:8000/v1") 27 | 28 | try: 29 | # List available models 30 | print("Checking available models...") 31 | models = await provider.list_models() 32 | print(f"Available models: {models}\n") 33 | 34 | if not models: 35 | print("No models found. Make sure vLLM server is running.") 36 | return 37 | 38 | # Use first available model 39 | model = models[0] 40 | print(f"Using model: {model}\n") 41 | 42 | # Test completion 43 | print("Testing completion...") 44 | result = await provider.complete( 45 | prompt="Explain AI in one sentence", model=model, max_tokens=100 46 | ) 47 | 48 | print(f"Response: {result.content}") 49 | print(f"Tokens: {result.tokens_used}") 50 | print(f"Latency: {result.latency_ms:.0f}ms") 51 | print(f"Cost: ${result.cost:.4f} (self-hosted)") 52 | 53 | except Exception as e: 54 | print(f"Error: {e}") 55 | print("\nMake sure vLLM server is running:") 56 | print(" python -m vllm.entrypoints.openai.api_server \\") 57 | print(" --model meta-llama/Llama-3-8B-Instruct \\") 58 | print(" --host 0.0.0.0 --port 8000") 59 | 60 | finally: 61 | await provider.client.aclose() 62 | 63 | 64 | if __name__ == "__main__": 65 | asyncio.run(main()) 66 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Python dependencies - DISABLED: No automatic PRs 4 | - package-ecosystem: "pip" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | day: "monday" 9 | open-pull-requests-limit: 0 10 | labels: 11 | - "dependencies" 12 | - "lang: python" 13 | commit-message: 14 | prefix: "chore(deps)" 15 | include: "scope" 16 | 17 | # TypeScript/npm dependencies (root) - DISABLED: No automatic PRs 18 | - package-ecosystem: "npm" 19 | directory: "/" 20 | schedule: 21 | interval: "weekly" 22 | day: "monday" 23 | open-pull-requests-limit: 0 24 | labels: 25 | - "dependencies" 26 | - "lang: typescript" 27 | commit-message: 28 | prefix: "chore(deps)" 29 | include: "scope" 30 | 31 | # TypeScript Core package - DISABLED: No automatic PRs 32 | - package-ecosystem: "npm" 33 | directory: "/packages/core" 34 | schedule: 35 | interval: "weekly" 36 | day: "monday" 37 | open-pull-requests-limit: 0 38 | labels: 39 | - "dependencies" 40 | - "lang: typescript" 41 | - "core" 42 | commit-message: 43 | prefix: "chore(deps-core)" 44 | include: "scope" 45 | 46 | # n8n Integration package - DISABLED: No automatic PRs 47 | - package-ecosystem: "npm" 48 | directory: "/packages/integrations/n8n" 49 | schedule: 50 | interval: "weekly" 51 | day: "monday" 52 | open-pull-requests-limit: 0 53 | labels: 54 | - "dependencies" 55 | - "integration: n8n" 56 | commit-message: 57 | prefix: "chore(deps-n8n)" 58 | include: "scope" 59 | 60 | # GitHub Actions - DISABLED: No automatic PRs 61 | - package-ecosystem: "github-actions" 62 | directory: "/" 63 | schedule: 64 | interval: "weekly" 65 | day: "monday" 66 | open-pull-requests-limit: 0 67 | labels: 68 | - "dependencies" 69 | - "ci/cd" 70 | commit-message: 71 | prefix: "chore(deps-actions)" 72 | include: "scope" 73 | -------------------------------------------------------------------------------- /.github/assets/README.md: -------------------------------------------------------------------------------- 1 | # cascadeflow Assets 2 | 3 | This directory contains brand assets and logos for cascadeflow used across documentation and READMEs. 4 | 5 | ## Logo Files 6 | 7 | ### Main Logos 8 | 9 | - **`CF_logo_bright.svg`** - cascadeflow logo for light mode/bright backgrounds 10 | - **`CF_logo_dark.svg`** - cascadeflow logo for dark mode/dark backgrounds 11 | 12 | ### Platform Icons 13 | 14 | - **`CF_python_color.svg`** - Python platform icon (color) 15 | - **`CF_ts_color.svg`** - TypeScript platform icon (color) 16 | - **`CF_n8n_color.svg`** - n8n integration icon (color) 17 | 18 | ## Usage 19 | 20 | ### Main Logo with Dark/Light Mode Support 21 | 22 | ```markdown 23 | 24 | 25 | 26 | cascadeflow Logo 27 | 28 | ``` 29 | 30 | ### Inline Platform Icons 31 | 32 | ```markdown 33 | Python 34 | TypeScript 35 | n8n 36 | ``` 37 | 38 | ## Current Usage 39 | 40 | These assets are used in: 41 | 42 | - **Main README** (`/README.md`) - Logo header + navigation icons 43 | - **TypeScript README** (`/packages/core/README.md`) - Logo header + TypeScript icon 44 | - **n8n Integration README** (`/packages/integrations/n8n/README.md`) - Logo header + n8n icon 45 | 46 | ## Brand Guidelines 47 | 48 | - **Logo Usage**: The main logo should be displayed at 80% width with responsive scaling for README headers 49 | - **Logo Styling**: Use `margin: 20px auto;` for proper spacing and centering 50 | - **Icon Usage**: Platform icons should be used at 20-24px for inline navigation 51 | - **Colors**: All icons use official brand colors (Python blue/yellow, TypeScript blue, n8n pink/purple) 52 | - **File Format**: All assets are SVG for scalability and quality at any size 53 | -------------------------------------------------------------------------------- /packages/core/examples/nodejs/test-complexity-quick.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Quick test of complexity detection improvements 3 | */ 4 | 5 | import { ComplexityDetector } from '@cascadeflow/core'; 6 | 7 | const detector = new ComplexityDetector(); 8 | 9 | const testQueries = [ 10 | { q: "What color is the sky?", expected: "trivial" }, 11 | { q: "What's the capital of France?", expected: "trivial" }, 12 | { q: "Translate 'hello' to Spanish", expected: "simple" }, 13 | { q: "Explain the difference between lists and tuples in Python", expected: "moderate" }, 14 | { q: "Write a function to reverse a string in Python", expected: "moderate" }, 15 | { q: "Explain quantum entanglement and its implications for quantum computing in detail", expected: "expert" }, 16 | { q: "Design a microservices architecture for a large-scale e-commerce platform with high availability", expected: "expert" }, 17 | { q: "Analyze the philosophical implications of consciousness and free will in the context of determinism", expected: "expert" }, 18 | ]; 19 | 20 | console.log('='.repeat(80)); 21 | console.log('COMPLEXITY DETECTION TEST'); 22 | console.log('='.repeat(80)); 23 | console.log(); 24 | 25 | let correct = 0; 26 | let total = testQueries.length; 27 | 28 | for (const test of testQueries) { 29 | const result = detector.detect(test.q, true); 30 | const match = result.complexity === test.expected ? '✅' : '❌'; 31 | 32 | if (result.complexity === test.expected) correct++; 33 | 34 | console.log(`${match} Query: ${test.q}`); 35 | console.log(` Expected: ${test.expected}, Got: ${result.complexity} (conf: ${result.confidence.toFixed(2)})`); 36 | 37 | if (result.metadata?.technicalTerms && result.metadata.technicalTerms.length > 0) { 38 | console.log(` Technical Terms: ${result.metadata.technicalTerms.join(', ')}`); 39 | } 40 | if (result.metadata?.domains && result.metadata.domains.size > 0) { 41 | console.log(` Domains: ${Array.from(result.metadata.domains).join(', ')}`); 42 | } 43 | console.log(); 44 | } 45 | 46 | console.log('='.repeat(80)); 47 | console.log(`ACCURACY: ${correct}/${total} (${((correct/total)*100).toFixed(1)}%)`); 48 | console.log('='.repeat(80)); 49 | -------------------------------------------------------------------------------- /cascadeflow/core/batch_config.py: -------------------------------------------------------------------------------- 1 | """Batch processing configuration for cascadeflow.""" 2 | 3 | from dataclasses import dataclass, field 4 | from enum import Enum 5 | from typing import Any, Optional 6 | 7 | 8 | class BatchStrategy(str, Enum): 9 | """Batch processing strategy""" 10 | 11 | LITELLM_NATIVE = "litellm_native" # Use LiteLLM batch API (preferred) 12 | SEQUENTIAL = "sequential" # Sequential with concurrency control 13 | AUTO = "auto" # Auto-detect best strategy 14 | 15 | 16 | @dataclass 17 | class BatchConfig: 18 | """ 19 | Configuration for batch processing. 20 | 21 | Example: 22 | config = BatchConfig( 23 | batch_size=10, 24 | max_parallel=3, 25 | timeout_per_query=30.0, 26 | strategy=BatchStrategy.AUTO 27 | ) 28 | """ 29 | 30 | # Batch settings 31 | batch_size: int = 10 32 | """Maximum number of queries in a single batch""" 33 | 34 | max_parallel: int = 3 35 | """Maximum number of parallel requests (fallback mode)""" 36 | 37 | timeout_per_query: float = 30.0 38 | """Timeout per query in seconds""" 39 | 40 | total_timeout: Optional[float] = None 41 | """Total timeout for entire batch (default: timeout_per_query * batch_size)""" 42 | 43 | # Strategy 44 | strategy: BatchStrategy = BatchStrategy.AUTO 45 | """Batch processing strategy""" 46 | 47 | # Error handling 48 | stop_on_error: bool = False 49 | """Stop processing batch if any query fails""" 50 | 51 | retry_failed: bool = True 52 | """Retry failed queries once""" 53 | 54 | # Cost & quality 55 | track_cost: bool = True 56 | """Track cost for each query in batch""" 57 | 58 | validate_quality: bool = True 59 | """Validate quality for each query in batch""" 60 | 61 | # Advanced 62 | preserve_order: bool = True 63 | """Preserve query order in results""" 64 | 65 | metadata: dict[str, Any] = field(default_factory=dict) 66 | """Custom metadata for batch""" 67 | 68 | def __post_init__(self): 69 | if self.total_timeout is None: 70 | self.total_timeout = self.timeout_per_query * self.batch_size 71 | -------------------------------------------------------------------------------- /.github/assets/CF_python_color.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # Labeler configuration for automatic PR labeling 2 | 3 | # Python 4 | 'lang: python': 5 | - changed-files: 6 | - any-glob-to-any-file: 7 | - 'cascadeflow/**/*.py' 8 | - 'tests/**/*.py' 9 | - 'examples/**/*.py' 10 | - 'pyproject.toml' 11 | - 'requirements*.txt' 12 | 13 | # TypeScript 14 | 'lang: typescript': 15 | - changed-files: 16 | - any-glob-to-any-file: 17 | - 'packages/**/*.ts' 18 | - 'packages/**/*.tsx' 19 | - 'packages/**/tsconfig.json' 20 | - 'packages/**/package.json' 21 | 22 | # n8n 23 | 'integration: n8n': 24 | - changed-files: 25 | - any-glob-to-any-file: 26 | - 'packages/integrations/n8n/**/*' 27 | 28 | # Documentation 29 | 'documentation': 30 | - changed-files: 31 | - any-glob-to-any-file: 32 | - '**/*.md' 33 | - 'docs/**/*' 34 | 35 | # Workflows 36 | 'ci/cd': 37 | - changed-files: 38 | - any-glob-to-any-file: 39 | - '.github/workflows/**/*' 40 | - '.github/**/*.yml' 41 | 42 | # Dependencies 43 | 'dependencies': 44 | - changed-files: 45 | - any-glob-to-any-file: 46 | - 'package.json' 47 | - 'pnpm-lock.yaml' 48 | - 'requirements*.txt' 49 | - 'pyproject.toml' 50 | 51 | # Tests 52 | 'tests': 53 | - changed-files: 54 | - any-glob-to-any-file: 55 | - 'tests/**/*' 56 | - 'packages/**/__tests__/**/*' 57 | - 'packages/**/*.test.ts' 58 | - 'packages/**/*.spec.ts' 59 | 60 | # Examples 61 | 'examples': 62 | - changed-files: 63 | - any-glob-to-any-file: 64 | - 'examples/**/*' 65 | - 'packages/**/examples/**/*' 66 | 67 | # Core 68 | 'core': 69 | - changed-files: 70 | - any-glob-to-any-file: 71 | - 'cascadeflow/**/*.py' 72 | - 'packages/core/src/**/*' 73 | 74 | # Providers 75 | 'providers': 76 | - changed-files: 77 | - any-glob-to-any-file: 78 | - 'cascadeflow/providers/**/*' 79 | - 'packages/core/src/providers/**/*' 80 | 81 | # Configuration 82 | 'configuration': 83 | - changed-files: 84 | - any-glob-to-any-file: 85 | - 'turbo.json' 86 | - 'pnpm-workspace.yaml' 87 | - '.eslintrc*' 88 | - '.prettierrc*' 89 | - 'ruff.toml' 90 | - 'pyproject.toml' 91 | -------------------------------------------------------------------------------- /packages/integrations/n8n/credentials/CascadeFlowApi.credentials.ts: -------------------------------------------------------------------------------- 1 | import { 2 | IAuthenticateGeneric, 3 | ICredentialTestRequest, 4 | ICredentialType, 5 | INodeProperties, 6 | } from 'n8n-workflow'; 7 | 8 | export class CascadeFlowApi implements ICredentialType { 9 | name = 'cascadeFlowApi'; 10 | displayName = 'Cascadeflow API'; 11 | documentationUrl = 'https://github.com/lemony-ai/cascadeflow'; 12 | properties: INodeProperties[] = [ 13 | { 14 | displayName: 'OpenAI API Key', 15 | name: 'openaiApiKey', 16 | type: 'string', 17 | typeOptions: { password: true }, 18 | default: '', 19 | description: 'API key for OpenAI (GPT-4, GPT-4o, etc.)', 20 | placeholder: 'sk-...', 21 | }, 22 | { 23 | displayName: 'Anthropic API Key', 24 | name: 'anthropicApiKey', 25 | type: 'string', 26 | typeOptions: { password: true }, 27 | default: '', 28 | description: 'API key for Anthropic (Claude models)', 29 | placeholder: 'sk-ant-...', 30 | }, 31 | { 32 | displayName: 'Groq API Key', 33 | name: 'groqApiKey', 34 | type: 'string', 35 | typeOptions: { password: true }, 36 | default: '', 37 | description: 'API key for Groq (fast Llama inference)', 38 | placeholder: 'gsk_...', 39 | }, 40 | { 41 | displayName: 'Together AI API Key', 42 | name: 'togetherApiKey', 43 | type: 'string', 44 | typeOptions: { password: true }, 45 | default: '', 46 | description: 'API key for Together AI', 47 | placeholder: '', 48 | }, 49 | { 50 | displayName: 'HuggingFace API Key', 51 | name: 'huggingfaceApiKey', 52 | type: 'string', 53 | typeOptions: { password: true }, 54 | default: '', 55 | description: 'API key for HuggingFace Inference', 56 | placeholder: 'hf_...', 57 | }, 58 | ]; 59 | 60 | authenticate: IAuthenticateGeneric = { 61 | type: 'generic', 62 | properties: {}, 63 | }; 64 | 65 | test: ICredentialTestRequest = { 66 | request: { 67 | baseURL: 'https://api.openai.com/v1', 68 | url: '/models', 69 | method: 'GET', 70 | headers: { 71 | Authorization: '=Bearer {{$credentials.openaiApiKey}}', 72 | }, 73 | }, 74 | }; 75 | } 76 | -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@cascadeflow/langchain", 3 | "version": "0.6.5", 4 | "description": "LangChain integration for cascadeflow - Add intelligent cost optimization to your LangChain models", 5 | "author": { 6 | "name": "Lemony Inc.", 7 | "email": "hello@lemony.ai" 8 | }, 9 | "license": "MIT", 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/lemony-ai/cascadeflow.git", 13 | "directory": "packages/langchain-cascadeflow" 14 | }, 15 | "main": "./dist/index.js", 16 | "module": "./dist/index.mjs", 17 | "types": "./dist/index.d.ts", 18 | "exports": { 19 | ".": { 20 | "types": "./dist/index.d.ts", 21 | "import": "./dist/index.mjs", 22 | "require": "./dist/index.js" 23 | } 24 | }, 25 | "files": [ 26 | "dist", 27 | "examples", 28 | "README.md" 29 | ], 30 | "scripts": { 31 | "build": "tsup src/index.ts --format cjs,esm --dts --clean", 32 | "dev": "tsup src/index.ts --format cjs,esm --dts --watch", 33 | "test": "vitest run", 34 | "test:watch": "vitest", 35 | "lint": "eslint src --ext .ts", 36 | "typecheck": "tsc --noEmit", 37 | "clean": "rm -rf dist" 38 | }, 39 | "keywords": [ 40 | "langchain", 41 | "cascadeflow", 42 | "ai", 43 | "llm", 44 | "cost-optimization", 45 | "model-routing", 46 | "cascade", 47 | "typescript" 48 | ], 49 | "dependencies": { 50 | "@cascadeflow/core": "workspace:^", 51 | "@cascadeflow/ml": "workspace:^", 52 | "@langchain/anthropic": "^1.0.1", 53 | "@langchain/google-genai": "^1.0.1" 54 | }, 55 | "peerDependencies": { 56 | "@langchain/core": "^0.3.0", 57 | "langchain": "^0.3.0" 58 | }, 59 | "peerDependenciesMeta": { 60 | "langchain": { 61 | "optional": true 62 | } 63 | }, 64 | "devDependencies": { 65 | "@langchain/core": "^0.3.24", 66 | "@langchain/openai": "^0.3.17", 67 | "@types/node": "^20.10.0", 68 | "@typescript-eslint/eslint-plugin": "^6.15.0", 69 | "@typescript-eslint/parser": "^6.15.0", 70 | "eslint": "^8.55.0", 71 | "langchain": "^0.3.13", 72 | "openai": "^4.73.1", 73 | "tsup": "^8.0.1", 74 | "typescript": "^5.3.3", 75 | "vitest": "^1.0.4" 76 | }, 77 | "engines": { 78 | "node": ">=18.0.0" 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /packages/integrations/n8n/DEPRECATE_5.0.x.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to deprecate incorrect 5.0.x versions of @cascadeflow/n8n-nodes-cascadeflow 4 | # Run this AFTER publishing v0.5.0 to npm 5 | 6 | echo "Deprecating versions 5.0.1 through 5.0.7..." 7 | echo "" 8 | echo "⚠️ Make sure you are authenticated to npm with the correct account" 9 | echo "⚠️ Run: npm whoami" 10 | echo "" 11 | read -p "Press Enter to continue or Ctrl+C to cancel..." 12 | 13 | # Deprecate each version 14 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.1 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration" 15 | 16 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.2 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration" 17 | 18 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.3 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration" 19 | 20 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.4 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration" 21 | 22 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.5 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration" 23 | 24 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.6 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration" 25 | 26 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.7 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration" 27 | 28 | echo "" 29 | echo "✅ All 5.0.x versions deprecated successfully!" 30 | echo "" 31 | echo "Users will see deprecation warnings when installing these versions." 32 | echo "npm will still recommend v0.5.0 as the latest stable version." 33 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' # Trigger on version tags like v0.1.0, v1.0.0 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | create-release: 13 | name: Create GitHub Release 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 # Get all history for changelog 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: '3.11' 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install build twine 31 | 32 | - name: Get version from tag 33 | id: get_version 34 | run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT 35 | 36 | - name: Build package 37 | run: python -m build 38 | 39 | - name: Generate changelog 40 | id: changelog 41 | run: | 42 | # Get commits since last tag 43 | PREV_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "") 44 | if [ -z "$PREV_TAG" ]; then 45 | CHANGELOG=$(git log --pretty=format:"- %s (%h)" --reverse) 46 | else 47 | CHANGELOG=$(git log ${PREV_TAG}..HEAD --pretty=format:"- %s (%h)" --reverse) 48 | fi 49 | 50 | # Write to file for multiline handling 51 | echo "$CHANGELOG" > changelog.txt 52 | 53 | - name: Create Release 54 | uses: actions/create-release@v1 55 | env: 56 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 57 | with: 58 | tag_name: ${{ github.ref }} 59 | release_name: Release ${{ steps.get_version.outputs.VERSION }} 60 | body_path: changelog.txt 61 | draft: false 62 | prerelease: false 63 | 64 | - name: Upload Release Assets 65 | uses: actions/upload-release-asset@v1 66 | env: 67 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 68 | with: 69 | upload_url: ${{ steps.create_release.outputs.upload_url }} 70 | asset_path: ./dist/cascadeflow-${{ steps.get_version.outputs.VERSION }}.tar.gz 71 | asset_name: cascadeflow-${{ steps.get_version.outputs.VERSION }}.tar.gz 72 | asset_content_type: application/gzip -------------------------------------------------------------------------------- /examples/integrations/docker-compose.yml: -------------------------------------------------------------------------------- 1 | # Docker Compose for cascadeflow OpenTelemetry + Grafana Stack 2 | # 3 | # This stack includes: 4 | # - OpenTelemetry Collector (receives metrics) 5 | # - Prometheus (stores metrics) 6 | # - Grafana (visualizes metrics) 7 | # 8 | # Usage: 9 | # docker-compose up -d # Start stack 10 | # docker-compose down # Stop stack 11 | # docker-compose logs -f # View logs 12 | # 13 | # Access: 14 | # - Grafana: http://localhost:3000 (admin/admin) 15 | # - Prometheus: http://localhost:9090 16 | 17 | version: '3.8' 18 | 19 | services: 20 | # OpenTelemetry Collector - Receives metrics from cascadeflow 21 | otel-collector: 22 | image: otel/opentelemetry-collector:latest 23 | container_name: cascadeflow-otel-collector 24 | command: ["--config=/etc/otel-collector-config.yaml"] 25 | volumes: 26 | - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml 27 | ports: 28 | - "4318:4318" # OTLP HTTP receiver 29 | - "4317:4317" # OTLP gRPC receiver 30 | - "8888:8888" # Prometheus metrics exposed by the collector 31 | - "8889:8889" # Prometheus exporter metrics 32 | networks: 33 | - cascadeflow 34 | 35 | # Prometheus - Stores metrics 36 | prometheus: 37 | image: prom/prometheus:latest 38 | container_name: cascadeflow-prometheus 39 | command: 40 | - '--config.file=/etc/prometheus/prometheus.yml' 41 | - '--storage.tsdb.path=/prometheus' 42 | - '--web.console.libraries=/usr/share/prometheus/console_libraries' 43 | - '--web.console.templates=/usr/share/prometheus/consoles' 44 | volumes: 45 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 46 | - prometheus-data:/prometheus 47 | ports: 48 | - "9090:9090" 49 | networks: 50 | - cascadeflow 51 | depends_on: 52 | - otel-collector 53 | 54 | # Grafana - Visualizes metrics 55 | grafana: 56 | image: grafana/grafana:latest 57 | container_name: cascadeflow-grafana 58 | environment: 59 | - GF_SECURITY_ADMIN_PASSWORD=admin 60 | - GF_SECURITY_ADMIN_USER=admin 61 | - GF_USERS_ALLOW_SIGN_UP=false 62 | volumes: 63 | - grafana-data:/var/lib/grafana 64 | - ./grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml 65 | ports: 66 | - "3000:3000" 67 | networks: 68 | - cascadeflow 69 | depends_on: 70 | - prometheus 71 | 72 | networks: 73 | cascadeflow: 74 | driver: bridge 75 | 76 | volumes: 77 | prometheus-data: 78 | grafana-data: 79 | -------------------------------------------------------------------------------- /cascadeflow/providers/deepseek.py: -------------------------------------------------------------------------------- 1 | """DeepSeek provider implementation. 2 | 3 | DeepSeek uses an OpenAI-compatible API, making it easy to integrate. 4 | The provider extends the OpenAI provider with DeepSeek-specific configuration. 5 | 6 | Environment Variables: 7 | DEEPSEEK_API_KEY: Your DeepSeek API key 8 | 9 | Models: 10 | - deepseek-coder: Specialized for code generation and understanding 11 | - deepseek-chat: General-purpose chat model 12 | 13 | Example: 14 | >>> from cascadeflow import CascadeAgent, ModelConfig 15 | >>> agent = CascadeAgent( 16 | ... models=[ 17 | ... ModelConfig(name="deepseek-coder", provider="deepseek", cost=0.00014), 18 | ... ] 19 | ... ) 20 | """ 21 | 22 | import os 23 | from typing import Optional 24 | 25 | from .openai import OpenAIProvider 26 | 27 | 28 | class DeepSeekProvider(OpenAIProvider): 29 | """ 30 | DeepSeek provider using OpenAI-compatible API. 31 | 32 | DeepSeek models are particularly strong at: 33 | - Code generation and understanding 34 | - Mathematical reasoning 35 | - General chat 36 | 37 | The API is fully compatible with OpenAI's API format. 38 | All methods are inherited from OpenAIProvider - only the 39 | base URL and API key source are different. 40 | """ 41 | 42 | # DeepSeek API base URL 43 | BASE_URL = "https://api.deepseek.com" 44 | 45 | def __init__( 46 | self, 47 | api_key: Optional[str] = None, 48 | base_url: Optional[str] = None, 49 | **kwargs, 50 | ): 51 | """ 52 | Initialize DeepSeek provider. 53 | 54 | Args: 55 | api_key: DeepSeek API key (defaults to DEEPSEEK_API_KEY env var) 56 | base_url: Custom base URL (defaults to DeepSeek API) 57 | **kwargs: Additional OpenAI provider options 58 | """ 59 | # Get API key from environment if not provided 60 | deepseek_api_key = api_key or os.getenv("DEEPSEEK_API_KEY") 61 | 62 | if not deepseek_api_key: 63 | raise ValueError( 64 | "DeepSeek API key not found. " 65 | "Set DEEPSEEK_API_KEY environment variable or pass api_key parameter." 66 | ) 67 | 68 | # Initialize parent OpenAI provider with DeepSeek API key 69 | super().__init__(api_key=deepseek_api_key, **kwargs) 70 | 71 | # Override base URL to use DeepSeek API 72 | self.base_url = base_url or self.BASE_URL 73 | 74 | @property 75 | def name(self) -> str: 76 | """Provider name.""" 77 | return "deepseek" 78 | -------------------------------------------------------------------------------- /cascadeflow/integrations/langchain/__init__.py: -------------------------------------------------------------------------------- 1 | """CascadeFlow LangChain Integration. 2 | 3 | Transparent wrapper for LangChain chat models with intelligent cascade logic 4 | for cost optimization. 5 | 6 | Example: 7 | >>> from langchain_openai import ChatOpenAI 8 | >>> from cascadeflow.langchain import CascadeFlow 9 | >>> 10 | >>> drafter = ChatOpenAI(model='gpt-4o-mini') 11 | >>> verifier = ChatOpenAI(model='gpt-4o') 12 | >>> 13 | >>> cascade = CascadeFlow( 14 | ... drafter=drafter, 15 | ... verifier=verifier, 16 | ... quality_threshold=0.7 17 | ... ) 18 | >>> 19 | >>> result = await cascade.ainvoke("What is TypeScript?") 20 | """ 21 | 22 | from .wrapper import CascadeFlow, with_cascade 23 | from .types import CascadeConfig, CascadeResult, CostMetadata, TokenUsage 24 | from .utils import ( 25 | calculate_quality, 26 | calculate_cost, 27 | calculate_savings, 28 | create_cost_metadata, 29 | extract_token_usage, 30 | MODEL_PRICING, 31 | ) 32 | 33 | # Model discovery utilities - optional feature 34 | from .models import ( 35 | MODEL_PRICING_REFERENCE, 36 | analyze_cascade_pair, 37 | suggest_cascade_pairs, 38 | discover_cascade_pairs, 39 | analyze_model, 40 | compare_models, 41 | find_best_cascade_pair, 42 | validate_cascade_pair, 43 | extract_model_name, 44 | get_provider, 45 | ) 46 | from .cost_tracking import ( 47 | BudgetTracker, 48 | CostHistory, 49 | CostEntry, 50 | track_costs, 51 | ) 52 | from .langchain_callbacks import ( 53 | CascadeFlowCallbackHandler, 54 | get_cascade_callback, 55 | ) 56 | 57 | __all__ = [ 58 | # Main classes 59 | "CascadeFlow", 60 | "with_cascade", 61 | # Types 62 | "CascadeConfig", 63 | "CascadeResult", 64 | "CostMetadata", 65 | "TokenUsage", 66 | # Utilities 67 | "calculate_quality", 68 | "calculate_cost", 69 | "calculate_savings", 70 | "create_cost_metadata", 71 | "extract_token_usage", 72 | "MODEL_PRICING", 73 | # Model discovery 74 | "MODEL_PRICING_REFERENCE", 75 | "analyze_cascade_pair", 76 | "suggest_cascade_pairs", 77 | "discover_cascade_pairs", 78 | "analyze_model", 79 | "compare_models", 80 | "find_best_cascade_pair", 81 | "validate_cascade_pair", 82 | "extract_model_name", 83 | "get_provider", 84 | # Cost tracking (Python-specific features) 85 | "BudgetTracker", 86 | "CostHistory", 87 | "CostEntry", 88 | "track_costs", 89 | # LangChain callback handlers 90 | "CascadeFlowCallbackHandler", 91 | "get_cascade_callback", 92 | ] 93 | -------------------------------------------------------------------------------- /packages/core/quick-perf-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Quick Performance Test - 3 runs for cost efficiency 3 | # Simplified version for faster analysis 4 | 5 | set -e 6 | 7 | RUNS=3 8 | OUTPUT_DIR="/tmp/cascadeflow-quickperf-$(date +%Y%m%d-%H%M%S)" 9 | mkdir -p "$OUTPUT_DIR" 10 | 11 | echo "╔═══════════════════════════════════════════════════════════════╗" 12 | echo "║ cascadeflow Quick Performance Test (3 runs) ║" 13 | echo "╚═══════════════════════════════════════════════════════════════╝" 14 | echo "" 15 | 16 | # Load environment 17 | if [ -f "../../.env" ]; then 18 | set -a 19 | source ../../.env 20 | set +a 21 | fi 22 | 23 | declare -a savings_array 24 | declare -a cost_array 25 | 26 | for i in {1..3}; do 27 | echo "" 28 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" 29 | echo "Run $i/3" 30 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" 31 | 32 | OUTPUT_FILE="$OUTPUT_DIR/run-$i.log" 33 | 34 | if npx tsx examples/nodejs/basic-usage.ts > "$OUTPUT_FILE" 2>&1; then 35 | echo "✓ Run $i completed" 36 | 37 | # Extract savings percentage 38 | savings=$(grep "💰 SAVINGS:" "$OUTPUT_FILE" | awk '{print $3}' | sed 's/[($%)]//g' || echo "0") 39 | cost=$(grep "Total Cost:" "$OUTPUT_FILE" | awk '{print $3}' | sed 's/\$//g' || echo "0") 40 | 41 | savings_array+=("$savings") 42 | cost_array+=("$cost") 43 | 44 | echo " Savings: ${savings}%" 45 | echo " Cost: \$${cost}" 46 | else 47 | echo "✗ Run $i failed" 48 | fi 49 | 50 | # Delay between runs 51 | if [ $i -lt 3 ]; then 52 | sleep 3 53 | fi 54 | done 55 | 56 | echo "" 57 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" 58 | echo "📊 RESULTS" 59 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" 60 | 61 | # Calculate average 62 | sum=0 63 | count=0 64 | for val in "${savings_array[@]}"; do 65 | if [ -n "$val" ]; then 66 | sum=$(echo "$sum + $val" | bc) 67 | count=$((count + 1)) 68 | fi 69 | done 70 | 71 | if [ $count -gt 0 ]; then 72 | avg=$(echo "scale=2; $sum / $count" | bc) 73 | else 74 | avg="0" 75 | fi 76 | 77 | echo "" 78 | echo "Average Savings: ${avg}%" 79 | echo "Target: 40-60%" 80 | if (( $(echo "$avg >= 40" | bc -l) )); then 81 | echo "Status: ✓ Target achieved!" 82 | else 83 | echo "Status: ⚠ Below target" 84 | fi 85 | 86 | echo "" 87 | echo "Individual runs:" 88 | for i in {1..3}; do 89 | if [ -n "${savings_array[$i-1]}" ]; then 90 | echo " Run $i: ${savings_array[$i-1]}% (Cost: \$${cost_array[$i-1]})" 91 | fi 92 | done 93 | 94 | echo "" 95 | echo "Logs: $OUTPUT_DIR" 96 | echo "" 97 | -------------------------------------------------------------------------------- /.github/assets/Lemony_logo_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /.github/assets/Lemony_logo_bright.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @module @cascadeflow/langchain 3 | * 4 | * LangChain integration for cascadeflow - Add intelligent cost optimization 5 | * to your existing LangChain models without reconfiguration. 6 | * 7 | * @example 8 | * ```typescript 9 | * import { withCascade } from '@cascadeflow/langchain'; 10 | * import { ChatOpenAI } from '@langchain/openai'; 11 | * 12 | * // Your existing models 13 | * const drafter = new ChatOpenAI({ model: 'gpt-4o-mini' }); 14 | * const verifier = new ChatOpenAI({ model: 'gpt-4o' }); 15 | * 16 | * // Wrap with cascade (2 lines!) 17 | * const cascadeModel = withCascade({ 18 | * drafter, 19 | * verifier, 20 | * qualityThreshold: 0.7 21 | * }); 22 | * 23 | * // Use like any LangChain model - all features preserved! 24 | * const result = await cascadeModel.invoke("What is TypeScript?"); 25 | * console.log(result); 26 | * 27 | * // Check cascade stats 28 | * const stats = cascadeModel.getLastCascadeResult(); 29 | * console.log(`Saved: ${stats.savingsPercentage}%`); 30 | * ``` 31 | */ 32 | 33 | export { CascadeFlow } from './wrapper.js'; 34 | export type { CascadeConfig, CascadeResult, CostMetadata } from './types.js'; 35 | export * from './utils.js'; 36 | export { analyzeCascadePair, suggestCascadePairs } from './helpers.js'; 37 | export type { CascadeAnalysis } from './helpers.js'; 38 | 39 | // Routers and complexity detection 40 | export { PreRouter, createPreRouter } from './routers/pre-router.js'; 41 | export type { PreRouterConfig, PreRouterStats } from './routers/pre-router.js'; 42 | export { Router, RoutingStrategy, RoutingDecisionHelper, RouterChain } from './routers/base.js'; 43 | export type { RoutingDecision } from './routers/base.js'; 44 | export { ComplexityDetector } from './complexity.js'; 45 | export type { QueryComplexity, ComplexityResult } from './complexity.js'; 46 | 47 | // Model discovery (works with YOUR models!) 48 | export { 49 | MODEL_PRICING_REFERENCE, 50 | discoverCascadePairs, 51 | analyzeModel, 52 | compareModels, 53 | findBestCascadePair, 54 | validateCascadePair, 55 | } from './models.js'; 56 | 57 | import { CascadeFlow } from './wrapper.js'; 58 | import type { CascadeConfig } from './types.js'; 59 | 60 | /** 61 | * Convenient helper to create a CascadeFlow model 62 | * 63 | * @param config - Cascade configuration with drafter/verifier models 64 | * @returns A wrapped model that cascades from drafter to verifier 65 | * 66 | * @example 67 | * ```typescript 68 | * const model = withCascade({ 69 | * drafter: new ChatOpenAI({ model: 'gpt-4o-mini' }), 70 | * verifier: new ChatOpenAI({ model: 'gpt-4o' }), 71 | * qualityThreshold: 0.7 72 | * }); 73 | * ``` 74 | */ 75 | export function withCascade(config: CascadeConfig): CascadeFlow { 76 | return new CascadeFlow(config); 77 | } 78 | -------------------------------------------------------------------------------- /examples/guardrails_usage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example: Guardrails (Content Moderation + PII Detection) v0.2.1 3 | 4 | Demonstrates content safety and PII detection for production use. 5 | """ 6 | 7 | import asyncio 8 | 9 | from cascadeflow import ( 10 | GuardrailsManager, 11 | TierLevel, 12 | UserProfile, 13 | ) 14 | 15 | 16 | async def main(): 17 | print("=" * 60) 18 | print("cascadeflow v0.2.1 - Guardrails") 19 | print("=" * 60) 20 | 21 | # Create profile with guardrails enabled 22 | profile = UserProfile.from_tier( 23 | TierLevel.PRO, 24 | user_id="secure_user", 25 | enable_content_moderation=True, 26 | enable_pii_detection=True, 27 | ) 28 | 29 | print("\nUser profile:") 30 | print(f" Tier: {profile.tier.name}") 31 | print(f" Content moderation: {profile.enable_content_moderation}") 32 | print(f" PII detection: {profile.enable_pii_detection}") 33 | 34 | # Initialize guardrails manager 35 | manager = GuardrailsManager() 36 | 37 | # Example 1: Safe content 38 | print("\n1. Safe content check") 39 | print("-" * 60) 40 | safe_text = "What is the capital of France?" 41 | result = await manager.check_content(safe_text, profile) 42 | print(f"Text: {safe_text}") 43 | print(f"Safe: {result.is_safe}") 44 | 45 | # Example 2: PII detection 46 | print("\n2. PII detection") 47 | print("-" * 60) 48 | pii_text = "My email is john.doe@example.com and phone is 555-123-4567" 49 | result = await manager.check_content(pii_text, profile) 50 | print(f"Text: {pii_text}") 51 | print(f"Safe: {result.is_safe}") 52 | if result.pii_detected: 53 | print(f"PII detected: {len(result.pii_detected)} matches") 54 | for match in result.pii_detected: 55 | print(f" - {match.pii_type}: {match.value}") 56 | 57 | # Example 3: PII redaction 58 | print("\n3. PII redaction") 59 | print("-" * 60) 60 | redacted_text, matches = await manager.redact_pii(pii_text, profile) 61 | print(f"Original: {pii_text}") 62 | print(f"Redacted: {redacted_text}") 63 | 64 | # Example 4: Disable guardrails 65 | print("\n4. Disabled guardrails") 66 | print("-" * 60) 67 | no_guards_profile = UserProfile.from_tier( 68 | TierLevel.FREE, 69 | user_id="basic_user", 70 | enable_content_moderation=False, 71 | enable_pii_detection=False, 72 | ) 73 | result = await manager.check_content(pii_text, no_guards_profile) 74 | print(f"Content moderation: {no_guards_profile.enable_content_moderation}") 75 | print(f"PII detection: {no_guards_profile.enable_pii_detection}") 76 | print(f"Result: {result.is_safe} (guardrails disabled)") 77 | 78 | print("\n" + "=" * 60) 79 | print("Guardrails examples completed!") 80 | print("=" * 60) 81 | 82 | 83 | if __name__ == "__main__": 84 | asyncio.run(main()) 85 | -------------------------------------------------------------------------------- /cascadeflow/integrations/langchain/types.py: -------------------------------------------------------------------------------- 1 | """Type definitions for CascadeFlow LangChain integration.""" 2 | 3 | from typing import Optional, TypedDict 4 | 5 | 6 | class TokenUsage(TypedDict): 7 | """Token usage tracking.""" 8 | 9 | input: int 10 | output: int 11 | 12 | 13 | class CostMetadata(TypedDict, total=False): 14 | """Cost tracking metadata for cascade execution. 15 | 16 | Attributes: 17 | drafter_tokens: Token usage for drafter model 18 | verifier_tokens: Token usage for verifier model (optional) 19 | drafter_cost: Cost of drafter execution in USD 20 | verifier_cost: Cost of verifier execution in USD 21 | total_cost: Total cost in USD 22 | savings_percentage: Savings percentage vs. always using verifier 23 | model_used: Which model was used ('drafter' or 'verifier') 24 | accepted: Whether drafter response was accepted 25 | drafter_quality: Quality score of drafter response (0-1) 26 | """ 27 | 28 | drafter_tokens: TokenUsage 29 | verifier_tokens: Optional[TokenUsage] 30 | drafter_cost: float 31 | verifier_cost: float 32 | total_cost: float 33 | savings_percentage: float 34 | model_used: str 35 | accepted: bool 36 | drafter_quality: float 37 | 38 | 39 | class CascadeResult(TypedDict): 40 | """Result of cascade execution. 41 | 42 | Attributes: 43 | content: Final response content 44 | model_used: Which model was used ('drafter' or 'verifier') 45 | accepted: Whether drafter response was accepted 46 | drafter_quality: Quality score of drafter response (0-1) 47 | drafter_cost: Cost of drafter execution in USD 48 | verifier_cost: Cost of verifier execution in USD 49 | total_cost: Total cost in USD 50 | savings_percentage: Savings percentage vs. always using verifier 51 | latency_ms: Total latency in milliseconds 52 | """ 53 | 54 | content: str 55 | model_used: str 56 | accepted: bool 57 | drafter_quality: float 58 | drafter_cost: float 59 | verifier_cost: float 60 | total_cost: float 61 | savings_percentage: float 62 | latency_ms: float 63 | 64 | 65 | class CascadeConfig(TypedDict, total=False): 66 | """Configuration for cascade behavior. 67 | 68 | Attributes: 69 | quality_threshold: Quality threshold for accepting drafter responses (0-1) 70 | enable_cost_tracking: Enable automatic cost tracking 71 | cost_tracking_provider: Cost tracking provider ('langsmith' or 'cascadeflow') 72 | enable_pre_router: Enable pre-routing based on query complexity 73 | cascade_complexities: Complexity levels that should use cascade 74 | """ 75 | 76 | quality_threshold: float 77 | enable_cost_tracking: bool 78 | cost_tracking_provider: str 79 | enable_pre_router: bool 80 | cascade_complexities: list[str] 81 | -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/examples/streaming-cascade.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Streaming Cascade Example 3 | * 4 | * Demonstrates real-time streaming with CascadeFlow: 5 | * 1. Stream drafter optimistically (user sees output immediately) 6 | * 2. Check quality after drafter completes 7 | * 3. If quality insufficient, show switch message and stream verifier 8 | */ 9 | 10 | import { ChatOpenAI } from '@langchain/openai'; 11 | import { withCascade } from '../src/index.js'; 12 | 13 | async function main() { 14 | console.log('🌊 CascadeFlow Streaming Example\n'); 15 | 16 | // Configure cascade with drafter and verifier 17 | const cascade = withCascade({ 18 | drafter: new ChatOpenAI({ 19 | model: 'gpt-4o-mini', 20 | temperature: 0.7, 21 | }), 22 | verifier: new ChatOpenAI({ 23 | model: 'gpt-4o', 24 | temperature: 0.7, 25 | }), 26 | qualityThreshold: 0.7, 27 | }); 28 | 29 | // Example 1: Simple query (likely accepted by drafter) 30 | console.log('Example 1: Simple Query (likely cascaded)\n'); 31 | console.log('Q: What is 2+2?\n'); 32 | console.log('A: '); 33 | 34 | const stream1 = await cascade.stream('What is 2+2?'); 35 | 36 | for await (const chunk of stream1) { 37 | const content = typeof chunk.content === 'string' ? chunk.content : ''; 38 | process.stdout.write(content); 39 | } 40 | 41 | console.log('\n\n---\n'); 42 | 43 | // Example 2: Complex query (likely escalated to verifier) 44 | console.log('Example 2: Complex Query (may escalate)\n'); 45 | console.log('Q: Explain quantum entanglement and its implications for quantum computing\n'); 46 | console.log('A: '); 47 | 48 | const stream2 = await cascade.stream( 49 | 'Explain quantum entanglement and its implications for quantum computing' 50 | ); 51 | 52 | for await (const chunk of stream2) { 53 | const content = typeof chunk.content === 'string' ? chunk.content : ''; 54 | process.stdout.write(content); 55 | } 56 | 57 | console.log('\n\n---\n'); 58 | 59 | // Example 3: Low quality query (forces cascade) 60 | console.log('Example 3: Ambiguous Query (likely escalates)\n'); 61 | console.log('Q: Tell me about it\n'); 62 | console.log('A: '); 63 | 64 | const stream3 = await cascade.stream('Tell me about it'); 65 | 66 | for await (const chunk of stream3) { 67 | const content = typeof chunk.content === 'string' ? chunk.content : ''; 68 | process.stdout.write(content); 69 | } 70 | 71 | console.log('\n\n---\n'); 72 | 73 | // Show final cascade statistics 74 | const stats = cascade.getLastCascadeResult(); 75 | if (stats) { 76 | console.log('\n📊 Cascade Statistics:'); 77 | console.log(` Model Used: ${stats.modelUsed}`); 78 | console.log(` Drafter Quality: ${stats.drafterQuality.toFixed(2)}`); 79 | console.log(` Accepted: ${stats.accepted}`); 80 | console.log(` Latency: ${stats.latencyMs}ms`); 81 | } 82 | } 83 | 84 | main().catch(console.error); 85 | -------------------------------------------------------------------------------- /scripts/test-typescript-examples.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Test TypeScript examples with proper workspace setup 3 | # 4 | # This script: 5 | # 1. Ensures the package is built 6 | # 2. Loads environment variables 7 | # 3. Runs each TypeScript example 8 | # 4. Reports results 9 | 10 | set -e 11 | 12 | echo "=====================================" 13 | echo "TypeScript Examples Testing" 14 | echo "=====================================" 15 | 16 | # Navigate to examples directory 17 | cd "$(dirname "$0")/../packages/core/examples" || exit 1 18 | 19 | # Build parent package if needed 20 | if [ ! -d "../dist" ]; then 21 | echo "📦 Building @cascadeflow/core..." 22 | cd .. 23 | pnpm build 24 | cd examples 25 | fi 26 | 27 | # Load environment variables 28 | if [ -f "../../../.env" ]; then 29 | set -a 30 | source ../../../.env 31 | set +a 32 | echo "✅ Environment variables loaded" 33 | else 34 | echo "⚠️ No .env file found" 35 | fi 36 | 37 | echo "" 38 | echo "Running TypeScript examples..." 39 | echo "-------------------------------------" 40 | 41 | PASSED=0 42 | FAILED=0 43 | 44 | # Test Node.js examples 45 | NODE_EXAMPLES=( 46 | "nodejs/basic-usage.ts" 47 | "nodejs/tool-calling.ts" 48 | "nodejs/multi-provider.ts" 49 | "nodejs/production-patterns.ts" 50 | ) 51 | 52 | for example in "${NODE_EXAMPLES[@]}"; do 53 | echo "" 54 | echo "Testing: $example" 55 | 56 | if npx tsx "$example" > /dev/null 2>&1; then 57 | echo "✅ PASSED" 58 | ((PASSED++)) 59 | else 60 | echo "❌ FAILED" 61 | # Show error 62 | npx tsx "$example" 2>&1 | tail -20 63 | ((FAILED++)) 64 | fi 65 | done 66 | 67 | # Test streaming example 68 | if [ -f "streaming.ts" ]; then 69 | echo "" 70 | echo "Testing: streaming.ts" 71 | 72 | if npx tsx streaming.ts > /dev/null 2>&1; then 73 | echo "✅ PASSED" 74 | ((PASSED++)) 75 | else 76 | echo "❌ FAILED" 77 | npx tsx streaming.ts 2>&1 | tail -20 78 | ((FAILED++)) 79 | fi 80 | fi 81 | 82 | # Validate browser example (compile only) 83 | if [ -f "browser/vercel-edge/api/chat.ts" ]; then 84 | echo "" 85 | echo "Validating: browser/vercel-edge/api/chat.ts" 86 | 87 | # Just check if it compiles 88 | if npx tsc --noEmit browser/vercel-edge/api/chat.ts 2>/dev/null; then 89 | echo "✅ VALIDATED (compiles)" 90 | ((PASSED++)) 91 | else 92 | echo "⚠️ VALIDATION WARNING" 93 | npx tsc --noEmit browser/vercel-edge/api/chat.ts 2>&1 | tail -10 94 | # Don't count as failure - browser examples need special env 95 | fi 96 | fi 97 | 98 | echo "" 99 | echo "=====================================" 100 | echo "Summary:" 101 | echo " ✅ Passed: $PASSED" 102 | echo " ❌ Failed: $FAILED" 103 | echo "=====================================" 104 | 105 | exit $FAILED 106 | -------------------------------------------------------------------------------- /packages/core/test-examples.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Comprehensive example test runner 3 | # Tests TypeScript examples with proper environment setup 4 | 5 | set -e 6 | 7 | # Colors for output 8 | RED='\033[0;31m' 9 | GREEN='\033[0;32m' 10 | YELLOW='\033[1;33m' 11 | NC='\033[0m' # No Color 12 | 13 | # Load environment variables from root .env 14 | if [ -f "../../.env" ]; then 15 | echo "📋 Loading environment from ../../.env" 16 | set -a 17 | source ../../.env 18 | set +a 19 | else 20 | echo "⚠️ Warning: ../../.env not found" 21 | fi 22 | 23 | # Track results 24 | PASSED=0 25 | FAILED=0 26 | SKIPPED=0 27 | declare -a FAILED_EXAMPLES 28 | 29 | # Function to test an example 30 | test_example() { 31 | local example=$1 32 | local timeout=${2:-60} 33 | 34 | echo "" 35 | echo "==========================================" 36 | echo "Testing: $example" 37 | echo "==========================================" 38 | 39 | if [ ! -f "examples/nodejs/$example" ]; then 40 | echo -e "${YELLOW}⏭️ SKIPPED${NC}: File not found" 41 | ((SKIPPED++)) 42 | return 43 | fi 44 | 45 | # Run example (no timeout on macOS) 46 | if npx tsx "examples/nodejs/$example" > "/tmp/test-$example.log" 2>&1; then 47 | echo -e "${GREEN}✅ PASSED${NC}" 48 | ((PASSED++)) 49 | # Show last 20 lines of output 50 | echo "--- Last 20 lines of output ---" 51 | tail -20 "/tmp/test-$example.log" 52 | else 53 | EXIT_CODE=$? 54 | echo -e "${RED}❌ FAILED${NC} (exit code: $EXIT_CODE)" 55 | ((FAILED++)) 56 | FAILED_EXAMPLES+=("$example") 57 | # Show last 50 lines for debugging 58 | echo "--- Last 50 lines of output ---" 59 | tail -50 "/tmp/test-$example.log" 60 | fi 61 | } 62 | 63 | # Phase 1: Test remaining untested examples 64 | echo "🚀 PHASE 1: Testing Remaining Examples" 65 | echo "========================================" 66 | 67 | test_example "cost-tracking.ts" 60 68 | test_example "free-models-cascade.ts" 60 69 | test_example "multi-instance-ollama.ts" 120 70 | test_example "multi-instance-vllm.ts" 120 71 | test_example "production-patterns.ts" 90 72 | test_example "reasoning-models.ts" 120 73 | test_example "semantic-quality.ts" 90 74 | test_example "user-profiles-workflows.ts" 60 75 | test_example "factory-methods.ts" 60 76 | 77 | # Summary 78 | echo "" 79 | echo "==========================================" 80 | echo "📊 TEST SUMMARY" 81 | echo "==========================================" 82 | echo -e "${GREEN}Passed: $PASSED${NC}" 83 | echo -e "${RED}Failed: $FAILED${NC}" 84 | echo -e "${YELLOW}Skipped: $SKIPPED${NC}" 85 | echo "" 86 | 87 | if [ $FAILED -gt 0 ]; then 88 | echo "Failed examples:" 89 | for example in "${FAILED_EXAMPLES[@]}"; do 90 | echo " ❌ $example" 91 | done 92 | echo "" 93 | echo "Logs available in /tmp/test-*.log" 94 | exit 1 95 | else 96 | echo "✅ All tests passed!" 97 | exit 0 98 | fi 99 | -------------------------------------------------------------------------------- /packages/core/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@cascadeflow/core", 3 | "version": "0.6.5", 4 | "description": "cascadeflow TypeScript library - Smart AI model cascading for cost optimization", 5 | "author": { 6 | "name": "Lemony Inc.", 7 | "email": "hello@lemony.ai" 8 | }, 9 | "license": "MIT", 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/lemony-ai/cascadeflow.git", 13 | "directory": "packages/core" 14 | }, 15 | "main": "./dist/index.js", 16 | "module": "./dist/index.mjs", 17 | "types": "./dist/index.d.ts", 18 | "exports": { 19 | ".": { 20 | "import": "./dist/index.mjs", 21 | "require": "./dist/index.js", 22 | "types": "./dist/index.d.ts" 23 | } 24 | }, 25 | "files": [ 26 | "dist", 27 | "README.md" 28 | ], 29 | "scripts": { 30 | "build": "tsup src/index.ts --format cjs,esm --dts --clean", 31 | "dev": "tsup src/index.ts --format cjs,esm --dts --watch", 32 | "test": "vitest run", 33 | "test:watch": "vitest", 34 | "lint": "eslint src --ext .ts", 35 | "typecheck": "tsc --noEmit", 36 | "typecheck:examples": "tsc --noEmit -p examples/nodejs/tsconfig.json", 37 | "docs": "typedoc", 38 | "docs:watch": "typedoc --watch", 39 | "clean": "rm -rf dist" 40 | }, 41 | "keywords": [ 42 | "ai", 43 | "llm", 44 | "cost-optimization", 45 | "model-routing", 46 | "cascade", 47 | "openai", 48 | "anthropic", 49 | "groq", 50 | "huggingface", 51 | "ollama", 52 | "vllm", 53 | "together", 54 | "typescript", 55 | "browser", 56 | "edge-functions" 57 | ], 58 | "dependencies": { 59 | "@cascadeflow/ml": "^0.6.5" 60 | }, 61 | "peerDependencies": { 62 | "@anthropic-ai/sdk": "^0.30.0", 63 | "@huggingface/inference": "^2.8.0", 64 | "@xenova/transformers": "^2.17.2", 65 | "groq-sdk": "^0.5.0", 66 | "openai": "^4.0.0" 67 | }, 68 | "peerDependenciesMeta": { 69 | "openai": { 70 | "optional": true 71 | }, 72 | "@anthropic-ai/sdk": { 73 | "optional": true 74 | }, 75 | "groq-sdk": { 76 | "optional": true 77 | }, 78 | "@huggingface/inference": { 79 | "optional": true 80 | }, 81 | "@xenova/transformers": { 82 | "optional": true 83 | } 84 | }, 85 | "devDependencies": { 86 | "@anthropic-ai/sdk": "^0.30.0", 87 | "@huggingface/inference": "^2.8.0", 88 | "@types/express": "^5.0.5", 89 | "@types/node": "^20.10.0", 90 | "@typescript-eslint/eslint-plugin": "^6.15.0", 91 | "@typescript-eslint/parser": "^6.15.0", 92 | "eslint": "^8.55.0", 93 | "express": "^5.1.0", 94 | "groq-sdk": "^0.5.0", 95 | "openai": "^4.73.1", 96 | "tsup": "^8.0.1", 97 | "typedoc": "^0.28.14", 98 | "typescript": "^5.3.3", 99 | "vitest": "^1.0.4", 100 | "zod": "^4.1.12" 101 | }, 102 | "engines": { 103 | "node": ">=18.0.0" 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /packages/core/tests/basic-test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Basic integration test for cascadeflow TypeScript library 3 | * 4 | * This test validates: 5 | * - Package imports work correctly 6 | * - OpenAI provider initializes 7 | * - Cascade logic executes 8 | * - Real API calls work 9 | * - Cost tracking functions 10 | */ 11 | 12 | import { describe, it, expect } from 'vitest'; 13 | import { CascadeAgent } from '../src'; 14 | 15 | // Check for API key 16 | const apiKey = process.env.OPENAI_API_KEY; 17 | const hasApiKey = !!apiKey; 18 | 19 | describe('Basic Integration Tests', () => { 20 | it.skipIf(!hasApiKey)('should run basic cascade with OpenAI', async () => { 21 | console.log('🧪 Testing cascadeflow TypeScript Library\n'); 22 | 23 | console.log('✅ OpenAI API key found'); 24 | console.log(` Key: ${apiKey!.substring(0, 10)}...${apiKey!.substring(apiKey!.length - 4)}\n`); 25 | 26 | // Create agent with two-tier cascade 27 | console.log('📦 Creating CascadeAgent...'); 28 | const agent = new CascadeAgent({ 29 | models: [ 30 | { 31 | name: 'gpt-4o-mini', 32 | provider: 'openai', 33 | cost: 0.00015, 34 | apiKey, 35 | }, 36 | { 37 | name: 'gpt-4o', 38 | provider: 'openai', 39 | cost: 0.00625, 40 | apiKey, 41 | }, 42 | ], 43 | }); 44 | 45 | console.log(`✅ Agent created with ${agent.getModelCount()} models`); 46 | console.log(` Models: ${agent.getModels().map(m => m.name).join(' → ')}\n`); 47 | 48 | // Test 1: Simple query (should use draft model) 49 | console.log('🔍 Test 1: Simple query (expect draft model)'); 50 | console.log(' Query: "What is TypeScript?"\n'); 51 | 52 | const startTime = Date.now(); 53 | const result1 = await agent.run('What is TypeScript?'); 54 | const elapsed = Date.now() - startTime; 55 | 56 | console.log('📊 Result:'); 57 | console.log(` Model used: ${result1.modelUsed}`); 58 | console.log(` Response: ${result1.content.substring(0, 100)}...`); 59 | console.log(` Cost: $${result1.totalCost.toFixed(6)}`); 60 | console.log(` Latency: ${elapsed}ms`); 61 | console.log(` Cascaded: ${result1.cascaded ? 'Yes' : 'No'}`); 62 | console.log(` Draft accepted: ${result1.draftAccepted ? 'Yes' : 'No'}`); 63 | 64 | if (result1.savingsPercentage !== undefined) { 65 | console.log(` Savings: ${result1.savingsPercentage.toFixed(1)}%`); 66 | } 67 | 68 | console.log(''); 69 | 70 | // Validate result 71 | expect(result1.content).toBeTruthy(); 72 | expect(result1.content.length).toBeGreaterThan(0); 73 | expect(result1.totalCost).toBeGreaterThan(0); 74 | 75 | console.log('✅ Test 1 passed!\n'); 76 | 77 | // Summary 78 | console.log('═══════════════════════════════════════════'); 79 | console.log('🎉 All tests passed!'); 80 | console.log('═══════════════════════════════════════════'); 81 | console.log(`Total cost: $${result1.totalCost.toFixed(6)}`); 82 | console.log(`Total time: ${elapsed}ms`); 83 | console.log(''); 84 | }, 30000); // 30 second timeout for API calls 85 | }); 86 | -------------------------------------------------------------------------------- /cascadeflow/schema/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data schemas and configuration for cascadeflow. 3 | 4 | This module contains: 5 | - Configuration dataclasses (ModelConfig, CascadeConfig, etc.) 6 | - Domain configuration (DomainConfig, DomainValidationMethod) 7 | - Model registry (ModelRegistry, ModelRegistryEntry) 8 | - Result dataclasses (CascadeResult) 9 | - Custom exceptions 10 | """ 11 | 12 | from .config import ( 13 | DEFAULT_TIERS, 14 | EXAMPLE_WORKFLOWS, 15 | CascadeConfig, 16 | LatencyProfile, 17 | ModelConfig, 18 | OptimizationWeights, 19 | UserTier, 20 | WorkflowProfile, 21 | ) 22 | from .domain_config import ( 23 | BUILTIN_DOMAIN_CONFIGS, 24 | DomainConfig, 25 | DomainValidationMethod, 26 | create_domain_config, 27 | get_builtin_domain_config, 28 | # Domain string constants (avoid circular imports) 29 | DOMAIN_CODE, 30 | DOMAIN_DATA, 31 | DOMAIN_STRUCTURED, 32 | DOMAIN_RAG, 33 | DOMAIN_CONVERSATION, 34 | DOMAIN_TOOL, 35 | DOMAIN_CREATIVE, 36 | DOMAIN_SUMMARY, 37 | DOMAIN_TRANSLATION, 38 | DOMAIN_MATH, 39 | DOMAIN_SCIENCE, 40 | DOMAIN_MEDICAL, 41 | DOMAIN_LEGAL, 42 | DOMAIN_FINANCIAL, 43 | DOMAIN_GENERAL, 44 | ) 45 | from .exceptions import ( 46 | AuthenticationError, 47 | BudgetExceededError, 48 | cascadeflowError, 49 | ConfigError, 50 | ModelError, 51 | ProviderError, 52 | QualityThresholdError, 53 | RateLimitError, 54 | RoutingError, 55 | TimeoutError, 56 | ToolExecutionError, 57 | ValidationError, 58 | ) 59 | from .model_registry import ( 60 | ModelRegistry, 61 | ModelRegistryEntry, 62 | get_default_registry, 63 | get_model, 64 | has_model, 65 | ) 66 | from .result import CascadeResult 67 | 68 | __all__ = [ 69 | # Configuration 70 | "ModelConfig", 71 | "CascadeConfig", 72 | "UserTier", 73 | "WorkflowProfile", 74 | "LatencyProfile", 75 | "OptimizationWeights", 76 | "DEFAULT_TIERS", 77 | "EXAMPLE_WORKFLOWS", 78 | # Domain Configuration 79 | "DomainConfig", 80 | "DomainValidationMethod", 81 | "BUILTIN_DOMAIN_CONFIGS", 82 | "create_domain_config", 83 | "get_builtin_domain_config", 84 | # Domain string constants 85 | "DOMAIN_CODE", 86 | "DOMAIN_DATA", 87 | "DOMAIN_STRUCTURED", 88 | "DOMAIN_RAG", 89 | "DOMAIN_CONVERSATION", 90 | "DOMAIN_TOOL", 91 | "DOMAIN_CREATIVE", 92 | "DOMAIN_SUMMARY", 93 | "DOMAIN_TRANSLATION", 94 | "DOMAIN_MATH", 95 | "DOMAIN_SCIENCE", 96 | "DOMAIN_MEDICAL", 97 | "DOMAIN_LEGAL", 98 | "DOMAIN_FINANCIAL", 99 | "DOMAIN_GENERAL", 100 | # Model Registry 101 | "ModelRegistry", 102 | "ModelRegistryEntry", 103 | "get_default_registry", 104 | "get_model", 105 | "has_model", 106 | # Exceptions 107 | "cascadeflowError", 108 | "ConfigError", 109 | "ProviderError", 110 | "AuthenticationError", 111 | "TimeoutError", 112 | "ModelError", 113 | "BudgetExceededError", 114 | "RateLimitError", 115 | "QualityThresholdError", 116 | "RoutingError", 117 | "ValidationError", 118 | "ToolExecutionError", 119 | # Results 120 | "CascadeResult", 121 | ] 122 | -------------------------------------------------------------------------------- /cascadeflow/providers/__init__.py: -------------------------------------------------------------------------------- 1 | """Provider implementations for cascadeflow.""" 2 | 3 | import logging 4 | from typing import Dict, Optional 5 | 6 | from .anthropic import AnthropicProvider 7 | from .base import PROVIDER_CAPABILITIES, BaseProvider, ModelResponse 8 | from .deepseek import DeepSeekProvider 9 | from .groq import GroqProvider 10 | from .huggingface import HuggingFaceProvider 11 | from .ollama import OllamaProvider 12 | from .openai import OpenAIProvider 13 | from .openrouter import OpenRouterProvider 14 | from .together import TogetherProvider 15 | from .vllm import VLLMProvider 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | # Provider registry - simple dict mapping 21 | PROVIDER_REGISTRY = { 22 | "openai": OpenAIProvider, 23 | "anthropic": AnthropicProvider, 24 | "ollama": OllamaProvider, 25 | "groq": GroqProvider, 26 | "vllm": VLLMProvider, 27 | "huggingface": HuggingFaceProvider, 28 | "together": TogetherProvider, 29 | "openrouter": OpenRouterProvider, 30 | "deepseek": DeepSeekProvider, 31 | } 32 | 33 | 34 | # Optional convenience functions (can be removed if not needed) 35 | 36 | 37 | def get_provider(provider_name: str) -> Optional[BaseProvider]: 38 | """ 39 | Get initialized provider instance. 40 | 41 | Convenience function - handles initialization and errors gracefully. 42 | 43 | Args: 44 | provider_name: Name of provider (e.g., 'openai', 'anthropic') 45 | 46 | Returns: 47 | Provider instance or None if initialization fails 48 | """ 49 | if provider_name not in PROVIDER_REGISTRY: 50 | logger.warning(f"Unknown provider: {provider_name}") 51 | return None 52 | 53 | try: 54 | provider_class = PROVIDER_REGISTRY[provider_name] 55 | provider = provider_class() 56 | logger.debug(f"Initialized {provider_name} provider") 57 | return provider 58 | except Exception as e: 59 | logger.debug(f"Could not initialize {provider_name}: {e}") 60 | return None 61 | 62 | 63 | def get_available_providers() -> dict[str, BaseProvider]: 64 | """ 65 | Get all providers that can be initialized (have API keys set). 66 | 67 | Useful for auto-discovery of available providers. 68 | 69 | Returns: 70 | Dict of provider_name -> provider_instance 71 | """ 72 | providers = {} 73 | 74 | for provider_name in PROVIDER_REGISTRY.keys(): 75 | provider = get_provider(provider_name) 76 | if provider is not None: 77 | providers[provider_name] = provider 78 | 79 | if providers: 80 | logger.info(f"Available providers: {', '.join(providers.keys())}") 81 | else: 82 | logger.warning("No providers available. Check API keys in .env") 83 | 84 | return providers 85 | 86 | 87 | # Exports 88 | __all__ = [ 89 | "BaseProvider", 90 | "ModelResponse", 91 | "PROVIDER_CAPABILITIES", 92 | "OpenAIProvider", 93 | "AnthropicProvider", 94 | "OllamaProvider", 95 | "GroqProvider", 96 | "VLLMProvider", 97 | "HuggingFaceProvider", 98 | "TogetherProvider", 99 | "OpenRouterProvider", 100 | "DeepSeekProvider", 101 | "PROVIDER_REGISTRY", 102 | "get_provider", 103 | "get_available_providers", 104 | ] 105 | -------------------------------------------------------------------------------- /tests/test_together.py: -------------------------------------------------------------------------------- 1 | """Tests for Together.ai provider.""" 2 | 3 | import os 4 | from unittest.mock import MagicMock, patch 5 | 6 | import pytest 7 | 8 | from cascadeflow.providers.base import ModelResponse 9 | from cascadeflow.providers.together import TogetherProvider 10 | 11 | 12 | @pytest.fixture 13 | def mock_env(): 14 | """Mock environment variables.""" 15 | with patch.dict(os.environ, {"TOGETHER_API_KEY": "test_key"}): 16 | yield 17 | 18 | 19 | @pytest.fixture 20 | def together_provider(mock_env): 21 | """Create Together.ai provider for testing.""" 22 | return TogetherProvider() 23 | 24 | 25 | @pytest.fixture 26 | def mock_together_response(): 27 | """Mock successful Together.ai API response.""" 28 | return { 29 | "choices": [{"message": {"content": "This is a test response."}, "finish_reason": "stop"}], 30 | "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}, 31 | } 32 | 33 | 34 | class TestTogetherProvider: 35 | """Tests for Together.ai provider.""" 36 | 37 | def test_init_with_api_key(self): 38 | """Test initialization with explicit API key.""" 39 | provider = TogetherProvider(api_key="explicit_key") 40 | assert provider.api_key == "explicit_key" 41 | 42 | def test_init_from_env(self, mock_env): 43 | """Test initialization from environment variable.""" 44 | provider = TogetherProvider() 45 | assert provider.api_key == "test_key" 46 | 47 | def test_init_no_api_key(self): 48 | """Test initialization fails without API key.""" 49 | with patch.dict(os.environ, {}, clear=True): 50 | with pytest.raises(ValueError, match="Together.ai API key not found"): 51 | TogetherProvider() 52 | 53 | @pytest.mark.asyncio 54 | async def test_complete_success(self, together_provider, mock_together_response): 55 | """Test successful completion.""" 56 | with patch.object(together_provider.client, "post") as mock_post: 57 | mock_response = MagicMock() 58 | mock_response.json.return_value = mock_together_response 59 | mock_response.raise_for_status = MagicMock() 60 | mock_post.return_value = mock_response 61 | 62 | result = await together_provider.complete( 63 | prompt="Test prompt", model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" 64 | ) 65 | 66 | assert isinstance(result, ModelResponse) 67 | assert result.content == "This is a test response." 68 | assert result.model == "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" 69 | assert result.provider == "together" 70 | assert result.tokens_used == 30 71 | 72 | def test_estimate_cost_8b(self, together_provider): 73 | """Test cost estimation for 8B model.""" 74 | cost = together_provider.estimate_cost(1000, "Llama-3.1-8B-Instruct-Turbo") 75 | # Uses blended pricing 76 | assert 0.00015 < cost < 0.00025 # Approximately $0.0002 per 1K tokens 77 | 78 | def test_estimate_cost_70b(self, together_provider): 79 | """Test cost estimation for 70B model.""" 80 | cost = together_provider.estimate_cost(1000, "Llama-3.1-70B-Instruct-Turbo") 81 | # Uses blended pricing 82 | assert 0.0007 < cost < 0.0010 # Approximately $0.0008 per 1K tokens 83 | 84 | 85 | if __name__ == "__main__": 86 | pytest.main([__file__, "-v"]) 87 | -------------------------------------------------------------------------------- /cascadeflow/tools/formats.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provider format conversion utilities for cascadeflow tools. 3 | 4 | Handles conversion between different provider tool formats. 5 | """ 6 | 7 | import logging 8 | from enum import Enum 9 | from typing import Any 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class ToolCallFormat(Enum): 15 | """Tool call format by provider.""" 16 | 17 | OPENAI = "openai" # OpenAI, Groq, Together 18 | ANTHROPIC = "anthropic" # Claude 19 | OLLAMA = "ollama" # Ollama 20 | VLLM = "vllm" # vLLM 21 | HUGGINGFACE = "huggingface" # Via Inference Providers 22 | 23 | 24 | def to_openai_format(name: str, description: str, parameters: dict[str, Any]) -> dict[str, Any]: 25 | """ 26 | Convert to OpenAI tool format. 27 | 28 | Used by: OpenAI, Groq, Together, vLLM 29 | """ 30 | return { 31 | "type": "function", 32 | "function": {"name": name, "description": description, "parameters": parameters}, 33 | } 34 | 35 | 36 | def to_anthropic_format(name: str, description: str, parameters: dict[str, Any]) -> dict[str, Any]: 37 | """ 38 | Convert to Anthropic tool format. 39 | 40 | Key difference: Uses 'input_schema' instead of 'parameters' 41 | """ 42 | return { 43 | "name": name, 44 | "description": description, 45 | "input_schema": parameters, # Anthropic uses input_schema 46 | } 47 | 48 | 49 | def to_ollama_format(name: str, description: str, parameters: dict[str, Any]) -> dict[str, Any]: 50 | """Convert to Ollama tool format (same as OpenAI).""" 51 | return to_openai_format(name, description, parameters) 52 | 53 | 54 | def to_provider_format( 55 | provider: str, name: str, description: str, parameters: dict[str, Any] 56 | ) -> dict[str, Any]: 57 | """ 58 | Convert to provider-specific format. 59 | 60 | Args: 61 | provider: Provider name (openai, anthropic, ollama, groq, together, vllm) 62 | name: Tool name 63 | description: Tool description 64 | parameters: Tool parameters (JSON schema) 65 | 66 | Returns: 67 | Tool schema in provider's expected format 68 | """ 69 | provider_lower = provider.lower() 70 | 71 | if provider_lower in ("openai", "groq", "together", "vllm", "huggingface"): 72 | return to_openai_format(name, description, parameters) 73 | elif provider_lower == "anthropic": 74 | return to_anthropic_format(name, description, parameters) 75 | elif provider_lower == "ollama": 76 | return to_ollama_format(name, description, parameters) 77 | else: 78 | # Default to OpenAI format (most common) 79 | logger.warning(f"Unknown provider '{provider}', using OpenAI format") 80 | return to_openai_format(name, description, parameters) 81 | 82 | 83 | def get_provider_format_type(provider: str) -> ToolCallFormat: 84 | """ 85 | Get the format type for a provider. 86 | 87 | Args: 88 | provider: Provider name 89 | 90 | Returns: 91 | ToolCallFormat enum value 92 | """ 93 | provider_lower = provider.lower() 94 | 95 | if provider_lower in ("openai", "groq", "together", "vllm", "huggingface"): 96 | return ToolCallFormat.OPENAI 97 | elif provider_lower == "anthropic": 98 | return ToolCallFormat.ANTHROPIC 99 | elif provider_lower == "ollama": 100 | return ToolCallFormat.OLLAMA 101 | else: 102 | return ToolCallFormat.OPENAI # Default 103 | -------------------------------------------------------------------------------- /packages/integrations/n8n/TROUBLESHOOTING.md: -------------------------------------------------------------------------------- 1 | # n8n Integration Troubleshooting 2 | 3 | ## Issue: Seeing old model names in logs after reconnecting 4 | 5 | ### Root Cause 6 | n8n caches the node instance. When you disconnect/reconnect models, the old `CascadeChatModel` instance may still have references to previous models. 7 | 8 | ### Solution 9 | 1. **Stop the workflow** in n8n 10 | 2. **Restart the workflow** (or restart n8n if that doesn't work) 11 | 3. **Look for initialization log**: 12 | ``` 13 | 🚀 CascadeFlow initialized 14 | PORT MAPPING: 15 | ├─ TOP port (labeled "Verifier") → VERIFIER model: lazy-loaded (will fetch only if needed) 16 | └─ BOTTOM port (labeled "Drafter") → DRAFTER model: [type] ([name]) 17 | ``` 18 | 19 | This shows which models are ACTUALLY connected. 20 | 21 | ### Verifying Correct Operation 22 | 23 | **Expected logs when drafter is accepted:** 24 | ``` 25 | 🎯 CascadeFlow: Trying drafter model (from BOTTOM port): ollama (gemma3:1b) 26 | 📊 Simple quality check: confidence=0.75 27 | 28 | ┌─────────────────────────────────────────┐ 29 | │ ✅ FLOW: DRAFTER ACCEPTED (FAST PATH) │ 30 | └─────────────────────────────────────────┘ 31 | Model used: ollama (gemma3:1b) 32 | Confidence: 0.75 (threshold: 0.64) 33 | ``` 34 | 35 | **Expected logs when verifier is triggered:** 36 | ``` 37 | 🎯 CascadeFlow: Trying drafter model (from BOTTOM port): ollama (gemma3:1b) 38 | 📊 Simple quality check: confidence=0.50 39 | 40 | ┌────────────────────────────────────────────────┐ 41 | │ ⚠️ FLOW: ESCALATED TO VERIFIER (SLOW PATH) │ 42 | └────────────────────────────────────────────────┘ 43 | 🔄 Loading verifier model from TOP port (labeled "Verifier")... 44 | ✓ Verifier model loaded: ollama (mistral:7b-instruct) 45 | ✅ Verifier completed successfully 46 | Model used: ollama (mistral:7b-instruct) 47 | ``` 48 | 49 | ## Issue: "Only drafts getting accepted" 50 | 51 | ### Is this a problem? 52 | **NO - This is correct behavior!** 53 | 54 | With quality threshold 0.64: 55 | - If drafter produces good responses → Quality check passes → Use cheap model (SAVE MONEY ✅) 56 | - If drafter produces poor responses → Quality check fails → Escalate to verifier 57 | 58 | ### When to adjust threshold 59 | 60 | **See 100% drafter acceptance?** 61 | - Your drafter is doing well for these queries 62 | - Consider lowering threshold to 0.50-0.55 if you want stricter quality 63 | 64 | **See 100% verifier escalation?** 65 | - Drafter quality too low for these queries 66 | - Increase threshold to 0.70-0.80 to accept more drafts 67 | - Or use a better drafter model 68 | 69 | ### Testing Verifier Triggering 70 | 71 | To force verifier usage, try: 72 | 1. Lower quality threshold to 0.90 (very strict) 73 | 2. Ask complex questions that drafter struggles with 74 | 3. Use a weaker drafter model 75 | 76 | ## Checking Model Connections 77 | 78 | **Initialization log location:** 79 | - n8n workflow logs (when workflow starts) 80 | - Server console logs (if running n8n manually) 81 | 82 | **Per-request logs:** 83 | - Show in n8n execution logs 84 | - Show actual model used: `Model used: [type] ([name])` 85 | 86 | ## Common Mistakes 87 | 88 | ❌ **Connecting models to wrong ports** 89 | - TOP port = Verifier (expensive, high quality) 90 | - BOTTOM port = Drafter (cheap, tried first) 91 | 92 | ❌ **Not restarting workflow after changing connections** 93 | - Must restart for new models to be loaded 94 | 95 | ❌ **Expecting verifier to be called every time** 96 | - Verifier is ONLY called when drafter quality < threshold 97 | - This is the cost-saving feature! 98 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yml: -------------------------------------------------------------------------------- 1 | name: ❓ Question or Discussion 2 | description: Ask a question or start a discussion about cascadeflow 3 | title: "[Question]: " 4 | labels: ["question"] 5 | assignees: [] 6 | 7 | body: 8 | - type: markdown 9 | attributes: 10 | value: | 11 | Thanks for your interest in cascadeflow! 12 | 13 | **Before asking, please check:** 14 | - [Documentation](https://github.com/lemony-ai/cascadeflow/blob/main/README.md) 15 | - [Existing issues](https://github.com/lemony-ai/cascadeflow/issues) 16 | - [Discussions](https://github.com/lemony-ai/cascadeflow/discussions) 17 | 18 | - type: dropdown 19 | id: topic 20 | attributes: 21 | label: Topic area 22 | description: What is your question about? 23 | options: 24 | - Getting started / Installation 25 | - Configuration / Setup 26 | - Provider integration (OpenAI, Anthropic, etc.) 27 | - Routing strategies 28 | - Cost optimization 29 | - Quality estimation 30 | - Performance / Speed 31 | - Tool calling / Functions 32 | - Streaming 33 | - Error handling 34 | - Best practices 35 | - Use case / Architecture 36 | - Comparison with alternatives 37 | - Contributing 38 | - Other 39 | validations: 40 | required: true 41 | 42 | - type: textarea 43 | id: question 44 | attributes: 45 | label: Your question 46 | description: What would you like to know? 47 | placeholder: | 48 | I'm trying to understand how to... 49 | 50 | What's the best way to... 51 | 52 | How does X compare to Y... 53 | validations: 54 | required: true 55 | 56 | - type: textarea 57 | id: context 58 | attributes: 59 | label: Context 60 | description: Provide any relevant context about your use case or what you're trying to achieve 61 | placeholder: | 62 | I'm building an application that... 63 | My goal is to... 64 | I've tried... 65 | validations: 66 | required: false 67 | 68 | - type: textarea 69 | id: code 70 | attributes: 71 | label: Code example (if applicable) 72 | description: Share relevant code to help us understand your question 73 | placeholder: | 74 | ```python 75 | from cascadeflow import CascadeAgent 76 | 77 | # Your code here 78 | ``` 79 | render: python 80 | validations: 81 | required: false 82 | 83 | - type: textarea 84 | id: attempted 85 | attributes: 86 | label: What have you tried? 87 | description: Have you already attempted to solve this or looked anywhere for answers? 88 | placeholder: | 89 | - I've read the documentation at... 90 | - I've tried the following approach... 91 | - I've searched for... 92 | validations: 93 | required: false 94 | 95 | - type: input 96 | id: version 97 | attributes: 98 | label: cascadeflow version (if relevant) 99 | placeholder: e.g., 0.1.0 100 | validations: 101 | required: false 102 | 103 | - type: checkboxes 104 | id: checks 105 | attributes: 106 | label: Pre-submission checklist 107 | options: 108 | - label: I have searched existing issues and discussions 109 | required: true 110 | - label: I have checked the documentation 111 | required: true 112 | - label: This is not a bug report (use bug report template instead) 113 | required: true -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Quick test to verify all imports work after __init__.py update.""" 2 | 3 | 4 | def test_core_imports(): 5 | """Test core configuration imports.""" 6 | from cascadeflow import CascadeAgent, CascadeConfig, CascadeResult, ModelConfig, UserTier 7 | 8 | print("✓ Core imports working") 9 | 10 | 11 | def test_day42_config_imports(): 12 | """Test Day 4.2 configuration imports.""" 13 | from cascadeflow import ( 14 | DEFAULT_TIERS, 15 | EXAMPLE_WORKFLOWS, 16 | LatencyProfile, 17 | OptimizationWeights, 18 | WorkflowProfile, 19 | ) 20 | 21 | print("✓ Day 4.2 config imports working") 22 | print(f" - Found {len(DEFAULT_TIERS)} default tiers") 23 | print(f" - Found {len(EXAMPLE_WORKFLOWS)} example workflows") 24 | 25 | 26 | def test_intelligence_imports(): 27 | """Test intelligence layer imports.""" 28 | from cascadeflow import ( 29 | ComplexityDetector, 30 | DomainDetector, 31 | ExecutionPlan, 32 | ExecutionStrategy, 33 | LatencyAwareExecutionPlanner, 34 | ModelScorer, 35 | QueryComplexity, 36 | ) 37 | 38 | print("✓ Intelligence layer imports working") 39 | 40 | 41 | def test_speculative_imports(): 42 | """Test speculative cascade imports.""" 43 | from cascadeflow import ( 44 | DeferralStrategy, 45 | FlexibleDeferralRule, 46 | SpeculativeCascade, 47 | SpeculativeResult, 48 | ) 49 | 50 | print("✓ Speculative cascade imports working") 51 | 52 | 53 | def test_features_imports(): 54 | """Test supporting features imports.""" 55 | from cascadeflow import ( 56 | CallbackData, 57 | CallbackEvent, 58 | CallbackManager, 59 | CascadePresets, 60 | ResponseCache, 61 | StreamManager, 62 | ) 63 | 64 | print("✓ Supporting features imports working") 65 | 66 | 67 | def test_providers_imports(): 68 | """Test provider imports.""" 69 | from cascadeflow import PROVIDER_REGISTRY, BaseProvider, ModelResponse 70 | 71 | print("✓ Provider imports working") 72 | 73 | 74 | def test_utils_imports(): 75 | """Test utility imports.""" 76 | from cascadeflow import estimate_tokens, format_cost, setup_logging 77 | 78 | print("✓ Utility imports working") 79 | 80 | 81 | def test_exceptions_imports(): 82 | """Test exception imports.""" 83 | from cascadeflow import ( 84 | BudgetExceededError, 85 | cascadeflowError, 86 | ConfigError, 87 | ModelError, 88 | ProviderError, 89 | QualityThresholdError, 90 | RateLimitError, 91 | RoutingError, 92 | ValidationError, 93 | ) 94 | 95 | print("✓ Exception imports working") 96 | 97 | 98 | def test_version(): 99 | """Test version info.""" 100 | from cascadeflow import __version__ 101 | 102 | print(f"✓ Version: {__version__}") 103 | assert __version__ == "0.4.2" 104 | 105 | 106 | if __name__ == "__main__": 107 | print("Testing cascadeflow imports...\n") 108 | 109 | try: 110 | test_core_imports() 111 | test_day42_config_imports() 112 | test_intelligence_imports() 113 | test_speculative_imports() 114 | test_features_imports() 115 | test_providers_imports() 116 | test_utils_imports() 117 | test_exceptions_imports() 118 | test_version() 119 | 120 | print("\n✅ All imports successful!") 121 | 122 | except ImportError as e: 123 | print(f"\n❌ Import failed: {e}") 124 | import traceback 125 | 126 | traceback.print_exc() 127 | -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/examples/analyze-models.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Model Analysis Example 3 | * 4 | * Demonstrates how to analyze and validate cascade configurations 5 | * using your existing LangChain model instances. 6 | */ 7 | 8 | import { ChatOpenAI } from '@langchain/openai'; 9 | import { analyzeCascadePair, suggestCascadePairs } from '../src/index.js'; 10 | 11 | async function main() { 12 | console.log('=== CascadeFlow Model Analysis Demo ===\n'); 13 | 14 | // Example 1: Analyze a specific drafter/verifier pair 15 | console.log('--- Example 1: Analyze OpenAI Cascade Pair ---'); 16 | 17 | const drafterOpenAI = new ChatOpenAI({ model: 'gpt-4o-mini' }); 18 | const verifierOpenAI = new ChatOpenAI({ model: 'gpt-4o' }); 19 | 20 | const analysis1 = analyzeCascadePair(drafterOpenAI, verifierOpenAI); 21 | 22 | console.log(`Drafter: ${analysis1.drafterModel}`); 23 | console.log(`Verifier: ${analysis1.verifierModel}`); 24 | console.log(`\nPricing (per 1M tokens):`); 25 | console.log(` Drafter: $${analysis1.drafterCost.input} input / $${analysis1.drafterCost.output} output`); 26 | console.log(` Verifier: $${analysis1.verifierCost.input} input / $${analysis1.verifierCost.output} output`); 27 | console.log(`\nEstimated Savings: ${analysis1.estimatedSavings.toFixed(1)}%`); 28 | console.log(`Configuration Valid: ${analysis1.valid ? '✅' : '❌'}`); 29 | console.log(`Recommendation: ${analysis1.recommendation}`); 30 | 31 | if (analysis1.warnings.length > 0) { 32 | console.log(`\nWarnings:`); 33 | analysis1.warnings.forEach(w => console.log(` ⚠️ ${w}`)); 34 | } 35 | console.log('\n'); 36 | 37 | // Example 2: Detect misconfiguration (drafter more expensive than verifier) 38 | console.log('--- Example 2: Detect Misconfiguration ---'); 39 | 40 | const expensiveDrafter = new ChatOpenAI({ model: 'gpt-4o' }); 41 | const cheapVerifier = new ChatOpenAI({ model: 'gpt-4o-mini' }); 42 | 43 | const analysis2 = analyzeCascadePair(expensiveDrafter, cheapVerifier); 44 | 45 | console.log(`Drafter: ${analysis2.drafterModel}`); 46 | console.log(`Verifier: ${analysis2.verifierModel}`); 47 | console.log(`Configuration Valid: ${analysis2.valid ? '✅' : '❌'}`); 48 | console.log(`Recommendation: ${analysis2.recommendation}`); 49 | 50 | if (analysis2.warnings.length > 0) { 51 | console.log(`\nWarnings:`); 52 | analysis2.warnings.forEach(w => console.log(` ⚠️ ${w}`)); 53 | } 54 | console.log('\n'); 55 | 56 | // Example 3: Suggest optimal pairs from available models 57 | console.log('--- Example 3: Suggest Optimal Cascade Pairs ---'); 58 | 59 | const availableModels = [ 60 | new ChatOpenAI({ model: 'gpt-4o-mini' }), 61 | new ChatOpenAI({ model: 'gpt-4o' }), 62 | new ChatOpenAI({ model: 'gpt-3.5-turbo' }), 63 | new ChatOpenAI({ model: 'gpt-4-turbo' }), 64 | ]; 65 | 66 | console.log(`Analyzing ${availableModels.length} available models...\n`); 67 | 68 | const suggestions = suggestCascadePairs(availableModels); 69 | 70 | console.log(`Found ${suggestions.length} viable cascade configurations:\n`); 71 | 72 | suggestions.slice(0, 5).forEach((suggestion, idx) => { 73 | const { drafter, verifier, analysis } = suggestion; 74 | console.log(`${idx + 1}. ${analysis.drafterModel} → ${analysis.verifierModel}`); 75 | console.log(` Estimated Savings: ${analysis.estimatedSavings.toFixed(1)}%`); 76 | console.log(` ${analysis.recommendation}`); 77 | console.log(); 78 | }); 79 | 80 | console.log('=== Analysis Complete ==='); 81 | console.log('\n💡 Use analyzeCascadePair() to validate your cascade configuration'); 82 | console.log('💡 Use suggestCascadePairs() to find optimal pairs from your models'); 83 | } 84 | 85 | main().catch(console.error); 86 | -------------------------------------------------------------------------------- /packages/langchain-cascadeflow/src/types.ts: -------------------------------------------------------------------------------- 1 | import type { PreRouter } from './routers/pre-router.js'; 2 | import type { QueryComplexity } from './complexity.js'; 3 | 4 | /** 5 | * Configuration for the CascadeFlow wrapper 6 | */ 7 | export interface CascadeConfig { 8 | /** 9 | * The drafter model (cheap, fast) - tries first 10 | */ 11 | drafter: any; // BaseChatModel from @langchain/core 12 | 13 | /** 14 | * The verifier model (expensive, accurate) - used when quality is insufficient 15 | */ 16 | verifier: any; // BaseChatModel from @langchain/core 17 | 18 | /** 19 | * Quality threshold for accepting drafter responses (0-1) 20 | * @default 0.7 21 | */ 22 | qualityThreshold?: number; 23 | 24 | /** 25 | * Enable automatic cost tracking 26 | * @default true 27 | */ 28 | enableCostTracking?: boolean; 29 | 30 | /** 31 | * Cost tracking provider 32 | * - 'langsmith': Use LangSmith's server-side cost calculation (default, requires LANGSMITH_API_KEY) 33 | * - 'cascadeflow': Use CascadeFlow's built-in pricing table (no external dependencies) 34 | * @default 'langsmith' 35 | */ 36 | costTrackingProvider?: 'langsmith' | 'cascadeflow'; 37 | 38 | /** 39 | * Custom quality validator function 40 | * Returns confidence score between 0-1 41 | */ 42 | qualityValidator?: (response: any) => Promise | number; 43 | 44 | /** 45 | * Enable pre-routing based on query complexity 46 | * When enabled, 'hard' and 'expert' queries skip the drafter and go directly to the verifier 47 | * @default true 48 | */ 49 | enablePreRouter?: boolean; 50 | 51 | /** 52 | * Custom PreRouter instance for advanced routing control 53 | * If not provided, a default PreRouter will be created when enablePreRouter is true 54 | */ 55 | preRouter?: PreRouter; 56 | 57 | /** 58 | * Complexity levels that should use cascade (try drafter first) 59 | * Queries with other complexity levels go directly to verifier 60 | * @default ['trivial', 'simple', 'moderate'] 61 | */ 62 | cascadeComplexities?: QueryComplexity[]; 63 | } 64 | 65 | /** 66 | * Cascade execution result with cost metadata 67 | */ 68 | export interface CascadeResult { 69 | /** 70 | * The final response content 71 | */ 72 | content: string; 73 | 74 | /** 75 | * Model that provided the final response ('drafter' | 'verifier') 76 | */ 77 | modelUsed: 'drafter' | 'verifier'; 78 | 79 | /** 80 | * Quality score of the drafter response (0-1) 81 | */ 82 | drafterQuality?: number; 83 | 84 | /** 85 | * Whether the drafter response was accepted 86 | */ 87 | accepted: boolean; 88 | 89 | /** 90 | * Cost of the drafter call 91 | */ 92 | drafterCost: number; 93 | 94 | /** 95 | * Cost of the verifier call (0 if not used) 96 | */ 97 | verifierCost: number; 98 | 99 | /** 100 | * Total cost of the cascade 101 | */ 102 | totalCost: number; 103 | 104 | /** 105 | * Cost savings percentage (0-100) 106 | */ 107 | savingsPercentage: number; 108 | 109 | /** 110 | * Latency in milliseconds 111 | */ 112 | latencyMs: number; 113 | } 114 | 115 | /** 116 | * Internal cost calculation metadata 117 | */ 118 | export interface CostMetadata { 119 | drafterTokens: { 120 | input: number; 121 | output: number; 122 | }; 123 | verifierTokens?: { 124 | input: number; 125 | output: number; 126 | }; 127 | drafterCost: number; 128 | verifierCost: number; 129 | totalCost: number; 130 | savingsPercentage: number; 131 | modelUsed: 'drafter' | 'verifier'; 132 | accepted: boolean; 133 | drafterQuality?: number; 134 | } 135 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # cascadeflow Documentation 2 | 3 | Welcome to cascadeflow documentation! 🌊 4 | 5 | ## 📖 Quick Links 6 | 7 | - [Installation Guide](INSTALLATION.md) 8 | - [Quick Start Guide](guides/quickstart.md) 9 | 10 | ## 🚀 Getting Started 11 | 12 | ### Core Concepts 13 | - [Quickstart](guides/quickstart.md) - Get started with cascadeflow in 5 minutes 14 | - [Providers](guides/providers.md) - Configure and use different AI providers (OpenAI, Anthropic, Groq, Ollama, etc.) 15 | - [Presets](guides/presets.md) - Use built-in presets for common use cases 16 | 17 | ### Core Features 18 | - [Streaming](guides/streaming.md) - Stream responses from cascade agents 19 | - [Tools](guides/tools.md) - Function calling and tool usage with cascades 20 | - [Cost Tracking](guides/cost_tracking.md) - Track and analyze API costs across queries 21 | 22 | ## 🏭 Production & Advanced 23 | 24 | ### Production Deployment 25 | - [Production Guide](guides/production.md) - Best practices for production deployments 26 | - [Performance Guide](guides/performance.md) - Optimize cascade performance and latency 27 | - [FastAPI Integration](guides/fastapi.md) - Integrate cascadeflow with FastAPI applications 28 | 29 | ### Advanced Topics 30 | - [Custom Cascades](guides/custom_cascade.md) - Build custom cascade strategies 31 | - [Custom Validation](guides/custom_validation.md) - Implement custom quality validators 32 | - [Edge Device Deployment](guides/edge_device.md) - Deploy cascades on edge devices (Jetson, etc.) 33 | - [Browser/Edge Runtime](guides/browser_cascading.md) - Run cascades in browser or edge environments 34 | 35 | ### Integrations 36 | - [n8n Integration](guides/n8n_integration.md) - Use cascadeflow in n8n workflows 37 | 38 | ## 📚 Examples 39 | 40 | Comprehensive working code samples: 41 | 42 | **Python Examples:** [examples/](../examples/) 43 | - Basic usage, preset usage, multi-provider 44 | - Tool execution, streaming, cost tracking 45 | - Production patterns, FastAPI integration 46 | - Edge device deployment, vLLM integration 47 | - Custom cascades and validation 48 | 49 | **TypeScript Examples:** [packages/core/examples/](../packages/core/examples/) 50 | - Basic usage, tool calling, multi-provider 51 | - Streaming responses 52 | - Production patterns 53 | - Browser/Vercel Edge deployment 54 | 55 | ## 🤝 Need Help? 56 | 57 | - 📖 [GitHub Discussions](https://github.com/lemony-ai/cascadeflow/discussions) - Q&A and community support 58 | - 🐛 [GitHub Issues](https://github.com/lemony-ai/cascadeflow/issues) - Bug reports and feature requests 59 | - 📧 [Email Support](mailto:hello@lemony.ai) - Direct support 60 | 61 | ## 📦 API Reference 62 | 63 | Comprehensive API documentation for all classes and methods: 64 | 65 | - **[API Overview](./api/README.md)** - Complete API reference for Python and TypeScript 66 | - **Python API** 67 | - [CascadeAgent](./api/python/agent.md) - Main agent class 68 | - [ModelConfig](./api/python/config.md) - Model and cascade configuration 69 | - [CascadeResult](./api/python/result.md) - Result object with 30+ diagnostic fields 70 | - **TypeScript API** 71 | - See [TypeScript Package](../packages/core/README.md) for API documentation 72 | 73 | See also: Comprehensive examples in [/examples](../examples/) directory 74 | 75 | ## 🏗️ Architecture & Contributing 76 | 77 | For contributors and advanced users: 78 | 79 | - **[Architecture Guide](ARCHITECTURE.md)** - Detailed architecture, data flow, and code organization 80 | - **[Contributing Guide](../CONTRIBUTING.md)** - How to contribute to cascadeflow 81 | 82 | The architecture guide covers: 83 | - Directory structure (monorepo layout) 84 | - Core components and design patterns 85 | - Data flow and execution paths 86 | - Adding new providers, quality checks, and routing strategies 87 | - Testing strategy and development workflow 88 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # cascadeflow Development Requirements 2 | # Everything needed for development, testing, and contributing 3 | 4 | # ============================================================================ 5 | # CORE DEPENDENCIES 6 | # ============================================================================ 7 | 8 | -r requirements.txt 9 | 10 | 11 | # ============================================================================ 12 | # PROVIDER SDKs (For Testing All Providers) 13 | # ============================================================================ 14 | 15 | # API-based providers (require API keys) 16 | openai>=1.0.0 17 | anthropic>=0.8.0 18 | groq>=0.4.0 19 | huggingface-hub>=0.19.0 20 | together>=0.2.0 21 | 22 | # Local inference (optional - can also use HTTP) 23 | vllm>=0.2.0 24 | 25 | # Note: Ollama doesn't need a Python package - uses HTTP 26 | 27 | 28 | # ============================================================================ 29 | # TESTING 30 | # ============================================================================ 31 | 32 | pytest>=7.4.0 33 | pytest-asyncio>=0.21.0 34 | pytest-cov>=4.1.0 35 | pytest-mock>=3.12.0 36 | 37 | # Environment variable management for tests 38 | python-dotenv>=1.0.0 39 | 40 | 41 | # ============================================================================ 42 | # CODE QUALITY 43 | # ============================================================================ 44 | 45 | # Formatting 46 | black>=23.0.0 47 | isort>=5.12.0 48 | 49 | # Linting 50 | ruff>=0.1.0 51 | 52 | # Type checking 53 | mypy>=1.5.0 54 | 55 | # Pre-commit hooks 56 | pre-commit>=3.5.0 57 | 58 | 59 | # ============================================================================ 60 | # SECURITY SCANNING 61 | # ============================================================================ 62 | 63 | # Python security linter 64 | bandit>=1.7.0 65 | 66 | # Check for known vulnerabilities in dependencies 67 | safety>=2.3.0 68 | 69 | # Audit Python packages for known vulnerabilities 70 | pip-audit>=2.4.0 71 | 72 | 73 | # ============================================================================ 74 | # DEVELOPMENT UTILITIES 75 | # ============================================================================ 76 | 77 | # Rich terminal output (for development/debugging) 78 | rich>=13.0.0 79 | 80 | # Web framework for API examples 81 | fastapi>=0.104.0 82 | uvicorn>=0.24.0 83 | 84 | # HTTP client (for health checks in examples) 85 | httpx>=0.25.0 86 | 87 | # Type stubs 88 | types-requests>=2.31.0 89 | 90 | 91 | # ============================================================================ 92 | # DOCUMENTATION (Optional) 93 | # ============================================================================ 94 | 95 | # Uncomment if building docs: 96 | # mkdocs>=1.5.0 97 | # mkdocs-material>=9.4.0 98 | # mkdocstrings[python]>=0.23.0 99 | 100 | 101 | # ============================================================================ 102 | # SEMANTIC FEATURES (For ML-based functionality) 103 | # ============================================================================ 104 | 105 | # Lightweight embedding model for semantic quality checks 106 | # Note: This is optional but required for semantic quality tests 107 | fastembed>=0.2.0 108 | 109 | 110 | # ============================================================================ 111 | # INSTALLATION 112 | # ============================================================================ 113 | 114 | # Install everything for development: 115 | # pip install -r requirements-dev.txt 116 | # 117 | # Or install in editable mode: 118 | # pip install -e ".[dev]" 119 | # 120 | # Run security checks: 121 | # bandit -r cascadeflow/ 122 | # safety check 123 | # pip-audit -------------------------------------------------------------------------------- /cascadeflow/guardrails/manager.py: -------------------------------------------------------------------------------- 1 | """ 2 | Guardrails manager for coordinating content safety checks. 3 | """ 4 | 5 | from dataclasses import dataclass 6 | from typing import TYPE_CHECKING, Optional 7 | 8 | from .content_moderator import ContentModerator, ModerationResult 9 | from .pii_detector import PIIDetector, PIIMatch 10 | 11 | if TYPE_CHECKING: 12 | from cascadeflow.profiles import UserProfile 13 | 14 | 15 | class GuardrailViolation(Exception): 16 | """Exception raised when content violates guardrails""" 17 | 18 | def __init__(self, message: str, violations: list[str]): 19 | super().__init__(message) 20 | self.violations = violations 21 | 22 | 23 | @dataclass 24 | class GuardrailsCheck: 25 | """Result from guardrails check""" 26 | 27 | is_safe: bool 28 | content_moderation: Optional[ModerationResult] = None 29 | pii_detected: Optional[list[PIIMatch]] = None 30 | violations: list[str] = None 31 | 32 | def __post_init__(self): 33 | if self.violations is None: 34 | self.violations = [] 35 | 36 | 37 | class GuardrailsManager: 38 | """ 39 | Centralized guardrails management. 40 | 41 | Coordinates content moderation and PII detection based on 42 | user profile settings. 43 | 44 | Example: 45 | >>> manager = GuardrailsManager() 46 | >>> result = await manager.check_content( 47 | ... text="user input", 48 | ... profile=profile 49 | ... ) 50 | >>> if not result.is_safe: 51 | ... raise GuardrailViolation("Content blocked", result.violations) 52 | """ 53 | 54 | def __init__(self): 55 | """Initialize guardrails manager""" 56 | self._content_moderator = ContentModerator() 57 | self._pii_detector = PIIDetector() 58 | 59 | async def check_content( 60 | self, 61 | text: str, 62 | profile: "UserProfile", 63 | ) -> GuardrailsCheck: 64 | """ 65 | Check content against enabled guardrails. 66 | 67 | Args: 68 | text: Text to check 69 | profile: User profile with guardrail settings 70 | 71 | Returns: 72 | GuardrailsCheck with results 73 | """ 74 | violations = [] 75 | moderation_result = None 76 | pii_matches = None 77 | 78 | # Check content moderation if enabled 79 | if profile.enable_content_moderation: 80 | moderation_result = await self._content_moderator.check_async(text) 81 | if not moderation_result.is_safe: 82 | violations.extend(moderation_result.violations) 83 | 84 | # Check PII if enabled 85 | if profile.enable_pii_detection: 86 | pii_matches = await self._pii_detector.detect_async(text) 87 | if pii_matches: 88 | pii_types = {m.pii_type for m in pii_matches} 89 | violations.append(f"PII detected: {', '.join(pii_types)}") 90 | 91 | is_safe = len(violations) == 0 92 | 93 | return GuardrailsCheck( 94 | is_safe=is_safe, 95 | content_moderation=moderation_result, 96 | pii_detected=pii_matches, 97 | violations=violations, 98 | ) 99 | 100 | async def redact_pii( 101 | self, 102 | text: str, 103 | profile: "UserProfile", 104 | ) -> tuple[str, list[PIIMatch]]: 105 | """ 106 | Redact PII from text if PII detection is enabled. 107 | 108 | Args: 109 | text: Text to redact 110 | profile: User profile 111 | 112 | Returns: 113 | Tuple of (redacted_text, pii_matches) 114 | """ 115 | if not profile.enable_pii_detection: 116 | return text, [] 117 | 118 | return self._pii_detector.redact(text) 119 | -------------------------------------------------------------------------------- /cascadeflow/scripts/format_code.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM cascadeflow Code Formatting Script (Windows) 3 | REM Run this before every commit and definitely before launch! 4 | 5 | echo. 6 | echo 🎨 cascadeflow - Code Formatting Script 7 | echo ======================================== 8 | echo. 9 | 10 | REM Check if virtual environment is activated 11 | if not defined VIRTUAL_ENV ( 12 | echo ⚠️ Virtual environment not activated! 13 | echo Please run: .venv\Scripts\activate 14 | exit /b 1 15 | ) 16 | 17 | REM Check if we're in the right directory 18 | if not exist pyproject.toml ( 19 | echo ❌ Error: Not in project root directory 20 | echo Please run this script from the cascadeflow\ directory 21 | exit /b 1 22 | ) 23 | 24 | echo ✅ Virtual environment: %VIRTUAL_ENV% 25 | echo ✅ Working directory: %CD% 26 | echo. 27 | 28 | REM Install formatting tools if not present 29 | echo 📦 Checking formatting tools... 30 | pip install --quiet --upgrade black isort ruff mypy 2>nul || ( 31 | echo Installing formatting tools... 32 | pip install black isort ruff mypy 33 | ) 34 | echo ✅ Formatting tools ready 35 | echo. 36 | 37 | REM Step 1: Black - Code formatting 38 | echo 🎨 Step 1/4: Running Black (code formatter)... 39 | echo ------------------------------------------- 40 | black cascadeflow\ tests\ examples\ --line-length 100 41 | if errorlevel 1 ( 42 | echo ❌ Black formatting failed 43 | exit /b 1 44 | ) 45 | echo ✅ Black formatting complete 46 | echo. 47 | 48 | REM Step 2: isort - Import sorting 49 | echo 📦 Step 2/4: Running isort (import sorter)... 50 | echo -------------------------------------------- 51 | isort cascadeflow\ tests\ examples\ --profile black --line-length 100 52 | if errorlevel 1 ( 53 | echo ❌ isort failed 54 | exit /b 1 55 | ) 56 | echo ✅ Import sorting complete 57 | echo. 58 | 59 | REM Step 3: Ruff - Linting and auto-fix 60 | echo 🔍 Step 3/4: Running Ruff (linter)... 61 | echo ------------------------------------ 62 | echo Checking for issues... 63 | ruff check cascadeflow\ tests\ examples\ --fix 64 | if errorlevel 1 ( 65 | echo ⚠️ Ruff found some issues 66 | echo Attempting to auto-fix... 67 | ruff check cascadeflow\ tests\ examples\ --fix --unsafe-fixes 68 | if errorlevel 1 ( 69 | echo ❌ Some issues need manual fixing 70 | echo Review the output above and fix manually 71 | exit /b 1 72 | ) 73 | ) 74 | echo ✅ Linting complete 75 | echo. 76 | 77 | REM Step 4: mypy - Type checking (optional, won't fail) 78 | echo 🔤 Step 4/4: Running mypy (type checker)... 79 | echo ----------------------------------------- 80 | mypy cascadeflow\ --ignore-missing-imports --no-strict-optional 81 | if errorlevel 1 ( 82 | echo ⚠️ Type checking found some issues (non-critical) 83 | echo Consider fixing these before launch, but not required 84 | ) 85 | echo ✅ Type checking complete 86 | echo. 87 | 88 | REM Final verification 89 | echo 🧪 Running quick verification... 90 | echo ------------------------------ 91 | python -m py_compile cascadeflow\*.py 2>nul 92 | if errorlevel 1 ( 93 | echo ❌ Syntax errors detected in cascadeflow\ 94 | exit /b 1 95 | ) 96 | echo ✅ Syntax verification passed 97 | echo. 98 | 99 | REM Summary 100 | echo ====================================== 101 | echo ✨ Code Formatting Complete! 102 | echo ====================================== 103 | echo. 104 | echo Summary: 105 | echo ✅ Black formatting applied 106 | echo ✅ Imports sorted with isort 107 | echo ✅ Linting issues fixed with Ruff 108 | echo ✅ Type checking completed 109 | echo ✅ Syntax verification passed 110 | echo. 111 | echo Next steps: 112 | echo 1. Review changes: git diff 113 | echo 2. Run tests: pytest tests\ -v 114 | echo 3. Commit: git add . ^&^& git commit -m "style: Format code with Black/isort/Ruff" 115 | echo. 116 | echo 🚀 Ready for launch! 117 | pause -------------------------------------------------------------------------------- /cascadeflow/tools/result.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tool result formatting for cascadeflow. 3 | 4 | Handles formatting tool execution results for different providers. 5 | """ 6 | 7 | import logging 8 | from dataclasses import dataclass 9 | from typing import Any, Optional 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @dataclass 15 | class ToolResult: 16 | """ 17 | Result from executing a tool. 18 | 19 | This is passed back to the model after tool execution. 20 | """ 21 | 22 | call_id: str # ID of the tool call 23 | name: str # Tool name 24 | result: Any # Tool output 25 | error: Optional[str] = None # Error message if tool failed 26 | execution_time_ms: Optional[float] = None # How long tool took 27 | 28 | @property 29 | def success(self) -> bool: 30 | """Whether tool execution succeeded.""" 31 | return self.error is None 32 | 33 | def to_openai_message(self) -> dict[str, Any]: 34 | """ 35 | Format as OpenAI tool result message. 36 | 37 | Used by: OpenAI, Groq, Together, vLLM 38 | 39 | Format: 40 | { 41 | "tool_call_id": "call_123", 42 | "role": "tool", 43 | "name": "get_weather", 44 | "content": "{'temp': 22, 'condition': 'sunny'}" 45 | } 46 | """ 47 | content = str(self.result) if not self.error else f"Error: {self.error}" 48 | 49 | return {"tool_call_id": self.call_id, "role": "tool", "name": self.name, "content": content} 50 | 51 | def to_anthropic_message(self) -> dict[str, Any]: 52 | """ 53 | Format as Anthropic tool result message. 54 | 55 | Key difference: Uses content blocks instead of role="tool" 56 | 57 | Format: 58 | { 59 | "role": "user", 60 | "content": [{ 61 | "type": "tool_result", 62 | "tool_use_id": "toolu_123", 63 | "content": "{'temp': 22, 'condition': 'sunny'}", 64 | "is_error": false 65 | }] 66 | } 67 | """ 68 | content = str(self.result) if not self.error else f"Error: {self.error}" 69 | 70 | return { 71 | "role": "user", 72 | "content": [ 73 | { 74 | "type": "tool_result", 75 | "tool_use_id": self.call_id, 76 | "content": content, 77 | "is_error": self.error is not None, 78 | } 79 | ], 80 | } 81 | 82 | def to_ollama_message(self) -> dict[str, Any]: 83 | """Format as Ollama tool result (same as OpenAI).""" 84 | return self.to_openai_message() 85 | 86 | def to_vllm_message(self) -> dict[str, Any]: 87 | """Format as vLLM tool result (same as OpenAI).""" 88 | return self.to_openai_message() 89 | 90 | def to_provider_message(self, provider: str) -> dict[str, Any]: 91 | """ 92 | Format as provider-specific message. 93 | 94 | Args: 95 | provider: Provider name 96 | 97 | Returns: 98 | Tool result in provider's expected format 99 | """ 100 | provider_lower = provider.lower() 101 | 102 | if provider_lower in ("openai", "groq", "together", "huggingface"): 103 | return self.to_openai_message() 104 | elif provider_lower == "anthropic": 105 | return self.to_anthropic_message() 106 | elif provider_lower == "ollama": 107 | return self.to_ollama_message() 108 | elif provider_lower == "vllm": 109 | return self.to_vllm_message() 110 | else: 111 | # Default to OpenAI format 112 | logger.warning(f"Unknown provider '{provider}', using OpenAI format") 113 | return self.to_openai_message() 114 | -------------------------------------------------------------------------------- /examples/langchain_basic_usage.py: -------------------------------------------------------------------------------- 1 | """Basic usage example for CascadeFlow LangChain integration. 2 | 3 | This example demonstrates: 4 | - Basic cascade setup with OpenAI models 5 | - Automatic quality-based routing 6 | - Cost tracking with metadata 7 | - Accessing cascade results 8 | 9 | Run: 10 | OPENAI_API_KEY=your-key python examples/langchain_basic_usage.py 11 | """ 12 | 13 | import asyncio 14 | import os 15 | 16 | from langchain_openai import ChatOpenAI 17 | 18 | from cascadeflow.integrations.langchain import CascadeFlow 19 | 20 | 21 | async def main(): 22 | # Verify API key is set 23 | if not os.getenv("OPENAI_API_KEY"): 24 | print("Error: OPENAI_API_KEY environment variable not set") 25 | return 26 | 27 | print("=" * 60) 28 | print("CascadeFlow LangChain Integration - Basic Usage") 29 | print("=" * 60) 30 | 31 | # Setup drafter (cheap, fast) and verifier (expensive, accurate) 32 | drafter = ChatOpenAI(model="gpt-4o-mini", temperature=0) 33 | verifier = ChatOpenAI(model="gpt-4o", temperature=0) 34 | 35 | # Create cascade with quality threshold 36 | cascade = CascadeFlow( 37 | drafter=drafter, 38 | verifier=verifier, 39 | quality_threshold=0.7, 40 | enable_cost_tracking=True, 41 | cost_tracking_provider="cascadeflow", # Use built-in pricing 42 | ) 43 | 44 | print("\n1. Testing with simple question (should use drafter):") 45 | print("-" * 60) 46 | 47 | response = await cascade.ainvoke("What is 2+2?") 48 | result = cascade.get_last_cascade_result() 49 | 50 | print("\nQuestion: What is 2+2?") 51 | print(f"Response: {response.content}") 52 | print(f"\nModel used: {result['model_used']}") 53 | print(f"Drafter quality: {result.get('drafter_quality', 0):.2f}") 54 | print(f"Accepted: {result['accepted']}") 55 | print(f"Drafter cost: ${result['drafter_cost']:.6f}") 56 | print(f"Verifier cost: ${result['verifier_cost']:.6f}") 57 | print(f"Total cost: ${result['total_cost']:.6f}") 58 | print(f"Savings: {result['savings_percentage']:.1f}%") 59 | print(f"Latency: {result['latency_ms']:.0f}ms") 60 | 61 | print("\n2. Testing with complex question (may use verifier):") 62 | print("-" * 60) 63 | 64 | response = await cascade.ainvoke( 65 | "Explain the difference between synchronous and asynchronous programming " 66 | "in Python, including examples and best practices." 67 | ) 68 | result = cascade.get_last_cascade_result() 69 | 70 | print("\nQuestion: Explain sync vs async in Python...") 71 | print(f"Response: {response.content[:200]}...") 72 | print(f"\nModel used: {result['model_used']}") 73 | print(f"Drafter quality: {result.get('drafter_quality', 0):.2f}") 74 | print(f"Accepted: {result['accepted']}") 75 | print(f"Drafter cost: ${result['drafter_cost']:.6f}") 76 | print(f"Verifier cost: ${result['verifier_cost']:.6f}") 77 | print(f"Total cost: ${result['total_cost']:.6f}") 78 | print(f"Savings: {result['savings_percentage']:.1f}%") 79 | print(f"Latency: {result['latency_ms']:.0f}ms") 80 | 81 | print("\n3. Testing bind() method:") 82 | print("-" * 60) 83 | 84 | # Create a bound instance with temperature 85 | bound_cascade = cascade.bind(temperature=1.0) 86 | 87 | response = await bound_cascade.ainvoke("Tell me a creative story in one sentence.") 88 | result = bound_cascade.get_last_cascade_result() 89 | 90 | print("\nQuestion: Tell me a creative story...") 91 | print(f"Response: {response.content}") 92 | print(f"\nModel used: {result['model_used']}") 93 | print(f"Accepted: {result['accepted']}") 94 | 95 | print("\n" + "=" * 60) 96 | print("Basic usage demo complete!") 97 | print("=" * 60) 98 | 99 | 100 | if __name__ == "__main__": 101 | asyncio.run(main()) 102 | -------------------------------------------------------------------------------- /cascadeflow/profiles/profile_manager.py: -------------------------------------------------------------------------------- 1 | """Profile manager for scaling to thousands of users.""" 2 | 3 | import asyncio 4 | from collections.abc import Awaitable 5 | from datetime import datetime, timedelta 6 | from typing import Callable, Optional 7 | 8 | from .tier_config import TierLevel 9 | from .user_profile import UserProfile 10 | 11 | 12 | class UserProfileManager: 13 | """ 14 | Manage user profiles at scale (thousands of users). 15 | 16 | Features: 17 | - In-memory caching (configurable TTL) 18 | - Database integration (via callback) 19 | - Bulk operations 20 | - Tier upgrades/downgrades 21 | """ 22 | 23 | def __init__( 24 | self, 25 | cache_ttl_seconds: int = 300, # 5 minutes 26 | load_callback: Optional[Callable[[str], Awaitable[Optional[UserProfile]]]] = None, 27 | save_callback: Optional[Callable[[UserProfile], Awaitable[None]]] = None, 28 | ): 29 | """ 30 | Initialize profile manager. 31 | 32 | Args: 33 | cache_ttl_seconds: How long to cache profiles in memory 34 | load_callback: Async function to load profile from database 35 | save_callback: Async function to save profile to database 36 | """ 37 | self._cache: dict[str, tuple[UserProfile, datetime]] = {} 38 | self._cache_ttl = timedelta(seconds=cache_ttl_seconds) 39 | self._load_callback = load_callback 40 | self._save_callback = save_callback 41 | self._lock = asyncio.Lock() 42 | 43 | async def get_profile(self, user_id: str) -> UserProfile: 44 | """ 45 | Get user profile (from cache or load). 46 | 47 | Fast path: Cached profile (microseconds) 48 | Slow path: Load from database (milliseconds) 49 | Default path: Create free tier profile (microseconds) 50 | """ 51 | # Check cache 52 | if user_id in self._cache: 53 | profile, cached_at = self._cache[user_id] 54 | if datetime.utcnow() - cached_at < self._cache_ttl: 55 | return profile 56 | 57 | # Load from database 58 | async with self._lock: 59 | if self._load_callback: 60 | profile = await self._load_callback(user_id) 61 | if profile: 62 | self._cache[user_id] = (profile, datetime.utcnow()) 63 | return profile 64 | 65 | # Default: Create free tier profile 66 | profile = UserProfile.from_tier(TierLevel.FREE, user_id=user_id) 67 | self._cache[user_id] = (profile, datetime.utcnow()) 68 | return profile 69 | 70 | async def save_profile(self, profile: UserProfile) -> None: 71 | """Save profile to database and cache""" 72 | self._cache[profile.user_id] = (profile, datetime.utcnow()) 73 | if self._save_callback: 74 | await self._save_callback(profile) 75 | 76 | async def update_tier(self, user_id: str, new_tier: TierLevel) -> UserProfile: 77 | """Upgrade/downgrade user tier""" 78 | from .tier_config import TierConfig 79 | 80 | profile = await self.get_profile(user_id) 81 | profile.tier = TierConfig.from_preset(new_tier) 82 | await self.save_profile(profile) 83 | return profile 84 | 85 | def invalidate_cache(self, user_id: str) -> None: 86 | """Invalidate cached profile (e.g., after tier change)""" 87 | if user_id in self._cache: 88 | del self._cache[user_id] 89 | 90 | def create_bulk(self, user_data: list[dict]) -> list[UserProfile]: 91 | """Create multiple profiles efficiently""" 92 | profiles = [] 93 | for data in user_data: 94 | tier = TierLevel(data.get("tier", "free")) 95 | profile = UserProfile.from_tier(tier, user_id=data["user_id"]) 96 | profiles.append(profile) 97 | self._cache[profile.user_id] = (profile, datetime.utcnow()) 98 | return profiles 99 | -------------------------------------------------------------------------------- /cascadeflow/profiles/tier_config.py: -------------------------------------------------------------------------------- 1 | """Tier configuration for user profiles.""" 2 | 3 | from dataclasses import dataclass 4 | from enum import Enum 5 | from typing import Optional 6 | 7 | 8 | class TierLevel(str, Enum): 9 | """Predefined tier levels""" 10 | 11 | FREE = "free" 12 | STARTER = "starter" 13 | PRO = "pro" 14 | BUSINESS = "business" 15 | ENTERPRISE = "enterprise" 16 | 17 | 18 | @dataclass 19 | class TierConfig: 20 | """ 21 | Tier configuration (one dimension of UserProfile). 22 | 23 | This represents subscription tiers with predefined limits and features. 24 | Can be used as-is or customized per user. 25 | """ 26 | 27 | name: str 28 | 29 | # Budget limits 30 | daily_budget: Optional[float] = None 31 | weekly_budget: Optional[float] = None 32 | monthly_budget: Optional[float] = None 33 | 34 | # Rate limits 35 | requests_per_hour: Optional[int] = None 36 | requests_per_day: Optional[int] = None 37 | tokens_per_minute: Optional[int] = None 38 | 39 | # Feature flags 40 | enable_streaming: bool = True 41 | enable_batch: bool = False 42 | enable_embeddings: bool = False 43 | 44 | # Quality settings 45 | min_quality: float = 0.60 46 | target_quality: float = 0.80 47 | 48 | # Model access 49 | allowed_models: Optional[list[str]] = None 50 | blocked_models: Optional[list[str]] = None 51 | 52 | # Support level 53 | support_priority: str = "community" # community, priority, dedicated 54 | 55 | @classmethod 56 | def from_preset(cls, tier: TierLevel) -> "TierConfig": 57 | """Create TierConfig from predefined preset""" 58 | return TIER_PRESETS[tier] 59 | 60 | 61 | # Predefined tier presets 62 | TIER_PRESETS = { 63 | TierLevel.FREE: TierConfig( 64 | name="free", 65 | daily_budget=0.10, 66 | requests_per_hour=10, 67 | requests_per_day=100, 68 | enable_streaming=False, 69 | enable_batch=False, 70 | enable_embeddings=False, 71 | min_quality=0.60, 72 | target_quality=0.70, 73 | support_priority="community", 74 | ), 75 | TierLevel.STARTER: TierConfig( 76 | name="starter", 77 | daily_budget=1.00, 78 | requests_per_hour=100, 79 | requests_per_day=1000, 80 | enable_streaming=True, 81 | enable_batch=False, 82 | enable_embeddings=False, 83 | min_quality=0.70, 84 | target_quality=0.80, 85 | support_priority="community", 86 | ), 87 | TierLevel.PRO: TierConfig( 88 | name="pro", 89 | daily_budget=10.00, 90 | requests_per_hour=1000, 91 | requests_per_day=10000, 92 | tokens_per_minute=100000, 93 | enable_streaming=True, 94 | enable_batch=True, 95 | enable_embeddings=True, 96 | min_quality=0.75, 97 | target_quality=0.85, 98 | allowed_models=None, # All models 99 | support_priority="priority", 100 | ), 101 | TierLevel.BUSINESS: TierConfig( 102 | name="business", 103 | daily_budget=50.00, 104 | requests_per_hour=5000, 105 | requests_per_day=50000, 106 | tokens_per_minute=500000, 107 | enable_streaming=True, 108 | enable_batch=True, 109 | enable_embeddings=True, 110 | min_quality=0.80, 111 | target_quality=0.90, 112 | support_priority="priority", 113 | ), 114 | TierLevel.ENTERPRISE: TierConfig( 115 | name="enterprise", 116 | daily_budget=None, # Unlimited 117 | requests_per_hour=None, # Unlimited 118 | requests_per_day=None, # Unlimited 119 | tokens_per_minute=None, # Unlimited 120 | enable_streaming=True, 121 | enable_batch=True, 122 | enable_embeddings=True, 123 | min_quality=0.85, 124 | target_quality=0.95, 125 | support_priority="dedicated", 126 | ), 127 | } 128 | -------------------------------------------------------------------------------- /cascadeflow/tools/call.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tool call parsing for cascadeflow. 3 | 4 | Handles parsing tool calls from different provider formats. 5 | """ 6 | 7 | import json 8 | import logging 9 | from dataclasses import dataclass 10 | from typing import Any 11 | 12 | from .formats import ToolCallFormat 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | @dataclass 18 | class ToolCall: 19 | """ 20 | Represents a tool call request from the model. 21 | 22 | This is returned by the model when it wants to use a tool. 23 | """ 24 | 25 | id: str # Unique call ID (for tracking) 26 | name: str # Tool name 27 | arguments: dict[str, Any] # Tool arguments 28 | provider_format: ToolCallFormat # Original format from provider 29 | 30 | @classmethod 31 | def from_openai(cls, tool_call: dict[str, Any]) -> "ToolCall": 32 | """ 33 | Parse OpenAI tool call format. 34 | 35 | Format: 36 | { 37 | "id": "call_123", 38 | "type": "function", 39 | "function": { 40 | "name": "get_weather", 41 | "arguments": '{"location": "Paris"}' 42 | } 43 | } 44 | """ 45 | try: 46 | arguments = json.loads(tool_call["function"]["arguments"]) 47 | except (json.JSONDecodeError, KeyError) as e: 48 | logger.error(f"Failed to parse OpenAI tool call arguments: {e}") 49 | arguments = {} 50 | 51 | return cls( 52 | id=tool_call["id"], 53 | name=tool_call["function"]["name"], 54 | arguments=arguments, 55 | provider_format=ToolCallFormat.OPENAI, 56 | ) 57 | 58 | @classmethod 59 | def from_anthropic(cls, tool_use: dict[str, Any]) -> "ToolCall": 60 | """ 61 | Parse Anthropic tool use format. 62 | 63 | Format: 64 | { 65 | "type": "tool_use", 66 | "id": "toolu_123", 67 | "name": "get_weather", 68 | "input": { 69 | "location": "Paris" 70 | } 71 | } 72 | """ 73 | return cls( 74 | id=tool_use["id"], 75 | name=tool_use["name"], 76 | arguments=tool_use.get("input", {}), 77 | provider_format=ToolCallFormat.ANTHROPIC, 78 | ) 79 | 80 | @classmethod 81 | def from_ollama(cls, tool_call: dict[str, Any]) -> "ToolCall": 82 | """Parse Ollama tool call format (same as OpenAI).""" 83 | return cls.from_openai(tool_call) 84 | 85 | @classmethod 86 | def from_vllm(cls, tool_call: dict[str, Any]) -> "ToolCall": 87 | """Parse vLLM tool call format (same as OpenAI).""" 88 | return cls.from_openai(tool_call) 89 | 90 | @classmethod 91 | def from_provider(cls, provider: str, tool_call: dict[str, Any]) -> "ToolCall": 92 | """ 93 | Parse tool call from any provider format. 94 | 95 | Args: 96 | provider: Provider name 97 | tool_call: Raw tool call from provider response 98 | 99 | Returns: 100 | Standardized ToolCall object 101 | """ 102 | provider_lower = provider.lower() 103 | 104 | if provider_lower in ("openai", "groq", "together", "huggingface"): 105 | return cls.from_openai(tool_call) 106 | elif provider_lower == "anthropic": 107 | return cls.from_anthropic(tool_call) 108 | elif provider_lower == "ollama": 109 | return cls.from_ollama(tool_call) 110 | elif provider_lower == "vllm": 111 | return cls.from_vllm(tool_call) 112 | else: 113 | # Try OpenAI format as default 114 | try: 115 | return cls.from_openai(tool_call) 116 | except Exception as e: 117 | logger.error(f"Failed to parse tool call from {provider}: {e}") 118 | raise ValueError(f"Unsupported tool call format from provider '{provider}'") 119 | -------------------------------------------------------------------------------- /cascadeflow/scripts/format_code.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # cascadeflow Code Formatting Script 3 | # Run this before every commit and definitely before launch! 4 | 5 | set -e # Exit on any error 6 | 7 | echo "🎨 cascadeflow - Code Formatting Script" 8 | echo "========================================" 9 | echo "" 10 | 11 | # Check if virtual environment is activated 12 | if [[ -z "$VIRTUAL_ENV" ]]; then 13 | echo "⚠️ Virtual environment not activated!" 14 | echo "Please run: source .venv/bin/activate" 15 | exit 1 16 | fi 17 | 18 | # Check if we're in the right directory 19 | if [[ ! -f "pyproject.toml" ]]; then 20 | echo "❌ Error: Not in project root directory" 21 | echo "Please run this script from the cascadeflow/ directory" 22 | exit 1 23 | fi 24 | 25 | echo "✅ Virtual environment: $VIRTUAL_ENV" 26 | echo "✅ Working directory: $(pwd)" 27 | echo "" 28 | 29 | # Install formatting tools if not present 30 | echo "📦 Checking formatting tools..." 31 | pip install --quiet --upgrade black isort ruff mypy 2>/dev/null || { 32 | echo "Installing formatting tools..." 33 | pip install black isort ruff mypy 34 | } 35 | echo "✅ Formatting tools ready" 36 | echo "" 37 | 38 | # Step 1: Black - Code formatting 39 | echo "🎨 Step 1/4: Running Black (code formatter)..." 40 | echo "-------------------------------------------" 41 | black cascadeflow/ tests/ examples/ --line-length 100 || { 42 | echo "❌ Black formatting failed" 43 | exit 1 44 | } 45 | echo "✅ Black formatting complete" 46 | echo "" 47 | 48 | # Step 2: isort - Import sorting 49 | echo "📦 Step 2/4: Running isort (import sorter)..." 50 | echo "--------------------------------------------" 51 | isort cascadeflow/ tests/ examples/ --profile black --line-length 100 || { 52 | echo "❌ isort failed" 53 | exit 1 54 | } 55 | echo "✅ Import sorting complete" 56 | echo "" 57 | 58 | # Step 3: Ruff - Linting and auto-fix 59 | echo "🔍 Step 3/4: Running Ruff (linter)..." 60 | echo "------------------------------------" 61 | echo "Checking for issues..." 62 | ruff check cascadeflow/ tests/ examples/ --fix || { 63 | echo "⚠️ Ruff found some issues" 64 | echo "Attempting to auto-fix..." 65 | ruff check cascadeflow/ tests/ examples/ --fix --unsafe-fixes || { 66 | echo "❌ Some issues need manual fixing" 67 | echo "Review the output above and fix manually" 68 | exit 1 69 | } 70 | } 71 | echo "✅ Linting complete" 72 | echo "" 73 | 74 | # Step 4: mypy - Type checking (optional, won't fail) 75 | echo "🔤 Step 4/4: Running mypy (type checker)..." 76 | echo "-----------------------------------------" 77 | mypy cascadeflow/ --ignore-missing-imports --no-strict-optional || { 78 | echo "⚠️ Type checking found some issues (non-critical)" 79 | echo "Consider fixing these before launch, but not required" 80 | } 81 | echo "✅ Type checking complete" 82 | echo "" 83 | 84 | # Final verification 85 | echo "🧪 Running quick verification..." 86 | echo "------------------------------" 87 | 88 | # Check if there are any .py files with syntax errors 89 | python -m py_compile cascadeflow/*.py 2>/dev/null || { 90 | echo "❌ Syntax errors detected in cascadeflow/" 91 | exit 1 92 | } 93 | 94 | python -m py_compile tests/*.py 2>/dev/null || { 95 | echo "⚠️ Syntax errors in tests/ (check manually)" 96 | } 97 | 98 | echo "✅ Syntax verification passed" 99 | echo "" 100 | 101 | # Summary 102 | echo "======================================" 103 | echo "✨ Code Formatting Complete!" 104 | echo "======================================" 105 | echo "" 106 | echo "Summary:" 107 | echo " ✅ Black formatting applied" 108 | echo " ✅ Imports sorted with isort" 109 | echo " ✅ Linting issues fixed with Ruff" 110 | echo " ✅ Type checking completed" 111 | echo " ✅ Syntax verification passed" 112 | echo "" 113 | echo "Next steps:" 114 | echo " 1. Review changes: git diff" 115 | echo " 2. Run tests: pytest tests/ -v" 116 | echo " 3. Commit: git add . && git commit -m 'style: Format code with Black/isort/Ruff'" 117 | echo "" 118 | echo "🚀 Ready for launch!" -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # CascadeFlow Code Owners 2 | # ======================= 3 | # 4 | # This file defines who is responsible for reviewing code changes in specific 5 | # parts of the repository. Code owners are automatically requested for review 6 | # when someone opens a pull request that modifies code they own. 7 | # 8 | # Learn more: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners 9 | 10 | # ============================================================================ 11 | # GLOBAL OWNER (All files) 12 | # ============================================================================ 13 | # The repository owner must approve ALL pull requests 14 | * @saschabuehrle 15 | 16 | # ============================================================================ 17 | # CRITICAL FILES (Extra protection for package distribution) 18 | # ============================================================================ 19 | 20 | # Python package configuration 21 | /pyproject.toml @saschabuehrle 22 | /setup.py @saschabuehrle 23 | /setup.cfg @saschabuehrle 24 | /requirements*.txt @saschabuehrle 25 | /MANIFEST.in @saschabuehrle 26 | 27 | # TypeScript/JavaScript package configuration 28 | /packages/*/package.json @saschabuehrle 29 | /package.json @saschabuehrle 30 | /pnpm-workspace.yaml @saschabuehrle 31 | /pnpm-lock.yaml @saschabuehrle 32 | /tsconfig*.json @saschabuehrle 33 | 34 | # GitHub workflows, actions, and settings 35 | /.github/workflows/ @saschabuehrle 36 | /.github/actions/ @saschabuehrle 37 | /.github/CODEOWNERS @saschabuehrle 38 | 39 | # Security 40 | /SECURITY.md @saschabuehrle 41 | /.env.example @saschabuehrle 42 | 43 | # ============================================================================ 44 | # CORE LIBRARY CODE (Python) 45 | # ============================================================================ 46 | 47 | # Core agent and orchestration 48 | /cascadeflow/agent.py @saschabuehrle 49 | /cascadeflow/config.py @saschabuehrle 50 | /cascadeflow/schema/ @saschabuehrle 51 | 52 | # Provider implementations - critical for integrations 53 | /cascadeflow/providers/ @saschabuehrle 54 | 55 | # Quality validation and routing systems 56 | /cascadeflow/quality/ @saschabuehrle 57 | /cascadeflow/routing/ @saschabuehrle 58 | 59 | # Utilities and helpers 60 | /cascadeflow/utils/ @saschabuehrle 61 | 62 | # All other cascadeflow code 63 | /cascadeflow/ @saschabuehrle 64 | 65 | # ============================================================================ 66 | # CORE LIBRARY CODE (TypeScript) 67 | # ============================================================================ 68 | /packages/core/src/ @saschabuehrle 69 | /packages/integrations/ @saschabuehrle 70 | /packages/ml/ @saschabuehrle 71 | 72 | # ============================================================================ 73 | # TESTS 74 | # ============================================================================ 75 | /tests/ @saschabuehrle 76 | /packages/*/tests/ @saschabuehrle 77 | /packages/*/__tests__/ @saschabuehrle 78 | 79 | # ============================================================================ 80 | # DOCUMENTATION 81 | # ============================================================================ 82 | /docs/ @saschabuehrle 83 | /README.md @saschabuehrle 84 | /CHANGELOG.md @saschabuehrle 85 | /CONTRIBUTING.md @saschabuehrle 86 | /LICENSE @saschabuehrle 87 | 88 | # ============================================================================ 89 | # EXAMPLES 90 | # ============================================================================ 91 | /examples/ @saschabuehrle 92 | /packages/*/examples/ @saschabuehrle 93 | 94 | # ============================================================================ 95 | # NOTES FOR CONTRIBUTORS 96 | # ============================================================================ 97 | # - All PRs require approval from @saschabuehrle (enforced by branch protection) 98 | # - Direct commits to main are blocked (enforced by branch protection) 99 | # - All changes must go through feature branches and PRs 100 | # - Even repository admins cannot bypass these rules (enforce_admins: true) -------------------------------------------------------------------------------- /packages/core/examples/browser/README.md: -------------------------------------------------------------------------------- 1 | # Browser Examples for cascadeflow 2 | 3 | This directory contains examples for using cascadeflow in browser environments. 4 | 5 | ## Security Note 6 | 7 | **NEVER expose API keys in browser code!** All examples use a backend proxy or edge function to securely handle API keys. 8 | 9 | ## Examples 10 | 11 | ### 1. Vercel Edge Function (`vercel-edge/`) 12 | 13 | Deploy cascadeflow as a Vercel Edge Function for global, low-latency inference. 14 | 15 | **Pros:** 16 | - Global edge network (low latency) 17 | - Serverless (no infrastructure) 18 | - Easy deployment 19 | 20 | **Cons:** 21 | - Vendor lock-in (Vercel) 22 | - Cold starts 23 | 24 | ## Quick Start 25 | 26 | ```bash 27 | cd vercel-edge 28 | npm install 29 | vercel dev # Test locally 30 | vercel deploy # Deploy to production 31 | ``` 32 | 33 | ## Usage Patterns 34 | 35 | ### Pattern 1: Edge Function (Serverless) 36 | 37 | Best for: Public-facing apps, global users, low latency 38 | 39 | ```typescript 40 | // Edge function handles everything 41 | import { CascadeAgent } from '@cascadeflow/core'; 42 | 43 | export default async function handler(req: Request) { 44 | // Recommended: Claude Haiku + GPT-5 45 | const agent = new CascadeAgent({ 46 | models: [ 47 | { name: 'claude-3-5-haiku-20241022', provider: 'anthropic', cost: 0.0008, apiKey: process.env.ANTHROPIC_API_KEY }, 48 | { name: 'gpt-5', provider: 'openai', cost: 0.00125, apiKey: process.env.OPENAI_API_KEY } 49 | ] 50 | }); 51 | 52 | const { query } = await req.json(); 53 | const result = await agent.run(query); 54 | 55 | return Response.json(result); 56 | } 57 | ``` 58 | 59 | ### Pattern 2: Backend API + Frontend 60 | 61 | Best for: Enterprise apps, existing backends, fine-grained control 62 | 63 | ```typescript 64 | // Backend (Express) 65 | app.post('/api/cascade', async (req, res) => { 66 | const agent = new CascadeAgent({ /* config */ }); 67 | const result = await agent.run(req.body.query); 68 | res.json(result); 69 | }); 70 | 71 | // Frontend (Browser) 72 | const response = await fetch('/api/cascade', { 73 | method: 'POST', 74 | headers: { 'Content-Type': 'application/json' }, 75 | body: JSON.stringify({ query: 'What is TypeScript?' }) 76 | }); 77 | const result = await response.json(); 78 | ``` 79 | 80 | ### Pattern 3: Direct Browser (Multi-Provider Support) 81 | 82 | Best for: When you already have a proxy endpoint 83 | 84 | All providers automatically work in browser through runtime detection: 85 | 86 | ```typescript 87 | import { CascadeAgent } from '@cascadeflow/core'; 88 | 89 | const agent = new CascadeAgent({ 90 | models: [ 91 | { 92 | name: 'claude-3-5-haiku-20241022', 93 | provider: 'anthropic', 94 | cost: 0.0008, 95 | proxyUrl: '/api/anthropic-proxy' // Your proxy endpoint 96 | }, 97 | { 98 | name: 'gpt-5', 99 | provider: 'openai', 100 | cost: 0.00125, 101 | proxyUrl: '/api/openai-proxy' // Your proxy endpoint 102 | } 103 | ] 104 | }); 105 | 106 | const result = await agent.run('Hello!'); 107 | console.log(`Savings: ${result.savingsPercentage}%`); 108 | ``` 109 | 110 | **All 7 providers work in browser:** 111 | OpenAI, Anthropic, Groq, Together AI, Ollama, HuggingFace, vLLM 112 | 113 | ## Environment Variables 114 | 115 | All examples require: 116 | 117 | ```bash 118 | OPENAI_API_KEY=sk-... 119 | ``` 120 | 121 | For Vercel: 122 | ```bash 123 | vercel env add OPENAI_API_KEY 124 | ``` 125 | 126 | For Cloudflare: 127 | ```bash 128 | npx wrangler secret put OPENAI_API_KEY 129 | ``` 130 | 131 | For Express: 132 | ```bash 133 | # Create .env file 134 | echo "OPENAI_API_KEY=sk-..." > .env 135 | ``` 136 | 137 | ## Cost Tracking in Browser 138 | 139 | All examples return full CascadeResult: 140 | 141 | ```typescript 142 | { 143 | content: "...", 144 | modelUsed: "gpt-4o-mini", 145 | totalCost: 0.000211, 146 | savingsPercentage: 97.8, 147 | cascaded: true, 148 | draftAccepted: true, 149 | // ... more fields 150 | } 151 | ``` 152 | 153 | Display savings to users: 154 | 155 | ```javascript 156 | document.getElementById('savings').textContent = 157 | `Saved ${result.savingsPercentage}% vs using ${result.verifierModel || 'best model'}`; 158 | ``` 159 | -------------------------------------------------------------------------------- /cascadeflow/utils/caching.py: -------------------------------------------------------------------------------- 1 | """ 2 | Response caching system. 3 | 4 | Provides: 5 | - In-memory LRU cache 6 | - Cache key generation 7 | - TTL support 8 | - Cache statistics 9 | """ 10 | 11 | import hashlib 12 | import logging 13 | import time 14 | from collections import OrderedDict 15 | from typing import Any, Optional 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class ResponseCache: 21 | """ 22 | Simple in-memory LRU cache for responses. 23 | 24 | Example: 25 | >>> cache = ResponseCache(max_size=1000, default_ttl=3600) 26 | >>> 27 | >>> # Store response 28 | >>> cache.set("What is 2+2?", response_data, ttl=600) 29 | >>> 30 | >>> # Retrieve response 31 | >>> cached = cache.get("What is 2+2?") 32 | >>> if cached: 33 | ... print("Cache hit!") 34 | """ 35 | 36 | def __init__(self, max_size: int = 1000, default_ttl: int = 3600): 37 | """ 38 | Initialize cache. 39 | 40 | Args: 41 | max_size: Maximum number of cached items 42 | default_ttl: Default TTL in seconds 43 | """ 44 | self.max_size = max_size 45 | self.default_ttl = default_ttl 46 | self.cache: OrderedDict = OrderedDict() 47 | self.stats = {"hits": 0, "misses": 0, "sets": 0, "evictions": 0} 48 | 49 | def _generate_key( 50 | self, query: str, model: Optional[str] = None, params: Optional[dict[str, Any]] = None 51 | ) -> str: 52 | """Generate cache key from query and parameters.""" 53 | key_data = {"query": query, "model": model, "params": params or {}} 54 | key_str = str(sorted(key_data.items())) 55 | return hashlib.sha256(key_str.encode()).hexdigest() 56 | 57 | def get( 58 | self, query: str, model: Optional[str] = None, params: Optional[dict[str, Any]] = None 59 | ) -> Optional[dict[str, Any]]: 60 | """ 61 | Get cached response. 62 | 63 | Returns None if not found or expired. 64 | """ 65 | key = self._generate_key(query, model, params) 66 | 67 | if key not in self.cache: 68 | self.stats["misses"] += 1 69 | return None 70 | 71 | # Check TTL 72 | entry = self.cache[key] 73 | if time.time() > entry["expires_at"]: 74 | # Expired 75 | del self.cache[key] 76 | self.stats["misses"] += 1 77 | return None 78 | 79 | # Move to end (LRU) 80 | self.cache.move_to_end(key) 81 | self.stats["hits"] += 1 82 | 83 | logger.debug(f"Cache hit for query: {query[:50]}...") 84 | return entry["response"] 85 | 86 | def set( 87 | self, 88 | query: str, 89 | response: dict[str, Any], 90 | model: Optional[str] = None, 91 | params: Optional[dict[str, Any]] = None, 92 | ttl: Optional[int] = None, 93 | ): 94 | """Set cache entry.""" 95 | key = self._generate_key(query, model, params) 96 | 97 | # Evict if full 98 | if len(self.cache) >= self.max_size: 99 | # Remove oldest (first item) 100 | self.cache.popitem(last=False) 101 | self.stats["evictions"] += 1 102 | 103 | # Add entry 104 | self.cache[key] = { 105 | "response": response, 106 | "created_at": time.time(), 107 | "expires_at": time.time() + (ttl or self.default_ttl), 108 | } 109 | self.stats["sets"] += 1 110 | 111 | logger.debug(f"Cached response for query: {query[:50]}...") 112 | 113 | def clear(self): 114 | """Clear all cache.""" 115 | self.cache.clear() 116 | logger.info("Cache cleared") 117 | 118 | def get_stats(self) -> dict[str, Any]: 119 | """Get cache statistics.""" 120 | hit_rate = ( 121 | self.stats["hits"] / (self.stats["hits"] + self.stats["misses"]) 122 | if self.stats["hits"] + self.stats["misses"] > 0 123 | else 0 124 | ) 125 | 126 | return { 127 | **self.stats, 128 | "size": len(self.cache), 129 | "max_size": self.max_size, 130 | "hit_rate": hit_rate, 131 | } 132 | -------------------------------------------------------------------------------- /.github/assets/CF_logo_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | --------------------------------------------------------------------------------